0.9.8.10
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
DataGeneratorColumn.h
Go to the documentation of this file.
1 /* -*- c++ -*-
2  * Copyright (C) 2007-2015 Hypertable, Inc.
3  *
4  * This file is part of Hypertable.
5  *
6  * Hypertable is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; version 3 of the
9  * License, or any later version.
10  *
11  * Hypertable is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19  * 02110-1301, USA.
20  */
21 
22 #ifndef Hypertable_Lib_DataGeneratorColumn_h
23 #define Hypertable_Lib_DataGeneratorColumn_h
24 
25 #include "Cell.h"
26 #include "DataGeneratorRandom.h"
28 #include "DataGeneratorQualifier.h"
29 
30 #include <Common/Config.h>
31 #include <Common/FileUtils.h>
32 #include <Common/String.h>
33 #include <Common/WordStream.h>
34 
35 #include <iostream>
36 #include <iterator>
37 #include <sstream>
38 #include <string>
39 
40 extern "C" {
41 #include <limits.h>
42 #include <stdlib.h>
43 }
44 
45 using namespace Hypertable;
46 using namespace Hypertable::Config;
47 using namespace std;
48 
49 namespace Hypertable {
50 
51  class ColumnSpec {
52  public:
55  int size {-1};
56  int order {RANDOM};
57  std::string source;
58  std::string cooked_source;
59  std::string column_family;
60  unsigned seed {1};
61  std::string distribution;
62  bool word_stream {};
63  bool to_stdout {};
64  bool fixed {};
65  };
66 
67  class Column : public ColumnSpec {
68  public:
69  Column(ColumnSpec &spec) : ColumnSpec(spec) {
70  if (spec.qualifier.type != -1)
71  m_qualifiers.push_back( QualifierFactory::create(spec.qualifier) );
72  m_next_qualifier = m_qualifiers.size();
73  }
74  virtual ~Column() { }
75  virtual bool next() = 0;
76  virtual std::string &qualifier() = 0;
77  virtual const char *value() = 0;
78  virtual uint32_t value_len() = 0;
79  protected:
80  std::vector<Qualifier *> m_qualifiers;
82  };
83 
84  class ColumnString : public Column {
85  public:
86  ColumnString(ColumnSpec &spec, bool keys_only = false)
87  : Column(spec), m_keys_only(keys_only), m_first_offset(0), m_size(0),
88  m_second_offset(0) {
89  std::string s = source;
90  if (s.empty())
91  s = cooked_source;
92 
93  if (word_stream) {
94  if (s.empty())
95  HT_FATAL("Source file not specified for word stream");
96  if (size == -1)
97  HT_FATAL("Size not specified for word stream");
98  m_word_stream = make_shared<WordStream>(s, seed, size, order == RANDOM);
99  }
100  else {
101 
102  if (s.empty()) {
103  m_value_data_len = size;
104  if (!fixed)
105  m_value_data_len *= 50;
106  m_value_data.reset( new char [ m_value_data_len+1 ] );
107  random_fill_with_chars((char *)m_value_data.get(), m_value_data_len);
108  ((char *)m_value_data.get())[m_value_data_len] = 0;
109  m_source = (const char *)m_value_data.get();
110 
111  }
112  else {
113  m_source = (const char *)FileUtils::mmap(s, &m_value_data_len);
114  HT_ASSERT(m_value_data_len >= size);
115  }
116  m_value = m_source;
117  m_value_data_len -= size;
118  if (cooked_source.empty())
119  m_render_buf.reset( new char [size * 2 + 1] );
120  else
121  m_render_buf.reset( new char [1024 * 10] );
122  }
123  }
124 
125  virtual ~ColumnString() { }
126 
127  virtual bool next() {
128  // "cooked mode": we have two pointers. move the second pointer forward.
129  // if it reaches eof then restart at the beginning, and move the first
130  // pointer forward
131  off_t offset {};
132  const char *p;
133  size_t first_word_size {};
134  size_t second_word_size {};
135  if (!cooked_source.empty()) {
136  m_cooked.clear();
137  p = m_source + m_first_offset;
138  while (*p && *p != '\n') {
139  p++;
140  first_word_size++;
141  }
142  p = m_source + m_second_offset;
143  while (*p && *p != '\n') {
144  p++;
145  second_word_size++;
146  }
147  // add the first word
148  m_cooked.insert(m_cooked.end(), m_source + m_first_offset,
149  m_source + m_first_offset + first_word_size);
150  m_cooked += " ";
151  m_cooked.insert(m_cooked.end(), m_source + m_second_offset,
152  m_source + m_second_offset + second_word_size);
153 
154  // update the offsets
155  m_second_offset += second_word_size + 1;
156  if (m_second_offset >= m_value_data_len) {
157  m_second_offset = 0;
158  m_first_offset += first_word_size + 1;
159  if (m_first_offset >= m_value_data_len)
160  m_first_offset = 0;
161  }
162  m_size = m_cooked.size();
163  }
164  else if (!m_word_stream) {
165  if (!fixed)
166  offset = random_int32((int32_t)m_value_data_len);
167  }
168 
169  if (m_qualifiers.empty())
170  m_next_qualifier = 0;
171  else
172  m_next_qualifier = (m_next_qualifier + 1) % m_qualifiers.size();
173 
174  if (m_next_qualifier == 0 && !m_keys_only) {
175  if (m_word_stream) {
176  m_value = m_word_stream->next();
177  }
178  else {
179  if (to_stdout) {
180  if (!m_cooked.empty())
181  m_value = m_cooked.c_str();
182  else
183  m_value = m_source + offset;
184  }
185  else if (!fixed) {
186  const char *src = m_source + offset;
187  if (!m_cooked.empty())
188  src = m_cooked.c_str();
189  char *dst = m_render_buf.get();
190  for (size_t i=0; i<(size_t)value_len(); i++) {
191  if (*src == '\n') {
192  *dst++ = '\\';
193  *dst++ = 'n';
194  }
195  else if (*src == '\t') {
196  *dst++ = '\\';
197  *dst++ = 't';
198  }
199  else if (*src == '\0') {
200  *dst++ = '\\';
201  *dst++ = '0';
202  }
203  else
204  *dst++ = *src;
205  src++;
206  }
207  *dst = 0;
208  m_value = m_render_buf.get();
209  }
210  }
211  }
212  if (m_qualifiers.empty())
213  return false;
214  m_qualifiers[m_next_qualifier]->next();
215  if (m_next_qualifier == (m_qualifiers.size()-1))
216  return false;
217  return true;
218  }
219 
220  virtual std::string &qualifier() {
221  if (m_qualifiers.empty())
222  return m_qualifier;
223  return m_qualifiers[m_next_qualifier]->get();
224  }
225 
226  virtual const char *value() {
227  return m_value;
228  }
229 
230  virtual uint32_t value_len() {
231  if (word_stream)
232  return strlen(m_value);
233  return m_size ? m_size : size;
234  }
235 
236  private:
238  const char *m_value;
239  std::string m_qualifier;
240  boost::shared_array<char> m_render_buf;
241  boost::shared_array<const char> m_value_data;
242  const char *m_source;
245  size_t m_size;
247  std::string m_cooked;
249  };
250 
251 }
252 
253 #endif // Hypertable_Lib_DataGeneratorColumn_h
boost::shared_array< const char > m_value_data
A class generating a stream of words; the words are retrieved from a file and can be randomized...
int32_t random_int32(int32_t maximum)
Generate random 32-bit integer.
static Qualifier * create(QualifierSpec &spec)
STL namespace.
#define HT_FATAL(msg)
Definition: Logger.h:339
virtual const char * value()
virtual std::string & qualifier()
#define HT_ASSERT(_e_)
Definition: Logger.h:396
File system utility functions.
virtual uint32_t value_len()
std::shared_ptr< WordStream > WordStreamPtr
Definition: WordStream.h:106
std::vector< Qualifier * > m_qualifiers
Hypertable definitions
void random_fill_with_chars(char *buf, size_t len, const char *charset=nullptr)
Fills a buffer with random values from a set of characters.
static void * mmap(const String &fname, off_t *lenp)
Maps a full file into memory using mmap; the mapping will be released when the application terminates...
Definition: FileUtils.cc:343
boost::shared_array< char > m_render_buf
A String class based on std::string.
Column(ColumnSpec &spec)
ColumnString(ColumnSpec &spec, bool keys_only=false)
Configuration settings.