0.9.8.10
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
DataGenerator.h
Go to the documentation of this file.
1 /* -*- c++ -*-
2  * Copyright (C) 2007-2015 Hypertable, Inc.
3  *
4  * This file is part of Hypertable.
5  *
6  * Hypertable is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; version 3 of the
9  * License, or any later version.
10  *
11  * Hypertable is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19  * 02110-1301, USA.
20  */
21 
22 #ifndef Hypertable_Lib_DataGenerator_h
23 #define Hypertable_Lib_DataGenerator_h
24 
25 #include "Cell.h"
26 #include "DataGeneratorRandom.h"
28 #include "DataGeneratorQualifier.h"
29 #include "DataGeneratorColumn.h"
30 
31 #include <Common/Config.h>
32 #include <Common/Random.h>
33 #include <Common/String.h>
34 
35 #include <iostream>
36 #include <iterator>
37 #include <sstream>
38 #include <string>
39 
40 extern "C" {
41 #include <limits.h>
42 #include <stdlib.h>
43 }
44 
45 namespace Hypertable {
46  using namespace Hypertable::Config;
47  using namespace std;
48 
50  static void init_options() {
52  file_desc().add_options()
53  ("DataGenerator.DeletePercentage", i32(),
54  "When generating update workload make this percentage of them deletes")
55  ("DataGenerator.MaxBytes", i64(),
56  "Maximum number of bytes of key and value data to generate")
57  ("DataGenerator.MaxKeys", i64(),
58  "Maximum number of keys to generate for query load")
59  ("DataGenerator.Seed", i32()->default_value(1),
60  "Pseudo-random number generator seed")
61  ("rowkey.order", str()->default_value("random"), "Order in which to "
62  "generate row keys (random, ascending)")
63  ("rowkey.distribution", str()->default_value("uniform"),
64  "Default rowkey distribution (uniform, zipf)")
65  ("rowkey.seed", i32()->default_value(1),
66  "Default rowkey random number generator seed")
67  ("rowkey.component.<n>.type", str(),
68  "Type of rowkey component <n> (string, integer)")
69  ("rowkey.component.<n>.format", str(), "printf-style format string "
70  "for rendering rowkey component <n>.")
71  ("rowkey.component.<n>.distribution", str(),
72  "Distribution for rowkey component <n>")
73  ("rowkey.component.<n>.order", str(),
74  "Order in which to generate row keys for component <n>")
75  ("rowkey.component.<n>.min", str(),
76  "Minimum value for rowkey component <n>.")
77  ("rowkey.component.<n>.max", str(),
78  "Maximum value for rowkey component <n>.")
79  ("rowkey.component.<n>.length.min", str(),
80  "Minimum length of randomly generated row component <n>.")
81  ("rowkey.component.<n>.length.max", str(),
82  "Maximum length of randomly generated row component <n>.")
83  ("<column>.qualifier.type", str(), "Type of qualifier")
84  ("<column>.qualifier.size", i32(), "Size of qualifier")
85  ("<column>.qualifier.charset", str(),
86  "Set of characters to use when generating string qualifiers")
87  ("<column>.value.random", boo(), "Generate random values (default=true)")
88  ("<column>.value.seed", i32(), "Pseudo-random generator seed")
89  ("<column>.value.size", i32(), "Size of value")
90  ("<column>.value.source", i32(), "Source file to pull value data from")
91  ("<column>.value.source.words", i32(), "Interpret source as word stream; value.size treated as word count")
92  ("<column>.value.fixed", boo(), "Used the same fixed value for each cell (default=false)")
93  ;
95  }
96  };
97 
98  class DataGenerator;
99 
103  class DataGeneratorIterator : public boost::iterator<forward_iterator_tag, Cell> {
104 
105  friend class DataGenerator;
106 
107  public:
108 
109  Cell& operator*() { return m_cell; }
110 
111  void next();
112  DataGeneratorIterator& operator++();
113  DataGeneratorIterator& operator++(int n);
114 
115  unsigned long last_data_size() { return m_last_data_size; }
116 
117  bool operator!=(const DataGeneratorIterator& other) const {
118  if (other.m_count != std::numeric_limits< ::int64_t >::max())
119  return m_count <= other.m_count;
120  return m_amount < other.m_amount;
121  }
122 
123  private:
125  DataGeneratorIterator(int64_t amount, int64_t count) : m_generator(0), m_amount(amount), m_count(count) { }
127  std::vector<RowComponent *> m_row_components;
128  std::vector<Column *> m_columns;
131  int64_t m_amount;
132  int64_t m_count;
133  unsigned long m_last_data_size;
134  std::string m_row;
135  int32_t m_next_column;
136  };
137 
138 
143 
144  public:
146  friend class DataGeneratorIterator;
147 
148  public:
149  DataGenerator(PropertiesPtr &props, bool keys_only=false);
150  iterator begin() { random_generator_set_seed(m_seed); return DataGeneratorIterator(this); }
151  iterator end() { return DataGeneratorIterator(m_max_bytes, m_max_keys); }
152  int64_t get_max_bytes() { return m_max_bytes; }
153  int64_t get_max_keys() { return m_max_keys; }
154 
155  protected:
158  int64_t m_max_bytes;
159  int64_t m_max_keys;
160  uint32_t m_seed;
161  std::vector<RowComponentSpec> m_row_component_specs;
162  std::vector<ColumnSpec> m_column_specs;
163 
164  private:
165  int parse_order(const std::string &str);
166  };
167 
168 }
169 
170 #endif // Hypertable_Lib_DataGenerator_h
Interface and base of config policy.
Definition: Config.h:149
DataGeneratorIterator(int64_t amount, int64_t count)
std::vector< RowComponent * > m_row_components
bool operator!=(const DataGeneratorIterator &other) const
std::vector< Column * > m_columns
Po::typed_value< String > * str(String *v=0)
Definition: Properties.h:166
STL namespace.
std::vector< RowComponentSpec > m_row_component_specs
std::vector< ColumnSpec > m_column_specs
Po::typed_value< int64_t > * i64(int64_t *v=0)
Definition: Properties.h:182
Po::typed_value< int32_t > * i32(int32_t *v=0)
Definition: Properties.h:178
std::shared_ptr< Properties > PropertiesPtr
Definition: Properties.h:447
Desc & file_desc(const char *usage)
Get the config file options description.
Definition: Config.cc:108
DataGeneratorIterator iterator
Po::typed_value< bool > * boo(bool *v=0)
Definition: Properties.h:162
bool allow_unregistered_options(bool choice)
Toggle allow unregistered options.
Definition: Config.cc:654
Hypertable definitions
Provides an STL-style iterator on DataGenerator objects.
Random number generator for int32, int64, double and ascii arrays.
A String class based on std::string.
Configuration settings.
Desc & cmdline_hidden_desc()
Get the command line hidden options description (for positional options)
Definition: Config.cc:81
Encapsulates decomposed key and value.
Definition: Cell.h:32
void random_generator_set_seed(unsigned seed)
Sets random number generator seed.