0.9.8.10
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
ScanSpec.h
Go to the documentation of this file.
1 /* -*- c++ -*-
2  * Copyright (C) 2007-2015 Hypertable, Inc.
3  *
4  * This file is part of Hypertable.
5  *
6  * Hypertable is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; version 3 of the
9  * License, or any later version.
10  *
11  * Hypertable is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19  * 02110-1301, USA.
20  */
21 
22 #ifndef Hypertable_Lib_ScanSpec_h
23 #define Hypertable_Lib_ScanSpec_h
24 
27 #include <Hypertable/Lib/Key.h>
30 
32 #include <Common/Serializable.h>
33 
34 #include <boost/noncopyable.hpp>
35 
36 #include <vector>
37 
38 namespace Hypertable {
39 namespace Lib {
40 
41  using namespace std;
42 
44  typedef vector<RowInterval, RowIntervalAlloc> RowIntervals;
45 
47  typedef vector<CellInterval, CellIntervalAlloc> CellIntervals;
48 
50  typedef vector<const char *, CstrAlloc> CstrColumns;
51 
53  typedef vector<ColumnPredicate, ColumnPredicateAlloc> ColumnPredicates;
54 
56  class ScanSpec : public Serializable {
57  public:
58  ScanSpec() : time_interval(TIMESTAMP_MIN, TIMESTAMP_MAX) { }
60  : columns(CstrAlloc(arena)),
61  row_intervals(RowIntervalAlloc(arena)),
62  cell_intervals(CellIntervalAlloc(arena)),
63  column_predicates(ColumnPredicateAlloc(arena)),
64  time_interval(TIMESTAMP_MIN, TIMESTAMP_MAX) { }
65  ScanSpec(CharArena &arena, const ScanSpec &);
66  ScanSpec(const uint8_t **bufp, size_t *remainp) { decode(bufp, remainp); }
67 
71  const string render_hql(const string &table) const;
72 
73  void clear() {
74  row_limit = 0;
75  cell_limit = 0;
76  cell_limit_per_family = 0;
77  row_offset = 0;
78  cell_offset = 0;
79  max_versions = 0;
80  columns.clear();
81  row_intervals.clear();
82  cell_intervals.clear();
83  column_predicates.clear();
84  time_interval.first = TIMESTAMP_MIN;
85  time_interval.second = TIMESTAMP_MAX;
86  rebuild_indices.clear();
87  keys_only = false;
88  return_deletes = false;
89  row_regexp = 0;
90  value_regexp = 0;
91  scan_and_filter_rows = false;
92  do_not_cache = false;
93  and_column_predicates = false;
94  }
95 
100  void base_copy(ScanSpec &other) const {
101  other.row_limit = row_limit;
102  other.cell_limit = cell_limit;
103  other.cell_limit_per_family = cell_limit_per_family;
104  other.row_offset = row_offset;
105  other.cell_offset = cell_offset;
106  other.max_versions = max_versions;
107  other.columns = columns;
108  other.time_interval = time_interval;
109  other.keys_only = keys_only;
110  other.return_deletes = return_deletes;
111  other.row_intervals.clear();
112  other.cell_intervals.clear();
113  other.column_predicates.clear();
114  other.row_regexp = row_regexp;
115  other.value_regexp = value_regexp;
116  other.scan_and_filter_rows = scan_and_filter_rows;
117  other.do_not_cache = do_not_cache;
118  other.column_predicates = column_predicates;
119  other.and_column_predicates = and_column_predicates;
120  other.rebuild_indices = rebuild_indices;
121  }
122 
123  bool cacheable() const {
124  if (do_not_cache || rebuild_indices)
125  return false;
126  else if (row_intervals.size() == 1) {
127  HT_ASSERT(row_intervals[0].start && row_intervals[0].end);
128  if (!strcmp(row_intervals[0].start, row_intervals[0].end))
129  return true;
130  }
131  else if (cell_intervals.size() == 1) {
132  HT_ASSERT(cell_intervals[0].start_row && cell_intervals[0].end_row);
133  if (!strcmp(cell_intervals[0].start_row, cell_intervals[0].end_row))
134  return true;
135  }
136  return false;
137  }
138 
139  const char *cache_key() const {
140  if (!row_intervals.empty())
141  return row_intervals[0].start;
142  else if (!cell_intervals.empty())
143  return cell_intervals[0].start_row;
144  HT_ASSERT(!"cache key not found");
145  return 0;
146  }
147 
148  void add_column(CharArena &arena, const string &str) {
149  columns.push_back(arena.dup(str));
150  }
151 
163  static void parse_column(const char *column_str, string &family,
164  const char **qualifier, size_t *qualifier_len,
165  bool *has_qualifier, bool *is_regexp,
166  bool *is_prefix);
167 
168  void add_row(CharArena &arena, const string &str) {
169  if (cell_intervals.size())
170  HT_THROW(Error::BAD_SCAN_SPEC, "cell spec excludes rows");
171 
172  RowInterval ri;
173  ri.start = ri.end = arena.dup(str);
174  ri.start_inclusive = ri.end_inclusive = true;
175  row_intervals.push_back(ri);
176  }
177 
178  void set_row_regexp(CharArena &arena, const char *regexp) {
179  if (row_regexp != 0)
180  HT_THROWF(Error::BAD_SCAN_SPEC, "row_regexp already set to '%s'", row_regexp);
181  row_regexp = arena.dup(regexp);
182  }
183 
184  void set_value_regexp(CharArena &arena, const char *regexp) {
185  if (value_regexp != 0)
186  HT_THROWF(Error::BAD_SCAN_SPEC, "value_regexp already set to '%s'", value_regexp);
187  value_regexp = arena.dup(regexp);
188  }
189 
191  const string &start, bool start_inclusive,
192  const string &end, bool end_inclusive) {
193  if (cell_intervals.size())
194  HT_THROW(Error::BAD_SCAN_SPEC, "cell spec excludes rows");
195 
196  RowInterval ri;
197  ri.start = arena.dup(start);
198  ri.start_inclusive = start_inclusive;
199  ri.end = !end.empty() ? arena.dup(end) : Key::END_ROW_MARKER;
200  ri.end_inclusive = end_inclusive;
201  row_intervals.push_back(ri);
202  }
203 
204  void add_cell(CharArena &arena, const string &row, const string &column) {
205  if (row_intervals.size())
206  HT_THROW(Error::BAD_SCAN_SPEC, "row spec excludes cells");
207 
208  CellInterval ci;
209  ci.start_row = ci.end_row = arena.dup(row);
210  ci.start_column = ci.end_column = arena.dup(column);
211  ci.start_inclusive = ci.end_inclusive = true;
212  cell_intervals.push_back(ci);
213  }
214 
216  const string &start_row, const string &start_column,
217  bool start_inclusive, const string &end_row,
218  const string &end_column, bool end_inclusive) {
219  if (row_intervals.size())
220  HT_THROW(Error::BAD_SCAN_SPEC, "row spec excludes cells");
221 
222  CellInterval ci;
223  ci.start_row = arena.dup(start_row);
224  ci.start_column = arena.dup(start_column);
225  ci.start_inclusive = start_inclusive;
226  ci.end_row = !end_row.empty() ? arena.dup(end_row) : Key::END_ROW_MARKER;
227  ci.end_column = arena.dup(end_column);
228  ci.end_inclusive = end_inclusive;
229  cell_intervals.push_back(ci);
230  }
231 
232  void add_column_predicate(CharArena &arena, const string &column_family,
233  const char *column_qualifier, uint32_t operation,
234  const char *value, uint32_t value_len = 0) {
235 
236  // As soon as we're building with C++11 and can replace the bitset<32> in
237  // the CellPredicate class with bitset<64>, then we should change the
238  // following expression to check for size of 64
239  if (column_predicates.size() == 32)
240  HT_THROW(Error::FAILED_EXPECTATION, "Column predicate limit of 32 has been exceeded!");
241 
242  ColumnPredicate cp;
243  cp.column_family = arena.dup(column_family);
244  cp.column_qualifier = arena.dup(column_qualifier);
245  cp.column_qualifier_len = column_qualifier ? strlen(column_qualifier) : 0;
246  cp.operation = operation;
247  if (value) {
248  // if value_len have not been specified assume zero terminating string
249  cp.value_len = value_len ? value_len : strlen(value);
250  cp.value = arena.dup(value, value_len ? value_len : cp.value_len + 1);
251  }
252  column_predicates.push_back(cp);
253  }
254 
255  void set_time_interval(int64_t start, int64_t end) {
256  time_interval.first = start;
257  time_interval.second = end;
258  }
259 
260  void set_start_time(int64_t start) {
261  time_interval.first = start;
262  }
263 
264  void set_end_time(int64_t end) {
265  time_interval.second = end;
266  }
267 
268  int32_t row_limit {};
269  int32_t cell_limit {};
270  int32_t cell_limit_per_family {};
271  int32_t row_offset {};
272  int32_t cell_offset {};
273  uint32_t max_versions {};
274  CstrColumns columns;
275  RowIntervals row_intervals;
276  CellIntervals cell_intervals;
277  ColumnPredicates column_predicates;
278  pair<int64_t,int64_t> time_interval;
279  const char *row_regexp {};
280  const char *value_regexp {};
281  bool return_deletes {};
282  bool keys_only {};
283  bool scan_and_filter_rows {};
284  bool do_not_cache {};
285  bool and_column_predicates {};
287 
288  private:
289 
292  uint8_t encoding_version() const override;
293 
297  size_t encoded_length_internal() const override;
298 
301  void encode_internal(uint8_t **bufp) const override;
302 
309  void decode_internal(uint8_t version, const uint8_t **bufp,
310  size_t *remainp) override;
311 
312  };
313 
318  class ScanSpecBuilder : boost::noncopyable {
319  public:
320 
323  ScanSpecBuilder(size_t page_size=8192) :
324  m_arena(page_size), m_scan_spec(m_arena) { }
325 
329  ScanSpecBuilder(const ScanSpec &ss, size_t page_size=8192) :
330  m_arena(page_size), m_scan_spec(m_arena, ss) {}
331 
333  m_scan_spec = ScanSpec(m_arena, ss);
334  return *this;
335  }
336 
342  void set_row_limit(int32_t n) { m_scan_spec.row_limit = n; }
343 
349  void set_cell_limit(int32_t n) { m_scan_spec.cell_limit = n; }
350 
356  void set_cell_limit_per_family(int32_t n) { m_scan_spec.cell_limit_per_family = n; }
357 
363  void set_row_offset(int32_t n) {
364  if (n && m_scan_spec.cell_offset)
365  HT_THROW(Error::BAD_SCAN_SPEC, "predicate row_offset not allowed in "
366  "combination with cell_offset");
367  m_scan_spec.row_offset = n;
368  }
369 
375  void set_cell_offset(int32_t n) {
376  if (n && m_scan_spec.row_offset)
377  HT_THROW(Error::BAD_SCAN_SPEC, "predicate cell_offset not allowed in "
378  "combination with row_offset");
379  m_scan_spec.cell_offset = n;
380  }
381 
387  void set_max_versions(uint32_t n) { m_scan_spec.max_versions = n; }
388 
394  void set_row_regexp(const char* regexp) { m_scan_spec.set_row_regexp(m_arena, regexp); }
395 
401  void set_value_regexp(const char* regexp) { m_scan_spec.set_value_regexp(m_arena, regexp); }
402 
408  void add_column(const string &str) {
409  m_scan_spec.add_column(m_arena, str);
410  }
411 
412  void reserve_columns(size_t s) { m_scan_spec.columns.reserve(s); }
413 
425  void
426  add_column_predicate(const string &column_family, const char *column_qualifier,
427  uint32_t operation, const char *value,
428  uint32_t value_len = 0) {
429  m_scan_spec.add_column_predicate(m_arena, column_family, column_qualifier,
430  operation, value, value_len);
431  }
432 
433  void reserve_column_predicates(size_t s) { m_scan_spec.column_predicates.reserve(s); }
434 
435 
441  void add_row(const string &str) {
442  m_scan_spec.add_row(m_arena, str);
443  }
444 
445  void reserve_rows(size_t s) { m_scan_spec.row_intervals.reserve(s); }
446 
455  void add_row_interval(const string &start, bool start_inclusive,
456  const string &end, bool end_inclusive) {
457  m_scan_spec.add_row_interval(m_arena, start, start_inclusive,
458  end, end_inclusive);
459  }
460 
467  void add_cell(const string &row, const string &column) {
468  m_scan_spec.add_cell(m_arena, row, column);
469  }
470 
471  void reserve_cells(size_t s) { m_scan_spec.cell_intervals.reserve(s); }
472 
485  void add_cell_interval(const string &start_row, const string &start_column,
486  bool start_inclusive, const string &end_row,
487  const string &end_column, bool end_inclusive) {
488  m_scan_spec.add_cell_interval(m_arena, start_row, start_column,
489  start_inclusive, end_row, end_column, end_inclusive);
490  }
491 
499  void set_time_interval(int64_t start, int64_t end) {
500  m_scan_spec.set_time_interval(start, end);
501  }
502 
503  void set_start_time(int64_t start) {
504  m_scan_spec.time_interval.first = start;
505  }
506 
507  void set_end_time(int64_t end) {
508  m_scan_spec.time_interval.second = end;
509  }
510 
514  void set_keys_only(bool val) {
515  m_scan_spec.keys_only = val;
516  }
517 
521  void set_return_deletes(bool val) {
522  m_scan_spec.return_deletes = val;
523  }
524 
528  void set_scan_and_filter_rows(bool val) {
529  m_scan_spec.scan_and_filter_rows = val;
530  }
531 
535  void set_do_not_cache(bool val) {
536  m_scan_spec.do_not_cache = val;
537  }
538 
542  m_scan_spec.rebuild_indices = parts;
543  }
544 
548  void set_and_column_predicates(bool val) {
549  m_scan_spec.and_column_predicates = val;
550  }
551 
555  void clear() {
556  m_scan_spec.clear();
557  // Don't call m_arena.free() here, as for stl containers (vector etc.),
558  // clear() assumes underlying storage is still intact!
559  }
560 
566  ScanSpec &get() { return m_scan_spec; }
567 
568  private:
571  };
572 
573  ostream &operator<<(ostream &os, const ScanSpec &scan_spec);
574 
575 }}
576 
577 #endif // Hypertable_Lib_ScanSpec_h
void set_end_time(int64_t end)
Definition: ScanSpec.h:264
void add_column_predicate(CharArena &arena, const string &column_family, const char *column_qualifier, uint32_t operation, const char *value, uint32_t value_len=0)
Definition: ScanSpec.h:232
const char * row_regexp
Definition: ScanSpec.h:279
void set_row_offset(int32_t n)
Sets the number of rows to be skipped at the beginning of the query.
Definition: ScanSpec.h:363
PageArenaAllocator< const char * > CstrAlloc
Definition: ScanSpec.h:49
void clear()
Clears the state.
Definition: ScanSpec.h:555
ColumnPredicates column_predicates
Definition: ScanSpec.h:277
void add_cell(CharArena &arena, const string &row, const string &column)
Definition: ScanSpec.h:204
const char * cache_key() const
Definition: ScanSpec.h:139
Po::typed_value< String > * str(String *v=0)
Definition: Properties.h:166
pair< int64_t, int64_t > time_interval
Definition: ScanSpec.h:278
const char * value_regexp
Definition: ScanSpec.h:280
void add_column(CharArena &arena, const string &str)
Definition: ScanSpec.h:148
void add_row(const string &str)
Adds a row to be returned in the scan.
Definition: ScanSpec.h:441
PageArena memory allocator for STL classes.
The PageArenaAllocator is a STL allocator based on PageArena.
STL namespace.
ScanSpecBuilder(size_t page_size=8192)
Constructor.
Definition: ScanSpec.h:323
void set_start_time(int64_t start)
Definition: ScanSpec.h:503
vector< ColumnPredicate, ColumnPredicateAlloc > ColumnPredicates
Definition: ScanSpec.h:53
void set_start_time(int64_t start)
Definition: ScanSpec.h:260
void set_value_regexp(const char *regexp)
Sets the regexp to filter cell values by.
Definition: ScanSpec.h:401
Represents a set of table parts (sub-tables).
Definition: TableParts.h:47
static const int64_t TIMESTAMP_MIN
Definition: KeySpec.h:34
Represents a row interval.
Definition: RowInterval.h:38
#define HT_ASSERT(_e_)
Definition: Logger.h:396
void set_row_regexp(CharArena &arena, const char *regexp)
Definition: ScanSpec.h:178
vector< RowInterval, RowIntervalAlloc > RowIntervals
Definition: ScanSpec.h:44
Scan predicate and control specification.
Definition: ScanSpec.h:56
vector< CellInterval, CellIntervalAlloc > CellIntervals
Definition: ScanSpec.h:47
void set_cell_limit(int32_t n)
Sets the maximum number of cells to return.
Definition: ScanSpec.h:349
void set_end_time(int64_t end)
Definition: ScanSpec.h:507
CharT * dup(const CharT *s)
Duplicate a null terminated string; memory is allocated from the pool.
Definition: PageArena.h:274
void set_row_regexp(const char *regexp)
Sets the regexp to filter rows by.
Definition: ScanSpec.h:394
void set_time_interval(int64_t start, int64_t end)
Definition: ScanSpec.h:255
void set_keys_only(bool val)
Return only keys (no values)
Definition: ScanSpec.h:514
void base_copy(ScanSpec &other) const
Initialize another ScanSpec object with this copy sans the intervals.
Definition: ScanSpec.h:100
ostream & operator<<(ostream &os, const CellInterval &ci)
TableParts rebuild_indices
Definition: ScanSpec.h:286
void reserve_column_predicates(size_t s)
Definition: ScanSpec.h:433
void add_cell(const string &row, const string &column)
Adds a cell to be returned in the scan.
Definition: ScanSpec.h:467
The PageArena allocator is simple and fast, avoiding individual mallocs/frees.
Definition: PageArena.h:69
void set_return_deletes(bool val)
Internal use only.
Definition: ScanSpec.h:521
Helper class for building a ScanSpec.
Definition: ScanSpec.h:318
void set_cell_limit_per_family(int32_t n)
Sets the maximum number of cells to return per column family.
Definition: ScanSpec.h:356
void set_do_not_cache(bool val)
Don't cache.
Definition: ScanSpec.h:535
Represents a column predicate (e.g.
vector< const char *, CstrAlloc > CstrColumns
Definition: ScanSpec.h:50
PageArenaAllocator< RowInterval > RowIntervalAlloc
Definition: ScanSpec.h:43
Declarations for Serializable.
Hypertable definitions
void add_row_interval(CharArena &arena, const string &start, bool start_inclusive, const string &end, bool end_inclusive)
Definition: ScanSpec.h:190
void set_time_interval(int64_t start, int64_t end)
Sets the time interval of the scan.
Definition: ScanSpec.h:499
PageArenaAllocator< CellInterval > CellIntervalAlloc
Definition: ScanSpec.h:46
static const int64_t TIMESTAMP_MAX
Definition: KeySpec.h:35
void add_column(const string &str)
Adds a column family to be returned by the scan.
Definition: ScanSpec.h:408
void add_row_interval(const string &start, bool start_inclusive, const string &end, bool end_inclusive)
Adds a row interval to be returned in the scan.
Definition: ScanSpec.h:455
void set_rebuild_indices(TableParts parts)
Rebuild indices.
Definition: ScanSpec.h:541
Mixin class that provides a standard serialization interface.
Definition: Serializable.h:65
ScanSpec(CharArena &arena)
Definition: ScanSpec.h:59
#define HT_THROWF(_code_, _fmt_,...)
Definition: Error.h:490
ScanSpec(const uint8_t **bufp, size_t *remainp)
Definition: ScanSpec.h:66
RowIntervals row_intervals
Definition: ScanSpec.h:275
bool cacheable() const
Definition: ScanSpec.h:123
void set_value_regexp(CharArena &arena, const char *regexp)
Definition: ScanSpec.h:184
void add_cell_interval(CharArena &arena, const string &start_row, const string &start_column, bool start_inclusive, const string &end_row, const string &end_column, bool end_inclusive)
Definition: ScanSpec.h:215
void set_and_column_predicates(bool val)
AND together the column predicates.
Definition: ScanSpec.h:548
Represents a cell interval.
Definition: CellInterval.h:38
void add_cell_interval(const string &start_row, const string &start_column, bool start_inclusive, const string &end_row, const string &end_column, bool end_inclusive)
Adds a cell interval to be returned in the scan.
Definition: ScanSpec.h:485
void add_column_predicate(const string &column_family, const char *column_qualifier, uint32_t operation, const char *value, uint32_t value_len=0)
Adds a column predicate to the scan.
Definition: ScanSpec.h:426
CellIntervals cell_intervals
Definition: ScanSpec.h:276
ScanSpecBuilder & operator=(const ScanSpec &ss)
Definition: ScanSpec.h:332
PageArenaAllocator< ColumnPredicate > ColumnPredicateAlloc
Definition: ScanSpec.h:52
void set_row_limit(int32_t n)
Sets the maximum number of rows to return in the scan.
Definition: ScanSpec.h:342
void set_max_versions(uint32_t n)
Sets the maximum number of revisions of each cell to return in the scan.
Definition: ScanSpec.h:387
void reserve_columns(size_t s)
Definition: ScanSpec.h:412
ScanSpecBuilder(const ScanSpec &ss, size_t page_size=8192)
Copy constructor from a ScanSpec.
Definition: ScanSpec.h:329
#define HT_THROW(_code_, _msg_)
Definition: Error.h:478
static const char * END_ROW_MARKER
Definition: Key.h:49
void set_scan_and_filter_rows(bool val)
Scan and filter rows.
Definition: ScanSpec.h:528
void set_cell_offset(int32_t n)
Sets the number of cells to be skipped at the beginning of the query.
Definition: ScanSpec.h:375
void add_row(CharArena &arena, const string &str)
Definition: ScanSpec.h:168
Declarations for TableParts.