0.9.8.10
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
ScanSpec.cc
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2007-2015 Hypertable, Inc.
3  *
4  * This file is part of Hypertable.
5  *
6  * Hypertable is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; version 3 of the
9  * License, or any later version.
10  *
11  * Hypertable is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19  * 02110-1301, USA.
20  */
21 
22 #include <Common/Compat.h>
23 
24 #include "ScanSpec.h"
25 
26 #include <Hypertable/Lib/KeySpec.h>
27 
28 #include <Common/Serialization.h>
29 
30 #include <boost/algorithm/string.hpp>
31 
32 #include <cstring>
33 #include <iostream>
34 
35 using namespace Hypertable;
36 using namespace Hypertable::Lib;
37 using namespace std;
38 
39 uint8_t ScanSpec::encoding_version() const {
40  return 1;
41 }
42 
44  size_t len = Serialization::encoded_length_vi32(row_offset) +
48  Serialization::encoded_length_vi32(cell_limit_per_family) +
50  Serialization::encoded_length_vi32(columns.size()) +
51  Serialization::encoded_length_vi32(row_intervals.size()) +
52  Serialization::encoded_length_vi32(cell_intervals.size()) +
53  Serialization::encoded_length_vi32(column_predicates.size()) +
56  rebuild_indices.encoded_length();
57  for (auto c : columns)
59  for (auto &ri : row_intervals)
60  len += ri.encoded_length();
61  for (auto &ci : cell_intervals)
62  len += ci.encoded_length();
63  for (auto &cp : column_predicates)
64  len += cp.encoded_length();
65  return len + 8 + 8 + 5;
66 }
67 
101 void ScanSpec::encode_internal(uint8_t **bufp) const {
102  Serialization::encode_vi32(bufp, row_offset);
103  Serialization::encode_vi32(bufp, row_limit);
104  Serialization::encode_vi32(bufp, cell_offset);
105  Serialization::encode_vi32(bufp, cell_limit);
106  Serialization::encode_vi32(bufp, cell_limit_per_family);
107  Serialization::encode_vi32(bufp, max_versions);
108  Serialization::encode_i64(bufp, time_interval.first);
109  Serialization::encode_i64(bufp, time_interval.second);
110  Serialization::encode_vi32(bufp, columns.size());
111  for (auto column : columns) Serialization::encode_vstr(bufp, column);
112  Serialization::encode_vi32(bufp, row_intervals.size());
113  for (auto & ri : row_intervals) ri.encode(bufp);
114  Serialization::encode_vi32(bufp, cell_intervals.size());
115  for (auto & ci : cell_intervals) ci.encode(bufp);
116  Serialization::encode_vi32(bufp, column_predicates.size());
117  for (auto & cp : column_predicates) cp.encode(bufp);
118  Serialization::encode_vstr(bufp, row_regexp);
119  Serialization::encode_vstr(bufp, value_regexp);
120  Serialization::encode_bool(bufp, return_deletes);
121  Serialization::encode_bool(bufp, keys_only);
122  Serialization::encode_bool(bufp, scan_and_filter_rows);
123  Serialization::encode_bool(bufp, do_not_cache);
124  Serialization::encode_bool(bufp, and_column_predicates);
125  rebuild_indices.encode(bufp);
126 }
127 
128 void ScanSpec::decode_internal(uint8_t version, const uint8_t **bufp,
129  size_t *remainp) {
130  RowInterval ri;
131  CellInterval ci;
132  ColumnPredicate cp;
133  HT_TRY("decoding scan spec",
134  row_offset = Serialization::decode_vi32(bufp, remainp);
135  row_limit = Serialization::decode_vi32(bufp, remainp);
136  cell_offset = Serialization::decode_vi32(bufp, remainp);
137  cell_limit = Serialization::decode_vi32(bufp, remainp);
138  cell_limit_per_family = Serialization::decode_vi32(bufp, remainp);
139  max_versions = Serialization::decode_vi32(bufp, remainp);
140  time_interval.first = Serialization::decode_i64(bufp, remainp);
141  time_interval.second = Serialization::decode_i64(bufp, remainp);
142  for (size_t i = Serialization::decode_vi32(bufp, remainp); i--;)
143  columns.push_back(Serialization::decode_vstr(bufp, remainp));
144  for (size_t i = Serialization::decode_vi32(bufp, remainp); i--;) {
145  ri.decode(bufp, remainp);
146  row_intervals.push_back(ri);
147  }
148  for (size_t i = Serialization::decode_vi32(bufp, remainp); i--;) {
149  ci.decode(bufp, remainp);
150  cell_intervals.push_back(ci);
151  }
152  for (size_t i = Serialization::decode_vi32(bufp, remainp); i--;) {
153  cp.decode(bufp, remainp);
154  column_predicates.push_back(cp);
155  }
156  row_regexp = Serialization::decode_vstr(bufp, remainp);
157  value_regexp = Serialization::decode_vstr(bufp, remainp);
158  return_deletes = Serialization::decode_bool(bufp, remainp);
159  keys_only = Serialization::decode_bool(bufp, remainp);
160  scan_and_filter_rows = Serialization::decode_bool(bufp, remainp);
161  do_not_cache = Serialization::decode_bool(bufp, remainp);
162  and_column_predicates = Serialization::decode_bool(bufp, remainp);
163  rebuild_indices.decode(bufp, remainp));
164 }
165 
166 const string ScanSpec::render_hql(const string &table) const {
167  string hql;
168 
169  hql.append("SELECT ");
170 
171  if (columns.empty())
172  hql.append("*");
173  else {
174  bool first = true;
175  for (auto column : columns) {
176  if (first)
177  first = false;
178  else
179  hql.append(",");
180  hql.append("\"");
181  hql.append(column);
182  hql.append("\"");
183  }
184  }
185 
186  hql.append(" FROM ");
187  hql.append(table);
188 
189  char const *bool_op = " AND ";
190  bool first = true;
191 
192  // row intervals
193  for (auto & ri : row_intervals) {
194  if (first) {
195  hql.append(" WHERE ");
196  first = false;
197  }
198  else
199  hql.append(bool_op);
200  hql.append(ri.render_hql());
201  }
202 
203  if (row_regexp) {
204  if (first) {
205  hql.append(" WHERE ");
206  first = false;
207  }
208  else
209  hql.append(bool_op);
210  hql.append(format("ROW REGEXP \"%s\"", row_regexp));
211  }
212 
213  if (value_regexp) {
214  if (first) {
215  hql.append(" WHERE ");
216  first = false;
217  }
218  else
219  hql.append(bool_op);
220  hql.append(format("VALUE REGEXP \"%s\"", value_regexp));
221  }
222 
223 
224  // cell intervals
225  for (auto & ci : cell_intervals) {
226  if (first) {
227  hql.append(" WHERE ");
228  first = false;
229  }
230  else
231  hql.append(bool_op);
232  hql.append(ci.render_hql());
233  }
234 
235  if (!and_column_predicates)
236  bool_op = " OR ";
237 
238  // column predicates
239  for (auto & cp : column_predicates) {
240  if (first) {
241  hql.append(" WHERE ");
242  first = false;
243  }
244  else
245  hql.append(bool_op);
246  hql.append(cp.render_hql());
247  }
248 
249  // time interval
250  if (time_interval.first != TIMESTAMP_MIN ||
251  time_interval.second != TIMESTAMP_MAX) {
252  hql.append(" AND ");
253  if (time_interval.first != TIMESTAMP_MIN) {
254  hql.append(format("%lld", (Lld)time_interval.first));
255  hql.append(" <= ");
256  }
257  hql.append("TIMESTAMP");
258  if (time_interval.second != TIMESTAMP_MAX) {
259  hql.append(" < ");
260  hql.append(format("%lld", (Lld)time_interval.second));
261  }
262  }
263 
264  if (row_offset)
265  hql.append(format(" ROW_OFFSET %d", (int)row_offset));
266 
267  if (row_limit)
268  hql.append(format(" ROW_LIMIT %d", (int)row_limit));
269 
270  if (cell_offset)
271  hql.append(format(" CELL_OFFSET %d", (int)cell_offset));
272 
273  if (cell_limit)
274  hql.append(format(" CELL_LIMIT %d", (int)cell_limit));
275 
276  if (cell_limit_per_family)
277  hql.append(format(" CELL_LIMIT_PER_FAMILY %d", (int)cell_limit_per_family));
278 
279  if (max_versions)
280  hql.append(format(" MAX_VERSIONS %d", (int)max_versions));
281 
282  if (return_deletes)
283  hql.append(" RETURN_DELETES");
284 
285  if (keys_only)
286  hql.append(" KEYS_ONLY");
287 
288  if (scan_and_filter_rows)
289  hql.append(" SCAN_AND_FILTER_ROWS");
290 
291  if (do_not_cache)
292  hql.append(" DO_NOT_CACHE");
293 
294  if (rebuild_indices)
295  hql.append(format(" REBUILD_INDICES %s", rebuild_indices.to_string().c_str()));
296 
297  return hql;
298 }
299 
300 
302 ostream &Hypertable::Lib::operator<<(ostream &os, const ScanSpec &scan_spec) {
303  os <<"{ScanSpec:";
304 
305  // columns
306  os << " columns=";
307  if (scan_spec.columns.empty())
308  os << '*';
309  else {
310  os << '(';
311  bool first = true;
312  for (auto column : scan_spec.columns) {
313  if (first)
314  first = false;
315  else
316  os << ",";
317  os << column;
318  }
319  os <<')';
320  }
321 
322  // row intervals
323  for (auto & ri : scan_spec.row_intervals)
324  os << " " << ri;
325 
326  // cell intervals
327  for (auto & ci : scan_spec.cell_intervals)
328  os << " " << ci;
329 
330  // column predicates
331  for (auto & cp : scan_spec.column_predicates)
332  os << " " << cp;
333 
334  // time interval
335  if (scan_spec.time_interval.first != TIMESTAMP_MIN ||
336  scan_spec.time_interval.second != TIMESTAMP_MAX) {
337  if (scan_spec.time_interval.first != TIMESTAMP_MIN)
338  os << scan_spec.time_interval.first << " <= ";
339  os << "TIMESTAMP";
340  if (scan_spec.time_interval.second != TIMESTAMP_MAX)
341  os << " < " << scan_spec.time_interval.second;
342  }
343 
344  if (scan_spec.row_offset)
345  os <<" row_offset=" << scan_spec.row_offset;
346 
347  if (scan_spec.row_limit)
348  os << " row_limit="<< scan_spec.row_limit;
349 
350  if (scan_spec.cell_offset)
351  os <<" cell_offset=" << scan_spec.cell_offset;
352 
353  if (scan_spec.cell_limit)
354  os <<" cell_limit=" << scan_spec.cell_limit;
355 
356  if (scan_spec.cell_limit_per_family)
357  os << " cell_limit_per_family=" << scan_spec.cell_limit_per_family;
358 
359  if (scan_spec.max_versions)
360  os << " max_versions=" << scan_spec.max_versions;
361 
362  if (scan_spec.return_deletes)
363  os << " return_deletes";
364 
365  if (scan_spec.keys_only)
366  os << " keys_only";
367 
368  if (scan_spec.row_regexp)
369  os << " row_regexp=" << scan_spec.row_regexp;
370 
371  if (scan_spec.value_regexp)
372  os << " value_regexp=" << scan_spec.value_regexp;
373 
374  if (scan_spec.scan_and_filter_rows)
375  os << " scan_and_filter_rows";
376 
377  if (scan_spec.do_not_cache)
378  os << " do_not_cache";
379 
380  if (scan_spec.and_column_predicates)
381  os << " and_column_predicates";
382 
383  if (scan_spec.rebuild_indices)
384  os << " rebuild_indices=" << scan_spec.rebuild_indices.to_string();
385 
386  os << "}";
387 
388  return os;
389 }
390 
391 
393  : row_limit(ss.row_limit), cell_limit(ss.cell_limit),
394  cell_limit_per_family(ss.cell_limit_per_family),
395  row_offset(ss.row_offset), cell_offset(ss.cell_offset),
396  max_versions(ss.max_versions), columns(CstrAlloc(arena)),
397  row_intervals(RowIntervalAlloc(arena)),
398  cell_intervals(CellIntervalAlloc(arena)),
399  column_predicates(ColumnPredicateAlloc(arena)),
400  time_interval(ss.time_interval.first, ss.time_interval.second),
401  row_regexp(arena.dup(ss.row_regexp)), value_regexp(arena.dup(ss.value_regexp)),
402  return_deletes(ss.return_deletes), keys_only(ss.keys_only),
403  scan_and_filter_rows(ss.scan_and_filter_rows),
404  do_not_cache(ss.do_not_cache), and_column_predicates(ss.and_column_predicates),
405  rebuild_indices(ss.rebuild_indices) {
406  columns.reserve(ss.columns.size());
407  row_intervals.reserve(ss.row_intervals.size());
408  cell_intervals.reserve(ss.cell_intervals.size());
409  column_predicates.reserve(ss.column_predicates.size());
410 
411  for (auto c : ss.columns)
412  add_column(arena, c);
413 
414  for (const auto &ri : ss.row_intervals)
415  add_row_interval(arena, ri.start, ri.start_inclusive,
416  ri.end, ri.end_inclusive);
417 
418  for (const auto &ci : ss.cell_intervals)
419  add_cell_interval(arena, ci.start_row, ci.start_column, ci.start_inclusive,
420  ci.end_row, ci.end_column, ci.end_inclusive);
421 
422  for (const auto &cp : ss.column_predicates)
423  add_column_predicate(arena, cp.column_family, cp.column_qualifier,
424  cp.operation, cp.value, cp.value_len);
425 }
426 
427 void
428 ScanSpec::parse_column(const char *column_str, string &family,
429  const char **qualifier, size_t *qualifier_len,
430  bool *has_qualifier, bool *is_regexp, bool *is_prefix)
431 {
432  const char *raw_qualifier;
433  size_t raw_qualifier_len;
434  const char *colon = strchr(column_str, ':');
435  *is_regexp = false;
436  *is_prefix = false;
437  *qualifier = "";
438  *qualifier_len = 0;
439 
440  if (colon == 0) {
441  *has_qualifier = false;
442  family = column_str;
443  return;
444  }
445  *has_qualifier = true;
446 
447  family = String(column_str, (size_t)(colon-column_str));
448 
449  raw_qualifier = colon+1;
450  raw_qualifier_len = strlen(raw_qualifier);
451 
452  if (raw_qualifier_len == 0)
453  return;
454 
455  if (raw_qualifier_len > 2 &&
456  raw_qualifier[0] == '/' && raw_qualifier[raw_qualifier_len-1] == '/') {
457  *is_regexp = true;
458  *qualifier = raw_qualifier+1;
459  *qualifier_len = raw_qualifier_len - 2;
460  }
461  else if (*raw_qualifier == '*') {
462  *is_prefix = true;
463  }
464  else {
465  if (*raw_qualifier == '^') {
466  *is_prefix = true;
467  strip_enclosing_quotes(raw_qualifier+1, raw_qualifier_len-1,
468  qualifier, qualifier_len);
469  }
470  else
471  strip_enclosing_quotes(raw_qualifier, raw_qualifier_len,
472  qualifier, qualifier_len);
473  }
474 }
void add_column_predicate(CharArena &arena, const string &column_family, const char *column_qualifier, uint32_t operation, const char *value, uint32_t value_len=0)
Definition: ScanSpec.h:232
char * decode_vstr(const uint8_t **bufp, size_t *remainp)
Decode a vstr (vint64, data, null).
const char * row_regexp
Definition: ScanSpec.h:279
size_t encoded_length_internal() const override
Returns internal serialized length.
Definition: ScanSpec.cc:43
std::string String
A String is simply a typedef to std::string.
Definition: String.h:44
ColumnPredicates column_predicates
Definition: ScanSpec.h:277
String format(const char *fmt,...)
Returns a String using printf like format facilities Vanilla snprintf is about 1.5x faster than this...
Definition: String.cc:37
pair< int64_t, int64_t > time_interval
Definition: ScanSpec.h:278
const char * value_regexp
Definition: ScanSpec.h:280
void add_column(CharArena &arena, const string &str)
Definition: ScanSpec.h:148
The PageArenaAllocator is a STL allocator based on PageArena.
STL namespace.
size_t encoded_length_vstr(size_t len)
Computes the encoded length of vstr (vint64, data, null)
static const int64_t TIMESTAMP_MIN
Definition: KeySpec.h:34
Represents a row interval.
Definition: RowInterval.h:38
const string render_hql(const string &table) const
Renders scan spec as an HQL SELECT statement.
Definition: ScanSpec.cc:166
Scan predicate and control specification.
Definition: ScanSpec.h:56
uint64_t decode_i64(const uint8_t **bufp, size_t *remainp)
Decode a 64-bit integer in little-endian order.
int encoded_length_vi32(uint32_t val)
Length of a variable length encoded 32-bit integer (up to 5 bytes)
bool strip_enclosing_quotes(const char *input, size_t input_len, const char **output, size_t *output_len)
Strips enclosing quotes.
Definition: String.h:129
bool decode_bool(const uint8_t **bufp, size_t *remainp)
Decodes a boolean value from the given buffer.
Definition: Serialization.h:96
ostream & operator<<(ostream &os, const CellInterval &ci)
TableParts rebuild_indices
Definition: ScanSpec.h:286
Compatibility Macros for C/C++.
The PageArena allocator is simple and fast, avoiding individual mallocs/frees.
Definition: PageArena.h:69
void encode_i64(uint8_t **bufp, uint64_t val)
Encode a 64-bit integer in little-endian order.
Functions to serialize/deserialize primitives to/from a memory buffer.
uint8_t encoding_version() const override
Returns encoding version.
Definition: ScanSpec.cc:39
Represents a column predicate (e.g.
virtual void decode(const uint8_t **bufp, size_t *remainp)
Reads serialized representation of object from a buffer.
Definition: Serializable.cc:70
static void parse_column(const char *column_str, string &family, const char **qualifier, size_t *qualifier_len, bool *has_qualifier, bool *is_regexp, bool *is_prefix)
Parses a column string into column family, qualifier and whether the qualifier is a regexp or not...
Definition: ScanSpec.cc:428
Hypertable library.
Definition: CellInterval.h:30
void encode_vstr(uint8_t **bufp, const void *buf, size_t len)
Encode a buffer as variable length string (vint64, data, null)
Hypertable definitions
void add_row_interval(CharArena &arena, const string &start, bool start_inclusive, const string &end, bool end_inclusive)
Definition: ScanSpec.h:190
long long int Lld
Shortcut for printf formats.
Definition: String.h:53
void encode_vi32(uint8_t **bufp, uint32_t val)
Encode a integer (up to 32-bit) in variable length encoding.
static const int64_t TIMESTAMP_MAX
Definition: KeySpec.h:35
void encode_bool(uint8_t **bufp, bool bval)
Encodes a boolean into the given buffer.
Definition: Serialization.h:84
RowIntervals row_intervals
Definition: ScanSpec.h:275
const std::string to_string() const
Returns human readable string describing table parts.
Definition: TableParts.cc:63
void decode_internal(uint8_t version, const uint8_t **bufp, size_t *remainp) override
Reads serialized representation of object from a buffer.
Definition: ScanSpec.cc:128
void add_cell_interval(CharArena &arena, const string &start_row, const string &start_column, bool start_inclusive, const string &end_row, const string &end_column, bool end_inclusive)
Definition: ScanSpec.h:215
Represents a cell interval.
Definition: CellInterval.h:38
CellIntervals cell_intervals
Definition: ScanSpec.h:276
#define HT_TRY(_s_, _code_)
Definition: Error.h:517
uint32_t decode_vi32(const uint8_t **bufp, size_t *remainp)
Decode a variable length encoded integer up to 32-bit.
void encode_internal(uint8_t **bufp) const override
Writes serialized representation of object to a buffer.
Definition: ScanSpec.cc:101