0.9.8.10
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
ScanContext.cc
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2007-2015 Hypertable, Inc.
3  *
4  * This file is part of Hypertable.
5  *
6  * Hypertable is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; version 3 of the
9  * License, or any later version.
10  *
11  * Hypertable is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19  * 02110-1301, USA.
20  */
21 
26 
27 #include <Common/Compat.h>
28 #include "ScanContext.h"
29 
31 
32 #include <Hypertable/Lib/Key.h>
33 
34 #include <Common/Logger.h>
35 #include <Common/Time.h>
36 
37 #include <algorithm>
38 #include <cassert>
39 #include <re2/re2.h>
40 
41 
42 using namespace std;
43 using namespace Hypertable;
44 
45 
46 void
47 ScanContext::initialize(int64_t rev, const ScanSpec *ss,
48  const RangeSpec *range_spec, SchemaPtr &sp,
49  std::set<uint8_t> *columns) {
50  ColumnFamilySpec *cf_spec;
51  int32_t max_versions = 0;
52  String family;
53  const char *qualifier;
54  size_t qualifier_len;
55  size_t id = 0;
56  bool is_regexp, is_prefix;
57 
58  int64_t now = get_ts64();
59 
60  revision = (rev == TIMESTAMP_NULL) ? TIMESTAMP_MAX : rev;
61 
62  // set time interval
63  if (ss) {
64  time_interval.first = ss->time_interval.first;
65  time_interval.second = ss->time_interval.second;
66  }
67  else {
68  time_interval.first = TIMESTAMP_MIN;
69  time_interval.second = TIMESTAMP_MAX;
70  }
71 
72  spec = ss;
73  range = range_spec;
74 
75  if (spec == 0)
76  memset(family_mask, true, 256*sizeof(bool));
77  else {
78  memset(family_mask, false, 256*sizeof(bool));
79  max_versions = spec->max_versions;
80  }
81 
82  if (sp) {
83  schema = sp;
84 
85  if (spec && spec->columns.size() > 0) {
86  bool has_qualifier;
87 
88  for (auto cfstr : spec->columns) {
89 
90  cfstr = (const char *)arena.dup(cfstr);
91 
92  ScanSpec::parse_column(cfstr, family, &qualifier, &qualifier_len,
93  &has_qualifier, &is_regexp, &is_prefix);
94  cf_spec = schema->get_column_family(family.c_str());
95 
96  if (cf_spec == 0)
98 
99  if (columns)
100  columns->insert(cf_spec->get_id());
101 
102  family_mask[cf_spec->get_id()] = true;
103  if (has_qualifier) {
104  ColumnPredicate cp;
105  cp.operation =
106  is_regexp ? ColumnPredicate::QUALIFIER_REGEX_MATCH :
107  (is_prefix ? ColumnPredicate::QUALIFIER_PREFIX_MATCH :
108  ColumnPredicate::QUALIFIER_EXACT_MATCH);
109  cp.column_qualifier = qualifier;
110  cp.column_qualifier_len = qualifier_len;
111  cell_predicates[cf_spec->get_id()].add_column_predicate(cp, id++);
112  }
113 
114  if (cf_spec->get_option_ttl() == 0)
115  cell_predicates[cf_spec->get_id()].cutoff_time = TIMESTAMP_MIN;
116  else
117  cell_predicates[cf_spec->get_id()].cutoff_time = now
118  - ((int64_t)cf_spec->get_option_ttl() * 1000000000LL);
119  if (max_versions == 0)
120  cell_predicates[cf_spec->get_id()].max_versions = cf_spec->get_option_max_versions();
121  else {
122  if (cf_spec->get_option_max_versions() == 0)
123  cell_predicates[cf_spec->get_id()].max_versions = max_versions;
124  else
125  cell_predicates[cf_spec->get_id()].max_versions = max_versions < cf_spec->get_option_max_versions()
126  ? max_versions : cf_spec->get_option_max_versions();
127  }
128  if (cf_spec->get_option_counter())
129  cell_predicates[cf_spec->get_id()].counter = true;
130  cell_predicates[cf_spec->get_id()].indexed = cf_spec->get_value_index() || cf_spec->get_qualifier_index();
131  }
132  }
133  else {
134 
135  family_mask[0] = true; // ROW_DELETE records have 0 column family, so
136  // this allows them to pass through
137  for (auto ag_spec : schema->get_access_groups()) {
138  for (auto cf_spec : ag_spec->columns()) {
139  if (cf_spec->get_id() == 0)
141  "Bad ID for Column Family '%s'", cf_spec->get_name().c_str());
142  if (cf_spec->get_deleted()) {
143  family_mask[cf_spec->get_id()] = false;
144  continue;
145  }
146  family_mask[cf_spec->get_id()] = true;
147  if (cf_spec->get_option_ttl() == 0)
148  cell_predicates[cf_spec->get_id()].cutoff_time = TIMESTAMP_MIN;
149  else
150  cell_predicates[cf_spec->get_id()].cutoff_time = now
151  - ((int64_t)cf_spec->get_option_ttl() * 1000000000LL);
152 
153  if (max_versions == 0)
154  cell_predicates[cf_spec->get_id()].max_versions = cf_spec->get_option_max_versions();
155  else {
156  if (cf_spec->get_option_max_versions() == 0)
157  cell_predicates[cf_spec->get_id()].max_versions = max_versions;
158  else
159  cell_predicates[cf_spec->get_id()].max_versions =
160  (max_versions < cf_spec->get_option_max_versions())
161  ? max_versions : cf_spec->get_option_max_versions();
162  }
163  if (cf_spec->get_option_counter())
164  cell_predicates[cf_spec->get_id()].counter = true;
165  cell_predicates[cf_spec->get_id()].indexed = cf_spec->get_value_index() || cf_spec->get_qualifier_index();
166  }
167  }
168  }
169  }
170 
175  single_row = false;
176  has_cell_interval = false;
177  has_start_cf_qualifier = false;
178  start_inclusive = end_inclusive = true;
179  restricted_range = true;
180 
181  if (spec) {
182  const char *ptr = 0;
183 
184  if (!spec->row_intervals.empty()) {
185  // start row
186  start_row = spec->row_intervals.front().start;
187  start_inclusive = spec->row_intervals.front().start_inclusive;
188 
189  // end row (if scan_and_filter_rows the rows are ordered ascending, otherwise only one row interval)
190  if (spec->row_intervals.back().end[0] == 0)
191  end_row = Key::END_ROW_MARKER;
192  else {
193  end_row = spec->row_intervals.back().end;
194  end_inclusive = spec->row_intervals.back().end_inclusive;
195 
196  if (!strcmp(spec->row_intervals.front().start, spec->row_intervals.back().end))
197  single_row = true;
198  }
199 
200  if (spec->scan_and_filter_rows) {
201  for (const auto &ri : spec->row_intervals) {
202  rowset.insert(arena.dup(ri.start)); // ri.end is set to "" in order to safe space
203  }
204  end_row = *rowset.rbegin();
205  end_inclusive = true;
206  single_row = rowset.size() == 1;
207  }
208  }
209  else if (!spec->cell_intervals.empty()) {
210  String column_family_str;
211  ColumnFamilySpec *cf_spec;
212 
213  has_cell_interval = true;
214 
215  if (*spec->cell_intervals[0].start_column) {
216  ptr = strchr(spec->cell_intervals[0].start_column, ':');
217  if (ptr == 0) {
218  ptr = spec->cell_intervals[0].start_column
219  + strlen(spec->cell_intervals[0].start_column);
220  start_qualifier = "";
221  }
222  else {
223  start_qualifier = ptr+1;
224  start_key.column_qualifier = start_qualifier.c_str();
225  start_key.column_qualifier_len = start_qualifier.length();
226  has_start_cf_qualifier = true;
227  }
228  column_family_str = String(spec->cell_intervals[0].start_column,
229  ptr - spec->cell_intervals[0].start_column);
230  if ((cf_spec = schema->get_column_family(column_family_str)) == 0)
232  format("Bad column family (%s)", column_family_str.c_str()));
233 
234  start_key.column_family_code = cf_spec->get_id();
235 
236  start_row = spec->cell_intervals[0].start_row;
237  start_inclusive = spec->cell_intervals[0].start_inclusive;
238  }
239  else {
240  start_row = "";
241  start_qualifier = "";
242  start_inclusive = true;
243  }
244 
245  if (*spec->cell_intervals[0].end_column) {
246  ptr = strchr(spec->cell_intervals[0].end_column, ':');
247  if (ptr == 0) {
248  ptr = spec->cell_intervals[0].end_column
249  + strlen(spec->cell_intervals[0].end_column);
250  end_qualifier = "";
251  }
252  else {
253  end_qualifier = ptr+1;
254  end_key.column_qualifier = end_qualifier.c_str();
255  end_key.column_qualifier_len = end_qualifier.length();
256  }
257 
258  column_family_str = String(spec->cell_intervals[0].end_column,
259  ptr - spec->cell_intervals[0].end_column);
260  if ((cf_spec = schema->get_column_family(column_family_str)) == 0)
261  HT_THROWF(Error::RANGESERVER_BAD_SCAN_SPEC, "Bad column family (%s)",
262  column_family_str.c_str());
263 
264  end_key.column_family_code = cf_spec->get_id();
265 
266  end_row = spec->cell_intervals[0].end_row;
267  end_inclusive = spec->cell_intervals[0].end_inclusive;
268  }
269  else {
270  end_row = Key::END_ROW_MARKER;
271  end_qualifier = "";
272  }
273 
274  if (!strcmp(spec->cell_intervals[0].start_row,
275  spec->cell_intervals[0].end_row))
276  single_row = true;
277 
278  if (single_row && ((end_key.column_family_code == start_key.column_family_code
279  && start_qualifier.compare(end_qualifier) > 0)
280  || start_key.column_family_code > end_key.column_family_code))
281  HT_THROW(Error::RANGESERVER_BAD_SCAN_SPEC, "start_cell > end_cell");
282 
283  }
284  else {
285  start_row = "";
286  end_row = Key::END_ROW_MARKER;
287  }
288 
289  if (start_row.compare(end_row) > 0)
290  HT_THROW(Error::RANGESERVER_BAD_SCAN_SPEC, "start_row > end_row");
291  }
292  else {
293  start_row = "";
294  end_row = Key::END_ROW_MARKER;
295  }
296 
297  if (start_row == "" && end_row == Key::END_ROW_MARKER)
298  restricted_range = false;
299 
300  assert(start_row <= end_row);
301 
302  start_key.row = start_row.c_str();
303  start_key.row_len = start_row.length();
304 
305  end_key.row = end_row.c_str();
306  end_key.row_len = end_row.length();
307 
308  dbuf.reserve(start_row.length() + start_qualifier.length()
309  + end_row.length() + end_qualifier.length() + 64);
310 
311  String tmp_str;
312 
313  if (spec && !spec->cell_intervals.empty()) {
314  if (start_inclusive)
315  // DELETE_ROW and DELETE_CF will be handled by the scanner
316  create_key_and_append(dbuf, FLAG_DELETE_CELL, start_key.row, start_key.column_family_code,
317  start_key.column_qualifier, TIMESTAMP_MAX, revision);
318  else {
319  if (start_key.column_qualifier == 0)
320  tmp_str = Key::END_ROW_MARKER;
321  else {
322  tmp_str = start_key.column_qualifier;
323  tmp_str.append(1, 1);
324  }
325  // DELETE_ROW and DELETE_CF will be handled by the scanner
326  create_key_and_append(dbuf, FLAG_DELETE_CELL, start_key.row,
327  start_key.column_family_code,
328  tmp_str.c_str(), TIMESTAMP_MAX, revision);
329  }
330  start_serkey.ptr = dbuf.base;
331  end_serkey.ptr = dbuf.ptr;
332 
333  if (!end_inclusive)
334  create_key_and_append(dbuf, 0, end_key.row, end_key.column_family_code,
335  end_key.column_qualifier, TIMESTAMP_MAX, revision);
336  else {
337  if (end_key.column_qualifier == 0)
338  tmp_str = Key::END_ROW_MARKER;
339  else {
340  tmp_str = end_key.column_qualifier;
341  tmp_str.append(1, 1);
342  }
343  create_key_and_append(dbuf, 0, end_key.row, end_key.column_family_code,
344  tmp_str.c_str(), TIMESTAMP_MAX, revision);
345  }
346  }
347  else {
348  if (start_inclusive || start_key.row_len == 0)
349  create_key_and_append(dbuf, 0, start_key.row, 0, "", TIMESTAMP_MAX, revision);
350  else {
351  tmp_str = start_key.row;
352  tmp_str.append(1, 1);
353  create_key_and_append(dbuf, 0, tmp_str.c_str(), 0, "", TIMESTAMP_MAX, revision);
354  }
355  start_serkey.ptr = dbuf.base;
356  end_serkey.ptr = dbuf.ptr;
357  if (!end_inclusive)
358  create_key_and_append(dbuf, 0, end_key.row, 0, "", TIMESTAMP_MAX, revision);
359  else {
360  tmp_str = end_key.row;
361  tmp_str.append(1, 1);
362  create_key_and_append(dbuf, 0, tmp_str.c_str(), 0, "", TIMESTAMP_MAX, revision);
363  }
364  }
365 
367  if (spec) {
368  if (spec->row_regexp && *spec->row_regexp != 0) {
369  row_regexp = new RE2(spec->row_regexp);
370  if (!row_regexp->ok()) {
371  HT_THROW(Error::BAD_SCAN_SPEC, (String)"Can't convert row_regexp "
372  + spec->row_regexp + " to regexp -" + row_regexp->error_arg());
373  }
374  }
375  if (spec->value_regexp && *spec->value_regexp != 0) {
376  value_regexp = new RE2(spec->value_regexp);
377  if (!value_regexp->ok()) {
378  HT_THROW(Error::BAD_SCAN_SPEC, (String)"Can't convert value_regexp "
379  + spec->value_regexp + " to regexp -" + value_regexp->error_arg());
380  }
381  }
382 
383  for (const auto& cp : spec->column_predicates) {
384  if (cp.column_family && *cp.column_family) {
385  cf_spec = schema->get_column_family(cp.column_family);
386  if (cf_spec == 0) {
388  "Invalid column family '%s'", cp.column_family);
389  }
390  if (cf_spec->get_id() == 0) {
392  "Bad id for column family '%s'", cf_spec->get_name().c_str());
393  }
394  if (cf_spec->get_option_counter()) {
395  HT_THROW(Error::BAD_SCAN_SPEC, "Counters are not supported for column predicates" );
396  }
397  cell_predicates[cf_spec->get_id()].add_column_predicate(cp, id++);
398  cell_predicates[cf_spec->get_id()].indexed = cf_spec->get_value_index() || cf_spec->get_qualifier_index();
399  }
400  }
401  }
402 }
void initialize(const String &name)
Public initialization function - creates a singleton instance of LogWriter.
Definition: Logger.cc:45
Range specification.
Definition: RangeSpec.h:40
std::string String
A String is simply a typedef to std::string.
Definition: String.h:44
bool get_option_counter() const
Gets the counter option.
String format(const char *fmt,...)
Returns a String using printf like format facilities Vanilla snprintf is about 1.5x faster than this...
Definition: String.cc:37
time_t get_option_ttl() const
Gets ttl option.
pair< int64_t, int64_t > time_interval
Definition: ScanSpec.h:278
bool get_value_index() const
Gets value index flag.
Column family specification.
static const uint32_t FLAG_DELETE_CELL
Definition: KeySpec.h:42
STL namespace.
static const int64_t TIMESTAMP_MIN
Definition: KeySpec.h:34
Scan predicate and control specification.
Definition: ScanSpec.h:56
Declarations for ScanContext.
bool get_qualifier_index() const
Gets qualifier index flag.
Logging routines and macros.
Compatibility Macros for C/C++.
int32_t get_id() const
Gets column ID.
static const int64_t TIMESTAMP_NULL
Definition: KeySpec.h:36
Represents a column predicate (e.g.
Time related declarations.
Hypertable definitions
const std::string & get_name() const
Gets column family name.
void create_key_and_append(DynamicBuffer &dst_buf, const Key &key, bool time_order_asc)
Definition: Key.cc:105
int32_t get_option_max_versions() const
Gets max versions option.
static const int64_t TIMESTAMP_MAX
Definition: KeySpec.h:35
#define HT_THROWF(_code_, _fmt_,...)
Definition: Error.h:490
bool get_deleted() const
Gets deleted flag.
std::shared_ptr< Schema > SchemaPtr
Smart pointer to Schema.
Definition: Schema.h:465
#define HT_THROW(_code_, _msg_)
Definition: Error.h:478
int64_t get_ts64()
Returns the current time in nanoseconds as a 64bit number.
Definition: Time.cc:40