0.9.8.10
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
count_stored.cc
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2007-2015 Hypertable, Inc.
3  *
4  * This file is part of Hypertable.
5  *
6  * Hypertable is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; version 3 of the
9  * License, or any later version.
10  *
11  * Hypertable is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19  * 02110-1301, USA.
20  */
21 
22 #include "Common/Compat.h"
23 #include <fstream>
24 #include <iostream>
25 #include <string>
26 
27 #include "Common/Init.h"
28 #include "Common/System.h"
29 #include "Common/Usage.h"
30 
31 #include "FsBroker/Lib/Client.h"
32 
33 #include "Hypertable/Lib/Client.h"
34 #include "Hypertable/Lib/Key.h"
35 #include "Hypertable/Lib/KeySpec.h"
37 
38 #include "Config.h"
39 #include "CellStoreFactory.h"
40 #include "CellStore.h"
41 #include "Global.h"
42 
43 
44 using namespace Hypertable;
45 using namespace Config;
46 using namespace std;
47 
48 namespace {
49 
50 struct MyPolicy : Config::Policy {
51  static void init_options() {
52  cmdline_desc("Usage: %s [options] <table>\n\n"
53  " This program counts the number of cells that exist in CellStores\n"
54  " for a table. It does this by reading the Files columns for the\n"
55  " table in the METADATA table to learn of all the CellStores. It\n"
56  " then does a linear scan of each CellStore file, counting the\n"
57  " number of cells...\nOptions");
58  cmdline_hidden_desc().add_options()
59  ("table", str(), "name of the table to scan")
60  ;
61  cmdline_positional_desc().add("table", -1);
62  }
63 };
64 
65 typedef Cons<MyPolicy, DefaultClientPolicy> AppPolicy;
66 
67 struct RangeCellStoreInfo {
68  String start_row;
69  String end_row;
70  std::vector<String> cell_stores;
71 };
72 
73 struct cell_store_info {
74  String start_row;
75  String end_row;
76  String file;
77 };
78 
79 void
80 fill_cell_store_vector(ClientPtr &client, NamespacePtr &ns, const char *table_name,
81  std::vector<cell_store_info> &file_vector);
82 
83 } // local namespace
84 
85 
86 int main(int argc, char **argv) {
87  try {
88  init_with_policy<AppPolicy>(argc, argv);
89 
90  String table_name = get("table", String());
91 
92  if (table_name.empty()) {
93  HT_ERROR_OUT <<"table name is required"<< HT_END;
94  cout << cmdline_desc() << endl;
95  return 1;
96  }
97 
98  bool hit_start = false;
99  uint64_t total_count = 0;
100  uint64_t store_count = 0;
101  int timeout = get_i32("FsBroker.Timeout");
102 
103  // Create Hypertable client object
104  ClientPtr hypertable_client = make_shared<Hypertable::Client>(argv[0]);
105  NamespacePtr ns = hypertable_client->open_namespace("/");
106  ConnectionManagerPtr conn_mgr = make_shared<ConnectionManager>();
107  FsBroker::Lib::ClientPtr dfs = std::make_shared<FsBroker::Lib::Client>(conn_mgr, properties);
108 
109  Global::dfs = dfs;
110 
111  if (!dfs->wait_for_connection(timeout)) {
112  cerr << "error: timed out waiting for FS broker" << endl;
113  exit(EXIT_FAILURE);
114  }
115 
117 
118  std::vector<cell_store_info> file_vector;
119 
120  fill_cell_store_vector(hypertable_client, ns, table_name.c_str(), file_vector);
121 
122  ScanContextPtr scan_context_ptr(new ScanContext());
123  Key key;
124  ByteString value;
125 
126  for (size_t i=0; i<file_vector.size(); i++) {
130  CellStorePtr cell_store_ptr = CellStoreFactory::open(file_vector[i].file, 0, 0);
131  CellListScannerPtr scanner;
132 
133  hit_start = (file_vector[i].start_row == "") ? true : false;
134  store_count = 0;
135  scanner = cell_store_ptr->create_scanner(scan_context_ptr.get());
136 
137  while (scanner->get(key, value)) {
138  if (!hit_start) {
139  if (strcmp(key.row, file_vector[i].start_row.c_str()) <= 0) {
140  scanner->forward();
141  continue;
142  }
143  hit_start = true;
144  }
145  if (strcmp(key.row, file_vector[i].end_row.c_str()) > 0)
146  break;
147 
148  store_count++;
149  scanner->forward();
150  }
151 
152  cout << store_count << "\t" << file_vector[i].file << "["
153  << file_vector[i].start_row << ".." << file_vector[i].end_row << "]"
154  << endl;
155  total_count += store_count;
156  }
157  cout << total_count << "\tTOTAL" << endl;
158  }
159  catch (Exception &e) {
160  HT_ERROR_OUT << e << HT_END;
161  return 1;
162  }
163  return 0;
164 }
165 
166 
167 namespace {
168 
169 void
170 fill_cell_store_vector(ClientPtr &client, NamespacePtr &ns, const char *table_name,
171  std::vector<cell_store_info> &file_vector) {
172  TablePtr table_ptr;
173  NamespacePtr ns_system;
174  TableScannerPtr scanner_ptr;
175  ScanSpec scan_spec;
176  RowInterval ri;
177  Cell cell;
178  char start_row[16];
179  char end_row[16];
180  RangeCellStoreInfo range_cell_store_info;
181  cell_store_info cell_store_info;
182  String table_id;
183 
184  try {
185  ns_system = client->open_namespace("sys");
186  // Open the 'METADATA' table
187  table_ptr = ns_system->open_table("METADATA");
188  table_id = ns->get_table_id(table_name);
189 
190  // Set up the scan specification
191  scan_spec.max_versions = 1;
192  sprintf(start_row, "%s:", table_id.c_str());
193  ri.start = start_row;
194  sprintf(end_row, "%s:%s", table_id.c_str(), Key::END_ROW_MARKER);
195  ri.end = end_row;
196  scan_spec.row_intervals.push_back(ri);
197  scan_spec.columns.clear();
198  scan_spec.columns.push_back("Files");
199  scan_spec.columns.push_back("StartRow");
200 
201  // Create a scanner on the 'METADATA' table
202  scanner_ptr.reset( table_ptr->create_scanner(scan_spec) );
203 
204  }
205  catch (std::exception &e) {
206  cerr << "error: " << e.what() << endl;
207  exit(EXIT_FAILURE);
208  }
209 
210  range_cell_store_info.start_row = "";
211  range_cell_store_info.end_row = "";
212  range_cell_store_info.cell_stores.clear();
213 
214  // Iterate through the cells returned by the scanner
215  while (scanner_ptr->next(cell)) {
216  if (strcmp(cell.row_key, range_cell_store_info.end_row.c_str())) {
217  if (range_cell_store_info.end_row != "") {
218  const char *end_row_cstr =
219  strchr(range_cell_store_info.end_row.c_str(), ':');
220  if (end_row_cstr == 0) {
221  cerr << "error: mal-formed end row (missing colon) - "
222  << range_cell_store_info.end_row << endl;
223  exit(EXIT_FAILURE);
224  }
225  end_row_cstr++;
226  cell_store_info.start_row = range_cell_store_info.start_row;
227  cell_store_info.end_row = end_row_cstr;
228  for (size_t i=0; i<range_cell_store_info.cell_stores.size(); i++) {
229  cell_store_info.file = range_cell_store_info.cell_stores[i];
230  file_vector.push_back(cell_store_info);
231  }
232  }
233  range_cell_store_info.start_row = "";
234  range_cell_store_info.end_row = cell.row_key;
235  range_cell_store_info.cell_stores.clear();
236  }
237 
238  if (!strcmp(cell.column_family, "StartRow"))
239  range_cell_store_info.start_row =
240  String((const char *)cell.value, cell.value_len);
241  else if (!strcmp(cell.column_family, "Files")) {
242  String files = String((const char *)cell.value, cell.value_len);
243  char *ptr, *save_ptr;
244  ptr = strtok_r((char *)files.c_str(), "\n\r;", &save_ptr);
245  while (ptr) {
246  range_cell_store_info.cell_stores.push_back(ptr);
247  ptr = strtok_r(0, "\n\r;", &save_ptr);
248  }
249  }
250  else {
251  cerr << "Unexpected column family encountered: '" << cell.column_family
252  << endl;
253  exit(EXIT_FAILURE);
254  }
255  }
256 
257  if (!range_cell_store_info.cell_stores.empty()) {
258  const char *end_row_cstr = strchr(range_cell_store_info.end_row.c_str(),
259  ':');
260  if (end_row_cstr == 0) {
261  cerr << "error: mal-formed end row (missing colon) - "
262  << range_cell_store_info.end_row << endl;
263  exit(EXIT_FAILURE);
264  }
265  end_row_cstr++;
266  cell_store_info.start_row = range_cell_store_info.start_row;
267  cell_store_info.end_row = end_row_cstr;
268  for (size_t i=0; i<range_cell_store_info.cell_stores.size(); i++) {
269  cell_store_info.file = range_cell_store_info.cell_stores[i];
270  file_vector.push_back(cell_store_info);
271  }
272  }
273 }
274 
275 } // local namespace
Retrieves system information (hardware, installation directory, etc)
Interface and base of config policy.
Definition: Config.h:149
const char * row
Definition: Key.h:129
Declarations for CellStoreFactory.
PropertiesPtr properties
This singleton map stores all options.
Definition: Config.cc:47
std::string String
A String is simply a typedef to std::string.
Definition: String.h:44
Helper class for printing usage banners on the command line.
Po::typed_value< String > * str(String *v=0)
Definition: Properties.h:166
STL namespace.
Scan context information.
Definition: ScanContext.h:52
std::shared_ptr< TableScanner > TableScannerPtr
Smart pointer to TableScanner.
Definition: TableScanner.h:124
Tracks range server memory used.
Definition: MemoryTracker.h:42
Desc & cmdline_desc(const char *usage)
A macro which definds global functions like get_bool(), get_str(), get_i16() etc. ...
Definition: Config.cc:72
Represents a row interval.
Definition: RowInterval.h:38
std::shared_ptr< Namespace > NamespacePtr
Shared smart pointer to Namespace.
Definition: Namespace.h:333
std::shared_ptr< Client > ClientPtr
Definition: Client.h:156
A class managing one or more serializable ByteStrings.
Definition: ByteString.h:47
Declarations for CellStore.
Scan predicate and control specification.
Definition: ScanSpec.h:56
static Hypertable::MemoryTracker * memory_tracker
Definition: Global.h:94
std::shared_ptr< Client > ClientPtr
Smart pointer to Client.
Definition: Client.h:233
Helpers to compose init policies; allow to combine two policies into one.
Definition: Config.h:174
Compatibility Macros for C/C++.
const char * row_key
Definition: Cell.h:66
Initialization helper for applications.
#define HT_END
Definition: Logger.h:220
static Hypertable::FilesystemPtr dfs
Definition: Global.h:64
#define HT_ERROR_OUT
Definition: Logger.h:301
std::shared_ptr< CellStore > CellStorePtr
Smart pointer to CellStore.
Definition: CellStore.h:340
Hypertable definitions
const char * column_family
Definition: Cell.h:67
Provides access to internal components of opaque key.
Definition: Key.h:40
RowIntervals row_intervals
Definition: ScanSpec.h:275
This is a generic exception class for Hypertable.
Definition: Error.h:314
uint32_t value_len
Definition: Cell.h:72
std::shared_ptr< CellListScanner > CellListScannerPtr
Definition: CellList.h:35
std::shared_ptr< ConnectionManager > ConnectionManagerPtr
Smart pointer to ConnectionManager.
static CellStorePtr open(const String &name, const char *start_row, const char *end_row)
Creates a CellStore object from a given cell store file.
Desc & cmdline_hidden_desc()
Get the command line hidden options description (for positional options)
Definition: Config.cc:81
int main(int argc, char **argv)
Definition: count_stored.cc:86
Encapsulates decomposed key and value.
Definition: Cell.h:32
PositionalDesc & cmdline_positional_desc()
Get the command line positional options description.
Definition: Config.cc:90
static const char * END_ROW_MARKER
Definition: Key.h:49
Declarations for Client.
std::shared_ptr< Table > TablePtr
Definition: Table.h:53
const uint8_t * value
Definition: Cell.h:71
std::shared_ptr< ScanContext > ScanContextPtr
Definition: ScanContext.h:169