0.9.8.10
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
csdump.cc
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2007-2015 Hypertable, Inc.
3  *
4  * This file is part of Hypertable.
5  *
6  * Hypertable is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; version 3 of the
9  * License, or any later version.
10  *
11  * Hypertable is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19  * 02110-1301, USA.
20  */
21 
22 #include <Common/Compat.h>
23 
29 
30 #include <FsBroker/Lib/Client.h>
31 
33 #include <Hypertable/Lib/Key.h>
34 
35 #include <AsyncComm/Comm.h>
38 
39 #include <Common/ByteString.h>
40 #include <Common/InetAddr.h>
41 #include <Common/Init.h>
42 #include <Common/Logger.h>
43 #include <Common/System.h>
44 #include <Common/Usage.h>
45 
46 #include <boost/algorithm/string.hpp>
47 
48 #include <iostream>
49 #include <string>
50 #include <unordered_map>
51 #include <vector>
52 
53 using namespace Hypertable;
54 using namespace Config;
55 using namespace std;
56 
57 namespace {
58 
59  struct AppPolicy : Config::Policy {
60  static void init_options() {
61  cmdline_desc("Usage: %s [options] <filename>\n\n"
62  "Dumps the contents of the CellStore contained in the FS <filename>."
63  "\n\nOptions").add_options()
64  ("all,a", "Dump everything, including key/value pairs")
65  ("compact,c", "Only prints the cellstore name and a status ('ok' or 'corrupt')")
66  ("count,c", "Count the number of key/value pairs")
67  ("column-id-map", str(), "Column family id to name map, format = <id>=<name>[,<id>=<name>...]")
68  ("end-key", str(), "Ignore keys that are greater than <arg>")
69  ("start-key", str(), "Ignore keys that are less than or equal to <arg>")
70  ("tsv-format", "Output data in TSV format")
71  ;
72  cmdline_hidden_desc().add_options()("filename", str(), "");
73  cmdline_positional_desc().add("filename", -1);
74  }
75  static void init() {
76  if (!has("filename")) {
77  HT_ERROR_OUT <<"filename required" << HT_END;
78  cout << cmdline_desc() << endl;
79  exit(EXIT_FAILURE);
80  }
81  }
82  };
83 
84  typedef Meta::list<AppPolicy, FsClientPolicy, DefaultCommPolicy> Policies;
85 
86  typedef std::unordered_map<uint32_t, String> ColumnIdMapT;
87 
88 
89 } // local namespace
90 
91 
92 int main(int argc, char **argv) {
93  try {
94  init_with_policies<Policies>(argc, argv);
95 
96  bool dump_compact = has("compact");
97  bool dump_all = has("all");
98  bool count_keys = has("count");
99  String start_key = get("start-key", String());
100  String end_key = get("end-key", String());
101  bool got_end_key = has("end-key");
102  bool hit_start = start_key.empty();
103  int timeout = get_i32("timeout");
104  String fname = get_str("filename");
105  bool tsv_format = has("tsv-format");
106  char *column_id_map[256];
107 
108  ConnectionManagerPtr conn_mgr = make_shared<ConnectionManager>();
109 
110  FsBroker::Lib::ClientPtr dfs = std::make_shared<FsBroker::Lib::Client>(conn_mgr, properties);
111 
112  if (!dfs->wait_for_connection(timeout)) {
113  cerr << "error: timed out waiting for FS broker" << endl;
114  exit(EXIT_FAILURE);
115  }
116 
117  Global::dfs = dfs;
118 
120 
121  if (dump_compact) {
122  try {
123  CellStorePtr cellstore = CellStoreFactory::open(fname, 0, 0);
124  std::cout << fname << ": ok" << std::endl;
125  }
126  catch (Exception &ex) {
127  std::cout << fname << ": corrupt" << std::endl;
128  quick_exit(EXIT_FAILURE);
129  }
130  quick_exit(EXIT_SUCCESS);
131  }
132 
133  memset(column_id_map, 0, 256*sizeof(char *));
134 
135  if (has("column-id-map")) {
136  char *key, *value;
137  int id;
138  String str = get_str("column-id-map");
139  key = strtok((char *)str.c_str(), ",=");
140  if (key) {
141  value = strtok(0, ",=");
142  id = atoi(key);
143  column_id_map[id] = value;
144  }
145  while (key) {
146  key = strtok(0, ",=");
147  if (key) {
148  value = strtok(0, ",=");
149  id = atoi(key);
150  column_id_map[id] = value;
151  }
152  }
153  }
154 
155  /***
156  for (size_t i=0; i<256; i++) {
157  if (column_id_map[i])
158  cout << i << " = " << column_id_map[i] << endl;
159  }
160  quick_exit(EXIT_SUCCESS);
161  **/
162 
166  CellStorePtr cellstore = CellStoreFactory::open(fname, 0, 0);
167  CellListScannerPtr scanner;
168 
172  uint64_t key_count = 0;
173  ByteString key, value;
174  uint8_t *bsptr;
175  size_t bslen;
176  char *buf = new char [ 1024 ];
177  size_t buf_len = 1024;
178  Key key_comps;
179 
183  if (tsv_format || dump_all || count_keys) {
184  LoadDataEscape row_escaper;
185  LoadDataEscape escaper;
186  ScanContextPtr scan_ctx(new ScanContext());
187  const char *unescaped_buf, *row_unescaped_buf;
188  size_t unescaped_len, row_unescaped_len;
189 
190  if (tsv_format)
191  cout << "#timestamp\trow\tcolumn\tvalue\n";
192 
193  scanner = cellstore->create_scanner(scan_ctx.get());
194  while (scanner->get(key_comps, value)) {
195 
196  if (!hit_start) {
197  if (strcmp(key_comps.row, start_key.c_str()) <= 0) {
198  scanner->forward();
199  continue;
200  }
201  hit_start = true;
202  }
203  if (got_end_key && strcmp(key_comps.row, end_key.c_str()) > 0)
204  break;
205  if (count_keys)
206  key_count++;
207  else {
208  if (tsv_format) {
209  row_escaper.escape(key_comps.row, key_comps.row_len,
210  &row_unescaped_buf, &row_unescaped_len);
211  if (column_id_map[key_comps.column_family_code])
212  cout << key_comps.timestamp << "\t" << row_unescaped_buf << "\t" << column_id_map[key_comps.column_family_code];
213  else
214  cout << key_comps.timestamp << "\t" << row_unescaped_buf << "\t" << (unsigned int)key_comps.column_family_code;
215  if (key_comps.column_qualifier && *key_comps.column_qualifier) {
216  escaper.escape(key_comps.column_qualifier, key_comps.column_qualifier_len,
217  &unescaped_buf, &unescaped_len);
218  cout << ":" << unescaped_buf;
219  }
220  bslen = value.decode_length((const uint8_t **)&bsptr);
221  if (bslen >= buf_len) {
222  delete [] buf;
223  buf_len = bslen + 256;
224  buf = new char [ buf_len ];
225  }
226  memcpy(buf, bsptr, bslen);
227  buf[bslen] = 0;
228  escaper.escape(buf, bslen, &unescaped_buf, &unescaped_len);
229  cout << "\t" << (char *)unescaped_buf << "\n";
230  }
231  else
232  cout << key_comps << endl;
233  }
234  scanner->forward();
235  }
236  }
237 
238  if (tsv_format)
239  return 0;
240 
241  if (count_keys) {
242  cout << key_count << endl;
243  return 0;
244  }
245 
249  cout << endl;
250  cout << "BLOCK INDEX:" << endl;
251  cellstore->display_block_info();
252 
256  cout << endl;
257  cout << "BLOOM FILTER SIZE: "
258  << cellstore->bloom_filter_size() << endl;
259 
263  cout << endl;
264  const vector<String> &replaced_files = cellstore->get_replaced_files();
265  cout << "REPLACED FILES: " << endl;
266  for(size_t ii=0; ii < replaced_files.size(); ++ii)
267  cout << replaced_files[ii] << endl;
268 
272  CellStoreTrailer *trailer = cellstore->get_trailer();
273 
274  cout << endl;
275  cout << "TRAILER:" << endl;
276  trailer->display_multiline(cout);
277  cout << endl;
278  }
279  catch (Exception &e) {
280  HT_ERROR_OUT << e << HT_END;
281  return 1;
282  }
283 
284  return 0;
285 }
Retrieves system information (hardware, installation directory, etc)
Abstract base class for cell store trailer.
Interface and base of config policy.
Definition: Config.h:149
int64_t timestamp
Definition: Key.h:134
const char * row
Definition: Key.h:129
Declarations for CellStoreFactory.
PropertiesPtr properties
This singleton map stores all options.
Definition: Config.cc:47
std::string String
A String is simply a typedef to std::string.
Definition: String.h:44
Helper class for printing usage banners on the command line.
void init(int argc, char *argv[], const Desc *desc=NULL)
Initialize with default policy.
Definition: Init.h:95
Po::typed_value< String > * str(String *v=0)
Definition: Properties.h:166
int main(int argc, char **argv)
Definition: csdump.cc:92
STL namespace.
bool escape(const char *in_buf, size_t in_len, const char **out_bufp, size_t *out_lenp)
Scan context information.
Definition: ScanContext.h:52
Tracks range server memory used.
Definition: MemoryTracker.h:42
Desc & cmdline_desc(const char *usage)
A macro which definds global functions like get_bool(), get_str(), get_i16() etc. ...
Definition: Config.cc:72
bool has(const String &name)
Check existence of a configuration value.
Definition: Config.h:57
A class managing one or more serializable ByteStrings.
Definition: ByteString.h:47
Declarations for CellStore.
uint32_t row_len
Definition: Key.h:131
static Hypertable::MemoryTracker * memory_tracker
Definition: Global.h:94
std::shared_ptr< Client > ClientPtr
Smart pointer to Client.
Definition: Client.h:233
Logging routines and macros.
Compatibility Macros for C/C++.
Initialization helper for applications.
#define HT_END
Definition: Logger.h:220
static Hypertable::FilesystemPtr dfs
Definition: Global.h:64
#define HT_ERROR_OUT
Definition: Logger.h:301
std::shared_ptr< CellStore > CellStorePtr
Smart pointer to CellStore.
Definition: CellStore.h:340
Hypertable definitions
Declarations for ConnectionManager.
Declarations for Comm.
size_t decode_length(const uint8_t **dptr) const
Retrieves the decoded length and returns a pointer to the string.
Definition: ByteString.h:83
Provides access to internal components of opaque key.
Definition: Key.h:40
uint32_t column_qualifier_len
Definition: Key.h:132
Internet address wrapper classes and utility functions.
Meta::list< MyPolicy, DefaultPolicy > Policies
Declarations for ReactorFactory.
This is a generic exception class for Hypertable.
Definition: Error.h:314
A serializable ByteString.
virtual void display_multiline(std::ostream &os)=0
Prints the trailer, one member per line, to the given ostream.
uint8_t column_family_code
Definition: Key.h:127
std::shared_ptr< CellListScanner > CellListScannerPtr
Definition: CellList.h:35
std::shared_ptr< ConnectionManager > ConnectionManagerPtr
Smart pointer to ConnectionManager.
static CellStorePtr open(const String &name, const char *start_row, const char *end_row)
Creates a CellStore object from a given cell store file.
Desc & cmdline_hidden_desc()
Get the command line hidden options description (for positional options)
Definition: Config.cc:81
const char * column_qualifier
Definition: Key.h:130
PositionalDesc & cmdline_positional_desc()
Get the command line positional options description.
Definition: Config.cc:90
Declarations for Client.
std::shared_ptr< ScanContext > ScanContextPtr
Definition: ScanContext.h:169