0.9.8.10
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
GcWorker.cc
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2007-2015 Hypertable, Inc.
3  *
4  * This file is part of Hypertable.
5  *
6  * Hypertable is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; version 3 of the
9  * License, or any later version.
10  *
11  * Hypertable is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19  * 02110-1301, USA.
20  */
21 
22 #include <Common/Compat.h>
23 
24 #include "GcWorker.h"
25 
26 #include <boost/algorithm/string.hpp>
27 
28 extern "C" {
29 #include <unistd.h>
30 }
31 
32 using namespace Hypertable;
33 using namespace std;
34 
35 GcWorker::GcWorker(ContextPtr &context) : m_context(context) {
36  m_tables_dir = context->props->get_str("Hypertable.Directory");
37  boost::trim_if(m_tables_dir, boost::is_any_of("/"));
38  m_tables_dir = String("/") + m_tables_dir + "/tables/";
39 }
40 
41 void GcWorker::gc() {
42  try {
43  CountMap files_map;
44  scan_metadata(files_map);
45  // TODO: scan_directories(files_map); // fsckish, slower
46  reap(files_map);
47  }
48  catch (Exception &e) {
49  HT_ERRORF("Error: caught exception while gc'ing: %s", e.what());
50  }
51 }
52 
53 
55  ScanSpec scan_spec;
56 
57  scan_spec.columns.clear();
58  scan_spec.columns.push_back("Files");
59 
60  TableScannerPtr scanner(m_context->metadata_table->create_scanner(scan_spec));
61 
62  TableMutatorPtr mutator(m_context->metadata_table->create_mutator());
63 
64  Cell cell;
65  string last_row;
66  string last_cq;
67  int64_t last_time = 0;
68  bool found_valid_files = true;
69 
70  HT_DEBUG("MasterGc: scanning metadata...");
71 
72  while (scanner->next(cell)) {
73  if (strcmp("Files", cell.column_family)) {
74  HT_ERRORF("Unexpected column family '%s', while scanning METADATA",
75  cell.column_family);
76  continue;
77  }
78  if (last_row != cell.row_key) {
79  // new row
80  if (!found_valid_files)
81  delete_row(last_row, mutator);
82 
83  last_row = cell.row_key;
84  last_cq = cell.column_qualifier;
85  last_time = cell.timestamp;
86  found_valid_files = *cell.value != '!';
87 
88  if (found_valid_files)
89  insert_files(files_map, (char *)cell.value, cell.value_len, 1);
90  }
91  else if (last_cq != cell.column_qualifier) {
92  // new access group
93  last_cq = cell.column_qualifier;
94  last_time = cell.timestamp;
95  bool is_valid_files = (cell.value_len == 0) || (*cell.value != '!');
96  found_valid_files |= is_valid_files;
97 
98  if (is_valid_files)
99  insert_files(files_map, (char *)cell.value, cell.value_len, 1);
100  }
101  else {
102  // cruft to delete
103  if (cell.timestamp > last_time) {
104  HT_ERROR("Unexpected timestamp order while scanning METADATA");
105  continue;
106  }
107  if (cell.value_len == 0 || *cell.value != '!') {
108  insert_files(files_map, (char *)cell.value, cell.value_len);
109  delete_cell(cell, mutator);
110  }
111  }
112  }
113  // for last table
114  if (!found_valid_files)
115  delete_row(last_row, mutator);
116 
117  mutator->flush();
118 }
119 
120 void GcWorker::delete_row(const std::string &row, TableMutatorPtr &mutator) {
121  KeySpec key;
122 
123  if (row.empty())
124  return;
125 
126  key.row = row.c_str();
127  key.row_len = row.length();
128  key.flag = FLAG_DELETE_ROW;
129 
130  HT_DEBUGF("MasterGc: Deleting row %s", (char *)key.row);
131 
132  mutator->set_delete(key);
133 }
134 
135 void GcWorker::delete_cell(const Cell &cell, TableMutatorPtr &mutator) {
136  HT_DEBUG_OUT <<"MasterGc: Deleting cell: ("<< cell.row_key <<", "
137  << cell.column_family <<", "<< cell.column_qualifier <<", "
138  << cell.timestamp <<')'<< HT_END;
139 
140  KeySpec key(cell.row_key, cell.column_family, cell.column_qualifier,
142  mutator->set_delete(key);
143 }
144 
145 
146 void GcWorker::insert_files(CountMap &map, const char *buf, size_t len, int c) {
147  const char *p = buf, *pn = p, *endp = p + len - 1;
148 
149  while (p < endp) {
150  while (p < endp && (*p != ';' || p[1] != '\n'))
151  ++p;
152 
153  if (p == endp)
154  break;
155 
156  string name(pn, p - pn);
157  p += 2;
158  insert_file(map, name.c_str(), c);
159  pn = p;
160  }
161 }
162 
163 void GcWorker::insert_file(CountMap &map, const char *fname, int c) {
164  if (*fname == '#')
165  ++fname;
166 
167  CountMap::InsRet ret = map.insert(fname, c);
168 
169  if (!ret.second)
170  (*ret.first).second += c;
171 }
172 
178 void GcWorker::reap(CountMap &files_map) {
179  size_t nf = 0, nf_done = 0, nd = 0, nd_done = 0;
180 
181  for (const auto &v : files_map) {
182  if (!v.second) {
183  HT_INFOF("MasterGc: removing file %s", v.first);
184  try {
185  m_context->dfs->remove(m_tables_dir + v.first);
186  ++nf_done;
187  }
188  catch (Exception &e) {
189  HT_WARNF("%s", e.what());
190  }
191  ++nf;
192  }
193  }
194 
195  HT_DEBUGF("MasterGc: removed %lu/%lu files; %lu/%lu directories",
196  (Lu)nf_done, (Lu)nf, (Lu)nd_done, (Lu)nd);
197 }
198 
void delete_cell(const Cell &cell, TableMutatorPtr &mutator)
Definition: GcWorker.cc:135
#define HT_WARNF(msg,...)
Definition: Logger.h:290
static const uint32_t FLAG_DELETE_ROW
Definition: KeySpec.h:40
std::string String
A String is simply a typedef to std::string.
Definition: String.h:44
std::pair< iterator, bool > InsRet
Definition: CstrHashMap.h:55
ContextPtr m_context
Definition: GcWorker.h:48
A hash map for storing and lookup char * strings efficiently.
Definition: CstrHashMap.h:44
const char * column_qualifier
Definition: Cell.h:68
static const uint32_t FLAG_DELETE_CELL
Definition: KeySpec.h:42
STL namespace.
GcWorker(ContextPtr &context)
Definition: GcWorker.cc:35
const void * row
Definition: KeySpec.h:125
std::shared_ptr< TableScanner > TableScannerPtr
Smart pointer to TableScanner.
Definition: TableScanner.h:124
void reap(CountMap &files_map)
Currently only stale cs files and range directories are reaped Table directories probably should be o...
Definition: GcWorker.cc:178
std::shared_ptr< Context > ContextPtr
Smart pointer to Context.
Definition: Context.h:265
Scan predicate and control specification.
Definition: ScanSpec.h:56
std::shared_ptr< TableMutator > TableMutatorPtr
Smart pointer to TableMutator.
Definition: TableMutator.h:257
#define HT_DEBUG(msg)
Definition: Logger.h:259
Compatibility Macros for C/C++.
const char * row_key
Definition: Cell.h:66
#define HT_END
Definition: Logger.h:220
InsRet insert(const char *key, const DataT &data)
Inserts a new string/data pair in the map.
Definition: CstrHashMap.h:76
Hypertable definitions
#define HT_DEBUGF(msg,...)
Definition: Logger.h:260
#define HT_ERROR(msg)
Definition: Logger.h:299
const char * column_family
Definition: Cell.h:67
#define HT_INFOF(msg,...)
Definition: Logger.h:272
void scan_metadata(CountMap &files_map)
Definition: GcWorker.cc:54
This is a generic exception class for Hypertable.
Definition: Error.h:314
long unsigned int Lu
Shortcut for printf formats.
Definition: String.h:47
#define HT_ERRORF(msg,...)
Definition: Logger.h:300
void delete_row(const std::string &row, TableMutatorPtr &mutator)
Definition: GcWorker.cc:120
Encapsulates decomposed key and value.
Definition: Cell.h:32
void insert_files(CountMap &map, const char *buf, size_t len, int c=0)
Definition: GcWorker.cc:146
#define HT_DEBUG_OUT
Definition: Logger.h:261
void insert_file(CountMap &map, const char *fname, int c)
Definition: GcWorker.cc:163
int64_t timestamp
Definition: Cell.h:69