0.9.8.10
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
AccessGroupGarbageTracker.cc
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2007-2015 Hypertable, Inc.
3  *
4  * This file is part of Hypertable.
5  *
6  * Hypertable is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; version 3 of the
9  * License, or any later version.
10  *
11  * Hypertable is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19  * 02110-1301, USA.
20  */
21 
27 
28 #include <Common/Compat.h>
30 
32 
33 #include <Common/Config.h>
34 #include <Common/Logger.h>
35 
36 #include <ctime>
37 
38 using namespace Hypertable;
39 using namespace Config;
40 using namespace std;
41 
43  CellCacheManagerPtr &cell_cache_manager, AccessGroupSpec *ag_spec)
44  : m_cell_cache_manager(cell_cache_manager) {
45  SubProperties cfg(props, "Hypertable.RangeServer.");
47  = cfg.get_i32("AccessGroup.GarbageThreshold.Percentage") / 100.0;
48  m_accum_data_target = cfg.get_i64("Range.SplitSize") / 10;
50  m_last_collection_time = time(0);
51  update_schema(ag_spec);
52 }
53 
54 
56  lock_guard<mutex> lock(m_mutex);
57  m_have_max_versions = false;
58  m_min_ttl = 0;
59  m_in_memory = ag_spec->get_option_in_memory();
60  for (auto cf_spec : ag_spec->columns()) {
61  if (cf_spec->get_option_max_versions() > 0)
62  m_have_max_versions = true;
63  if (cf_spec->get_option_ttl() > 0) {
64  if (m_min_ttl == 0)
65  m_min_ttl = (time_t)cf_spec->get_option_ttl();
66  else if (cf_spec->get_option_ttl() < m_min_ttl)
67  m_min_ttl = cf_spec->get_option_ttl();
68  }
69  }
71 }
72 
73 void
75  time_t t,
76  bool collection_performed) {
77  lock_guard<mutex> lock(m_mutex);
78  m_stored_deletes = 0;
81  for (auto csi : stores) {
82  m_stored_expirable += csi.expirable_data();
83  m_stored_deletes += csi.delete_count();
84  m_current_disk_usage += csi.cs->disk_usage() / csi.cs->compression_ratio();
85  }
86  if (m_in_memory)
88  if (collection_performed) {
89  m_last_collection_time = (t==0) ? time(0) : t;
91  }
92 }
93 
95  const std::string &label) {
96  lock_guard<mutex> lock(m_mutex);
97  out << label << "\telapsed_target\t" << m_elapsed_target << "\n";
98  out << label << "\tlast_collection_time\t" << m_last_collection_time << "\n";
99  out << label << "\tstored_deletes\t" << m_stored_deletes << "\n";
100  out << label << "\tstored_expirable\t" << m_stored_expirable << "\n";
101  out << label << "\tlast_collection_disk_usage\t"
102  << m_last_collection_disk_usage << "\n";
103  out << label << "\tcurrent_disk_usage\t" << m_current_disk_usage << "\n";
104  out << label << "\taccum_data_target\t" << m_accum_data_target << "\n";
105  out << label << "\tmin_ttl\t" << m_min_ttl << "\n";
106  out << label << "\thave_max_versions\t"
107  << (m_have_max_versions ? "true" : "false") << "\n";
108  out << label << "\tin_memory\t" << (m_in_memory ? "true" : "false") << "\n";
109  out << label << "\tdelete_count\t" << compute_delete_count() << "\n";
110  out << label << "\tmemory_accumulated\t"
112 }
113 
114 
116  lock_guard<mutex> lock(m_mutex);
118  return check_needed_deletes() || check_needed_ttl(now);
119  return false;
120 }
121 
122 
123 void
125  MergeScannerAccessGroup *mscanner) {
126  if (mscanner && (mscanner->get_flags() &
128  double input = (double)mscanner->get_input_bytes();
129  double garbage = input - (double)mscanner->get_output_bytes();
130  adjust_targets(now, input, garbage);
131  }
132 }
133 
134 void
136  double garbage) {
137  lock_guard<mutex> lock(m_mutex);
138 
140 
141  double garbage_ratio = garbage / total;
142  bool gc_needed = garbage_ratio >= m_garbage_threshold;
143  bool check_deletes = check_needed_deletes();
144  bool check_ttl = check_needed_ttl(now);
145 
146  // If check matches actual need, targets are ok so just return
147  if (gc_needed == (check_deletes||check_ttl))
148  return;
149 
150  // Recompute DATA target
151  bool have_garbage {m_have_max_versions || compute_delete_count() > 0};
152  if (have_garbage && check_deletes != gc_needed) {
153  if (garbage_ratio > 0) {
154  int64_t new_accum_data_target =
156  / garbage_ratio;
157  if (!gc_needed)
158  new_accum_data_target *= 1.15;
159  if (new_accum_data_target < m_accum_data_target_minimum)
161  else if (new_accum_data_target > (m_accum_data_target*2))
162  m_accum_data_target *= 2;
163  else
164  m_accum_data_target = new_accum_data_target;
165  }
166  else
167  m_accum_data_target *= 2;
168  }
169 
170  // Recompute ELAPSED target
171  if (m_min_ttl > 0 && check_ttl != gc_needed) {
172  if (garbage_ratio > 0) {
173  time_t new_elapsed_target =
174  ((now-m_last_collection_time) * m_garbage_threshold) / garbage_ratio;
175  if (!gc_needed)
176  new_elapsed_target *= 1.15;
177  if (new_elapsed_target < m_elapsed_target_minimum)
179  else if (new_elapsed_target > (m_elapsed_target*2))
180  m_elapsed_target *= 2;
181  else
182  m_elapsed_target = new_elapsed_target;
183  }
184  else
185  m_elapsed_target *= 2;
186  }
187 }
188 
190  int64_t accum;
191  if (m_cell_cache_manager->immutable_cache())
192  accum = m_cell_cache_manager->immutable_cache()->logical_size();
193  else
194  accum = m_cell_cache_manager->logical_size();
195  if (m_in_memory)
197  return (accum < 0) ? 0 : accum;
198 }
199 
201  int64_t accum {memory_accumulated_since_collection()};
204  return accum;
205 }
206 
208  int64_t delete_count {m_stored_deletes};
209  if (m_cell_cache_manager->immutable_cache())
210  delete_count += m_cell_cache_manager->immutable_cache()->delete_count();
211  else
212  delete_count += m_cell_cache_manager->delete_count();
213  return delete_count;
214 }
215 
217  if ((m_have_max_versions || compute_delete_count() > 0) &&
219  return true;
220  return false;
221 }
222 
224  int64_t memory_accum {memory_accumulated_since_collection()};
225  int64_t total_size {m_current_disk_usage + memory_accum};
226  double possible_garbage = m_stored_expirable + memory_accum;
227  double possible_garbage_ratio = possible_garbage / total_size;
228  time_t elapsed {now - m_last_collection_time};
229  if (m_min_ttl > 0 && possible_garbage_ratio >= m_garbage_threshold &&
230  elapsed >= m_elapsed_target)
231  return true;
232  return false;
233 }
int64_t m_accum_data_target
Amount of data to accummulate before signaling GC likely needed (adaptive)
void update_cellstore_info(std::vector< CellStoreInfo > &stores, time_t t=0, bool collection_performed=true)
Updates stored data statistics from current set of CellStores.
int64_t m_current_disk_usage
Current disk usage, updated by update_cellstore_info()
int64_t m_accum_data_target_minimum
Minimum amount of data to accummulate before signaling GC likely needed.
time_t m_min_ttl
Minimum TTL found in access group schema.
int64_t m_stored_expirable
Amount of data accumulated in cell stores that could expire due to TTL.
bool m_have_max_versions
true if any column families have non-zero MAX_VERSIONS
bool check_needed_deletes()
Signals if GC is likely needed due to MAX_VERSIONS or deletes.
STL namespace.
bool m_in_memory
true if access group is in memory
int64_t memory_accumulated_since_collection()
Computes the amount of in-memory data accumulated since last collection.
int64_t total_accumulated_since_collection()
Computes the total amount of data accumulated since last collection.
#define HT_ASSERT(_e_)
Definition: Logger.h:396
bool get_option_in_memory() const
Gets in memory option.
time_t m_last_collection_time
Time of last garbage collection
std::shared_ptr< Properties > PropertiesPtr
Definition: Properties.h:447
bool check_needed(time_t now)
Signals if garbage collection is likely needed.
Logging routines and macros.
Compatibility Macros for C/C++.
void update_schema(AccessGroupSpec *ag_spec)
Updates control variables from access group schema definition.
AccessGroupGarbageTracker(PropertiesPtr &props, CellCacheManagerPtr &cell_cache_manager, AccessGroupSpec *ag_spec)
Constructor.
CellCacheManagerPtr m_cell_cache_manager
Cell cache manager
int64_t compute_delete_count()
Computes number of delete records in access group.
Access group specification.
Hypertable definitions
double m_garbage_threshold
Fraction of accumulated garbage that triggers collection.
Helper class to access parts of the properties.
Definition: Properties.h:458
ColumnFamilySpecs & columns()
Returns reference to column specifications.
int64_t m_last_collection_disk_usage
Disk usage at the time the last garbage collection was performed.
uint32_t m_stored_deletes
Number of delete records accumulated in cell stores.
std::mutex m_mutex
Mutex to serialize access to data members
void output_state(std::ofstream &out, const std::string &label)
Prints a human-readable representation of internal state to an output stream.
bool check_needed_ttl(time_t now)
Signals if GC is likeley needed due to TTL.
time_t m_elapsed_target_minimum
Minimum elapsed seconds required before signaling TTL GC likely needed.
Configuration settings.
time_t m_elapsed_target
Elapsed seconds required before signaling TTL GC likely needed (adaptive)
std::shared_ptr< CellCacheManager > CellCacheManagerPtr
Smart pointer to CellCacheManager.
void adjust_targets(time_t now, double total, double garbage)
Adjusts targets based on measured garbage.
Declarations for AccessGroupGarbageTracker.
Merge scanner for access groups.