0.9.8.10
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
HostSpecification.cc
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2007-2015 Hypertable, Inc.
3  *
4  * This file is part of Hypertable.
5  *
6  * Hypertable is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 3
9  * of the License, or any later version.
10  *
11  * Hypertable is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19  * 02110-1301, USA.
20  */
21 
26 
27 #include <Common/Compat.h>
28 
29 #include "HostSpecification.h"
30 
31 #include <Common/Error.h>
32 #include <Common/Logger.h>
33 
34 #include <algorithm>
35 #include <cctype>
36 #include <memory>
37 #include <set>
38 #include <stack>
39 #include <string>
40 
41 using namespace Hypertable;
42 using namespace std;
43 
44 namespace {
45 
47  class ClusterHostname {
48  public:
54  ClusterHostname(const string &name, const string &prefix,
55  int num, const string &postfix)
56  : name(name), prefix(prefix), postfix(postfix), num(num) { }
62  ClusterHostname(const string &name) : name(name) {
63  const char *base = name.c_str();
64  const char *ptr = base;
65  while (*ptr && !isdigit(*ptr))
66  ptr++;
67  prefix.append(base, (size_t)(ptr-base));
68  if (*ptr) {
69  char *end;
70  num = (int)strtol(ptr, &end, 10);
71  postfix.append(end);
72  }
73  }
75  const string name;
77  string prefix;
79  string postfix;
81  int num {-1};
82  };
83 
92  inline bool operator<(const ClusterHostname &ch1, const ClusterHostname &ch2) {
93  if (ch1.prefix.length() == ch2.prefix.length() &&
94  ch1.postfix.length() == ch2.postfix.length() &&
95  ch1.num != -1 && ch2.num != -1) {
96  int cmp = ch1.prefix.compare(ch2.prefix);
97  if (cmp)
98  return cmp < 0;
99  if (ch1.num != ch2.num)
100  return ch1.num < ch2.num;
101  cmp = ch1.postfix.compare(ch2.postfix);
102  return cmp < 0;
103  }
104  return ch1.name.compare(ch2.name) < 0;
105  }
106 
108  class Frame {
109  public:
111  bool subtract {};
113  set<ClusterHostname> hosts;
114  };
115 
117  typedef std::shared_ptr<Frame> FramePtr;
118 
123  class Token {
124  public:
126  char value {};
128  set<ClusterHostname> hosts;
129  };
130 
132  class Tokenizer {
133  public:
137  Tokenizer(const std::string &spec) : m_spec(spec) {
138  m_ptr = m_spec.c_str();
139  }
145  bool next(Token &token);
148  int current_position() {
149  return (m_ptr - m_spec.c_str()) + 1;
150  }
151  private:
154  void skip_whitespace() {
155  while (*m_ptr && isspace(*m_ptr))
156  m_ptr++;
157  }
160  void save_position() { m_saved_ptr = m_ptr; }
163  int saved_position() {
164  return (m_saved_ptr - m_spec.c_str()) + 1;
165  }
167  const char *current_character() {
168  int index {};
169  if (*m_ptr == '\'' || *m_ptr == '\"' || *m_ptr == '\?' ||
170  *m_ptr == '\\' || *m_ptr == '\a' || *m_ptr == '\b' ||
171  *m_ptr == '\f' || *m_ptr == '\n' || *m_ptr == '\r' ||
172  *m_ptr == '\t' || *m_ptr == '\v') {
173  character_buf[index++] = '\'';
174  character_buf[index++] = '\\';
175  if (*m_ptr == '\'' || *m_ptr == '\"' || *m_ptr == '\?' ||
176  *m_ptr == '\\')
177  character_buf[index++] = *m_ptr;
178  else if (*m_ptr == '\a')
179  character_buf[index++] = 'a';
180  else if (*m_ptr == '\b')
181  character_buf[index++] = 'b';
182  else if (*m_ptr == '\f')
183  character_buf[index++] = 'f';
184  else if (*m_ptr == '\n')
185  character_buf[index++] = 'n';
186  else if (*m_ptr == '\r')
187  character_buf[index++] = 'r';
188  else if (*m_ptr == '\t')
189  character_buf[index++] = 't';
190  else if (*m_ptr == '\v')
191  character_buf[index++] = 'v';
192  character_buf[index++] = '\'';
193  character_buf[index++] = '\0';
194  }
195  else if (*m_ptr >= 32) {
196  character_buf[index++] = '\'';
197  character_buf[index++] = *m_ptr;
198  character_buf[index++] = '\'';
199  character_buf[index++] = '\0';
200  }
201  else
202  sprintf(character_buf, "0x%x", *m_ptr);
203  return character_buf;
204  }
205 
206  std::string extract_numeric_range(int *beginp, int *endp);
208  std::string m_spec {};
210  const char *m_ptr {};
212  const char *m_saved_ptr {};
214  char character_buf[8];
215  };
216 
217  bool Tokenizer::next(Token &token) {
218 
219  skip_whitespace();
220 
221  if (*m_ptr == 0)
222  return false;
223 
224  if (*m_ptr == '(' || *m_ptr == ')' || *m_ptr == '+' ||
225  *m_ptr == ',' || *m_ptr == '-') {
226  token.value = *m_ptr++;
227  return true;
228  }
229 
230  if (!isalnum(*m_ptr) && *m_ptr == ']')
231  HT_THROWF(Error::BAD_FORMAT, "Invalid character encountered at position %d",
232  current_position());
233 
234  string prefix {};
235  string host_pattern {};
236  int postfix_len {};
237  int range_begin {};
238  int range_end {};
239 
240  if (isalnum(*m_ptr)) {
241  const char *base = m_ptr;
242  while (*m_ptr != 0 && (isalnum(*m_ptr) || *m_ptr == '.' || *m_ptr == '-'))
243  m_ptr++;
244  prefix = string(base, m_ptr-base);
245  }
246 
247  if (*m_ptr == '[') {
248  host_pattern = prefix;
249  host_pattern += extract_numeric_range(&range_begin, &range_end);
250  if (isalnum(*m_ptr) || *m_ptr == '.' || *m_ptr == '-') {
251  const char *base = m_ptr;
252  while (*m_ptr != 0 && (isalnum(*m_ptr) || *m_ptr == '.' || *m_ptr == '-'))
253  m_ptr++;
254  postfix_len = m_ptr - base;
255  host_pattern.append(base, postfix_len);
256  }
257  }
258 
259  if (*m_ptr != 0 && !isspace(*m_ptr) && *m_ptr != '(' && *m_ptr != ')' &&
260  *m_ptr != '+' && *m_ptr != ',' && *m_ptr != '-')
261  HT_THROWF(Error::BAD_FORMAT, "Invalid character %s encountered at position %d",
262  current_character(), current_position());
263 
264  token.hosts.clear();
265  token.value = 0;
266  if (!host_pattern.empty()) {
267  for (int i= range_begin; i<= range_end; i++) {
268  string name = format(host_pattern.c_str(), i);
269  string postfix = name.substr( name.length() - postfix_len);
270  token.hosts.insert( ClusterHostname(name, prefix, i, postfix) );
271  }
272  }
273  else
274  token.hosts.insert( ClusterHostname(prefix) );
275 
276  return true;
277  }
278 
279  string Tokenizer::extract_numeric_range(int *beginp, int *endp) {
280 
281  save_position();
282 
283  HT_ASSERT(*m_ptr == '[');
284  m_ptr++;
285 
286  skip_whitespace();
287 
288  // sanity check to make sure the next thing we see is a number
289  if (*m_ptr == '\0')
291  "Truncated range pattern at position %d",
292  current_position());
293  else if (!isdigit(*m_ptr))
295  "Invalid character %s in range pattern at position %d",
296  current_character(), current_position());
297  bool range_begin_leading_zero = *m_ptr == '0';
298 
299  // skip over begin number
300  const char *base = m_ptr;
301  while (*m_ptr && isdigit(*m_ptr))
302  m_ptr++;
303  size_t range_begin_width = m_ptr - base;
304  *beginp = atoi(base);
305 
306  skip_whitespace();
307 
308  // Verify the range contains '-' separation character
309  if (*m_ptr != '-') {
310  string msg;
311  if (*m_ptr == ']')
312  msg = format("Missing range separation character '-' at position %d",
313  current_position());
314  else if (*m_ptr == '\0')
315  msg = format("Truncated range pattern at position %d",
316  current_position());
317  else
318  msg = format("Invalid character %s in range pattern at position %d",
319  current_character(), current_position());
321  }
322 
323  m_ptr++;
324 
325  skip_whitespace();
326 
327  // sanity check to make sure the next thing we see is a number
328  if (*m_ptr == '\0')
330  "Truncated range pattern at position %d",
331  current_position());
332  else if (!isdigit(*m_ptr))
334  "Invalid character %s in range pattern at position %d",
335  current_character(), current_position());
336  bool range_end_leading_zero = *m_ptr == '0';
337 
338  // skip over end number
339  base = m_ptr;
340  while (*m_ptr && isdigit(*m_ptr))
341  m_ptr++;
342  size_t range_end_width = m_ptr - base;
343  *endp = atoi(base);
344 
345  skip_whitespace();
346 
347  if (*m_ptr == '\0')
349  "Truncated range pattern at position %d",
350  current_position());
351  else if (*m_ptr != ']')
353  "Invalid character %s in range pattern at position %d",
354  current_character(), current_position());
355  else if (range_begin_width != range_end_width &&
356  (range_begin_leading_zero || range_end_leading_zero))
357  HT_THROWF(Error::BAD_FORMAT, "Fixed-width numeric range specifiers must be of equal length (position %d)",
358  saved_position());
359  m_ptr++;
360 
361  if (range_begin_width != range_end_width)
362  return "%d";
363 
364  return format("%%0%dd", (int)range_begin_width);
365 
366  }
367 }
368 
369 HostSpecification::operator std::vector<std::string>() {
370  char last_token {(char)255};
371  stack<FramePtr> frame_stack;
372 
373  frame_stack.push(std::make_shared<Frame>());
374 
375  Token token;
376  Tokenizer tokenizer(m_spec);
377  while (tokenizer.next(token)) {
378  if (token.value == 0) {
379  if (frame_stack.top()->subtract) {
380  for (auto & host : token.hosts)
381  frame_stack.top()->hosts.erase(host);
382  }
383  else {
384  for (auto & host : token.hosts)
385  frame_stack.top()->hosts.insert(host);
386  }
387  }
388  else if (token.value == '(')
389  frame_stack.push(std::make_shared<Frame>());
390  else if (token.value == ')') {
391  if (last_token == '+' || last_token == ',' || last_token == '-')
392  HT_THROWF(Error::BAD_FORMAT, "Missing operand for '%c' operator at position %d",
393  last_token, tokenizer.current_position() - 1);
394  if (frame_stack.size() == 1)
395  HT_THROW(Error::BAD_FORMAT, "Mis-matched parenthesis");
396  FramePtr top = frame_stack.top();
397  frame_stack.pop();
398  if (frame_stack.top()->subtract) {
399  for (auto & host : top->hosts)
400  frame_stack.top()->hosts.erase(host);
401  }
402  else {
403  for (auto & host : top->hosts)
404  frame_stack.top()->hosts.insert(host);
405  }
406  }
407  else if (token.value == '+' || token.value == ',') {
408  if (last_token != 0 && last_token != ')')
409  HT_THROWF(Error::BAD_FORMAT, "Missing operand for '%c' operator at position %d",
410  token.value, tokenizer.current_position() - 1);
411  frame_stack.top()->subtract = false;
412  }
413  else if (token.value == '-') {
414  if (last_token != 0 && last_token != ')')
415  HT_THROWF(Error::BAD_FORMAT, "Missing operand for '-' operator at position %d",
416  tokenizer.current_position() - 1);
417  frame_stack.top()->subtract = true;
418  }
419  else
420  HT_THROWF(Error::BAD_FORMAT, "Unrecognized character '%c'", token.value);
421  last_token = token.value;
422  }
423 
424  if (frame_stack.size() != 1)
425  HT_THROW(Error::BAD_FORMAT, "Mis-matched parenthesis");
426 
427  if (last_token == '+' || last_token == ',' || last_token == '-')
428  HT_THROWF(Error::BAD_FORMAT, "Missing operand for '%c' operator at position %d",
429  last_token, tokenizer.current_position() - 1);
430 
431  vector<string> hosts;
432 
433  hosts.reserve(frame_stack.top()->hosts.size());
434  for (auto & ch : frame_stack.top()->hosts)
435  hosts.push_back(ch.name);
436 
437  return move(hosts);
438 }
Declarations for HostSpecification.
String format(const char *fmt,...)
Returns a String using printf like format facilities Vanilla snprintf is about 1.5x faster than this...
Definition: String.cc:37
STL namespace.
#define HT_ASSERT(_e_)
Definition: Logger.h:396
Logging routines and macros.
Compatibility Macros for C/C++.
bool operator<(const directory_entry< _Key, _Tp > &lhs, const directory_entry< _Key, _Tp > &rhs)
Definition: directory.h:128
Hypertable definitions
#define HT_THROWF(_code_, _fmt_,...)
Definition: Error.h:490
Error codes, Exception handling, error logging.
#define HT_THROW(_code_, _msg_)
Definition: Error.h:478