0.9.8.10
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
Tokenizer.cc
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2007-2015 Hypertable, Inc.
3  *
4  * This file is part of Hypertable.
5  *
6  * Hypertable is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; version 3 of the
9  * License, or any later version.
10  *
11  * Hypertable is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19  * 02110-1301, USA.
20  */
21 
26 
27 #include <Common/Compat.h>
28 
29 #include "Tokenizer.h"
30 #include "TokenizerTools.h"
31 
32 #include <Common/Error.h>
33 #include <Common/FileUtils.h>
34 #include <Common/Logger.h>
35 
36 #include <cerrno>
37 #include <ctime>
38 #include <iostream>
39 
40 extern "C" {
41 #include <pwd.h>
42 #include <sys/stat.h>
43 #include <sys/types.h>
44 #include <unistd.h>
45 }
46 
47 #include <cctype>
48 #include <stack>
49 
50 using namespace Hypertable;
51 using namespace Hypertable::ClusterDefinitionFile;
52 using namespace std;
53 
54 Tokenizer::Tokenizer(const string &fname)
55  : m_fname(fname) {
57  exit(1);
58  m_next = m_content.c_str();
59 }
60 
61 Tokenizer::Tokenizer(const string &fname, const string &content)
62  : m_fname(fname), m_content(content) {
63  m_next = m_content.c_str();
64 }
65 
67  size_t lastslash = m_fname.find_last_of('/');
68  if (lastslash != string::npos)
69  return m_fname.substr(0, lastslash);
70  return ".";
71 }
72 
73 bool Tokenizer::next(Token &token) {
74  const char *base = m_next;
75  const char *end;
76  const char *ptr;
77  token.clear();
78 
79  if (*base == 0)
80  return false;
81 
82  token.line = m_line + 1;
83  token.fname = m_fname;
84 
85  while (*base) {
86 
87  end = base;
88 
89  while (*end && *end != '\n')
90  end++;
91 
92  m_line++;
93 
94  int line_type = identify_line_type(base, end);
95 
96  switch (line_type) {
97 
98  case (Token::VARIABLE):
99  ptr = strchr(base, '=');
100  ptr++;
101  if (*ptr == '\'' || *ptr == '"' || *ptr == '`') {
102  int starting_line = (int)m_line;
103  if (!TokenizerTools::find_end_char(ptr, &end, &m_line))
105  "Unterminated string starting on line %d", starting_line);
107  }
108  accumulate(&base, end, Token::VARIABLE, token);
109  return true;
110 
111  case (Token::ROLE):
112  if (accumulate(&base, end, Token::ROLE, token))
113  return true;
114  break;
115 
116  case (Token::TASK):
117  if ((ptr = strchr(base, '{')) == 0)
119  "Mal-formed task: statement starting on line %d",(int)m_line);
120  {
121  int starting_line = (int)m_line;
122  if (!TokenizerTools::find_end_char(ptr, &end, &m_line))
123  HT_THROWF(Error::SYNTAX_ERROR, "Missing terminating '}' character in "
124  "task: statement on line %d of file '%s'",
125  starting_line, m_fname.c_str());
127  }
128  accumulate(&base, end, Token::TASK, token);
129  return true;
130 
131  case (Token::FUNCTION):
132  if ((ptr = strchr(base, '{')) == 0)
134  "Mal-formed function starting on line %d",(int)m_line);
135  {
136  int starting_line = (int)m_line;
137  if (!TokenizerTools::find_end_char(ptr, &end, &m_line))
138  HT_THROWF(Error::SYNTAX_ERROR, "Missing terminating '}' character in "
139  "function on line %d of file '%s'",
140  starting_line, m_fname.c_str());
142  }
143  if (accumulate(&base, end, Token::FUNCTION, token))
144  return true;
145  break;
146 
147  case (Token::CONTROLFLOW):
148  {
149  end = base;
152  "Incomplete control flow statement on line %d of file '%s'",
153  (int)m_line, m_fname.c_str());
154  m_line += TokenizerTools::count_newlines(base, end);
156  }
157  if (accumulate(&base, end, Token::CONTROLFLOW, token))
158  return true;
159  break;
160 
161  case (Token::COMMENT):
162  if (accumulate(&base, end, Token::COMMENT, token))
163  return true;
164  break;
165 
166  case (Token::CODE):
167  if (accumulate(&base, end, Token::CODE, token))
168  return true;
169  break;
170 
171  case (Token::BLANKLINE):
172  if (accumulate(&base, end, Token::BLANKLINE, token))
173  return true;
174  break;
175 
176  case (Token::INCLUDE):
177  if (accumulate(&base, end, Token::INCLUDE, token))
178  return true;
179  break;
180 
181  default:
182  HT_FATALF("Unknown token type - %u", (unsigned int)line_type);
183 
184  }
185  }
186  return true;
187 }
188 
189 
190 int Tokenizer::identify_line_type(const char *base, const char *end) {
191  const char *ptr = base;
192 
193  // skip leading whitespace
194  while (ptr < end && isspace(*ptr))
195  ptr++;
196 
197  if (ptr == end)
198  return Token::BLANKLINE;
199 
201  ptr++;
203  ptr++;
204  if (*ptr == '=')
205  return Token::VARIABLE;
206  else if (*ptr == ':') {
207  if (!strncmp(base, "include", 7))
208  return Token::INCLUDE;
209  if (!strncmp(base, "role", 4))
210  return Token::ROLE;
211  else if (!strncmp(base, "task", 4))
212  return Token::TASK;
213  else {
214  string tag(base, ptr-base);
216  "Unrecognized meta tag '%s:' on line %u",
217  tag.c_str(), (unsigned int)m_line);
218  }
219  }
220  else if (isspace(*ptr)) {
221  if (!strncmp(base, "if", 2) || !strncmp(base, "while", 5) ||
222  !strncmp(base, "for", 3) || !strncmp(base, "until", 5) ||
223  !strncmp(base, "case", 4))
224  return Token::CONTROLFLOW;
225  else if (!strncmp(base, "function", 8))
226  return Token::FUNCTION;
227  else if (!strncmp(base, "role", 4))
229  "Invalid role: statement on line %d",(int)m_line);
230  else if (!strncmp(base, "task", 4))
232  "Invalid task: statement on line %d",(int)m_line);
233  else if (!strncmp(base, "include", 4))
235  "Invalid include: statement on line %d",(int)m_line);
236  ptr++;
237  while (ptr < end && isspace(*ptr))
238  ptr++;
239  if (*ptr == '(')
240  return Token::FUNCTION;
241  return Token::CODE;
242  }
243  }
244  else if (*ptr == '#')
245  return Token::COMMENT;
246 
247  return Token::CODE;
248 }
249 
250 
251 bool Tokenizer::accumulate(const char **basep,
252  const char *end,
253  int type, Token &token) {
254 
255  if (token.type == Token::ROLE && type == Token::CODE)
256  type = Token::ROLE;
257  else {
258  if (token.type == Token::COMMENT && type == Token::TASK)
259  token.type = Token::TASK;
260  else if (type == Token::FUNCTION || type == Token::BLANKLINE ||
261  type == Token::CONTROLFLOW)
262  type = Token::CODE;
263 
264  if (token.type != Token::NONE &&
265  (type != token.type || type == Token::ROLE)) {
266  m_next = *basep;
267  m_line--;
268  token.create_translator();
269  return true;
270  }
271  }
272 
273  if (*end)
274  end++;
275  token.text.append(*basep, end-*basep);
276  token.type = type;
277  *basep = end;
278  m_next = end;
279  if (type == Token::VARIABLE || type == Token::TASK || *m_next == 0) {
280  token.create_translator();
281  return true;
282  }
283  return false;
284 }
bool skip_to_newline(const char **endp)
Skips to next newline character in text.
bool next(Token &token)
Returns the next token from the cluster definition file.
Definition: Tokenizer.cc:73
bool skip_control_flow_statement(const char **basep)
Skips over bash control flow statement.
static bool read(const String &fname, String &contents)
Reads a whole file into a String.
Definition: FileUtils.cc:59
string fname
Pathname of file from which token was extracted.
Definition: Token.h:81
bool accumulate(const char **basep, const char *end, int type, Token &token)
Accumulates the next token.
Definition: Tokenizer.cc:251
STL namespace.
bool find_end_char(const char *base, const char **endp, size_t *linep)
Skips to end of block or quoted string in code.
size_t count_newlines(const char *base, const char *end)
Counts number of newlines in text.
Cluster definition file token.
Definition: Token.h:43
File system utility functions.
Tokenizer(const string &fname)
Constructor.
Definition: Tokenizer.cc:54
void create_translator()
Creates a translator for the token.
Definition: Token.cc:42
size_t line
Starting line number of token.
Definition: Token.h:79
Logging routines and macros.
Compatibility Macros for C/C++.
void clear()
Clears token state.
Definition: Token.h:60
const char * m_next
Pointer to beginning of next token to read.
Definition: Tokenizer.h:117
string dirname()
Returns the directory path containing the cluster definition file.
Definition: Tokenizer.cc:66
size_t m_line
Line number of end of last token read.
Definition: Tokenizer.h:119
Hypertable definitions
#define HT_FATALF(msg,...)
Definition: Logger.h:343
string m_fname
Pathname of cluster definition file.
Definition: Tokenizer.h:113
#define HT_THROWF(_code_, _fmt_,...)
Definition: Error.h:490
Declarations for TokenizerTools.
int identify_line_type(const char *base, const char *end)
Identifies token type of line starting at base.
Definition: Tokenizer.cc:190
bool is_identifier_character(char c)
Checks if character is valid bash identifier character.
Declarations for Tokenizer.
Cluster definition file translation definitions.
Definition: Compiler.h:35
bool is_identifier_start_character(char c)
Checks if character is valid bash identifier start character.
Error codes, Exception handling, error logging.
string m_content
Content of cluster definition file.
Definition: Tokenizer.h:115