0.9.8.10
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
ApacheLogParser.cc
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2007-2015 Hypertable, Inc.
3  *
4  * This file is part of Hypertable.
5  *
6  * Hypertable is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; version 3 of the
9  * License, or any later version.
10  *
11  * Hypertable is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19  * 02110-1301, USA.
20  */
21 
22 #include <Common/Compat.h>
23 
24 #include "ApacheLogParser.h"
25 
26 #include <boost/algorithm/string/predicate.hpp>
27 #include <boost/iostreams/filter/gzip.hpp>
28 
29 #include <ctime>
30 
31 namespace Io = boost::iostreams;
32 namespace Fn = boost::algorithm;
33 
34 using namespace Hypertable;
35 using namespace std;
36 
37 void ApacheLogParser::load(std::string filename) {
38  if (Fn::ends_with(filename, ".gz"))
39  m_fin.push(Io::gzip_decompressor());
40  m_fin.push(Io::file_source(filename));
41 }
42 
44  char *base;
45 
46  while (true) {
47 
48  memset(&entry, 0, sizeof(entry));
49 
50  if (!getline(m_fin, m_line))
51  return false;
52 
53  base = (char *)m_line.c_str();
54 
55  // IP address
56  if ((base = extract_field(base, &entry.ip_address)) == 0)
57  continue;
58 
59  // skip identd
60  if ((base = extract_field(base, 0)) == 0)
61  continue;
62 
63  // userid
64  if ((base = extract_field(base, &entry.userid)) == 0)
65  continue;
66 
67  // timestamp
68  if ((base = extract_timestamp(base, &entry.tm)) == 0)
69  continue;
70 
71  // request
72  if ((base = extract_field(base, &entry.request)) == 0)
73  continue;
74 
75  // response_code
76  if ((base = extract_field(base, &entry.response_code)) == 0)
77  continue;
78 
79  // object_size
80  if ((base = extract_field(base, &entry.object_size)) == 0)
81  continue;
82 
83  // referer
84  if ((base = extract_field(base, &entry.referer)) == 0)
85  return true;
86 
87  // user_agent
88  if ((base = extract_field(base, &entry.user_agent)) == 0)
89  return true;
90 
91  return true;
92  }
93 
94 }
95 
96 
97 
98 char *ApacheLogParser::extract_field(char *base, char **field_ptr) {
99  char *ptr;
100  while (isspace(*base))
101  base++;
102  if (*base == '"') {
103  base++;
104  if ((ptr = strchr(base, '"')) == 0)
105  return 0;
106  *ptr++ = 0;
107  }
108  else if ((ptr = strchr(base, ' ')) != 0)
109  *ptr++ = 0;
110  else
111  ptr += strlen(base);
112  if (field_ptr) {
113  if (*base == 0 || !strcmp(base, "-"))
114  *field_ptr = 0;
115  else
116  *field_ptr = base;
117  }
118  return ptr;
119 }
120 
121 char *ApacheLogParser::extract_timestamp(char *base, struct tm *tmp) {
122  char *end_ptr;
123  char *ptr;
124 
125  memset(tmp, 0, sizeof(tm));
126 
127  while (isspace(*base))
128  base++;
129  if (*base != '[')
130  return 0;
131  base++;
132  if ((ptr = strchr(base, ']')) != 0)
133  *ptr++ = 0;
134 
135  if ((tmp->tm_mday = strtol(base, &end_ptr, 10)) == 0)
136  return 0;
137  if (*end_ptr != '/')
138  return 0;
139  base = end_ptr+1;
140 
141  end_ptr = (char *)base;
142  while (isalpha(*end_ptr))
143  end_ptr++;
144  if (*end_ptr != '/' || (end_ptr-base) != 3)
145  return 0;
146  *end_ptr++ = 0;
147  if (!strcasecmp(base, "Jan"))
148  tmp->tm_mon = 0;
149  else if (!strcasecmp(base, "Feb"))
150  tmp->tm_mon = 1;
151  else if (!strcasecmp(base, "Mar"))
152  tmp->tm_mon = 2;
153  else if (!strcasecmp(base, "Apr"))
154  tmp->tm_mon = 3;
155  else if (!strcasecmp(base, "May"))
156  tmp->tm_mon = 4;
157  else if (!strcasecmp(base, "Jun"))
158  tmp->tm_mon = 5;
159  else if (!strcasecmp(base, "Jul"))
160  tmp->tm_mon = 6;
161  else if (!strcasecmp(base, "Aug"))
162  tmp->tm_mon = 7;
163  else if (!strcasecmp(base, "Sep"))
164  tmp->tm_mon = 8;
165  else if (!strcasecmp(base, "Oct"))
166  tmp->tm_mon = 9;
167  else if (!strcasecmp(base, "Nov"))
168  tmp->tm_mon = 10;
169  else if (!strcasecmp(base, "Dec"))
170  tmp->tm_mon = 11;
171  else
172  return 0;
173 
174  base = end_ptr;
175  if ((tmp->tm_year = strtol(base, &end_ptr, 10)) == 0)
176  return 0;
177  if (*end_ptr != ':')
178  return 0;
179  tmp->tm_year -= 1900;
180 
181  base = end_ptr+1;
182  tmp->tm_hour = strtol(base, &end_ptr, 10);
183  if (*end_ptr != ':')
184  return 0;
185 
186  base = end_ptr+1;
187  tmp->tm_min = strtol(base, &end_ptr, 10);
188  if (*end_ptr != ':')
189  return 0;
190 
191  base = end_ptr+1;
192  tmp->tm_sec = strtol(base, &end_ptr, 10);
193  base = end_ptr;
194 
195  while (isspace(*base))
196  base++;
197 
198  long offset = 0;
199  bool positive = true;
200  if (*base) {
201  if (*base == '+')
202  positive = true;
203  else if (*base == '-')
204  positive = false;
205  else
206  return 0;
207  base++;
208 
209  if (!isdigit(*base))
210  return 0;
211  offset += 360000 * (*base-'0');
212  if (*++base == 0)
213  return 0;
214 
215  if (!isdigit(*base))
216  return 0;
217  offset += 36000 * (*base-'0');
218  if (*++base == 0)
219  return 0;
220 
221  if (!isdigit(*base))
222  return 0;
223  offset += 600 * (*base-'0');
224  if (*++base == 0)
225  return 0;
226 
227  if (!isdigit(*base))
228  return 0;
229  offset += 60 * (*base-'0');
230  }
231 
232  if (!positive)
233  offset *= -1;
234 
235 #if !defined(__sun__)
236  tmp->tm_gmtoff = offset;
237 #endif
238 
239  return ptr;
240 }
241 
static String filename
Definition: Config.cc:48
bool next(ApacheLogEntry &entry)
STL namespace.
char * user_agent
char * userid
struct tm tm
Compatibility Macros for C/C++.
char * request
char * object_size
char * extract_timestamp(char *base, struct tm *tmp)
char * extract_field(char *base, char **field_ptr)
char * response_code
Hypertable definitions
void load(std::string filename)
char * referer
char * ip_address