0.9.8.10
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
freebase_parser.cc
Go to the documentation of this file.
1 
22 #include "Common/Compat.h"
23 
24 #include <cstring>
25 
26 #include "freebase_parser.h"
27 
28 using namespace std;
29 
30 
32 }
33 
37 bool freebase_parser::load(const std::string fname) {
38  ColumnInfo cinfo;
39 
40  m_fin.open(fname.c_str());
41 
42  m_fname = fname;
43 
44  if (!m_fin.is_open()) {
45  cerr << "error: problem opening file '" << m_fname << "' for reading"
46  << endl;
47  return false;
48  }
49 
50  getline (m_fin, m_header_line);
51  m_lineno++;
52 
53  m_column_name = -1;
54  m_column_id = -1;
55 
56  char *base = (char *)m_header_line.c_str();
57  char *ptr = base;
58  size_t i=0;
59 
60  ptr = strchr(base, '\t');
61  while (ptr) {
62  *ptr = 0;
63  if (!strcmp(base, "name"))
64  m_column_name = i;
65  else if (!strcmp(base, "id"))
66  m_column_id = i;
67  cinfo.name = base;
68  cinfo.name_len = ptr-base;
69  cinfo.value = 0;
70  cinfo.value_len = 0;
71  m_column_info.push_back(cinfo);
72  i++;
73  base = ptr+1;
74  ptr = strchr(base, '\t');
75  }
76 
77  if (!strcmp(base, "name"))
78  m_column_name = i;
79  else if (!strcmp(base, "id"))
80  m_column_id = i;
81  cinfo.name = base;
82  cinfo.name_len = strlen(base);
83  cinfo.value = 0;
84  cinfo.value_len = 0;
85  m_column_info.push_back(cinfo);
86 
87  m_inserts = new InsertRec [ m_column_info.size() ];
88 
89  if (m_column_name != 0) {
90  cerr << "error: 'name' not found in column position 0 in file '"
91  << m_fname << "'" << endl;
92  return false;
93  }
94 
95  if (m_column_id != 1) {
96  cerr << "error: 'id' not found in column position 1 in file '"
97  << m_fname << "'" << endl;
98  return false;
99  }
100 
101  if ((base = (char*)strrchr(fname.c_str(), '/')) == 0)
102  base = (char *)fname.c_str();
103  else
104  base++;
105  if ((ptr = strchr(base, '.')) != 0)
106  *ptr = 0;
107  m_category = base;
108 
109  return true;
110 }
111 
112 
113 
114 
116  char *base, *ptr;
117  size_t nfields=0;
118 
119  while (!m_fin.eof()) {
120 
121  getline(m_fin, m_line);
122  m_lineno++;
123 
124  base = (char *)m_line.c_str();
125  ptr = base;
126  ptr = strchr(base, '\t');
127 
128  while (ptr) {
129  *ptr = 0;
130  if (nfields == m_column_info.size())
131  break;
132  m_column_info[nfields].value = base;
133  m_column_info[nfields].value_len = ptr - base;
134  nfields++;
135  base = ++ptr;
136  ptr = strchr(base, '\t');
137  }
138 
139  if (nfields < m_column_info.size()) {
140  m_column_info[nfields].value = base;
141  m_column_info[nfields].value_len = strlen(base);
142  nfields++;
143  }
144 
145  if (nfields < 2)
146  continue;
147  else if (*m_column_info[1].value == 0) {
148  cerr << "error: 'id' field not found on line " << m_lineno
149  << " of file '" << m_fname << "'" << endl;
150  continue;
151  }
152 
153  const char *row_key = m_column_info[1].value;
154  size_t row_key_len = m_column_info[1].value_len;
155 
156  // set 'name' column
157 
158  size_t j=0;
159  if (*m_column_info[0].value != 0) {
160  m_inserts[j].key.row = row_key;
161  m_inserts[j].key.row_len = row_key_len;
162  m_inserts[j].key.column_family = "name";
165  m_inserts[j].value = m_column_info[0].value;
166  m_inserts[j].value_len = m_column_info[0].value_len;
167  j++;
168  }
169 
170  // set 'category' column
171  m_inserts[j].key.row = row_key;
172  m_inserts[j].key.row_len = row_key_len;
173  m_inserts[j].key.column_family = "category";
176  m_inserts[j].value = m_category.c_str();
177  m_inserts[j].value_len = m_category.length();
178  j++;
179 
180  for (size_t i=2; i<nfields; i++) {
181  if (*m_column_info[i].value) {
182  m_inserts[j].key.row = row_key;
183  m_inserts[j].key.row_len = row_key_len;
184  m_inserts[j].key.column_family = "property";
187  m_inserts[j].value = m_column_info[i].value;
188  m_inserts[j].value_len = m_column_info[i].value_len;
189  j++;
190  }
191  }
192 
193  *countp = j;
194  return m_inserts;
195  }
196 
197  return 0;
198 }
size_t name_len
Hypertable::KeySpec key
const char * name
const char * column_qualifier
Definition: KeySpec.h:128
std::string m_category
std::vector< ColumnInfo > m_column_info
STL namespace.
bool load(const std::string fname)
size_t column_qualifier_len
Definition: KeySpec.h:129
const void * row
Definition: KeySpec.h:125
const void * value
std::string m_header_line
size_t value_len
InsertRec * m_inserts
Compatibility Macros for C/C++.
uint32_t value_len
std::ifstream m_fin
InsertRec * next(int *countp)
std::string m_fname
const char * value
const char * column_family
Definition: KeySpec.h:127
std::string m_line