0.9.8.10
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
WordStream.h
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2007-2015 Hypertable, Inc.
3  *
4  * This file is part of Hypertable.
5  *
6  * Hypertable is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 3
9  * of the License, or any later version.
10  *
11  * Hypertable is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19  * 02110-1301, USA.
20  */
21 
27 #ifndef Common_WordStream_h
28 #define Common_WordStream_h
29 
30 #include <Common/String.h>
31 
32 #include <memory>
33 #include <random>
34 #include <vector>
35 
36 namespace Hypertable {
37 
46  class WordStream {
47  public:
57  WordStream(const String &word_file, unsigned seed, size_t words_per_record,
58  bool random = false, const char *separator = " ");
59 
63  virtual ~WordStream();
64 
66  const char *next();
67 
68  private:
70  struct word_info {
71  const char *word;
72  size_t len;
73  };
74 
76  std::mt19937 ms_rng;
77 
79  char *m_base;
80 
82  const char *m_end;
83 
85  const char *m_separator;
86 
89 
91  off_t m_len;
92 
94  std::vector<struct word_info> m_words;
95 
97  std::vector<size_t> m_offset;
98 
101 
103  bool m_random;
104  };
105 
106  typedef std::shared_ptr<WordStream> WordStreamPtr;
107 
110 }
111 
112 #endif // Common_WordStream_h
std::string String
A String is simply a typedef to std::string.
Definition: String.h:44
const char * m_separator
The separator, as specified by the user.
Definition: WordStream.h:85
const char * m_end
End pointer for the memory mapped file.
Definition: WordStream.h:82
std::mt19937 ms_rng
Random number generator.
Definition: WordStream.h:76
size_t m_words_per_record
Words per record, as specified by the user.
Definition: WordStream.h:88
off_t m_len
Length of the memory mapped file.
Definition: WordStream.h:91
std::vector< struct word_info > m_words
All words from the mapped file.
Definition: WordStream.h:94
std::shared_ptr< WordStream > WordStreamPtr
Definition: WordStream.h:106
String m_record
The current string.
Definition: WordStream.h:100
char * m_base
Base pointer for the memory mapped file.
Definition: WordStream.h:79
Internal structure for a single word.
Definition: WordStream.h:70
std::vector< size_t > m_offset
Helper for parsing the words.
Definition: WordStream.h:97
virtual ~WordStream()
Releases internal resources.
Definition: WordStream.cc:83
Hypertable definitions
A class generating a stream of words; the words are retrieved from a file and can be randomized...
Definition: WordStream.h:46
const char * next()
Retrieves the next word, or an empty string if EOF is reached.
Definition: WordStream.cc:87
A String class based on std::string.
WordStream(const String &word_file, unsigned seed, size_t words_per_record, bool random=false, const char *separator=" ")
Constructor.
Definition: WordStream.cc:46
bool m_random
Whether to return random strings or not.
Definition: WordStream.h:103