OpenTREP Logo  0.07.18
C++ Open Travel Request Parsing Library
Loading...
Searching...
No Matches
opentrep-indexer.cpp
Go to the documentation of this file.
1// STL
2#include <cassert>
3#include <iostream>
4#include <sstream>
5#include <fstream>
6#include <vector>
7#include <string>
8// Boost (Extended STL)
9#include <boost/date_time/posix_time/posix_time.hpp>
10#include <boost/date_time/gregorian/gregorian.hpp>
11#include <boost/program_options.hpp>
12// OpenTREP
14#include <opentrep/Location.hpp>
16#include <opentrep/DBType.hpp>
19#include <opentrep/config/opentrep-paths.hpp>
20
21
22// //////// Type definitions ///////
23typedef std::vector<std::string> WordList_T;
24
25
26// //////// Constants //////
30const std::string K_OPENTREP_DEFAULT_LOG_FILENAME ("opentrep-indexer.log");
31
45
46
47// ///////// Parsing of Options & Configuration /////////
50
52int readConfiguration (int argc, char* argv[],
53 std::string& ioPORFilepath,
54 std::string& ioXapianDBFilepath,
55 std::string& ioSQLDBTypeString,
56 std::string& ioSQLDBConnectionString,
57 unsigned short& ioDeploymentNumber,
58 bool& ioIncludeNonIATAPOR,
59 bool& ioIndexPORInXapian,
60 bool& ioAddPORInDB,
61 std::string& ioLogFilename,
62 std::ostringstream& oStr) {
63
64 // Declare a group of options that will be allowed only on command line
65 boost::program_options::options_description generic ("Generic options");
66 generic.add_options()
67 ("prefix", "print installation prefix")
68 ("version,v", "print version string")
69 ("help,h", "produce help message");
70
71 // Declare a group of options that will be allowed both on command
72 // line and in config file
73 boost::program_options::options_description config ("Configuration");
74 config.add_options()
75 ("porfile,p",
76 boost::program_options::value< std::string >(&ioPORFilepath)->default_value(OPENTREP::DEFAULT_OPENTREP_POR_FILEPATH),
77 "POR file-path (e.g., optd_por_public.csv)")
78 ("xapiandb,d",
79 boost::program_options::value< std::string >(&ioXapianDBFilepath)->default_value(OPENTREP::DEFAULT_OPENTREP_XAPIAN_DB_FILEPATH),
80 "Xapian database filepath (e.g., /tmp/opentrep/xapian_traveldb)")
81 ("sqldbtype,t",
82 boost::program_options::value< std::string >(&ioSQLDBTypeString)->default_value(OPENTREP::DEFAULT_OPENTREP_SQL_DB_TYPE),
83 "SQL database type (e.g., nodb for no SQL database, sqlite for SQLite, mysql for MariaDB/MySQL)")
84 ("sqldbconx,s",
85 boost::program_options::value< std::string >(&ioSQLDBConnectionString),
86 "SQL database connection string (e.g., ~/tmp/opentrep/sqlite_travel.db for SQLite, \"db=trep_trep user=trep password=trep\" for MariaDB/MySQL)")
87 ("deploymentnb,m",
88 boost::program_options::value<unsigned short>(&ioDeploymentNumber)->default_value(OPENTREP::DEFAULT_OPENTREP_DEPLOYMENT_NUMBER),
89 "Deployment number (from to N, where N=1 normally)")
90 ("noniata,n",
91 boost::program_options::value<bool>(&ioIncludeNonIATAPOR)->default_value(K_OPENTREP_DEFAULT_POR_INCLUDING),
92 "Whether or not to include POR not referenced by IATA (0 = only IATA-referenced POR, 1 = all POR are included)")
93 ("xapianindex,x",
94 boost::program_options::value<bool>(&ioIndexPORInXapian)->default_value(OPENTREP::DEFAULT_OPENTREP_INDEX_IN_XAPIAN),
95 "Whether or not to index the POR in Xapian (0 = do not touch the Xapian index, 1 = re-index all the POR in Xapian)")
96 ("dbadd,a",
97 boost::program_options::value<bool>(&ioAddPORInDB)->default_value(OPENTREP::DEFAULT_OPENTREP_ADD_IN_DB),
98 "Whether or not to add and index the POR in the SQL-based database (0 = do not touch the SQL-based database, 1 = add and re-index all the POR in the SQL-based database)")
99 ("log,l",
100 boost::program_options::value< std::string >(&ioLogFilename)->default_value(K_OPENTREP_DEFAULT_LOG_FILENAME),
101 "Filepath for the logs")
102 ;
103
104 // Hidden options, will be allowed both on command line and
105 // in config file, but will not be shown to the user.
106 boost::program_options::options_description hidden ("Hidden options");
107 hidden.add_options()
108 ("copyright",
109 boost::program_options::value< std::vector<std::string> >(),
110 "Show the copyright (license)");
111
112 boost::program_options::options_description cmdline_options;
113 cmdline_options.add(generic).add(config).add(hidden);
114
115 boost::program_options::options_description config_file_options;
116 config_file_options.add(config).add(hidden);
117
118 boost::program_options::options_description visible ("Allowed options");
119 visible.add(generic).add(config);
120
121 boost::program_options::positional_options_description p;
122 p.add ("copyright", -1);
123
124 boost::program_options::variables_map vm;
125 boost::program_options::
126 store (boost::program_options::command_line_parser (argc, argv).
127 options (cmdline_options).positional(p).run(), vm);
128
129 std::ifstream ifs ("opentrep-indexer.cfg");
130 boost::program_options::store (parse_config_file (ifs, config_file_options),
131 vm);
132 boost::program_options::notify (vm);
133
134 if (vm.count ("help")) {
135 std::cout << visible << std::endl;
137 }
138
139 if (vm.count ("version")) {
140 std::cout << PACKAGE_NAME << ", version " << PACKAGE_VERSION << std::endl;
142 }
143
144 if (vm.count ("prefix")) {
145 std::cout << "Installation prefix: " << PREFIXDIR << std::endl;
147 }
148
149 if (vm.count ("porfile")) {
150 ioPORFilepath = vm["porfile"].as< std::string >();
151 oStr << "POR file-path is: " << ioPORFilepath << std::endl;
152 }
153
154 if (vm.count ("deploymentnb")) {
155 ioDeploymentNumber = vm["deploymentnb"].as< unsigned short >();
156 oStr << "Deployment number: " << ioDeploymentNumber << std::endl;
157 }
158
159 if (vm.count ("xapiandb")) {
160 ioXapianDBFilepath = vm["xapiandb"].as< std::string >();
161 oStr << "Xapian index/database filepath is: " << ioXapianDBFilepath
162 << ioDeploymentNumber << std::endl;
163 }
164
165 // Parse the SQL database type, if any is given
166 if (vm.count ("sqldbtype")) {
167 ioSQLDBTypeString = vm["sqldbtype"].as< std::string >();
168 oStr << "SQL database type is: " << ioSQLDBTypeString << std::endl;
169 }
170
182 const OPENTREP::DBType lDBType (ioSQLDBTypeString);
183 if (lDBType == OPENTREP::DBType::NODB) {
184 ioAddPORInDB = false;
185 ioSQLDBConnectionString = "";
186
187 } else if (lDBType == OPENTREP::DBType::SQLITE3) {
188 ioAddPORInDB = true;
189 ioSQLDBConnectionString = OPENTREP::DEFAULT_OPENTREP_SQLITE_DB_FILEPATH;
190
191 } else if (lDBType == OPENTREP::DBType::MYSQL) {
192 ioAddPORInDB = true;
193 ioSQLDBConnectionString = OPENTREP::DEFAULT_OPENTREP_MYSQL_CONN_STRING;
194 }
195
196 // Set the SQL database connection string, if any is given
197 if (vm.count ("sqldbconx")) {
198 ioSQLDBConnectionString = vm["sqldbconx"].as< std::string >();
199 }
200
201 // Reporting of the SQL database connection string
202 if (lDBType == OPENTREP::DBType::SQLITE3
203 || lDBType == OPENTREP::DBType::MYSQL) {
204 const std::string& lSQLDBConnString =
206 ioSQLDBConnectionString,
207 ioDeploymentNumber);
208 //
209 oStr << "SQL database connection string is: " << lSQLDBConnString
210 << std::endl;
211 }
212
213 oStr << "Are non-IATA-referenced POR included? " << ioIncludeNonIATAPOR
214 << std::endl;
215
216 oStr << "Index the POR in Xapian? " << ioIndexPORInXapian << std::endl;
217
218 oStr << "Add and re-index the POR in the SQL-based database? " << ioAddPORInDB
219 << std::endl;
220
221 if (vm.count ("log")) {
222 ioLogFilename = vm["log"].as< std::string >();
223 oStr << "Log filename is: " << ioLogFilename << std::endl;
224 }
225
226 return 0;
227}
228
229
230// /////////////// M A I N /////////////////
231int main (int argc, char* argv[]) {
232
233 // Output log File
234 std::string lLogFilename;
235
236 // File-path of POR (points of reference)
237 std::string lPORFilepathStr;
238
239 // Xapian database name (directory of the index)
240 std::string lXapianDBNameStr;
241
242 // SQL database type
243 std::string lSQLDBTypeStr;
244
245 // SQL database connection string
246 std::string lSQLDBConnectionStr;
247
248 // Deployment number/version
249 OPENTREP::DeploymentNumber_T lDeploymentNumber;
250
251 // Whether or not to include non-IATA-referenced POR
252 OPENTREP::shouldIndexNonIATAPOR_T lIncludeNonIATAPOR;
253
254 // Whether or not to index the POR in Xapian
255 OPENTREP::shouldIndexPORInXapian_T lShouldIndexPORInXapian;
256
257 // Whether or not to insert the POR in the SQL database
258 OPENTREP::shouldAddPORInSQLDB_T lShouldAddPORInSQLDB;
259
260 // Log stream for the introduction part
261 std::ostringstream oIntroStr;
262
263 // Call the command-line option parser
264 const int lOptionParserStatus =
265 readConfiguration (argc, argv, lPORFilepathStr, lXapianDBNameStr,
266 lSQLDBTypeStr, lSQLDBConnectionStr, lDeploymentNumber,
267 lIncludeNonIATAPOR, lShouldIndexPORInXapian,
268 lShouldAddPORInSQLDB, lLogFilename, oIntroStr);
269
270 if (lOptionParserStatus == K_OPENTREP_EARLY_RETURN_STATUS) {
271 return 0;
272 }
273
274 // Set the log parameters
275 std::ofstream logOutputFile;
276 // open and clean the log outputfile
277 logOutputFile.open (lLogFilename.c_str());
278 logOutputFile.clear();
279
280 //
281 oIntroStr << "Parsing and indexing the OpenTravelData POR data file (into "
282 << "Xapian and/or SQL databases) may take a few tens of minutes "
283 << "on some architectures (and a few minutes on fastest ones)..."
284 << std::endl;
285 std::cout << oIntroStr.str();
286
287 // DEBUG
288 // Get the current time in UTC Timezone
289 boost::posix_time::ptime lTimeUTC =
290 boost::posix_time::second_clock::universal_time();
291 logOutputFile << "[" << lTimeUTC << "][" << __FILE__ << "#"
292 << __LINE__ << "]:Parameters:" << std::endl
293 << oIntroStr.str() << std::endl;
294
295 // Initialise the context
296 const OPENTREP::PORFilePath_T lPORFilepath (lPORFilepathStr);
297 const OPENTREP::TravelDBFilePath_T lXapianDBName (lXapianDBNameStr);
298 const OPENTREP::DBType lDBType (lSQLDBTypeStr);
299 const OPENTREP::SQLDBConnectionString_T lSQLDBConnStr (lSQLDBConnectionStr);
300 OPENTREP::OPENTREP_Service opentrepService (logOutputFile, lPORFilepath,
301 lXapianDBName, lDBType,
302 lSQLDBConnStr, lDeploymentNumber,
303 lIncludeNonIATAPOR,
304 lShouldIndexPORInXapian,
305 lShouldAddPORInSQLDB);
306
307 // Launch the indexation
308 const OPENTREP::NbOfDBEntries_T lNbOfEntries =
309 opentrepService.insertIntoDBAndXapian();
310
311 //
312 std::ostringstream oStr;
313 oStr << lNbOfEntries << " entries have been processed" << std::endl;
314 std::cout << oStr.str();
315
316 // Get the current time in UTC Timezone
317 lTimeUTC = boost::posix_time::second_clock::universal_time();
318 logOutputFile << "[" << lTimeUTC << "][" << __FILE__ << "#"
319 << __LINE__ << "]:" << oStr.str() << std::endl;
320
321 // Close the Log outputFile
322 logOutputFile.close();
323
324 return 0;
325}
Interface for the OPENTREP Services.
NbOfDBEntries_T insertIntoDBAndXapian()
const std::string DEFAULT_OPENTREP_SQLITE_DB_FILEPATH
bool shouldAddPORInSQLDB_T
unsigned int NbOfDBEntries_T
const std::string DEFAULT_OPENTREP_SQL_DB_TYPE
std::string parseAndDisplayConnectionString(const DBType &iDBType, const std::string &iSQLDBConnStr, const DeploymentNumber_T &iDeploymentNumber)
const bool DEFAULT_OPENTREP_INDEX_IN_XAPIAN
const unsigned short DEFAULT_OPENTREP_DEPLOYMENT_NUMBER
bool shouldIndexPORInXapian_T
const std::string DEFAULT_OPENTREP_MYSQL_CONN_STRING
unsigned short DeploymentNumber_T
const std::string DEFAULT_OPENTREP_XAPIAN_DB_FILEPATH
const bool DEFAULT_OPENTREP_ADD_IN_DB
const std::string DEFAULT_OPENTREP_POR_FILEPATH
bool shouldIndexNonIATAPOR_T
const int K_OPENTREP_EARLY_RETURN_STATUS
const bool K_OPENTREP_DEFAULT_POR_INCLUDING
int main(int argc, char *argv[])
int readConfiguration(int argc, char *argv[], std::string &ioPORFilepath, std::string &ioXapianDBFilepath, std::string &ioSQLDBTypeString, std::string &ioSQLDBConnectionString, unsigned short &ioDeploymentNumber, bool &ioIncludeNonIATAPOR, bool &ioIndexPORInXapian, bool &ioAddPORInDB, std::string &ioLogFilename, std::ostringstream &oStr)
std::vector< std::string > WordList_T
const std::string K_OPENTREP_DEFAULT_LOG_FILENAME("opentrep-indexer.log")
Enumeration of database types.
Definition DBType.hpp:17