OpenTREP Logo  0.07.18
C++ Open Travel Request Parsing Library
Loading...
Searching...
No Matches
opentrep-searcher.cpp
Go to the documentation of this file.
1// STL
2#include <cassert>
3#include <iostream>
4#include <sstream>
5#include <fstream>
6#include <vector>
7#include <string>
8// Boost (Extended STL)
9#include <boost/date_time/posix_time/posix_time.hpp>
10#include <boost/date_time/gregorian/gregorian.hpp>
11#include <boost/tokenizer.hpp>
12#include <boost/program_options.hpp>
13// OpenTREP
15#include <opentrep/DBType.hpp>
18#include <opentrep/Location.hpp>
20#include <opentrep/config/opentrep-paths.hpp>
21
22
23// //////// Type definitions ///////
24typedef std::vector<std::string> WordList_T;
25
26
27// //////// Constants //////
31const std::string K_OPENTREP_DEFAULT_LOG_FILENAME ("opentrep-searcher.log");
32
36const std::string K_OPENTREP_DEFAULT_QUERY_STRING ("sna francisco rio de janero los angeles reykyavki");
37
46const unsigned short K_OPENTREP_DEFAULT_SEARCH_TYPE = 0;
47
52
53
54// //////////////////////////////////////////////////////////////////////
55void tokeniseStringIntoWordList (const std::string& iPhrase,
56 WordList_T& ioWordList) {
57 // Empty the word list
58 ioWordList.clear();
59
60 // Boost Tokeniser
61 typedef boost::tokenizer<boost::char_separator<char> > Tokeniser_T;
62
63 // Define the separators
64 const boost::char_separator<char> lSepatorList(" .,;:|+-*/_=!@#$%`~^&(){}[]?'<>\"");
65
66 // Initialise the phrase to be tokenised
67 Tokeniser_T lTokens (iPhrase, lSepatorList);
68 for (Tokeniser_T::const_iterator tok_iter = lTokens.begin();
69 tok_iter != lTokens.end(); ++tok_iter) {
70 const std::string& lTerm = *tok_iter;
71 ioWordList.push_back (lTerm);
72 }
73}
74
75// //////////////////////////////////////////////////////////////////////
76std::string createStringFromWordList (const WordList_T& iWordList) {
77 std::ostringstream oStr;
78
79 unsigned short idx = iWordList.size();
80 for (WordList_T::const_iterator itWord = iWordList.begin();
81 itWord != iWordList.end(); ++itWord, --idx) {
82 const std::string& lWord = *itWord;
83 oStr << lWord;
84 if (idx > 1) {
85 oStr << " ";
86 }
87 }
88
89 return oStr.str();
90}
91
92
93// ///////// Parsing of Options & Configuration /////////
94// A helper function to simplify the main part.
95template<class T> std::ostream& operator<< (std::ostream& os,
96 const std::vector<T>& v) {
97 std::copy (v.begin(), v.end(), std::ostream_iterator<T> (os, " "));
98 return os;
99}
100
103
105int readConfiguration (int argc, char* argv[],
106 unsigned short& ioSpellingErrorDistance,
107 std::string& ioQueryString,
108 std::string& ioXapianDBFilepath,
109 std::string& ioSQLDBTypeString,
110 std::string& ioSQLDBConnectionString,
111 unsigned short& ioDeploymentNumber,
112 std::string& ioLogFilename,
113 unsigned short& ioSearchType,
114 std::ostringstream& oStr) {
115
116 // Initialise the travel query string, if that one is empty
117 if (ioQueryString.empty() == true) {
118 ioQueryString = K_OPENTREP_DEFAULT_QUERY_STRING;
119 }
120
121 // Transform the query string into a list of words (STL strings)
122 WordList_T lWordList;
123 tokeniseStringIntoWordList (ioQueryString, lWordList);
124
125 // Declare a group of options that will be allowed only on command line
126 boost::program_options::options_description generic ("Generic options");
127 generic.add_options()
128 ("prefix", "print installation prefix")
129 ("version,v", "print version string")
130 ("help,h", "produce help message");
131
132 // Declare a group of options that will be allowed both on command
133 // line and in config file
134 boost::program_options::options_description config ("Configuration");
135 config.add_options()
136 ("error,e",
137 boost::program_options::value< unsigned short >(&ioSpellingErrorDistance)->default_value(K_OPENTREP_DEFAULT_SPELLING_ERROR_DISTANCE),
138 "Spelling error distance (e.g., 3)")
139 ("xapiandb,d",
140 boost::program_options::value< std::string >(&ioXapianDBFilepath)->default_value(OPENTREP::DEFAULT_OPENTREP_XAPIAN_DB_FILEPATH),
141 "Xapian database filepath (e.g., /tmp/opentrep/xapian_traveldb)")
142 ("sqldbtype,t",
143 boost::program_options::value< std::string >(&ioSQLDBTypeString)->default_value(OPENTREP::DEFAULT_OPENTREP_SQL_DB_TYPE),
144 "SQL database type (e.g., nodb for no SQL database, sqlite for SQLite, mysql for MariaDB/MySQL)")
145 ("sqldbconx,s",
146 boost::program_options::value< std::string >(&ioSQLDBConnectionString),
147 "SQL database connection string (e.g., ~/tmp/opentrep/sqlite_travel.db for SQLite, "
148 "\"db=trep_trep user=trep password=trep\" for MariaDB/MySQL)")
149 ("deploymentnb,m",
150 boost::program_options::value<unsigned short>(&ioDeploymentNumber)->default_value(OPENTREP::DEFAULT_OPENTREP_DEPLOYMENT_NUMBER),
151 "Deployment number (from to N, where N=1 normally)")
152 ("log,l",
153 boost::program_options::value< std::string >(&ioLogFilename)->default_value(K_OPENTREP_DEFAULT_LOG_FILENAME),
154 "Filepath for the logs")
155 ("type,y",
156 boost::program_options::value<unsigned short>(&ioSearchType)->default_value(K_OPENTREP_DEFAULT_SEARCH_TYPE),
157 "Type of search request (0 = full text, 1 = coordinates)")
158 ("query,q",
159 boost::program_options::value< WordList_T >(&lWordList)->multitoken(),
160 "Travel query word list (e.g. sna francisco rio de janero los angeles reykyavki), "
161 "which sould be located at the end of the command line (otherwise, "
162 "the other options would be interpreted as part of that travel query word list)")
163 ;
164
165 // Hidden options, will be allowed both on command line and
166 // in config file, but will not be shown to the user.
167 boost::program_options::options_description hidden ("Hidden options");
168 hidden.add_options()
169 ("copyright",
170 boost::program_options::value< std::vector<std::string> >(),
171 "Show the copyright (license)");
172
173 boost::program_options::options_description cmdline_options;
174 cmdline_options.add(generic).add(config).add(hidden);
175
176 boost::program_options::options_description config_file_options;
177 config_file_options.add(config).add(hidden);
178
179 boost::program_options::options_description visible ("Allowed options");
180 visible.add(generic).add(config);
181
182 boost::program_options::positional_options_description p;
183 p.add ("copyright", -1);
184
185 boost::program_options::variables_map vm;
186 boost::program_options::
187 store (boost::program_options::command_line_parser (argc, argv).
188 options (cmdline_options).positional(p).run(), vm);
189
190 std::ifstream ifs ("opentrep-searcher.cfg");
191 boost::program_options::store (parse_config_file (ifs, config_file_options),
192 vm);
193 boost::program_options::notify (vm);
194
195 if (vm.count ("help")) {
196 std::cout << visible << std::endl;
198 }
199
200 if (vm.count ("version")) {
201 std::cout << PACKAGE_NAME << ", version " << PACKAGE_VERSION << std::endl;
203 }
204
205 if (vm.count ("prefix")) {
206 std::cout << "Installation prefix: " << PREFIXDIR << std::endl;
208 }
209
210 if (vm.count ("deploymentnb")) {
211 ioDeploymentNumber = vm["deploymentnb"].as< unsigned short >();
212 oStr << "Deployment number: " << ioDeploymentNumber << std::endl;
213 }
214
215 if (vm.count ("xapiandb")) {
216 ioXapianDBFilepath = vm["xapiandb"].as< std::string >();
217 oStr << "Xapian database filepath is: " << ioXapianDBFilepath
218 << ioDeploymentNumber << std::endl;
219 }
220
221 if (vm.count ("sqldbtype")) {
222 ioSQLDBTypeString = vm["sqldbtype"].as< std::string >();
223 oStr << "SQL database type is: " << ioSQLDBTypeString << std::endl;
224 }
225
226 // Derive the detault connection string depending on the SQL database type
227 const OPENTREP::DBType lDBType (ioSQLDBTypeString);
228 if (lDBType == OPENTREP::DBType::NODB) {
229 ioSQLDBConnectionString = "";
230
231 } else if (lDBType == OPENTREP::DBType::SQLITE3) {
232 ioSQLDBConnectionString = OPENTREP::DEFAULT_OPENTREP_SQLITE_DB_FILEPATH;
233
234 } else if (lDBType == OPENTREP::DBType::MYSQL) {
235 ioSQLDBConnectionString = OPENTREP::DEFAULT_OPENTREP_MYSQL_CONN_STRING;
236 }
237
238 // Set the SQL database connection string, if any is given
239 if (vm.count ("sqldbconx")) {
240 ioSQLDBConnectionString = vm["sqldbconx"].as< std::string >();
241 }
242
243 // Reporting of the SQL database connection string
244 if (lDBType == OPENTREP::DBType::SQLITE3
245 || lDBType == OPENTREP::DBType::MYSQL) {
246 const std::string& lSQLDBConnString =
248 ioSQLDBConnectionString,
249 ioDeploymentNumber);
250 //
251 oStr << "SQL database connection string is: " << lSQLDBConnString
252 << std::endl;
253 }
254
255 if (vm.count ("log")) {
256 ioLogFilename = vm["log"].as< std::string >();
257 oStr << "Log filename is: " << ioLogFilename << std::endl;
258 }
259
260 oStr << "The type of search is: " << ioSearchType << std::endl;
261
262 oStr << "The spelling error distance is: " << ioSpellingErrorDistance
263 << std::endl;
264
265 ioQueryString = createStringFromWordList (lWordList);
266 oStr << "The travel query string is: " << ioQueryString << std::endl;
267
268 return 0;
269}
270
274std::string parseQuery (OPENTREP::OPENTREP_Service& ioOpentrepService,
275 const OPENTREP::TravelQuery_T& iTravelQuery) {
276 std::ostringstream oStr;
277
278 // Query the Xapian database (index)
279 OPENTREP::WordList_T lNonMatchedWordList;
280 OPENTREP::LocationList_T lLocationList;
281 const OPENTREP::NbOfMatches_T nbOfMatches =
282 ioOpentrepService.interpretTravelRequest (iTravelQuery, lLocationList,
283 lNonMatchedWordList);
284
285 oStr << nbOfMatches << " (geographical) location(s) have been found "
286 << "matching your query (`" << iTravelQuery << "'). "
287 << lNonMatchedWordList.size() << " word(s) was/were left unmatched."
288 << std::endl;
289
290 if (nbOfMatches != 0) {
292 for (OPENTREP::LocationList_T::const_iterator itLocation =
293 lLocationList.begin();
294 itLocation != lLocationList.end(); ++itLocation, ++idx) {
295 const OPENTREP::Location& lLocation = *itLocation;
296 oStr << " [" << idx << "]: " << lLocation << std::endl;
297 }
298 }
299
300 if (lNonMatchedWordList.empty() == false) {
301 oStr << "List of unmatched words:" << std::endl;
302
304 for (OPENTREP::WordList_T::const_iterator itWord =
305 lNonMatchedWordList.begin();
306 itWord != lNonMatchedWordList.end(); ++itWord, ++idx) {
307 const OPENTREP::Word_T& lWord = *itWord;
308 oStr << " [" << idx << "]: " << lWord << std::endl;
309 }
310 }
311
312 return oStr.str();
313}
314
315// /////////////// M A I N /////////////////
316int main (int argc, char* argv[]) {
317
318 // Travel query
319 OPENTREP::TravelQuery_T lTravelQuery;
320
321 // Output log File
322 std::string lLogFilename;
323
324 // Xapian database name (directory of the index)
325 std::string lXapianDBNameStr;
326
327 // Type of search
328 unsigned short lSearchType;
329
330 // Xapian spelling error distance
331 unsigned short lSpellingErrorDistance;
332
333 // SQL database type
334 std::string lSQLDBTypeStr;
335
336 // SQL database connection string
337 std::string lSQLDBConnectionStr;
338
339 // Deployment number/version
340 OPENTREP::DeploymentNumber_T lDeploymentNumber;
341
342 // Log stream for the introduction part
343 std::ostringstream oIntroStr;
344
345 // Call the command-line option parser
346 const int lOptionParserStatus =
347 readConfiguration (argc, argv, lSpellingErrorDistance, lTravelQuery,
348 lXapianDBNameStr, lSQLDBTypeStr, lSQLDBConnectionStr,
349 lDeploymentNumber, lLogFilename, lSearchType, oIntroStr);
350
351 if (lOptionParserStatus == K_OPENTREP_EARLY_RETURN_STATUS) {
352 return 0;
353 }
354
355 // Set the log parameters
356 std::ofstream logOutputFile;
357 // open and clean the log outputfile
358 logOutputFile.open (lLogFilename.c_str());
359 logOutputFile.clear();
360
361 // Report the parameters
362 std::cout << oIntroStr.str();
363
364 // DEBUG
365 // Get the current time in UTC Timezone
366 boost::posix_time::ptime lTimeUTC =
367 boost::posix_time::second_clock::universal_time();
368 logOutputFile << "[" << lTimeUTC << "][" << __FILE__ << "#"
369 << __LINE__ << "]:Parameters:" << std::endl
370 << oIntroStr.str() << std::endl;
371
372 //
373 std::ostringstream oStr;
374 if (lSearchType == 0) {
375 // Initialise the context
376 const OPENTREP::TravelDBFilePath_T lXapianDBName (lXapianDBNameStr);
377 const OPENTREP::DBType lDBType (lSQLDBTypeStr);
378 const OPENTREP::SQLDBConnectionString_T lSQLDBConnStr (lSQLDBConnectionStr);
379 OPENTREP::OPENTREP_Service opentrepService (logOutputFile, lXapianDBName,
380 lDBType, lSQLDBConnStr,
381 lDeploymentNumber);
382
383 // Check the directory of the Xapian database/index exists and is accessible
385 opentrepService.getFilePaths();
386 const OPENTREP::TravelDBFilePath_T& lActualXapianDBDir= lFPSet.second.first;
387 const bool lExistXapianDBDir =
388 opentrepService.checkXapianDBOnFileSystem (lActualXapianDBDir);
389 if (lExistXapianDBDir == false) {
390 std::ostringstream errorStr;
391 errorStr << "Error - The file-path to the Xapian database/index ('"
392 << lActualXapianDBDir
393 << "') does not exist or is not a directory." << std::endl;
394 errorStr << "\tThat usually means that the OpenTREP indexer "
395 << "(opentrep-indexer) has not been launched yet, "
396 << "or that it has operated on a different Xapian "
397 << "database/index file-path." << std::endl;
398 errorStr << "\tFor instance the Xapian database/index may have been "
399 << "created with a different deployment number ("
400 << lDeploymentNumber << " being the current deployment number)";
401 std::cerr << errorStr.str() << std::endl;
402 return -1;
403 }
404
405 // Parse the query and retrieve the places from Xapian only
406 const std::string& lOutput = parseQuery (opentrepService, lTravelQuery);
407 oStr << lOutput;
408
409 } else {
410 oStr << "Finding the airports closest to: " << lTravelQuery << std::endl;
411 }
412
413 //
414 std::cout << oStr.str();
415
416 // Get the current time in UTC Timezone
417 lTimeUTC = boost::posix_time::second_clock::universal_time();
418 logOutputFile << "[" << lTimeUTC << "][" << __FILE__ << "#"
419 << __LINE__ << "]:Results:" << std::endl
420 << oStr.str() << std::endl;
421
422 // Close the Log outputFile
423 logOutputFile.close();
424
425 return 0;
426}
Interface for the OPENTREP Services.
bool checkXapianDBOnFileSystem(const TravelDBFilePath_T &) const
std::pair< const PORFilePath_T, const DBFilePathPair_T > FilePathSet_T
FilePathSet_T getFilePaths() const
NbOfMatches_T interpretTravelRequest(const std::string &iTravelQuery, LocationList_T &, WordList_T &)
const std::string DEFAULT_OPENTREP_SQLITE_DB_FILEPATH
std::list< Word_T > WordList_T
std::string Word_T
std::string TravelQuery_T
const std::string DEFAULT_OPENTREP_SQL_DB_TYPE
std::string parseAndDisplayConnectionString(const DBType &iDBType, const std::string &iSQLDBConnStr, const DeploymentNumber_T &iDeploymentNumber)
std::list< Location > LocationList_T
const unsigned short DEFAULT_OPENTREP_DEPLOYMENT_NUMBER
const std::string DEFAULT_OPENTREP_MYSQL_CONN_STRING
unsigned short DeploymentNumber_T
const std::string DEFAULT_OPENTREP_XAPIAN_DB_FILEPATH
unsigned short NbOfMatches_T
const int K_OPENTREP_EARLY_RETURN_STATUS
std::vector< std::string > WordList_T
const std::string K_OPENTREP_DEFAULT_LOG_FILENAME("opentrep-indexer.log")
std::string createStringFromWordList(const WordList_T &iWordList)
int main(int argc, char *argv[])
const unsigned short K_OPENTREP_DEFAULT_SEARCH_TYPE
int readConfiguration(int argc, char *argv[], unsigned short &ioSpellingErrorDistance, std::string &ioQueryString, std::string &ioXapianDBFilepath, std::string &ioSQLDBTypeString, std::string &ioSQLDBConnectionString, unsigned short &ioDeploymentNumber, std::string &ioLogFilename, unsigned short &ioSearchType, std::ostringstream &oStr)
const std::string K_OPENTREP_DEFAULT_LOG_FILENAME("opentrep-searcher.log")
void tokeniseStringIntoWordList(const std::string &iPhrase, WordList_T &ioWordList)
std::string parseQuery(OPENTREP::OPENTREP_Service &ioOpentrepService, const OPENTREP::TravelQuery_T &iTravelQuery)
std::ostream & operator<<(std::ostream &os, const std::vector< T > &v)
const unsigned short K_OPENTREP_DEFAULT_SPELLING_ERROR_DISTANCE
const std::string K_OPENTREP_DEFAULT_QUERY_STRING("sna francisco rio de janero los angeles reykyavki")
Enumeration of database types.
Definition DBType.hpp:17
Structure modelling a (geographical) location.
Definition Location.hpp:25