OpenTREP Logo  0.07.18
C++ Open Travel Request Parsing Library
Loading...
Searching...
No Matches
RequestInterpreter.cpp
Go to the documentation of this file.
1// //////////////////////////////////////////////////////////////////////
2// Import section
3// //////////////////////////////////////////////////////////////////////
4// STL
5#include <cassert>
6#include <sstream>
7#include <string>
8#include <vector>
9#include <exception>
10// Boost
11#include <boost/filesystem.hpp>
12#include <boost/regex.hpp>
13// SOCI
14#include <soci/soci.h>
15// OpenTrep
16#include <opentrep/DBType.hpp>
35
36namespace OPENTREP {
37
49 // //////////////////////////////////////////////////////////////////////
50 void addUnmatchedWord (const TravelQuery_T& iQueryString,
51 WordList_T& ioWordList, WordSet_T& ioWordSet) {
52 // Token-ise the given string
53 WordList_T lQueryStringWordList;
55 lQueryStringWordList);
56 if (lQueryStringWordList.size() == 1) {
57 // Add the unmatched/unknown word, only when that latter has not
58 // already been stored, and when it is not black-listed.
59 const bool shouldBeKept = Filter::shouldKeep ("", iQueryString);
60 //const bool shouldBeKept = true;
61
62 WordSet_T::const_iterator itWord = ioWordSet.find (iQueryString);
63 if (shouldBeKept == true && itWord == ioWordSet.end()) {
64 ioWordSet.insert (iQueryString);
65 ioWordList.push_back (iQueryString);
66 }
67 }
68 }
69
70 // //////////////////////////////////////////////////////////////////////
71 void createPlaces (const ResultCombination& iResultCombination,
72 PlaceHolder& ioPlaceHolder) {
73
74 // Retrieve the best matching ResultHolder object.
75 const ResultHolder& lResultHolder =
76 iResultCombination.getBestMatchingResultHolder();
77
78 // Browse the list of result objects
79 const ResultList_T& lResultList = lResultHolder.getResultList();
80 for (ResultList_T::const_iterator itResult = lResultList.begin();
81 itResult != lResultList.end(); ++itResult) {
82 // Retrieve the result object
83 const Result* lResult_ptr = *itResult;
84 assert (lResult_ptr != NULL);
85
90 const bool hasFullTextMatched = lResult_ptr->hasFullTextMatched();
91 if (hasFullTextMatched == false) {
92 continue;
93 }
94 assert (hasFullTextMatched == true);
95
96 // Retrieve the Xapian document data (string)
97 const std::string& lDocDataStr = lResult_ptr->getBestDocData();
98 const RawDataString_T& lDocData = RawDataString_T (lDocDataStr);
99
100 // Parse the POR details and create the corresponding Location structure
101 const Location& lLocation = Result::retrieveLocation (lDocData);
102
103 // Instanciate an empty place object, which will be filled from the
104 // rows retrieved from the database.
105 Place& lPlace = FacPlace::instance().create (lLocation);
106
107 // Insert the Place object within the PlaceHolder object
108 FacPlaceHolder::initLinkWithPlace (ioPlaceHolder, lPlace);
109
110 // Fill the place with the remaining of the Result details.
111 lResult_ptr->fillPlace (lPlace);
112
113 // DEBUG
114 OPENTREP_LOG_DEBUG ("Retrieved Document: " << lPlace.toString());
115 }
116 }
117
130 // //////////////////////////////////////////////////////////////////////
131 void searchString (const StringPartition& iStringPartition,
132 const Xapian::Database& iDatabase,
133 ResultCombination& ioResultCombination,
134 WordList_T& ioWordList) {
135
136 // Catch any thrown Xapian::Error exceptions
137 try {
138
139 // Set of unknown words (just to eliminate the duplicates)
140 WordSet_T lWordSet;
141
142 // Browse the partitions
143 for (StringPartition::StringPartition_T::const_iterator itSet =
144 iStringPartition._partition.begin();
145 itSet != iStringPartition._partition.end(); ++itSet) {
146 const StringSet& lStringSet = *itSet;
147
148 // DEBUG
149 OPENTREP_LOG_DEBUG (" ==========");
150 OPENTREP_LOG_DEBUG (" String set: " << lStringSet);
151
152 // Create a ResultHolder object.
153 ResultHolder& lResultHolder =
154 FacResultHolder::instance().create (lStringSet.describe(), iDatabase);
155
156 // Add the ResultHolder object to the dedicated list.
158 lResultHolder);
159
160 // Browse through all the word combinations of the partition
161 for (StringSet::StringSet_T::const_iterator itString =
162 lStringSet._set.begin();
163 itString != lStringSet._set.end(); ++itString) {
164 //
165 const std::string lQueryString (*itString);
166
167 // DEBUG
168 OPENTREP_LOG_DEBUG (" --------");
169 OPENTREP_LOG_DEBUG (" Query string: '" << lQueryString << "'");
170
171 // Create an empty Result object
172 Result& lResult = FacResult::instance().create (lQueryString,
173 iDatabase);
174
175 // Add the Result object to the dedicated list.
176 FacResultHolder::initLinkWithResult (lResultHolder, lResult);
177
178 // Perform the Xapian-based full-text match: the set of
179 // matching documents is filled.
180 const std::string& lMatchedString =
181 lResult.fullTextMatch (iDatabase, lQueryString);
182
183 // When a single-word string is unmatched/unknown by/from Xapian,
184 // add it to the dedicated list (i.e., ioWordList).
185 if (lMatchedString.empty() == true) {
186 OPENTREP::addUnmatchedWord (lQueryString, ioWordList, lWordSet);
187 }
188 }
189
190 // DEBUG
191 OPENTREP_LOG_DEBUG (std::endl
192 << "========================================="
193 << std::endl << "Result holder: "
194 << lResultHolder.toString() << std::endl
195 << "========================================="
196 << std::endl << std::endl);
197 }
198
199 // DEBUG
200 OPENTREP_LOG_DEBUG ("*********************");
201
202 } catch (const Xapian::Error& error) {
203 // Error
204 OPENTREP_LOG_ERROR ("Exception: " << error.get_msg());
205 throw XapianException (error.get_msg());
206 }
207 }
208
225 // //////////////////////////////////////////////////////////////////////
227
228 // Calculate the weights for the full-text matches
229 const bool doesBestMatchingResultHolderExist =
230 ioResultCombination.chooseBestMatchingResultHolder();
231
232 if (doesBestMatchingResultHolderExist == true) {
233 const ResultHolder& lBestMatchingResultHolder =
234 ioResultCombination.getBestMatchingResultHolder();
235
236 // DEBUG
237 const StringSet& lCorrectedStringSet =
238 ioResultCombination.getCorrectedStringSet();
239 OPENTREP_LOG_DEBUG ("The best matching string partition for '"
240 << ioResultCombination.describeShortKey() << "' is "
241 << lBestMatchingResultHolder.describeShortKey()
242 << ", and has got a weight of "
243 << ioResultCombination.getBestMatchingWeight()
244 << "%. The corrected string set is: "
245 << lCorrectedStringSet);
246
247 } else {
248 // DEBUG
249 OPENTREP_LOG_DEBUG ("There is no match for '"
250 << ioResultCombination.describeShortKey() << "'");
251 }
252 }
253
254 // //////////////////////////////////////////////////////////////////////
255 bool RequestInterpreter::areAllCodeOrGeoID (const TravelQuery_T& iQueryString,
256 WordList_T& ioWordList) {
257 bool areAllWordsCodes = true;
258
259 // Token-ise the given string
260 WordHolder::tokeniseStringIntoWordList (iQueryString, ioWordList);
261 for (WordList_T::const_iterator itWord = ioWordList.begin();
262 itWord != ioWordList.end(); ++itWord) {
263 const std::string& lWord = *itWord;
264
265 // IATA code: alpha{3}
266 const boost::regex lIATACodeExp ("^[[:alpha:]]{3}$");
267 const bool lMatchesWithIATACode = regex_match (lWord, lIATACodeExp);
268
269 // ICAO code: (alpha|digit){4}
270 const boost::regex lICAOCodeExp ("^([[:alpha:]]|[[:digit:]]){4}$");
271 const bool lMatchesWithICAOCode = regex_match (lWord, lICAOCodeExp);
272
273 // UN/LOCODE code: alpha{2}(alpha|digit){3}
274 const boost::regex
275 lUNLOCodeExp ("^[[:alpha:]]{2}([[:alpha:]]|[[:digit:]]){3}$");
276 const bool lMatchesWithUNLOCode = regex_match (lWord, lUNLOCodeExp);
277
278 // Geonames ID: digit{1,12}
279 const boost::regex lGeoIDCodeExp ("^[[:digit:]]{1,12}$");
280 const bool lMatchesWithGeoID = regex_match (lWord, lGeoIDCodeExp);
281
282 // If the word is neither a IATA/ICAO code or a Geonames ID,
283 // there is nothing more to be done at that stage. The query string
284 // will have to be fully analysed.
285 // Otherwise, we go on analysing the other words.
286 if (lMatchesWithIATACode == false && lMatchesWithICAOCode == false
287 && lMatchesWithUNLOCode == false && lMatchesWithGeoID == false) {
288 areAllWordsCodes = false;
289 break;
290 }
291 }
292
293 return areAllWordsCodes;
294 }
295
302 // //////////////////////////////////////////////////////////////////////
304 Location oLocation;
305 PageRank_T lMaxPageRank = 0.0;
306
307 for (LocationList_T::const_iterator itLocation = iLocationList.begin();
308 itLocation != iLocationList.end(); ++itLocation) {
309 const Location& lLocation = *itLocation;
310
311 // Get the PageRank value
312 const PageRank_T& lPageRank = lLocation.getPageRank();
313 if (lPageRank > lMaxPageRank) {
314 lMaxPageRank = lPageRank;
315 oLocation = lLocation;
316 }
317 }
318
319 return oLocation;
320 }
321
335 // //////////////////////////////////////////////////////////////////////
337 const SQLDBConnectionString_T& iSQLDBConnStr,
338 const WordList_T& iCodeList,
339 LocationList_T& ioLocationList,
340 WordList_T& ioWordList) {
341 NbOfMatches_T oNbOfMatches = 0;
342
343 // Connect to the SQL database/file
344 soci::session* lSociSession_ptr =
345 DBManager::initSQLDBSession (iSQLDBType, iSQLDBConnStr);
346 if (lSociSession_ptr == NULL) {
347 std::ostringstream oStr;
348 oStr << "The " << iSQLDBType.describe()
349 << " database is not accessible. Connection string: "
350 << iSQLDBConnStr << std::endl
351 << "Hint: launch the 'opentrep-dbmgr' program and "
352 << "see the 'tutorial' command.";
353 OPENTREP_LOG_ERROR (oStr.str());
355 }
356 assert (lSociSession_ptr != NULL);
357
358 // Browse the list of words/items
359 for (WordList_T::const_iterator itWord = iCodeList.begin();
360 itWord != iCodeList.end(); ++itWord) {
361 const std::string& lWord = *itWord;
362
363 // Check for IATA code: alpha{3}
364 const boost::regex lIATACodeExp ("^[[:alpha:]]{3}$");
365 const bool lMatchesWithIATACode = regex_match (lWord, lIATACodeExp);
366 if (lMatchesWithIATACode == true) {
367 // Perform the select statement on the underlying SQL database
368 const IATACode_T lIATACode (lWord);
369 const bool lUniqueEntry = true;
370 const NbOfDBEntries_T& lNbOfEntries =
371 DBManager::getPORByIATACode (*lSociSession_ptr, lIATACode,
372 ioLocationList, lUniqueEntry);
373 oNbOfMatches += lNbOfEntries;
374 continue;
375 }
376
377 // Check for ICAO code: (alpha|digit){4}
378 const boost::regex lICAOCodeExp ("^([[:alpha:]]|[[:digit:]]){4}$");
379 const bool lMatchesWithICAOCode = regex_match (lWord, lICAOCodeExp);
380 if (lMatchesWithICAOCode == true) {
381 // Perform the select statement on the underlying SQL database
382 const ICAOCode_T lICAOCode (lWord);
383 const NbOfDBEntries_T& lNbOfEntries =
384 DBManager::getPORByICAOCode (*lSociSession_ptr, lICAOCode,
385 ioLocationList);
386 oNbOfMatches += lNbOfEntries;
387 continue;
388 }
389
390 // Check for UN/LOCODE code: alpha{2}(alpha|digit){3}
391 const boost::regex
392 lUNLOCodeExp ("^[[:alpha:]]{2}([[:alpha:]]|[[:digit:]]){3}$");
393 const bool lMatchesWithUNLOCode = regex_match (lWord, lUNLOCodeExp);
394 if (lMatchesWithUNLOCode == true) {
395 // Perform the select statement on the underlying SQL database
396 const UNLOCode_T lUNLOCode (lWord);
397 const bool lUniqueEntry = true;
398 const NbOfDBEntries_T& lNbOfEntries =
399 DBManager::getPORByUNLOCode (*lSociSession_ptr, lUNLOCode,
400 ioLocationList, lUniqueEntry);
401 oNbOfMatches += lNbOfEntries;
402 continue;
403 }
404
405 // Check for Geonames ID: digit{1,12}
406 const boost::regex lGeoIDCodeExp ("^[[:digit:]]{1,12}$");
407 const bool lMatchesWithGeoID = regex_match (lWord, lGeoIDCodeExp);
408 if (lMatchesWithGeoID == true) {
409 try {
410 // Convert the character string into a number
411 const GeonamesID_T lGeonamesID =
412 boost::lexical_cast<GeonamesID_T> (lWord);
413
414 // Perform the select statement on the underlying SQL database
415 const NbOfDBEntries_T& lNbOfEntries =
416 DBManager::getPORByGeonameID (*lSociSession_ptr, lGeonamesID,
417 ioLocationList);
418 oNbOfMatches += lNbOfEntries;
419
420 } catch (boost::bad_lexical_cast& eCast) {
421 OPENTREP_LOG_ERROR ("The Geoname ID ('" << lWord
422 << "') cannot be understood.");
423 }
424 }
425 }
426
427 return oNbOfMatches;
428 }
429
430 // //////////////////////////////////////////////////////////////////////
431 NbOfMatches_T RequestInterpreter::
432 interpretTravelRequest (const TravelDBFilePath_T& iTravelDBFilePath,
433 const DBType& iSQLDBType,
434 const SQLDBConnectionString_T& iSQLDBConnStr,
435 const TravelQuery_T& iTravelQuery,
436 LocationList_T& ioLocationList,
437 WordList_T& ioWordList,
438 const OTransliterator& iTransliterator) {
439 NbOfMatches_T oNbOfMatches = 0;
440
441 // Sanity check
442 assert (iTravelQuery.empty() == false);
443
444 // Check whether the file-path to the Xapian database/index exists
445 // and is a directory.
446 boost::filesystem::path lTravelDBFilePath (iTravelDBFilePath.begin(),
447 iTravelDBFilePath.end());
448 if (!(boost::filesystem::exists (lTravelDBFilePath)
449 && boost::filesystem::is_directory (lTravelDBFilePath))) {
450 std::ostringstream oStr;
451 oStr << "The file-path to the Xapian database/index ('"
452 << iTravelDBFilePath << "') does not exist or is not a directory. ";
453 oStr << "That usually means that the OpenTREP indexer (opentrep-indexer) "
454 << "has not been launched yet, or that it has operated "
455 << "on a different Xapian database/index file-path.";
456 OPENTREP_LOG_ERROR (oStr.str());
457 throw FileNotFoundException (oStr.str());
458 }
459
460 // Open the Xapian database
461 Xapian::Database lXapianDatabase (iTravelDBFilePath);
462
463 // DEBUG
464 OPENTREP_LOG_DEBUG (std::endl
465 << "=========================================");
466
467 // First, cut the travel query in slices and calculate all the partitions
468 // for each of those query slices
469 QuerySlices lQuerySlices (lXapianDatabase, iTravelQuery, iTransliterator);
470
471 // DEBUG
472 OPENTREP_LOG_DEBUG ("+=+=+=+=+=+=+=+=+=+=+=+=+=+=+");
473 OPENTREP_LOG_DEBUG ("Travel query: `" << iTravelQuery << "'");
474 const TravelQuery_T& lNormalisedQueryString = lQuerySlices.getQueryString();
475 if (!(iTravelQuery == lNormalisedQueryString)) {
476 OPENTREP_LOG_DEBUG ("Normalised travel query: `" << lNormalisedQueryString
477 << "'");
478 }
479 OPENTREP_LOG_DEBUG ("Query slices: `" << lQuerySlices << "'");
480
481 // Browse the travel query slices
482 const StringPartitionList_T& lStringPartitionList =
483 lQuerySlices.getStringPartitionList();
484 for (StringPartitionList_T::const_iterator itSlice =
485 lStringPartitionList.begin();
486 itSlice != lStringPartitionList.end(); ++itSlice) {
487 StringPartition lStringPartition = *itSlice;
488 const std::string& lTravelQuerySlice = lStringPartition.getInitialString();
489
495 ResultCombination& lResultCombination =
496 FacResultCombination::instance().create (lTravelQuerySlice);
497
498 // DEBUG
499 OPENTREP_LOG_DEBUG ("+++++++++++++++++++++");
500 OPENTREP_LOG_DEBUG ("Travel query slice: `" << lTravelQuerySlice << "'");
501 OPENTREP_LOG_DEBUG ("Partitions: " << lStringPartition);
502
503
508 WordList_T lCodeList;
509 const bool areAllWordsCodes =
510 areAllCodeOrGeoID (lTravelQuerySlice, lCodeList);
511
512 NbOfMatches_T lNbOfMatches = 0;
513 if (areAllWordsCodes == true && !(iSQLDBType == DBType::NODB)) {
520 // DEBUG
521 OPENTREP_LOG_DEBUG ("The travel query string (" << lTravelQuerySlice
522 << ") is made only of IATA/ICAO/UNLOCODE codes "
523 << "or Geonames ID. The " << iSQLDBType.describe()
524 << " SQL database (" << iSQLDBConnStr
525 << ") will be used. "
526 << "The Xapian database/index will not be used");
527
528 lNbOfMatches = getLocationList (iSQLDBType, iSQLDBConnStr, lCodeList,
529 ioLocationList, ioWordList);
530 }
531
532 if (lNbOfMatches == 0) {
543 // DEBUG
544 if (iSQLDBType == DBType::NODB) {
545 OPENTREP_LOG_DEBUG ("No SQL database may be used. "
546 << "The Xapian database will be used instead");
547 } else {
548 OPENTREP_LOG_DEBUG ("The travel query string (" << lTravelQuerySlice
549 << ") has got items/words, which are neither "
550 << "IATA/ICAO codes nor Geonames ID. "
551 << "The Xapian database/index will be used");
552 }
553
558 OPENTREP::searchString (lTravelQuerySlice, lXapianDatabase,
559 lResultCombination, ioWordList);
560
564 lResultCombination.calculateAllWeights();
565
569 OPENTREP::chooseBestMatchingResultHolder (lResultCombination);
570
576 // Create a PlaceHolder object, to collect the matching Place objects
577 PlaceHolder& lPlaceHolder = FacPlaceHolder::instance().create();
578 createPlaces (lResultCombination, lPlaceHolder);
579
580 // DEBUG
581 OPENTREP_LOG_DEBUG (std::endl
582 << "========================================="
583 << std::endl << "Summary:" << std::endl
584 << lPlaceHolder.toShortString() << std::endl
585 << "========================================="
586 << std::endl);
587
592 lPlaceHolder.createLocations (ioLocationList);
593 }
594 }
595
596 oNbOfMatches = ioLocationList.size();
597 return oNbOfMatches;
598 }
599
600}
#define OPENTREP_LOG_ERROR(iToBeLogged)
Definition Logger.hpp:24
#define OPENTREP_LOG_DEBUG(iToBeLogged)
Definition Logger.hpp:33
static NbOfDBEntries_T getPORByICAOCode(soci::session &, const ICAOCode_T &, LocationList_T &)
static soci::session * initSQLDBSession(const DBType &, const SQLDBConnectionString_T &)
static NbOfDBEntries_T getPORByUNLOCode(soci::session &, const UNLOCode_T &, LocationList_T &, const bool iUniqueEntry)
static NbOfDBEntries_T getPORByGeonameID(soci::session &, const GeonamesID_T &, LocationList_T &)
static NbOfDBEntries_T getPORByIATACode(soci::session &, const IATACode_T &, LocationList_T &, const bool iUniqueEntry)
static FacPlaceHolder & instance()
static void initLinkWithPlace(PlaceHolder &, Place &)
static FacPlace & instance()
Definition FacPlace.cpp:29
static void initLinkWithResultHolder(ResultCombination &, ResultHolder &)
static FacResultCombination & instance()
ResultCombination & create(const TravelQuery_T &iQueryString)
static void initLinkWithResult(ResultHolder &, Result &)
static FacResultHolder & instance()
ResultHolder & create(const TravelQuery_T &iQueryString, const Xapian::Database &iDatabase)
Result & create(const TravelQuery_T &, const Xapian::Database &)
Definition FacResult.cpp:41
static FacResult & instance()
Definition FacResult.cpp:29
Class modelling a place/POR (point of reference).
Definition Place.hpp:29
std::string toString() const
Definition Place.cpp:85
Class wrapping functions on a list of ResultHolder objects.
const ResultHolder & getBestMatchingResultHolder() const
StringSet getCorrectedStringSet() const
const Percentage_T & getBestMatchingWeight() const
std::string describeShortKey() const
Class wrapping functions on a list of Result objects.
std::string describeShortKey() const
const ResultList_T & getResultList() const
std::string toString() const
Class wrapping a set of Xapian documents having matched a given query string.
Definition Result.hpp:48
std::string fullTextMatch(const Xapian::Database &, const TravelQuery_T &)
Definition Result.cpp:521
bool hasFullTextMatched() const
Definition Result.hpp:71
const RawDataString_T & getBestDocData() const
Definition Result.hpp:132
void fillPlace(Place &) const
Definition Result.cpp:211
static Location retrieveLocation(const Xapian::Document &)
Definition Result.cpp:272
static void tokeniseStringIntoWordList(const TravelQuery_T &, WordList_T &)
std::list< Word_T > WordList_T
void addUnmatchedWord(const TravelQuery_T &iQueryString, WordList_T &ioWordList, WordSet_T &ioWordSet)
void chooseBestMatchingResultHolder(ResultCombination &ioResultCombination)
unsigned int NbOfDBEntries_T
std::string TravelQuery_T
void searchString(const StringPartition &iStringPartition, const Xapian::Database &iDatabase, ResultCombination &ioResultCombination, WordList_T &ioWordList)
double PageRank_T
std::list< Location > LocationList_T
void createPlaces(const ResultCombination &iResultCombination, PlaceHolder &ioPlaceHolder)
NbOfMatches_T getLocationList(const DBType &iSQLDBType, const SQLDBConnectionString_T &iSQLDBConnStr, const WordList_T &iCodeList, LocationList_T &ioLocationList, WordList_T &ioWordList)
Location getBestMatchingLocation(const LocationList_T &iLocationList)
std::set< std::string > WordSet_T
unsigned short NbOfMatches_T
unsigned int GeonamesID_T
std::list< Result * > ResultList_T
std::list< StringPartition > StringPartitionList_T
std::vector< std::string > WordList_T
Enumeration of database types.
Definition DBType.hpp:17
const std::string describe() const
Definition DBType.cpp:131
static bool shouldKeep(const std::string &iPhrase, const std::string &iWord)
Definition Filter.cpp:144
Structure modelling a (geographical) location.
Definition Location.hpp:25
const PageRank_T & getPageRank() const
Definition Location.hpp:354
Class holding a set of strings, e.g., {"rio", "de", "janeiro"}.
Definition StringSet.hpp:19
std::string describe() const
Definition StringSet.cpp:88