OpenTREP Logo  0.07.18
C++ Open Travel Request Parsing Library
Loading...
Searching...
No Matches
Result.cpp
Go to the documentation of this file.
1// //////////////////////////////////////////////////////////////////////
2// Import section
3// //////////////////////////////////////////////////////////////////////
4// STL
5#include <cassert>
6#include <sstream>
7#include <algorithm>
8// Boost
9#include <boost/tokenizer.hpp>
10// OpenTREP
21
22namespace OPENTREP {
23
24 // //////////////////////////////////////////////////////////////////////
25 Result::Result (const TravelQuery_T& iQueryString,
26 const Xapian::Database& iDatabase)
27 : _resultHolder (NULL), _database (iDatabase),
28 _queryString (iQueryString), _hasFullTextMatched (false),
29 _bestDocData (RawDataString_T ("")) {
30 init();
31 }
32
33 // //////////////////////////////////////////////////////////////////////
34 Result::~Result() {
35 }
36
37 // //////////////////////////////////////////////////////////////////////
38 void Result::init() {
39 }
40
41 // //////////////////////////////////////////////////////////////////////
42 std::string Result::describeShortKey() const {
43 std::ostringstream oStr;
44 oStr << _queryString;
45 return oStr.str();
46 }
47
48 // //////////////////////////////////////////////////////////////////////
49 std::string Result::describeKey() const {
50 std::ostringstream oStr;
51 oStr << "'" << describeShortKey() << "' ";
52 if (_correctedQueryString.empty() == false
53 && _correctedQueryString != _queryString) {
54 oStr << "(corrected into '" << _correctedQueryString
55 << "' with an edit distance/error of " << _editDistance
56 << " over an allowable distance of " << _allowableEditDistance
57 << ") - ";
58 } else {
59 oStr << "- ";
60 }
61 return oStr.str();
62 }
63
64 // //////////////////////////////////////////////////////////////////////
65 std::string Result::toString() const {
66 std::ostringstream oStr;
67 oStr << describeKey();
68
69 if (_documentList.empty() == true) {
70 oStr << "No match" << std::endl;
71 return oStr.str();
72 }
73 assert (_hasFullTextMatched == true);
74
75 unsigned short idx = 0;
76 for (DocumentList_T::const_iterator itDoc = _documentList.begin();
77 itDoc != _documentList.end(); ++itDoc, ++idx) {
78 const XapianDocumentPair_T& lDocumentPair = *itDoc;
79
80 const Xapian::Document& lXapianDoc = lDocumentPair.first;
81 const Xapian::docid& lDocID = lXapianDoc.get_docid();
82
83 const ScoreBoard& lScoreBoard = lDocumentPair.second;
84
85 if (idx != 0) {
86 oStr << ", ";
87 }
88 oStr << "Doc ID: " << lDocID << ", matching with ("
89 << lScoreBoard.describe() << "), containing: '"
90 << lXapianDoc.get_data() << "'";
91 }
92
93 return oStr.str();
94 }
95
96 // //////////////////////////////////////////////////////////////////////
97 void Result::toStream (std::ostream& ioOut) const {
98 ioOut << toString();
99 }
100
101 // //////////////////////////////////////////////////////////////////////
102 void Result::fromStream (std::istream& ioIn) {
103 }
104
105 // //////////////////////////////////////////////////////////////////////
106 const XapianDocumentPair_T& Result::
107 getDocumentPair (const Xapian::docid& iDocID) const {
108 // Retrieve the Xapian document and associated ScoreBoard structure
109 // corresponding to the doc ID of the best matching document
110 DocumentMap_T::const_iterator itDoc = _documentMap.find (iDocID);
111
112 if (itDoc == _documentMap.end()) {
113 OPENTREP_LOG_ERROR ("The Xapian document (ID = " << iDocID
114 << ") can not be found in the Result object "
115 << describeKey());
116 }
117 assert (itDoc != _documentMap.end());
118
119 //
120 const XapianDocumentPair_T& oDocumentPair = itDoc->second;
121
122 //
123 return oDocumentPair;
124 }
125
126 // //////////////////////////////////////////////////////////////////////
127 const Xapian::Document& Result::
128 getDocument (const Xapian::docid& iDocID) const {
129 // First, retrieve the pair made of Xapian document and associated
130 // ScoreBoard structure
131 const XapianDocumentPair_T& lDocumentPair = getDocumentPair (iDocID);
132
133 // Then, take the Xapian document (and leave the ScoreBoard out)
134 const Xapian::Document& oXapianDocument = lDocumentPair.first;
135
136 //
137 return oXapianDocument;
138 }
139
140 // //////////////////////////////////////////////////////////////////////
141 void Result::addDocument (const Xapian::Document& iDocument,
142 const Score_T& iScore) {
151 Score_T lCorrectedScore = iScore;
152 if (_editDistance > 0) {
153 lCorrectedScore = iScore / (_editDistance * _editDistance * _editDistance);
154 }
155
156 // The document is created at the time of (Xapian-based) full-text matching
157 const ScoreType lXapianScoreType (ScoreType::XAPIAN_PCT);
158
159 // Create a ScoreBoard structure
160 const ScoreBoard lScoreBoard (_queryString,
161 lXapianScoreType, lCorrectedScore);
162
163 // Retrieve the ID of the Xapian document
164 const Xapian::docid& lDocID = iDocument.get_docid();
165
174
175 // Create a (Xapian document, score board) pair, so as to store
176 // the document along with its corresponding score board
177 const XapianDocumentPair_T lDocumentPair (iDocument, lScoreBoard);
178
179 // Insert the just created pair into the dedicated (STL) list
180 _documentList.push_back (lDocumentPair);
181
182 // Insert the just created pair into the dedicated (STL) map
183 const bool hasInsertBeenSuccessful =
184 _documentMap.insert (DocumentMap_T::value_type (lDocID,
185 lDocumentPair)).second;
186 // Sanity check
187 if (hasInsertBeenSuccessful == false) {
188 std::ostringstream errorStr;
189 errorStr << "Error while inserting the Xapian Document pair into "
190 << "the internal STL map";
191 OPENTREP_LOG_DEBUG (errorStr.str());
192 }
193 assert (hasInsertBeenSuccessful == true);
194 }
195
196 // //////////////////////////////////////////////////////////////////////
197 void Result::fillResult (const Xapian::MSet& iMatchingSet) {
202 for (Xapian::MSetIterator itDoc = iMatchingSet.begin();
203 itDoc != iMatchingSet.end(); ++itDoc) {
204 const int& lXapianPercentage = itDoc.get_percent();
205 const Xapian::Document& lDocument = itDoc.get_document();
206 addDocument (lDocument, lXapianPercentage);
207 }
208 }
209
210 // //////////////////////////////////////////////////////////////////////
211 void Result::fillPlace (Place& ioPlace) const {
212 // Set the original and corrected/suggested keywords
213 ioPlace.setOriginalKeywords (_queryString);
214 ioPlace.setCorrectedKeywords (_correctedQueryString);
215
216 // Set the effective (Levenshtein) edit distance/error, as
217 // well as the allowable edit distance/error
218 ioPlace.setEditDistance (_editDistance);
219 ioPlace.setAllowableEditDistance (_allowableEditDistance);
220
221 // Set the Xapian document ID
222 ioPlace.setDocID (_bestDocID);
223
224 // Set the matching percentage
225 ioPlace.setPercentage (_bestCombinedWeight);
226
227 // Retrieve the parameters of the best matching document
228 const LocationKey& lKey = ioPlace.getKey();
229
230 // DEBUG
231 OPENTREP_LOG_DEBUG ("Place key: " << lKey << " - Xapian ID " << _bestDocID
232 << ", " << _bestCombinedWeight << "% [" << _bestDocData
233 << "]");
234 }
235
247 // //////////////////////////////////////////////////////////////////////
248 static unsigned int calculateEditDistance (const TravelQuery_T& iPhrase) {
249 NbOfErrors_T oEditDistance = 2;
250
251 const NbOfErrors_T lQueryStringSize = iPhrase.size();
252
253 oEditDistance = lQueryStringSize / K_DEFAULT_SIZE_FOR_SPELLING_ERROR_UNIT;
254 return oEditDistance;
255 }
256
257 // //////////////////////////////////////////////////////////////////////
259 // Initialise the POR (point of reference) parser
260 PORStringParser lStringParser (iRawDataString);
261
262 // Parse the raw data
263 const Location& oLocation = lStringParser.generateLocation();
264
265 // DEBUG
266 //OPENTREP_LOG_DEBUG ("Location: " << oLocation);
267
268 return oLocation;
269 }
270
271 // //////////////////////////////////////////////////////////////////////
272 Location Result::retrieveLocation (const Xapian::Document& iDocument) {
273 // Retrieve the Xapian document data
274 const std::string& lDocumentDataStr = iDocument.get_data();
275 const RawDataString_T& lDocumentData = RawDataString_T (lDocumentDataStr);
276
277 // Parse the POR details and create the corresponding Location structure
278 const Location& oLocation = retrieveLocation (lDocumentData);
279
280 return oLocation;
281 }
282
283 // //////////////////////////////////////////////////////////////////////
284 LocationKey Result::getPrimaryKey (const Xapian::Document& iDocument) {
285 // Parse the POR (point of reference) details held by the Xapian document
286 const Location& lLocation = retrieveLocation (iDocument);
287
288 // Get the key (IATA and ICAO codes, GeonamesID)
289 const LocationKey& oLocationKey = lLocation.getKey();
290
291 return oLocationKey;
292 }
293
294 // //////////////////////////////////////////////////////////////////////
295 Score_T Result::getEnvelopeID (const Xapian::Document& iDocument) {
296 // Parse the POR (point of reference) details held by the Xapian document
297 const Location& lLocation = retrieveLocation (iDocument);
298
299 // Get the envelope ID (it is an integer value in the Location structure)
300 const EnvelopeID_T& lEnvelopeIDInt = lLocation.getEnvelopeID();
301
302 // Convert the envelope ID value, from an integer to a floating point one
303 const Score_T oEnvelopeID = static_cast<const Score_T> (lEnvelopeIDInt);
304
305 return oEnvelopeID;
306 }
307
308 // //////////////////////////////////////////////////////////////////////
309 PageRank_T Result::getPageRank (const Xapian::Document& iDocument) {
310 // Parse the POR (point of reference) details held by the Xapian document
311 const Location& lLocation = retrieveLocation (iDocument);
312
313 // Get the PageRank value
314 const PageRank_T& oPageRank = lLocation.getPageRank();
315
316 return oPageRank;
317 }
318
319 // //////////////////////////////////////////////////////////////////////
320 std::string Result::fullTextMatch (const Xapian::Database& iDatabase,
321 const TravelQuery_T& iQueryString,
322 Xapian::MSet& ioMatchingSet) {
323 std::string oMatchedString;
324
325 // Catch any Xapian::Error exceptions thrown
326 try {
327
328 // Build the query object
329 Xapian::QueryParser lQueryParser;
330 lQueryParser.set_database (iDatabase);
331
337 // lQueryParser.set_default_op (Xapian::Query::OP_ADJ);
338 lQueryParser.set_default_op (Xapian::Query::OP_PHRASE);
339
340 // DEBUG
341 /*
342 OPENTREP_LOG_DEBUG ("Query parser `" << lQueryParser.get_description()
343 << "'");
344 */
345
346 // DEBUG
347 OPENTREP_LOG_DEBUG (" --------");
348
349 // Start an enquire session
350 Xapian::Enquire enquire (iDatabase);
351
358 const Xapian::Query& lXapianQuery =
359 lQueryParser.parse_query (iQueryString,
360 Xapian::QueryParser::FLAG_BOOLEAN
361 | Xapian::QueryParser::FLAG_PHRASE
362 | Xapian::QueryParser::FLAG_LOVEHATE);
363
364 // Give the query object to the enquire session
365 enquire.set_query (lXapianQuery);
366
367 // Get the top K_DEFAULT_XAPIAN_MATCHING_SET_SIZE (normally, 30)
368 // results of the query
369 ioMatchingSet = enquire.get_mset (0, K_DEFAULT_XAPIAN_MATCHING_SET_SIZE);
370
371 // Display the results
372 int nbMatches = ioMatchingSet.size();
373
374 // DEBUG
375 OPENTREP_LOG_DEBUG (" Query string: `" << iQueryString
376 << "', i.e.: `" << lXapianQuery.get_description()
377 << "' => " << nbMatches << " result(s) found");
378
379 if (nbMatches != 0) {
380 // Store the effective (Levenshtein) edit distance/error
381 const NbOfErrors_T lEditDistance = 0;
382 setEditDistance (lEditDistance);
383
384 // Store the allowable edit distance/error
385 setAllowableEditDistance (lEditDistance);
386
387 //
388 oMatchedString = iQueryString;
389
390 // Store the fact that there has been a full-text match
391 setHasFullTextMatched (true);
392
393 // Store the corrected string (the same as the given string, here,
394 // as that latter directly gave full-text matches).
395 setCorrectedQueryString (oMatchedString);
396
397 // DEBUG
398 OPENTREP_LOG_DEBUG (" Query string: `" << iQueryString
399 << "' provides " << nbMatches << " exact matches.");
400
401 return oMatchedString;
402 }
403 assert (ioMatchingSet.empty() == true);
404
410 const NbOfErrors_T& lAllowableEditDistance =
411 calculateEditDistance (iQueryString);
412
413 // Let Xapian find a spelling correction (if any)
414 const std::string& lCorrectedString =
415 iDatabase.get_spelling_suggestion (iQueryString, lAllowableEditDistance);
416
417 // If the correction is no better than the original string, there is
418 // no need to go further: there is no match.
419 if (lCorrectedString.empty() == true || lCorrectedString == iQueryString) {
420 // DEBUG
421 OPENTREP_LOG_DEBUG (" Query string: `"
422 << iQueryString << "' provides no match, "
423 << "and there is no spelling suggestion, "
424 << "even with an edit distance of "
425 << lAllowableEditDistance);
426
427 // Store the fact that there has not been any full-text match
428 setHasFullTextMatched (false);
429
430 // Leave the string empty
431 return oMatchedString;
432 }
433 assert (lCorrectedString.empty() == false
434 && lCorrectedString != iQueryString);
435
436 // Calculate the effective (Levenshtein) edit distance/error
437 const NbOfErrors_T& lEditDistance =
438 Levenshtein::getDistance (iQueryString, lCorrectedString);
439
447 const Xapian::Query& lCorrectedXapianQuery =
448 lQueryParser.parse_query (lCorrectedString,
449 Xapian::QueryParser::FLAG_BOOLEAN
450 | Xapian::QueryParser::FLAG_PHRASE
451 | Xapian::QueryParser::FLAG_LOVEHATE);
452
453 // Retrieve a maximum of K_DEFAULT_XAPIAN_MATCHING_SET_SIZE (normally,
454 // 30) entries
455 enquire.set_query (lCorrectedXapianQuery);
456 ioMatchingSet = enquire.get_mset (0, K_DEFAULT_XAPIAN_MATCHING_SET_SIZE);
457
458 // Display the results
459 nbMatches = ioMatchingSet.size();
460
461 // DEBUG
462 OPENTREP_LOG_DEBUG (" Corrected query string: `" << lCorrectedString
463 << "', i.e.: `"
464 << lCorrectedXapianQuery.get_description()
465 << "' => " << nbMatches << " result(s) found");
466
467 if (nbMatches != 0) {
468 // Store the effective (Levenshtein) edit distance/error
469 setEditDistance (lEditDistance);
470
471 // Store the allowable edit distance/error
472 setAllowableEditDistance (lAllowableEditDistance);
473
474 //
475 oMatchedString = lCorrectedString;
476
477 // Store the fact that there has been a full-text match
478 setHasFullTextMatched (true);
479
480 // Store the corrected string
481 setCorrectedQueryString (oMatchedString);
482
483 // DEBUG
484 OPENTREP_LOG_DEBUG (" Query string: `"
485 << iQueryString << "', spelling suggestion: `"
486 << lCorrectedString
487 << "', with a Levenshtein edit distance of "
488 << lEditDistance
489 << " over an allowable edit distance of "
490 << lAllowableEditDistance << ", provides "
491 << nbMatches << " matches.");
492
493 //
494 return oMatchedString;
495 }
496
497 // Error
498 OPENTREP_LOG_ERROR (" Query string: `"
499 << iQueryString << "', spelling suggestion: `"
500 << lCorrectedString
501 << "', with a Levenshtein edit distance of "
502 << lEditDistance
503 << " over an allowable edit distance of "
504 << lAllowableEditDistance << ", provides no match, "
505 << "which is not consistent with the existence of "
506 << "the spelling correction.");
507 assert (false);
508
509 } catch (const Xapian::Error& error) {
510 OPENTREP_LOG_ERROR ("Exception: " << error.get_msg());
511 throw XapianException (error.get_msg());
512 }
513
514 // Store the fact that there has not been any full-text match
515 setHasFullTextMatched (false);
516
517 return oMatchedString;
518 }
519
520 // //////////////////////////////////////////////////////////////////////
521 std::string Result::fullTextMatch (const Xapian::Database& iDatabase,
522 const TravelQuery_T& iQueryString) {
523 std::string oMatchedString;
524
525 // Catch any Xapian::Error exceptions thrown
526 try {
527
528 // DEBUG
529 OPENTREP_LOG_DEBUG (" ----------------");
530 OPENTREP_LOG_DEBUG (" Current query string: '"<< iQueryString << "'");
531
532 // Check whether the string should be filtered out
533 const bool isToBeAdded = Filter::shouldKeep ("", iQueryString);
534 //const bool isToBeAdded = true;
535
536 Xapian::MSet lMatchingSet;
537 if (isToBeAdded == true) {
538 oMatchedString = fullTextMatch (iDatabase, iQueryString, lMatchingSet);
539 }
540
541 // Create the corresponding documents (from the Xapian MSet object)
542 fillResult (lMatchingSet);
543
544 // DEBUG
545 if (isToBeAdded == false) {
546 OPENTREP_LOG_DEBUG (" No full text search performed as '"
547 << iQueryString
548 << "' is not made of searchable words");
549 }
550 OPENTREP_LOG_DEBUG (" ==> " << toString());
551 OPENTREP_LOG_DEBUG (" ----------------");
552
553 } catch (const Xapian::Error& error) {
554 OPENTREP_LOG_ERROR ("Xapian-related error: " << error.get_msg());
555 throw XapianException (error.get_msg());
556 }
557
558 return oMatchedString;
559 }
560
561 // //////////////////////////////////////////////////////////////////////
563 // Browse the list of Xapian documents
564 for (DocumentList_T::const_iterator itDoc = _documentList.begin();
565 itDoc != _documentList.end(); ++itDoc) {
566 const XapianDocumentPair_T& lDocumentPair = *itDoc;
567
568 // Retrieve the Xapian document
569 const Xapian::Document& lXapianDoc = lDocumentPair.first;
570
571 // Extract the Xapian document ID
572 const Xapian::docid& lDocID = lXapianDoc.get_docid();
573
574 // Extract the envelope ID from the document data
575 const LocationKey& lLocationKey = getPrimaryKey (lXapianDoc);
576
577 // Retrieve the score board for that Xapian document
578 const ScoreBoard& lScoreBoard = lDocumentPair.second;
579
580 // Extract the Xapian matching percentage
581 const Score_T& lXapianPct = lScoreBoard.getScore (ScoreType::XAPIAN_PCT);
582
583 // DEBUG
585 << "' with (" << lLocationKey << ", doc ID = "
586 << lDocID << ") matches at " << lXapianPct
587 << "%");
588 }
589 }
590
591 // //////////////////////////////////////////////////////////////////////
592 void Result::setScoreOnDocMap (const Xapian::docid& iDocID,
593 const ScoreType& iType, const Score_T& iScore) {
594 // Retrieve the Xapian document and associated ScoreBoard structure
595 // corresponding to the given doc ID
596 DocumentMap_T::iterator itDoc = _documentMap.find (iDocID);
597
598 if (itDoc == _documentMap.end()) {
599 OPENTREP_LOG_ERROR ("The Xapian document (ID = " << iDocID
600 << ") can not be found in the Result object "
601 << describeKey());
602 }
603 assert (itDoc != _documentMap.end());
604
605 // Retrieve the associated ScoreBoard structure
606 XapianDocumentPair_T& lXapianDocPair = itDoc->second;
607 ScoreBoard& lScoreBoard = lXapianDocPair.second;
608
609 // Update the score/weight
610 lScoreBoard.setScore (iType, iScore);
611 }
612
613 // //////////////////////////////////////////////////////////////////////
615 // Browse the list of Xapian documents
616 for (DocumentList_T::iterator itDoc = _documentList.begin();
617 itDoc != _documentList.end(); ++itDoc) {
618 XapianDocumentPair_T& lDocumentPair = *itDoc;
619
620 // Retrieve the Xapian document
621 const Xapian::Document& lXapianDoc = lDocumentPair.first;
622
623 // Extract the Xapian document ID
624 const Xapian::docid& lDocID = lXapianDoc.get_docid();
625
626 // Extract the envelope ID from the document data
627 const LocationKey& lLocationKey = getPrimaryKey (lXapianDoc);
628
629 // Extract the envelope ID from the document data
630 const EnvelopeID_T& lEnvelopeIDInt = getEnvelopeID (lXapianDoc);
631
632 // DEBUG
633 if (lEnvelopeIDInt != 0) {
635 << "] (" << lLocationKey << ", doc ID = "
636 << lDocID << ") has a non-null envelope ID ("
637 << lEnvelopeIDInt << ") => match of 0.10%");
638 }
639
640 // Convert the envelope ID value, from an integer to a floating point one
641 const Score_T lEnvelopeID = static_cast<const Score_T> (lEnvelopeIDInt);
642
643 // Retrieve the score board for that Xapian document
644 ScoreBoard& lScoreBoard = lDocumentPair.second;
645
646 // Store the envelope-related weight
647 lScoreBoard.setScore (ScoreType::ENV_ID, lEnvelopeID);
648 setScoreOnDocMap (lDocID, ScoreType::ENV_ID, lEnvelopeID);
649 }
650 }
651
652 // //////////////////////////////////////////////////////////////////////
654 // Browse the list of Xapian documents
655 for (DocumentList_T::iterator itDoc = _documentList.begin();
656 itDoc != _documentList.end(); ++itDoc) {
657 XapianDocumentPair_T& lDocumentPair = *itDoc;
658
659 // Retrieve the Xapian document
660 const Xapian::Document& lXapianDoc = lDocumentPair.first;
661
662 // Extract the Xapian document ID
663 const Xapian::docid& lDocID = lXapianDoc.get_docid();
664
665 // Extract the envelope ID from the document data
666 const LocationKey& lLocationKey = getPrimaryKey (lXapianDoc);
667
668 // Initialisation of the IATA/ICAO code full matching percentage
669 Score_T lCodeMatchPct = 0.0;
670 bool hasCodeFullyMatched = false;
671
672 // Filter out "standard" words such as "airport", "international",
673 // "city", as well as words having a length strictly less than
674 // 3 letters.
675 std::string lFilteredString (_queryString);
676 const NbOfLetters_T kMinWordLength = 3;
677 Filter::trim (lFilteredString, kMinWordLength);
678
679 // Check whether or not the filtered query string is made of
680 // a single word
681 WordList_T lFilteredQueryWordList;
683 lFilteredQueryWordList);
684 const NbOfWords_T nbOfFilteredQueryWords = lFilteredQueryWordList.size();
685
686 //
687 if (_hasFullTextMatched == true) {
694 const size_t lNbOfLetters = lFilteredString.size();
695 if (nbOfFilteredQueryWords == 1
696 && lNbOfLetters >= 3 && lNbOfLetters <= 4
697 && _correctedQueryString == _queryString) {
698 // Convert the query string (made of one word of 3 or 4 letters)
699 // to uppercase letters
700 std::string lUpperQueryWord;
701 lUpperQueryWord.resize (lNbOfLetters);
702 std::transform (lFilteredString.begin(), lFilteredString.end(),
703 lUpperQueryWord.begin(), ::toupper);
704
705 // Retrieve with the IATA code
706 const IATACode_T& lIataCode = lLocationKey.getIataCode();
707
708 // Compare the 3/4-letter-word query string with the IATA
709 // and ICAO codes
710 if (lUpperQueryWord == lIataCode) {
714 lCodeMatchPct = 1.0;
715 hasCodeFullyMatched = true;
716 }
717 }
718
719 if (hasCodeFullyMatched == true) {
720 // DEBUG
722 << "' matches the IATA/ICAO code ("
723 << lLocationKey << ", doc ID = "
724 << lDocID << ") => match of "
726 } else {
727 // DEBUG
729 << "' does not match with the IATA/ICAO "
730 << "code (" << lLocationKey << ", doc ID = "
731 << lDocID << ") => match of "
733 }
734 }
735
736 // Retrieve the score board for that Xapian document
737 ScoreBoard& lScoreBoard = lDocumentPair.second;
738
739 // Store the IATA/ICAO code match percentage/weight
740 lScoreBoard.setScore (ScoreType::CODE_FULL_MATCH, lCodeMatchPct);
741 setScoreOnDocMap (lDocID, ScoreType::CODE_FULL_MATCH, lCodeMatchPct);
742 }
743 }
744
745 // //////////////////////////////////////////////////////////////////////
747 // Browse the list of Xapian documents
748 for (DocumentList_T::iterator itDoc = _documentList.begin();
749 itDoc != _documentList.end(); ++itDoc) {
750 XapianDocumentPair_T& lDocumentPair = *itDoc;
751
752 // Retrieve the Xapian document
753 const Xapian::Document& lXapianDoc = lDocumentPair.first;
754
755 // Extract the Xapian document ID
756 const Xapian::docid& lDocID = lXapianDoc.get_docid();
757
758 // Extract the envelope ID from the document data
759 const LocationKey& lLocationKey = getPrimaryKey (lXapianDoc);
760
761 // Extract the PageRank from the document data
762 const Score_T& lPageRank = getPageRank (lXapianDoc);
763
764 // DEBUG
766 << "] (" << lLocationKey << ", doc ID = "
767 << lDocID << ") has a PageRank of "
768 << lPageRank << "%");
769
770 // Retrieve the score board for that Xapian document
771 ScoreBoard& lScoreBoard = lDocumentPair.second;
772
773 // Store the PageRank weight
774 lScoreBoard.setScore (ScoreType::PAGE_RANK, lPageRank);
775 setScoreOnDocMap (lDocID, ScoreType::PAGE_RANK, lPageRank);
776 }
777 }
778
779 // //////////////////////////////////////////////////////////////////////
787
788 // //////////////////////////////////////////////////////////////////////
790 Percentage_T lMaxPercentage = 0.0;
791 std::string lBestDocData;
792
793 // Browse the list of Xapian documents
794 Xapian::docid lBestDocID = 0;
795 for (DocumentList_T::iterator itDoc = _documentList.begin();
796 itDoc != _documentList.end(); ++itDoc) {
797 XapianDocumentPair_T& lDocumentPair = *itDoc;
798
799 // Retrieve the Xapian document ID
800 const Xapian::Document& lXapianDoc = lDocumentPair.first;
801 const Xapian::docid& lDocID = lXapianDoc.get_docid();
802 const std::string& lDocData = lXapianDoc.get_data();
803
808 ScoreBoard& lScoreBoard = lDocumentPair.second;
809 const Percentage_T& lPercentage = lScoreBoard.calculateCombinedWeight();
810
819
820 // Register the document, if it is the best matching until now
821 if (lPercentage > lMaxPercentage) {
822 lMaxPercentage = lPercentage;
823 lBestDocID = lDocID;
824 lBestDocData = lDocData;
825 }
826 }
827
828 // Check whether or not the (original) query string is made of a single word
829 WordList_T lOriginalQueryWordList;
831 lOriginalQueryWordList);
832 const NbOfWords_T nbOfOriginalQueryWords = lOriginalQueryWordList.size();
833
834 //
835 if (_hasFullTextMatched == true) {
836 // Retrieve the primary key (IATA, location type, Geonames ID) of
837 // the place corresponding to the document
838 const XapianDocumentPair_T& lXapianDocPair = getDocumentPair (lBestDocID);
839 const Xapian::Document& lXapianDoc = lXapianDocPair.first;
840 const ScoreBoard& lScoreBoard = lXapianDocPair.second;
841 const LocationKey& lLocationKey = getPrimaryKey (lXapianDoc);
842
843 // DEBUG
844 OPENTREP_LOG_DEBUG (" [pct] '" << describeShortKey()
845 << "' matches at " << lMaxPercentage
846 << "% for " << lLocationKey << " (doc ID = "
847 << lBestDocID << "). Score calculation: "
848 << lScoreBoard.describe());
849
850 } else {
855 const bool shouldBeKept = Filter::shouldKeep ("", _queryString);
856
857 if (nbOfOriginalQueryWords == 1 && shouldBeKept == true) {
863 lMaxPercentage = 100.0;
864
865 // DEBUG
866 OPENTREP_LOG_DEBUG (" [pct] '" << describeShortKey()
867 << "' does not match, but it is a non black-listed "
868 << "single-word string; hence, the weight is "
869 << lMaxPercentage << "%");
870
871 } else {
880 lMaxPercentage = std::pow (10.0, -3*nbOfOriginalQueryWords);
881
882 // DEBUG
884 << "' does not match, and is either a multiple-word "
885 << "string or black-listed; hence, the weight is "
886 << lMaxPercentage << "%");
887 }
888 }
889
890 // Store the doc ID of the best matching document
891 setBestDocID (lBestDocID);
892
893 // Store the best weight
894 setBestCombinedWeight (lMaxPercentage);
895
896 // Store all the details of the Xapian document
897 setBestDocData (lBestDocData);
898 }
899
900}
#define OPENTREP_LOG_ERROR(iToBeLogged)
Definition Logger.hpp:24
#define OPENTREP_LOG_DEBUG(iToBeLogged)
Definition Logger.hpp:33
#define OPENTREP_LOG_NOTIFICATION(iToBeLogged)
Definition Logger.hpp:27
const Location & generateLocation()
Class modelling a place/POR (point of reference).
Definition Place.hpp:29
void setEditDistance(const NbOfErrors_T &iEditDistance)
Definition Place.hpp:898
void setDocID(const XapianDocID_T &iDocID)
Definition Place.hpp:920
void setAllowableEditDistance(const NbOfErrors_T &iAllowableEditDistance)
Definition Place.hpp:906
void setPercentage(const MatchingPercentage_T &iPercentage)
Definition Place.hpp:891
void setOriginalKeywords(const std::string &iOriginalKeywords)
Definition Place.hpp:877
const LocationKey & getKey() const
Definition Place.hpp:59
void setCorrectedKeywords(const std::string &iCorrectedKeywords)
Definition Place.hpp:884
std::string fullTextMatch(const Xapian::Database &, const TravelQuery_T &)
Definition Result.cpp:521
void setScoreOnDocMap(const Xapian::docid &, const ScoreType &, const Score_T &)
Definition Result.cpp:592
void addDocument(const Xapian::Document &, const Score_T &)
Definition Result.cpp:141
void setBestCombinedWeight(const Percentage_T &iPercentage)
Definition Result.hpp:215
void displayXapianPercentages() const
Definition Result.cpp:562
void fillResult(const Xapian::MSet &iMatchingSet)
Definition Result.cpp:197
const XapianDocumentPair_T & getDocumentPair(const Xapian::docid &) const
Definition Result.cpp:107
void setBestDocData(const std::string &iDocData)
Definition Result.hpp:222
void fillPlace(Place &) const
Definition Result.cpp:211
void toStream(std::ostream &ioOut) const
Definition Result.cpp:97
void calculatePageRanks()
Definition Result.cpp:746
void setBestDocID(const Xapian::docid &iDocID)
Definition Result.hpp:207
static Score_T getEnvelopeID(const Xapian::Document &)
Definition Result.cpp:295
static PageRank_T getPageRank(const Xapian::Document &)
Definition Result.cpp:309
std::string toString() const
Definition Result.cpp:65
void fromStream(std::istream &ioIn)
Definition Result.cpp:102
std::string describeShortKey() const
Definition Result.cpp:42
static Location retrieveLocation(const Xapian::Document &)
Definition Result.cpp:272
void calculateHeuristicWeights()
Definition Result.cpp:780
void calculateEnvelopeWeights()
Definition Result.cpp:614
void calculateCodeMatches()
Definition Result.cpp:653
static LocationKey getPrimaryKey(const Xapian::Document &)
Definition Result.cpp:284
void calculateCombinedWeights()
Definition Result.cpp:789
std::string describeKey() const
Definition Result.cpp:49
static void tokeniseStringIntoWordList(const TravelQuery_T &, WordList_T &)
std::list< Word_T > WordList_T
unsigned int NbOfLetters_T
unsigned short NbOfErrors_T
const Percentage_T K_DEFAULT_MODIFIED_MATCHING_PCT
std::string TravelQuery_T
double Percentage_T
static unsigned int calculateEditDistance(const TravelQuery_T &iPhrase)
Helper function.
double PageRank_T
unsigned int EnvelopeID_T
const Percentage_T K_DEFAULT_FULL_CODE_MATCH_PCT
std::pair< Xapian::Document, ScoreBoard > XapianDocumentPair_T
Definition Result.hpp:30
const NbOfMatches_T K_DEFAULT_XAPIAN_MATCHING_SET_SIZE
const NbOfErrors_T K_DEFAULT_SIZE_FOR_SPELLING_ERROR_UNIT
unsigned short NbOfWords_T
std::string toString(const TokenList_T &iTokenList)
static void trim(std::string &ioPhrase, const NbOfLetters_T &iMinWordLength=4)
Definition Filter.cpp:131
static bool shouldKeep(const std::string &iPhrase, const std::string &iWord)
Definition Filter.cpp:144
Class modelling the primary key of a location/POR (point of reference).
const IATACode_T & getIataCode() const
Structure modelling a (geographical) location.
Definition Location.hpp:25
const LocationKey & getKey() const
Definition Location.hpp:31
const EnvelopeID_T & getEnvelopeID() const
Definition Location.hpp:123
const PageRank_T & getPageRank() const
Definition Location.hpp:354
Structure holding a board for all the types of score/matching having been performed.
Percentage_T calculateCombinedWeight()
std::string describe() const
void setScore(const ScoreType &, const Score_T &)
Score_T getScore(const ScoreType &) const
Enumeration of score types.
Definition ScoreType.hpp:25