OpenTREP Logo  0.07.18
C++ Open Travel Request Parsing Library
Loading...
Searching...
No Matches
XapianIndexManager.cpp
Go to the documentation of this file.
1// //////////////////////////////////////////////////////////////////////
2// Import section
3// //////////////////////////////////////////////////////////////////////
4// STL
5#include <cassert>
6#include <sstream>
7#include <string>
8#include <vector>
9#include <exception>
10// Boost
11#include <boost/filesystem.hpp>
12#include <boost/random/random_device.hpp>
13#include <boost/random/uniform_int_distribution.hpp>
14// Xapian
15#include <xapian.h>
16// OpenTrep
17#include <opentrep/Location.hpp>
22
23namespace OPENTREP {
24
25 // //////////////////////////////////////////////////////////////////////
26 boost::filesystem::path
27 checkTravelDBFilePath (const TravelDBFilePath_T& iTravelDBFilePath) {
28 boost::filesystem::path oTravelDBFilePath (iTravelDBFilePath.begin(),
29 iTravelDBFilePath.end());
30 if (!(boost::filesystem::exists (oTravelDBFilePath)
31 && boost::filesystem::is_directory (oTravelDBFilePath))) {
32 std::ostringstream oStr;
33 oStr << "The file-path to the Xapian database/index ('"
34 << iTravelDBFilePath << "') does not exist or is not a directory.";
35 OPENTREP_LOG_ERROR (oStr.str());
36 throw FileNotFoundException (oStr.str());
37 }
38
39 return oTravelDBFilePath;
40 }
41
42 // //////////////////////////////////////////////////////////////////////
43 NbOfDBEntries_T XapianIndexManager::
44 getSize (const TravelDBFilePath_T& iTravelDBFilePath) {
45 NbOfDBEntries_T oNbOfDBEntries = 0;
46
47 // Check whether the file-path to the Xapian database/index exists
48 // and is a directory.
49 checkTravelDBFilePath (iTravelDBFilePath);
50
51 // Open the Xapian database
52 Xapian::Database lXapianDatabase (iTravelDBFilePath);
53
54 // Retrieve the actual number of documents indexed by the Xapian database
55 const Xapian::doccount& lDocCount = lXapianDatabase.get_doccount();
56
57 //
58 oNbOfDBEntries = static_cast<const NbOfDBEntries_T> (lDocCount);
59
60 return oNbOfDBEntries;
61 }
62
63 // //////////////////////////////////////////////////////////////////////
64 NbOfMatches_T XapianIndexManager::
65 drawRandomLocations (const TravelDBFilePath_T& iTravelDBFilePath,
66 const NbOfMatches_T& iNbOfDraws,
67 LocationList_T& ioLocationList) {
68 NbOfMatches_T oNbOfMatches = 0;
69
70 // Check whether the file-path to the Xapian database/index exists
71 // and is a directory.
72 checkTravelDBFilePath (iTravelDBFilePath);
73
74 // Open the Xapian database
75 Xapian::Database lXapianDatabase (iTravelDBFilePath);
76
77 // Retrieve the number of documents indexed by the database
78 const NbOfDBEntries_T& lTotalNbOfDocs = getSize (iTravelDBFilePath);
79
80 // No need to go further when the Xapian database (index) is empty
81 if (lTotalNbOfDocs == 0) {
82 //
83 OPENTREP_LOG_NOTIFICATION ("The Xapian database is empty");
84 return oNbOfMatches;
85 }
86
87 // random_device is used as a source of entropy, since the generated
88 // locations are expected not to be reproducible.
89 boost::random::random_device lRandomDevice;
90 boost::random::uniform_int_distribution<> uniformDistrib (1, lTotalNbOfDocs);
91
92 // Randomly generate document IDs. If the corresponding documents
93 // do not exist in the Xapian database, generate another one.
94 for (NbOfMatches_T idx = 1; idx <= iNbOfDraws; ++idx) {
95 unsigned int lRandomNbInt = uniformDistrib (lRandomDevice);
96 Xapian::docid lDocID = static_cast<Xapian::docid> (lRandomNbInt);
97
98 // Retrieve the document from the Xapian database/index
99 Xapian::termcount lDocLength = lXapianDatabase.get_doclength (lDocID);
100
101 unsigned short currentNbOfIterations = 0;
102 while (lDocLength == 0 && currentNbOfIterations <= 100) {
103 // DEBUG
104 OPENTREP_LOG_DEBUG ("[" << idx << "] The " << lDocID
105 << " document ID does not exist in the Xapian "
106 << "database. Another ID will be generated.");
107
108 // Re-draw another random document ID
109 lRandomNbInt = uniformDistrib (lRandomDevice);
110 lDocID = static_cast<Xapian::docid> (lRandomNbInt);
111
112 // Retrieve the document from the Xapian database/index
113 lDocLength = lXapianDatabase.get_doclength (lDocID);
114 }
115
116 // Bad luck: no document ID can be generated so that it corresponds to
117 // an actual document in the Xapian database/index
118 if (lDocLength == 0) {
119 //
120 OPENTREP_LOG_NOTIFICATION ("[" << idx << "] No document ID can be "
121 << "generated so that it corresponds to "
122 << "a document in the Xapian database.");
123
124 } else {
125 // Retrieve the actual document.
126 const Xapian::Document lDoc = lXapianDatabase.get_document (lDocID);
127 const std::string& lDocDataStr = lDoc.get_data();
128 const RawDataString_T& lDocData = RawDataString_T (lDocDataStr);
129
130 // Parse the POR details and create the corresponding Location structure
131 const Location& lLocation = Result::retrieveLocation (lDocData);
132
133 // Add the Location structure to the dedicated list
134 ioLocationList.push_back (lLocation);
135 }
136 }
137
138 // Consistency check
139 oNbOfMatches = ioLocationList.size();
140 if (oNbOfMatches != iNbOfDraws) {
141 //
142 OPENTREP_LOG_NOTIFICATION (iNbOfDraws << " random draws were expected, "
143 << "but " << oNbOfMatches
144 << " have been generated.");
145 }
146
147 //
148 return oNbOfMatches;
149 }
150
151}
#define OPENTREP_LOG_ERROR(iToBeLogged)
Definition Logger.hpp:24
#define OPENTREP_LOG_DEBUG(iToBeLogged)
Definition Logger.hpp:33
#define OPENTREP_LOG_NOTIFICATION(iToBeLogged)
Definition Logger.hpp:27
static Location retrieveLocation(const Xapian::Document &)
Definition Result.cpp:272
unsigned int NbOfDBEntries_T
std::list< Location > LocationList_T
boost::filesystem::path checkTravelDBFilePath(const TravelDBFilePath_T &iTravelDBFilePath)
unsigned short NbOfMatches_T