OpenTREP Logo  0.07.18
C++ Open Travel Request Parsing Library
Loading...
Searching...
No Matches
QuerySlices.cpp
Go to the documentation of this file.
1// //////////////////////////////////////////////////////////////////////
2// Import section
3// //////////////////////////////////////////////////////////////////////
4// STL
5#include <cassert>
6#include <sstream>
7#include <set>
8// OpenTrep
16
17namespace OPENTREP {
18
19 // //////////////////////////////////////////////////////////////////////
20 QuerySlices::QuerySlices (const Xapian::Database& iDatabase,
21 const TravelQuery_T& iQueryString,
22 const OTransliterator& iTransliterator)
23 : _database (iDatabase), _queryString (iQueryString) {
24 init (iTransliterator);
25 }
26
27 // //////////////////////////////////////////////////////////////////////
30
31 // //////////////////////////////////////////////////////////////////////
32 void QuerySlices::push_back (const StringPartition& iStringPartition) {
33 if (iStringPartition.empty() == false) {
34 _slices.push_back (iStringPartition);
35 }
36 }
37
38 // //////////////////////////////////////////////////////////////////////
39 size_t QuerySlices::size() const {
40 return _slices.size();
41 }
42
43 // //////////////////////////////////////////////////////////////////////
44 bool QuerySlices::empty() const {
45 return _slices.empty();
46 }
47
48 // //////////////////////////////////////////////////////////////////////
50 _slices.clear();
51 }
52
53 // //////////////////////////////////////////////////////////////////////
54 std::string QuerySlices::describeKey() const {
55 std::ostringstream oStr;
56 oStr << "";
57 return oStr.str();
58 }
59
60 // //////////////////////////////////////////////////////////////////////
61 std::string QuerySlices::describe() const {
62 std::ostringstream oStr;
63 oStr << describeKey();
64
65 //
66 oStr << "[ ";
67
68 short idx_sublist = 0;
69 for (StringPartitionList_T::const_iterator itSlice = _slices.begin();
70 itSlice != _slices.end(); ++itSlice, ++idx_sublist) {
71 //
72 if (idx_sublist != 0) {
73 oStr << "; ";
74 }
75
76 //
77 const StringPartition& lStringPartition = *itSlice;
78
79 //
80 oStr << idx_sublist << ". " << lStringPartition;
81 }
82
83 //
84 oStr << " ]";
85
86 return oStr.str();
87 }
88
89 // //////////////////////////////////////////////////////////////////////
90 void QuerySlices::toStream (std::ostream& ioOut) const {
91 ioOut << describe();
92 }
93
94 // //////////////////////////////////////////////////////////////////////
95 void QuerySlices::fromStream (std::istream& ioIn) {
96 }
97
109 // //////////////////////////////////////////////////////////////////////
110 static unsigned int calculateEditDistance (const TravelQuery_T& iPhrase) {
111 NbOfErrors_T oEditDistance = 2;
112
113 const NbOfErrors_T lQueryStringSize = iPhrase.size();
114
115 oEditDistance = lQueryStringSize / K_DEFAULT_SIZE_FOR_SPELLING_ERROR_UNIT;
116 return oEditDistance;
117 }
118
122 // //////////////////////////////////////////////////////////////////////
123 bool doesMatch (const Xapian::Database& iDatabase,
124 const std::string& iWord1, const std::string& iWord2) {
125 bool oDoesMatch = false;
126
127 //
128 std::ostringstream oStr;
129 oStr << iWord1 << " " << iWord2;
130 const std::string lQueryString (oStr.str());
131
132 // Catch any Xapian::Error exceptions thrown
133 Xapian::MSet lMatchingSet;
134 try {
135
136 // Build the query object
137 Xapian::QueryParser lQueryParser;
138 lQueryParser.set_database (iDatabase);
139
145 // lQueryParser.set_default_op (Xapian::Query::OP_ADJ);
146 lQueryParser.set_default_op (Xapian::Query::OP_PHRASE);
147
148 // DEBUG
149 /*
150 OPENTREP_LOG_DEBUG ("Query parser `" << lQueryParser.get_description()
151 << "'");
152 */
153
154 // DEBUG
155 // OPENTREP_LOG_DEBUG (" --------");
156
157 // Start an enquire session
158 Xapian::Enquire enquire (iDatabase);
159
166 const Xapian::Query& lXapianQuery =
167 lQueryParser.parse_query (lQueryString,
168 Xapian::QueryParser::FLAG_BOOLEAN
169 | Xapian::QueryParser::FLAG_PHRASE
170 | Xapian::QueryParser::FLAG_LOVEHATE);
171
172 // Give the query object to the enquire session
173 enquire.set_query (lXapianQuery);
174
175 // Get the top 20 results of the query
176 lMatchingSet = enquire.get_mset (0, 20);
177
178 // Display the results
179 int nbMatches = lMatchingSet.size();
180
181 // DEBUG
182 /*
183 OPENTREP_LOG_DEBUG (" Query string: `" << lQueryString
184 << "', i.e.: `" << lXapianQuery.get_description()
185 << "' => " << nbMatches << " result(s) found");
186 */
187
188 if (nbMatches != 0) {
189 // There has been a matching
190 oDoesMatch = true;
191
192 // DEBUG
193 /*
194 OPENTREP_LOG_DEBUG (" Query string: `" << lQueryString
195 << "' provides " << nbMatches << " exact matches.");
196 */
197
198 return oDoesMatch;
199 }
200 assert (lMatchingSet.empty() == true);
201
207 const NbOfErrors_T& lAllowableEditDistance =
208 calculateEditDistance (lQueryString);
209
210 // Let Xapian find a spelling correction (if any)
211 const std::string& lCorrectedString =
212 iDatabase.get_spelling_suggestion (lQueryString, lAllowableEditDistance);
213
214 // If the correction is no better than the original string, there is
215 // no need to go further: there is no match.
216 if (lCorrectedString.empty() == true || lCorrectedString == lQueryString) {
217 // DEBUG
218 /*
219 OPENTREP_LOG_DEBUG (" Query string: `"
220 << lQueryString << "' provides no match, "
221 << "and there is no spelling suggestion, "
222 << "even with an edit distance of "
223 << lAllowableEditDistance);
224 */
225
226 // No match
227 return oDoesMatch;
228 }
229 assert (lCorrectedString.empty() == false
230 && lCorrectedString != lQueryString);
231
232 // Calculate the effective (Levenshtein) edit distance/error
233 const NbOfErrors_T& lEditDistance =
234 Levenshtein::getDistance (lQueryString, lCorrectedString);
235
243 const Xapian::Query& lCorrectedXapianQuery =
244 lQueryParser.parse_query (lCorrectedString,
245 Xapian::QueryParser::FLAG_BOOLEAN
246 | Xapian::QueryParser::FLAG_PHRASE
247 | Xapian::QueryParser::FLAG_LOVEHATE);
248
249 enquire.set_query (lCorrectedXapianQuery);
250 lMatchingSet = enquire.get_mset (0, 20);
251
252 // Display the results
253 nbMatches = lMatchingSet.size();
254
255 // DEBUG
256 /*
257 OPENTREP_LOG_DEBUG (" Corrected query string: `" << lCorrectedString
258 << "', i.e.: `"
259 << lCorrectedXapianQuery.get_description()
260 << "' => " << nbMatches << " result(s) found");
261 */
262
263 if (nbMatches != 0) {
264 // DEBUG
265 /*
266 OPENTREP_LOG_DEBUG (" Query string: `"
267 << lQueryString << "', spelling suggestion: `"
268 << lCorrectedString
269 << "', with a Levenshtein edit distance of "
270 << lEditDistance
271 << " over an allowable edit distance of "
272 << lAllowableEditDistance << ", provides "
273 << nbMatches << " matches.");
274 */
275
276 //
277 oDoesMatch = true;
278 return oDoesMatch;
279 }
280
281 // Error
282 OPENTREP_LOG_ERROR (" Query string: `"
283 << lQueryString << "', spelling suggestion: `"
284 << lCorrectedString
285 << "', with a Levenshtein edit distance of "
286 << lEditDistance
287 << " over an allowable edit distance of "
288 << lAllowableEditDistance << ", provides no match, "
289 << "which is not consistent with the existence of "
290 << "the spelling correction.");
291 assert (false);
292
293 } catch (const Xapian::Error& error) {
294 // Error
295 OPENTREP_LOG_ERROR ("Exception: " << error.get_msg());
296 throw XapianException (error.get_msg());
297 }
298
299 return oDoesMatch;
300 }
301
302 // //////////////////////////////////////////////////////////////////////
303 void QuerySlices::init (const OTransliterator& iTransliterator) {
304 // 0. Initialisation
305 // 0.1. Stripping of the punctuation and quotation characters
306 _queryString = iTransliterator.unpunctuate (_queryString);
307 _queryString = iTransliterator.unquote (_queryString);
308
309 // 0.2. Initialisation of the tokenizer
310 WordList_T lWordList;
312 const unsigned short nbOfWords = lWordList.size();
313
314 // When the query has a single word, stop here, as there is a single slice
315 if (nbOfWords <= 1) {
316 _slices.push_back (_queryString);
317 return;
318 }
319
320 // 0.3. Re-create the initial phrase, without any (potential) seperator
321 const std::string lPhrase = createStringFromWordList (lWordList);
322
323 // 1. Browse the words, two by two, and check whether their association
324 // matches with the Xapian index
325 WordList_T::const_iterator itWord = lWordList.begin();
326 WordList_T::const_iterator itNextWord = lWordList.begin(); ++itNextWord;
327 for (unsigned short idx = 1, idx_rel = 1; itNextWord != lWordList.end();
328 ++itWord, ++itNextWord, ++idx, ++idx_rel) {
329 const std::string& leftWord = *itWord;
330 const std::string& rightWord = *itNextWord;
331
332 // Store the left word in the staging string
333 if (idx_rel >= 2) {
334 _itLeftWords += " ";
335 }
336 _itLeftWords += leftWord;
337
338 // Check whether the juxtaposition of the two contiguous words matches
339 const bool lDoesMatch =
340 OPENTREP::doesMatch (_database, leftWord, rightWord);
341
342 if (lDoesMatch == true) {
343 // When the two words give a match, do nothing now, as at the next turn,
344 // the right word will become the left word and thus be added to the
345 // staging string
346
347 // DEBUG
348 /*
349 OPENTREP_LOG_DEBUG ("[" << idx << "][" << idx_rel
350 << "] Match - staging string: '"
351 << _itLeftWords << "'");
352 */
353
354 } else {
355 // DEBUG
356 /*
357 OPENTREP_LOG_DEBUG ("[" << idx << "][" << idx_rel
358 << "] No match - staging string: '"
359 << _itLeftWords << "'");
360 */
361
362 // When the two words give no match, add the content of the staging
363 // list to the list of slices. Then, empty the staging string.
364 _slices.push_back (_itLeftWords);
365 _itLeftWords = "";
366 idx_rel = 0;
367 }
368 }
369
370 // 2.
371 const std::string& leftWord = *itWord;
372 if (_itLeftWords.empty() == false) {
373 _itLeftWords += " ";
374 }
375 _itLeftWords += leftWord;
376 _slices.push_back (_itLeftWords);
377
378 // DEBUG
379 // OPENTREP_LOG_DEBUG ("Last staging string: '" << _itLeftWords << "'");
380 // OPENTREP_LOG_DEBUG ("Slices: " << *this);
381 }
382
383}
#define OPENTREP_LOG_ERROR(iToBeLogged)
Definition Logger.hpp:24
static int getDistance(const std::string &iSource, const std::string &iTarget)
void tokeniseStringIntoWordList(const std::string &iPhrase, WordList_T &ioWordList)
Definition Utilities.cpp:19
unsigned short NbOfErrors_T
std::string createStringFromWordList(const WordList_T &iWordList, const NbOfWords_T iSplitIdx, const bool iFromBeginningFlag)
Definition Utilities.cpp:43
std::string TravelQuery_T
static unsigned int calculateEditDistance(const TravelQuery_T &iPhrase)
Helper function.
bool doesMatch(const Xapian::Database &iDatabase, const std::string &iWord1, const std::string &iWord2)
Helper function to query for a Xapian-based full text match.
const NbOfErrors_T K_DEFAULT_SIZE_FOR_SPELLING_ERROR_UNIT
std::vector< std::string > WordList_T
void push_back(const StringPartition &iStringPartition)
TravelQuery_T _queryString
void toStream(std::ostream &ioOut) const
QuerySlices(const Xapian::Database &, const TravelQuery_T &, const OTransliterator &)
const Xapian::Database & _database
StringPartitionList_T _slices
void fromStream(std::istream &ioIn)
std::string describe() const
std::string describeKey() const
size_t size() const