OpenTREP Logo  0.07.18
C++ Open Travel Request Parsing Library
Loading...
Searching...
No Matches
WordCombinationHolder.cpp
Go to the documentation of this file.
1// //////////////////////////////////////////////////////////////////////
2// Import section
3// //////////////////////////////////////////////////////////////////////
4// STL
5#include <cassert>
6#include <sstream>
7#include <set>
8// OpenTrep
14
15namespace OPENTREP {
16
17 // //////////////////////////////////////////////////////////////////////
18 WordCombinationHolder::WordCombinationHolder (const std::string& iString) {
19 init (iString);
20 }
21
22 // //////////////////////////////////////////////////////////////////////
25
26 // //////////////////////////////////////////////////////////////////////
27 void WordCombinationHolder::push_back (const std::string& iString) {
28 _list.push_back (iString);
29 }
30
31 // //////////////////////////////////////////////////////////////////////
33 return _list.size();
34 }
35
36 // //////////////////////////////////////////////////////////////////////
38 _list.clear();
39 }
40
41 // //////////////////////////////////////////////////////////////////////
43 std::ostringstream oStr;
44 oStr << "";
45 return oStr.str();
46 }
47
48 // //////////////////////////////////////////////////////////////////////
49 std::string WordCombinationHolder::describe() const {
50 std::ostringstream oStr;
51 oStr << describeKey();
52
53 //
54 oStr << "{";
55
56 short idx_sublist = 0;
57 for (StringList_T::const_iterator itWordCombination = _list.begin();
58 itWordCombination != _list.end(); ++itWordCombination, ++idx_sublist) {
59 //
60 if (idx_sublist != 0) {
61 oStr << ", ";
62 }
63
64 //
65 const StringSet& lStringSet = *itWordCombination;
66
67 //
68 oStr << lStringSet;
69 }
70
71 //
72 oStr << " }";
73
74 return oStr.str();
75 }
76
77 // //////////////////////////////////////////////////////////////////////
78 void WordCombinationHolder::toStream (std::ostream& ioOut) const {
79 ioOut << describe();
80 }
81
82 // //////////////////////////////////////////////////////////////////////
83 void WordCombinationHolder::fromStream (std::istream& ioIn) {
84 }
85
86 // //////////////////////////////////////////////////////////////////////
87 void WordCombinationHolder::init (const std::string& iPhrase) {
88 // Set of unique strings
89 typedef std::set<std::string> StringSet_T;
90 StringSet_T lStringSet;
91
92 // 1. Derive all the partitions of the initial (full) string
93 const StringPartition lStringPartitionHolder (iPhrase);
94 const StringPartition::StringPartition_T& lStringPartition =
95 lStringPartitionHolder._partition;
96
97 // 2.1. For every word combination, add it if not already in the
98 // list (STL set) of strings.
99 for (StringPartition::StringPartition_T::const_iterator itSet =
100 lStringPartition.begin(); itSet != lStringPartition.end(); ++itSet) {
101 const StringSet& itStringList = *itSet;
102
103 const StringList_T& lStringList = itStringList._set;
104 for (StringList_T::const_iterator itWordCombination = lStringList.begin();
105 itWordCombination != lStringList.end(); ++itWordCombination) {
106 const std::string& lWordCombination = *itWordCombination;
107
108 // Check whether the (remaining) word combination should be filtered out
109 //const bool isToBeAdded= Filter::shouldKeep (iPhrase, lWordCombination);
110 const bool isToBeAdded = true;
111 if (isToBeAdded == true) {
112 lStringSet.insert (lWordCombination);
113 }
114 }
115 }
116
117 // 2.2. Convert the STL set into a STL list
118 for (StringSet_T::const_iterator itWordCombination = lStringSet.begin();
119 itWordCombination != lStringSet.end(); ++itWordCombination) {
120 const std::string& lWordCombination = *itWordCombination;
121
122 // Add that word combination in the list for indexation by Xapian.
123 // Note that if that word combination is already present in the list,
124 // it will not be added a second time (thanks to the STL list design).
125 _list.push_back (lWordCombination);
126 }
127
128 // 3. Add the word combinations, made by removing all the possible groups
129 // of continuous words inbetween the two extreme words (from left- and
130 // right-hand sides).
131 // 3.0. Initialisation of the list of words, made of all the words of the
132 // given string.
133 WordList_T lWordList;
134 tokeniseStringIntoWordList (iPhrase, lWordList);
135 const short nbOfWords = lWordList.size();
136
137 // 3.1. If the string contains no more than two words, the job is finished.
138 if (nbOfWords <= 2) {
139 return;
140 }
141
142 // 3.2. Iteration on the number of words to remove in the middle of the
143 // string, from 1 to (nbOfWords - 2)
144 for (short mdl_string_len = 1; mdl_string_len != nbOfWords-1;
145 ++mdl_string_len) {
146
147 // 3.2. Iteration on all the middle words of the given string,
148 // from 1 to (nbOfWords - mdl_string_len)
149 for (short idx_word=1; idx_word != nbOfWords-mdl_string_len; ++idx_word) {
150 // 3.2.1. Copy the first idx_word word(s)
151 const std::string& lLeftHandString =
152 createStringFromWordList (lWordList, idx_word);
153
154 // 3.2.2. Copy the last (nbOfWords - (idx_word + mdl_string_len)) words
155 const std::string& lRightHandString =
156 createStringFromWordList (lWordList,
157 idx_word + mdl_string_len,
158 false);
159
160 // 3.2.3. Concatenate both sub-strings
161 std::ostringstream lConcatenatedStr;
162 lConcatenatedStr << lLeftHandString << " " << lRightHandString;
163 const std::string& lConcatenatedString = lConcatenatedStr.str();
164
165 // 3.2.4. Add the concatenated string into the list, if not filtered out
166 // const bool isToBeAdded =
167 // Filter::shouldKeep (iPhrase, lConcatenatedString);
168 const bool isToBeAdded = true;
169 if (isToBeAdded == true) {
170 _list.push_back (lConcatenatedString);
171 }
172 }
173 }
174 }
175
176}
std::list< Word_T > WordList_T
void tokeniseStringIntoWordList(const std::string &iPhrase, WordList_T &ioWordList)
Definition Utilities.cpp:19
std::string createStringFromWordList(const WordList_T &iWordList, const NbOfWords_T iSplitIdx, const bool iFromBeginningFlag)
Definition Utilities.cpp:43
std::list< StringSet > StringPartition_T
Class holding a set of strings, e.g., {"rio", "de", "janeiro"}.
Definition StringSet.hpp:19
void toStream(std::ostream &) const