OpenTREP Logo  0.07.18
C++ Open Travel Request Parsing Library
Loading...
Searching...
No Matches
OTransliterator.cpp
Go to the documentation of this file.
1// //////////////////////////////////////////////////////////////////////
2// Import section
3// //////////////////////////////////////////////////////////////////////
4// STL
5#include <cassert>
6#include <sstream>
7// OpenTrep
13
14namespace OPENTREP {
15
16 // //////////////////////////////////////////////////////////////////////
18 : _punctuationRemover (NULL), _quoteRemover (NULL), _accentRemover (NULL),
19 _tranlist (NULL) {
20 init();
21 }
22
23 // //////////////////////////////////////////////////////////////////////
25 : _punctuationRemover (NULL), _quoteRemover (NULL), _accentRemover (NULL),
26 _tranlist (NULL) {
27 assert (iTransliterator._punctuationRemover != NULL);
28 _punctuationRemover = iTransliterator._punctuationRemover->clone();
29
30 assert (iTransliterator._quoteRemover != NULL);
31 _quoteRemover = iTransliterator._quoteRemover->clone();
32
33 assert (iTransliterator._accentRemover != NULL);
34 _accentRemover = iTransliterator._accentRemover->clone();
35
36 assert (iTransliterator._tranlist != NULL);
37 _tranlist = iTransliterator._tranlist->clone();
38
39 }
40
41 // //////////////////////////////////////////////////////////////////////
43 finalise();
44 }
45
46 // //////////////////////////////////////////////////////////////////////
47 void OTransliterator::initPunctuationRemover() {
48 // Create a remover of punctuation
49 UErrorCode lStatus = U_ZERO_ERROR;
50 _punctuationRemover =
51 icu::Transliterator::createInstance (K_ICU_PUNCTUATION_REMOVAL_RULE,
52 UTRANS_FORWARD, lStatus);
53
54 if (_punctuationRemover == NULL || U_FAILURE (lStatus)) {
55 std::ostringstream oStr;
56 oStr << "Unicode error: no Transliterator can be created for the '"
57 << K_ICU_PUNCTUATION_REMOVAL_RULE << "' rule.";
58 OPENTREP_LOG_ERROR (oStr.str());
60 }
61 assert (_punctuationRemover != NULL);
62
63 // Register the Unicode Transliterator
64 icu::Transliterator::registerInstance (_punctuationRemover);
65 }
66
67 // //////////////////////////////////////////////////////////////////////
68 void OTransliterator::initQuoteRemover() {
69 // Create a remover of quotation
70 UErrorCode lStatus = U_ZERO_ERROR;
71 UParseError pError;
72 icu::UnicodeString lUnquotedRules (K_ICU_QUOTATION_REMOVAL_RULE);
73 _quoteRemover =
74 icu::Transliterator::createFromRules ("RBTUnaccent", lUnquotedRules,
75 UTRANS_FORWARD, pError, lStatus);
76
77 if (_quoteRemover == NULL || U_FAILURE (lStatus)) {
78 std::ostringstream oStr;
79 oStr << "Unicode error: no Transliterator can be created for the '"
80 << K_ICU_QUOTATION_REMOVAL_RULE << "' rule.";
81 OPENTREP_LOG_ERROR (oStr.str());
82 throw UnicodeTransliteratorCreationException (oStr.str());
83 }
84 assert (_quoteRemover != NULL);
85
86 // Register the Unicode Transliterator
87 icu::Transliterator::registerInstance (_quoteRemover);
88 }
89
90 // //////////////////////////////////////////////////////////////////////
91 void OTransliterator::initAccentRemover() {
92 // Create a remover of accents
93 UErrorCode lStatus = U_ZERO_ERROR;
94 _accentRemover =
95 icu::Transliterator::createInstance (K_ICU_ACCENT_REMOVAL_RULE, UTRANS_FORWARD,
96 lStatus);
97
98 if (_accentRemover == NULL || U_FAILURE (lStatus)) {
99 std::ostringstream oStr;
100 oStr << "Unicode error: no Transliterator can be created for the '"
101 << K_ICU_ACCENT_REMOVAL_RULE << "' rule.";
102 OPENTREP_LOG_ERROR (oStr.str());
103 throw UnicodeTransliteratorCreationException (oStr.str());
104 }
105 assert (_accentRemover != NULL);
106
107 // Register the Unicode Transliterator
108 icu::Transliterator::registerInstance (_accentRemover);
109 }
110
111 // //////////////////////////////////////////////////////////////////////
112 void OTransliterator::initTranlisterator() {
113 // Create a generic transliterator
114 UErrorCode lStatus = U_ZERO_ERROR;
115 _tranlist =
116 icu::Transliterator::createInstance (K_ICU_GENERIC_TRANSLITERATOR_RULE,
117 UTRANS_FORWARD, lStatus);
118
119 if (_tranlist == NULL || U_FAILURE (lStatus)) {
120 std::ostringstream oStr;
121 oStr << "Unicode error: no Transliterator can be created for the '"
122 << K_ICU_GENERIC_TRANSLITERATOR_RULE << "' rule.";
123 OPENTREP_LOG_ERROR (oStr.str());
124 throw UnicodeTransliteratorCreationException (oStr.str());
125 }
126 assert (_tranlist != NULL);
127
128 // Register the Unicode Transliterator
129 icu::Transliterator::registerInstance (_tranlist);
130 }
131
132 // //////////////////////////////////////////////////////////////////////
133 void OTransliterator::init() {
134 initPunctuationRemover();
135 initQuoteRemover();
136 initAccentRemover();
137 initTranlisterator();
138 }
139
140 // //////////////////////////////////////////////////////////////////////
141 void OTransliterator::finalise() {
142 delete _punctuationRemover; _punctuationRemover = NULL;
143 delete _quoteRemover; _quoteRemover = NULL;
144 delete _accentRemover; _accentRemover = NULL;
145 delete _tranlist; _tranlist = NULL;
146 }
147
148 // //////////////////////////////////////////////////////////////////////
149 void OTransliterator::unpunctuate (icu::UnicodeString& ioString) const {
150 // Apply the punctuation removal scheme
151 assert (_punctuationRemover != NULL);
152 _punctuationRemover->transliterate (ioString);
153 }
154
155 // //////////////////////////////////////////////////////////////////////
156 std::string OTransliterator::unpunctuate (const std::string& iString) const {
157 // Build a UnicodeString from the STL string
158 icu::UnicodeString lString (iString.c_str());
159
160 // Apply the punctuation removal scheme
161 unpunctuate (lString);
162
163 // Convert back from UnicodeString to UTF8-encoded STL string
164 const std::string& lPunctuatedString = getUTF8 (lString);
165
166 return lPunctuatedString;
167 }
168
169 // //////////////////////////////////////////////////////////////////////
170 void OTransliterator::unquote (icu::UnicodeString& ioString) const {
171 // Apply the quotation removal scheme
172 assert (_quoteRemover != NULL);
173 _quoteRemover->transliterate (ioString);
174 }
175
176 // //////////////////////////////////////////////////////////////////////
177 std::string OTransliterator::unquote (const std::string& iString) const {
178 // Build a UnicodeString from the STL string
179 icu::UnicodeString lString (iString.c_str());
180
181 // Apply the quotation removal scheme
182 unquote (lString);
183
184 // Convert back from UnicodeString to UTF8-encoded STL string
185 const std::string& lUnquotedString = getUTF8 (lString);
186
187 return lUnquotedString;
188 }
189
190 // //////////////////////////////////////////////////////////////////////
191 void OTransliterator::unaccent (icu::UnicodeString& ioString) const {
192 // Apply the accent removal scheme
193 assert (_accentRemover != NULL);
194 _accentRemover->transliterate (ioString);
195 }
196
197 // //////////////////////////////////////////////////////////////////////
198 std::string OTransliterator::unaccent (const std::string& iString) const {
199 // Build a UnicodeString from the STL string
200 icu::UnicodeString lString (iString.c_str());
201
202 // Apply the accent removal scheme
203 unaccent (lString);
204
205 // Convert back from UnicodeString to UTF8-encoded STL string
206 const std::string& lUnaccentuatedString = getUTF8 (lString);
207
208 return lUnaccentuatedString;
209 }
210
211 // //////////////////////////////////////////////////////////////////////
212 void OTransliterator::transliterate (icu::UnicodeString& ioString) const {
213 // Apply the transliteration scheme
214 assert (_tranlist != NULL);
215 _tranlist->transliterate (ioString);
216 }
217
218 // //////////////////////////////////////////////////////////////////////
219 std::string OTransliterator::transliterate (const std::string& iString) const {
220 // Build a UnicodeString from the STL string
221 icu::UnicodeString lString (iString.c_str());
222
223 // Apply the transliteration scheme
224 transliterate (lString);
225
226 // Convert back from UnicodeString to UTF8-encoded STL string
227 const std::string& lTransliteratedString = getUTF8 (lString);
228
229 return lTransliteratedString;
230 }
231
232 // //////////////////////////////////////////////////////////////////////
233 std::string OTransliterator::normalise (const std::string& iString) const {
234 // Build a UnicodeString from the STL string
235 icu::UnicodeString lString (iString.c_str());
236
237 // Apply the whole sery of transformators
238 unaccent (lString);
239 unquote (lString);
240 unpunctuate (lString);
241 transliterate (lString);
242
243 // Convert back from UnicodeString to UTF8-encoded STL string
244 const std::string& lNormalisedString = getUTF8 (lString);
245
246 return lNormalisedString;
247 }
248
249}
#define OPENTREP_LOG_ERROR(iToBeLogged)
Definition Logger.hpp:24
std::string unquote(const std::string &iString) const
std::string normalise(const std::string &iString) const
std::string transliterate(const std::string &iString) const
std::string unpunctuate(const std::string &iString) const
std::string unaccent(const std::string &iString) const
std::string getUTF8(const icu::UnicodeString &iString)
Definition icu_util.cpp:65
const char * K_ICU_ACCENT_REMOVAL_RULE
Definition BasConst.cpp:110
const char * K_ICU_GENERIC_TRANSLITERATOR_RULE
Definition BasConst.cpp:129
const char * K_ICU_QUOTATION_REMOVAL_RULE
Definition BasConst.cpp:116
const char * K_ICU_PUNCTUATION_REMOVAL_RULE
Definition BasConst.cpp:123