18 : _punctuationRemover (NULL), _quoteRemover (NULL), _accentRemover (NULL),
25 : _punctuationRemover (NULL), _quoteRemover (NULL), _accentRemover (NULL),
27 assert (iTransliterator._punctuationRemover != NULL);
28 _punctuationRemover = iTransliterator._punctuationRemover->clone();
30 assert (iTransliterator._quoteRemover != NULL);
31 _quoteRemover = iTransliterator._quoteRemover->clone();
33 assert (iTransliterator._accentRemover != NULL);
34 _accentRemover = iTransliterator._accentRemover->clone();
36 assert (iTransliterator._tranlist != NULL);
37 _tranlist = iTransliterator._tranlist->clone();
47 void OTransliterator::initPunctuationRemover() {
49 UErrorCode lStatus = U_ZERO_ERROR;
52 UTRANS_FORWARD, lStatus);
54 if (_punctuationRemover == NULL || U_FAILURE (lStatus)) {
55 std::ostringstream oStr;
56 oStr <<
"Unicode error: no Transliterator can be created for the '"
61 assert (_punctuationRemover != NULL);
64 icu::Transliterator::registerInstance (_punctuationRemover);
68 void OTransliterator::initQuoteRemover() {
70 UErrorCode lStatus = U_ZERO_ERROR;
74 icu::Transliterator::createFromRules (
"RBTUnaccent", lUnquotedRules,
75 UTRANS_FORWARD, pError, lStatus);
77 if (_quoteRemover == NULL || U_FAILURE (lStatus)) {
78 std::ostringstream oStr;
79 oStr <<
"Unicode error: no Transliterator can be created for the '"
82 throw UnicodeTransliteratorCreationException (oStr.str());
84 assert (_quoteRemover != NULL);
87 icu::Transliterator::registerInstance (_quoteRemover);
91 void OTransliterator::initAccentRemover() {
93 UErrorCode lStatus = U_ZERO_ERROR;
98 if (_accentRemover == NULL || U_FAILURE (lStatus)) {
99 std::ostringstream oStr;
100 oStr <<
"Unicode error: no Transliterator can be created for the '"
103 throw UnicodeTransliteratorCreationException (oStr.str());
105 assert (_accentRemover != NULL);
108 icu::Transliterator::registerInstance (_accentRemover);
112 void OTransliterator::initTranlisterator() {
114 UErrorCode lStatus = U_ZERO_ERROR;
117 UTRANS_FORWARD, lStatus);
119 if (_tranlist == NULL || U_FAILURE (lStatus)) {
120 std::ostringstream oStr;
121 oStr <<
"Unicode error: no Transliterator can be created for the '"
124 throw UnicodeTransliteratorCreationException (oStr.str());
126 assert (_tranlist != NULL);
129 icu::Transliterator::registerInstance (_tranlist);
133 void OTransliterator::init() {
134 initPunctuationRemover();
137 initTranlisterator();
141 void OTransliterator::finalise() {
142 delete _punctuationRemover; _punctuationRemover = NULL;
143 delete _quoteRemover; _quoteRemover = NULL;
144 delete _accentRemover; _accentRemover = NULL;
145 delete _tranlist; _tranlist = NULL;
151 assert (_punctuationRemover != NULL);
152 _punctuationRemover->transliterate (ioString);
158 icu::UnicodeString lString (iString.c_str());
164 const std::string& lPunctuatedString =
getUTF8 (lString);
166 return lPunctuatedString;
172 assert (_quoteRemover != NULL);
173 _quoteRemover->transliterate (ioString);
179 icu::UnicodeString lString (iString.c_str());
185 const std::string& lUnquotedString =
getUTF8 (lString);
187 return lUnquotedString;
193 assert (_accentRemover != NULL);
194 _accentRemover->transliterate (ioString);
200 icu::UnicodeString lString (iString.c_str());
206 const std::string& lUnaccentuatedString =
getUTF8 (lString);
208 return lUnaccentuatedString;
214 assert (_tranlist != NULL);
215 _tranlist->transliterate (ioString);
221 icu::UnicodeString lString (iString.c_str());
227 const std::string& lTransliteratedString =
getUTF8 (lString);
229 return lTransliteratedString;
235 icu::UnicodeString lString (iString.c_str());
244 const std::string& lNormalisedString =
getUTF8 (lString);
246 return lNormalisedString;
#define OPENTREP_LOG_ERROR(iToBeLogged)
std::string unquote(const std::string &iString) const
std::string normalise(const std::string &iString) const
std::string transliterate(const std::string &iString) const
std::string unpunctuate(const std::string &iString) const
std::string unaccent(const std::string &iString) const
std::string getUTF8(const icu::UnicodeString &iString)
const char * K_ICU_ACCENT_REMOVAL_RULE
const char * K_ICU_GENERIC_TRANSLITERATOR_RULE
const char * K_ICU_QUOTATION_REMOVAL_RULE
const char * K_ICU_PUNCTUATION_REMOVAL_RULE