OpenTREP Logo  0.07.18
C++ Open Travel Request Parsing Library
Loading...
Searching...
No Matches
UTF8Handler.cpp
Go to the documentation of this file.
1// //////////////////////////////////////////////////////////////////////
2// Import section
3// //////////////////////////////////////////////////////////////////////
4// STL
5#include <cassert>
6#include <sstream>
7#include <string>
8// OpenTrep
10
11namespace OPENTREP {
12
16 typedef long unsigned int u_int32_t;
17
18 // //////////////////////////////////////////////////////////////////////
19 static const u_int32_t offsetsFromUTF8[6] = {
20 0x00000000UL, 0x00003080UL, 0x000E2080UL,
21 0x03C82080UL, 0xFA082080UL, 0x82082080UL
22 };
23
24 // //////////////////////////////////////////////////////////////////////
25 static const char trailingBytesForUTF8[256] = {
26 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
27 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
28 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
29 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
30 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
31 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
32 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
33 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
34 };
35
36 // //////////////////////////////////////////////////////////////////////
37 std::wstring UTF8Handler::toWideString (const std::string& iSrc) {
38 std::basic_ostringstream<wchar_t> oStr;
39
40 // Length of the source string
41 const size_t lStringSize = iSrc.size();
42
43 // Transform the source string in a regular C-string (char*)
44 const char* src = iSrc.c_str();
45
46 //
47 typedef unsigned char uchar_t;
48
49 size_t idx = 0;
50 while (idx != lStringSize) {
51
52 uchar_t lCurrentChar = static_cast<uchar_t> (src[idx]);
53
62 if (lCurrentChar == '\0') {
63 break;
64 }
65
66 const int nb = trailingBytesForUTF8[lCurrentChar];
67
68 wchar_t tmpChar = 0;
69 switch (nb) {
70 // These fall through deliberately
71 case 3: {
72 lCurrentChar = static_cast<uchar_t> (src[idx]); ++idx;
73 tmpChar += lCurrentChar; tmpChar <<= 6;
74 }
75 case 2: {
76 lCurrentChar = static_cast<uchar_t> (src[idx]); ++idx;
77 tmpChar += lCurrentChar; tmpChar <<= 6;
78 }
79 case 1: {
80 lCurrentChar = static_cast<uchar_t> (src[idx]); ++idx;
81 tmpChar += lCurrentChar; tmpChar <<= 6;
82 }
83 case 0: {
84 lCurrentChar = static_cast<uchar_t> (src[idx]); ++idx;
85 tmpChar += lCurrentChar;
86 }
87 }
88
89 tmpChar -= offsetsFromUTF8[nb];
90 oStr << tmpChar;
91 }
92
93 oStr << '\0';
94 return oStr.str();
95 }
96
97 // //////////////////////////////////////////////////////////////////////
98 std::string UTF8Handler::toSimpleString (const std::wstring& iStr) {
99 std::ostringstream oStr;
100
101 const wchar_t* src = iStr.c_str();
102 size_t idx = 0;
103 size_t i = 0;
104
105 while (src[i] != 0) {
106 wchar_t ch = src[i];
107
108 if (ch < 0x80) {
109 const char tmpChar = static_cast<const char> (ch);
110 oStr << tmpChar; ++idx;
111
112 } else if (ch < 0x800) {
113 char tmpChar = static_cast<const char> ((ch >> 6) | 0xC0);
114 oStr << tmpChar; ++idx;
115
116 tmpChar = static_cast<const char> ((ch & 0x3F) | 0x80);
117 oStr << tmpChar; ++idx;
118
119 } else if (ch < 0x10000) {
120 char tmpChar = static_cast<const char> ((ch>>12) | 0xE0);
121 oStr << tmpChar; ++idx;
122
123 tmpChar = static_cast<const char> (((ch>>6) & 0x3F) | 0x80);
124 oStr << tmpChar; ++idx;
125
126 tmpChar = static_cast<const char> ((ch & 0x3F) | 0x80);
127 oStr << tmpChar; ++idx;
128
129 } else if (ch < 0x110000) {
130 char tmpChar = static_cast<const char> ((ch>>18) | 0xF0);
131 oStr << tmpChar; ++idx;
132
133 tmpChar = static_cast<const char> (((ch>>12) & 0x3F) | 0x80);
134 oStr << tmpChar; ++idx;
135
136 tmpChar = static_cast<const char> (((ch>>6) & 0x3F) | 0x80);
137 oStr << tmpChar; ++idx;
138
139 tmpChar = static_cast<const char> ((ch & 0x3F) | 0x80);
140 oStr << tmpChar; ++idx;
141 }
142 i++;
143 }
144
145 oStr << '\0';
146
147 return oStr.str();
148 }
149
150 // //////////////////////////////////////////////////////////////////////
151 std::string UTF8Handler::displayCharString (const char* iString) {
152 std::ostringstream oStr;
153
154 bool hasReachedEnd = false;
155 for (size_t idx = 0; hasReachedEnd == false; ++idx) {
156 if (idx != 0) {
157 oStr << "; ";
158 }
159 const unsigned char lChar = iString[idx];
160 // const wchar_t lChar = iString[idx];
161 if (lChar == '\0') {
162 hasReachedEnd = true;
163 }
164 oStr << "[" << idx << "]: " << std::hex << lChar;
165 }
166 oStr << std::endl;
167
168 return oStr.str();
169 }
170
171 // //////////////////////////////////////////////////////////////////////
172 std::string UTF8Handler::displaySTLWString (const std::wstring& iString) {
173 std::ostringstream oStr;
174
175 size_t idx = 0;
176 for (std::wstring::const_iterator itChar = iString.begin();
177 itChar != iString.end(); ++itChar, ++idx) {
178 if (idx != 0) {
179 oStr << "; ";
180 }
181 const wchar_t lChar = *itChar;
182 oStr << "[" << idx << "]: " << std::hex << lChar;
183 }
184 oStr << std::endl;
185
186 return oStr.str();
187 }
188
189}
190
static std::string displayCharString(const char *iString)
static std::wstring toWideString(const std::string &iSrc)
static std::string displaySTLWString(const std::wstring &iString)
static std::string toSimpleString(const std::wstring &iStr)
static const u_int32_t offsetsFromUTF8[6]
static const char trailingBytesForUTF8[256]
long unsigned int u_int32_t