8 #ifndef BOOST_NOWIDE_UTF8_CODECVT_HPP 9 #define BOOST_NOWIDE_UTF8_CODECVT_HPP 11 #include <boost/locale/utf.hpp> 12 #include <boost/cstdint.hpp> 13 #include <boost/static_assert.hpp> 22 BOOST_STATIC_ASSERT(
sizeof(std::mbstate_t)>=2);
24 #if defined _MSC_VER && _MSC_VER < 1700 26 #define BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST 29 template<
typename CharType,
int CharSize=sizeof(CharType)>
32 template<
typename CharType>
33 class utf8_codecvt<CharType,2> :
public std::codecvt<CharType,char,std::mbstate_t>
36 utf8_codecvt(
size_t refs = 0) : std::codecvt<CharType,char,std::mbstate_t>(refs)
41 typedef CharType uchar;
43 virtual std::codecvt_base::result do_unshift(std::mbstate_t &s,
char *from,
char * ,
char *&next)
const 45 boost::uint16_t &state = *
reinterpret_cast<boost::uint16_t *
>(&s);
47 std::cout <<
"Entering unshift " << std::hex << state << std::dec << std::endl;
50 return std::codecvt_base::error;
52 return std::codecvt_base::ok;
54 virtual int do_encoding()
const throw()
58 virtual int do_max_length()
const throw()
62 virtual bool do_always_noconv()
const throw()
68 do_length( std::mbstate_t
69 #ifdef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST
77 #ifndef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST 78 char const *save_from = from;
79 boost::uint16_t &state = *
reinterpret_cast<boost::uint16_t *
>(&std_state);
81 size_t save_max = max;
82 boost::uint16_t state = *
reinterpret_cast<boost::uint16_t
const *
>(&std_state);
84 while(max > 0 && from < from_end){
85 char const *prev_from = from;
86 boost::uint32_t ch=boost::locale::utf::utf_traits<char>::decode(from,from_end);
87 if(ch==boost::locale::utf::incomplete || ch==boost::locale::utf::illegal) {
102 #ifndef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST 103 return from - save_from;
105 return save_max - max;
110 virtual std::codecvt_base::result
111 do_in( std::mbstate_t &std_state,
113 char const *from_end,
114 char const *&from_next,
117 uchar *&to_next)
const 119 std::codecvt_base::result r=std::codecvt_base::ok;
126 boost::uint16_t &state = *
reinterpret_cast<boost::uint16_t *
>(&std_state);
127 while(to < to_end && from < from_end)
130 std::cout <<
"Entering IN--------------" << std::endl;
131 std::cout <<
"State " << std::hex << state <<std::endl;
132 std::cout <<
"Left in " << std::dec << from_end - from <<
" out " << to_end -to << std::endl;
134 char const *from_saved = from;
136 uint32_t ch=boost::locale::utf::utf_traits<char>::decode(from,from_end);
138 if(ch==boost::locale::utf::illegal) {
140 r=std::codecvt_base::error;
143 if(ch==boost::locale::utf::incomplete) {
145 r=std::codecvt_base::partial;
163 boost::uint16_t vh = ch >> 10;
164 boost::uint16_t vl = ch & 0x3FF;
165 boost::uint16_t w1 = vh + 0xD800;
166 boost::uint16_t w2 = vl + 0xDC00;
180 if(r == std::codecvt_base::ok && (from!=from_end || state!=0))
181 r = std::codecvt_base::partial;
183 std::cout <<
"Returning ";
185 case std::codecvt_base::ok:
186 std::cout <<
"ok" << std::endl;
188 case std::codecvt_base::partial:
189 std::cout <<
"partial" << std::endl;
191 case std::codecvt_base::error:
192 std::cout <<
"error" << std::endl;
195 std::cout <<
"other" << std::endl;
198 std::cout <<
"State " << std::hex << state <<std::endl;
199 std::cout <<
"Left in " << std::dec << from_end - from <<
" out " << to_end -to << std::endl;
204 virtual std::codecvt_base::result
205 do_out( std::mbstate_t &std_state,
207 uchar
const *from_end,
208 uchar
const *&from_next,
211 char *&to_next)
const 213 std::codecvt_base::result r=std::codecvt_base::ok;
221 boost::uint16_t &state = *
reinterpret_cast<boost::uint16_t *
>(&std_state);
222 while(to < to_end && from < from_end)
225 std::cout <<
"Entering OUT --------------" << std::endl;
226 std::cout <<
"State " << std::hex << state <<std::endl;
227 std::cout <<
"Left in " << std::dec << from_end - from <<
" out " << to_end -to << std::endl;
229 boost::uint32_t ch=0;
234 boost::uint16_t w1 = state;
235 boost::uint16_t w2 = *from;
238 if(0xDC00 <= w2 && w2<=0xDFFF) {
239 boost::uint16_t vh = w1 - 0xD800;
240 boost::uint16_t vl = w2 - 0xDC00;
241 ch=((uint32_t(vh) << 10) | vl) + 0x10000;
245 r=std::codecvt_base::error;
251 if(0xD800 <= ch && ch<=0xDBFF) {
260 else if(0xDC00 <= ch && ch<=0xDFFF) {
264 r=std::codecvt_base::error;
268 if(!boost::locale::utf::is_valid_codepoint(ch)) {
269 r=std::codecvt_base::error;
272 int len = boost::locale::utf::utf_traits<char>::width(ch);
273 if(to_end - to < len) {
274 r=std::codecvt_base::partial;
277 to = boost::locale::utf::utf_traits<char>::encode(ch,to);
283 if(r==std::codecvt_base::ok && from!=from_end)
284 r = std::codecvt_base::partial;
286 std::cout <<
"Returning ";
288 case std::codecvt_base::ok:
289 std::cout <<
"ok" << std::endl;
291 case std::codecvt_base::partial:
292 std::cout <<
"partial" << std::endl;
294 case std::codecvt_base::error:
295 std::cout <<
"error" << std::endl;
298 std::cout <<
"other" << std::endl;
301 std::cout <<
"State " << std::hex << state <<std::endl;
302 std::cout <<
"Left in " << std::dec << from_end - from <<
" out " << to_end -to << std::endl;
309 template<
typename CharType>
310 class utf8_codecvt<CharType,4> :
public std::codecvt<CharType,char,std::mbstate_t>
313 utf8_codecvt(
size_t refs = 0) : std::codecvt<CharType,char,std::mbstate_t>(refs)
318 typedef CharType uchar;
320 virtual std::codecvt_base::result do_unshift(std::mbstate_t &,
char *from,
char * ,
char *&next)
const 323 return std::codecvt_base::ok;
325 virtual int do_encoding()
const throw()
329 virtual int do_max_length()
const throw()
333 virtual bool do_always_noconv()
const throw()
339 do_length( std::mbstate_t
340 #ifdef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST
345 char const *from_end,
348 #ifndef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST 349 char const *start_from = from;
351 size_t save_max = max;
354 while(max > 0 && from < from_end){
355 char const *save_from = from;
356 boost::uint32_t ch=boost::locale::utf::utf_traits<char>::decode(from,from_end);
357 if(ch==boost::locale::utf::incomplete || ch==boost::locale::utf::illegal) {
363 #ifndef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST 364 return from - start_from;
366 return save_max - max;
371 virtual std::codecvt_base::result
372 do_in( std::mbstate_t &,
374 char const *from_end,
375 char const *&from_next,
378 uchar *&to_next)
const 380 std::codecvt_base::result r=std::codecvt_base::ok;
387 while(to < to_end && from < from_end)
390 std::cout <<
"Entering IN--------------" << std::endl;
391 std::cout <<
"State " << std::hex << state <<std::endl;
392 std::cout <<
"Left in " << std::dec << from_end - from <<
" out " << to_end -to << std::endl;
394 char const *from_saved = from;
396 uint32_t ch=boost::locale::utf::utf_traits<char>::decode(from,from_end);
398 if(ch==boost::locale::utf::illegal) {
399 r=std::codecvt_base::error;
403 if(ch==boost::locale::utf::incomplete) {
404 r=std::codecvt_base::partial;
412 if(r == std::codecvt_base::ok && from!=from_end)
413 r = std::codecvt_base::partial;
415 std::cout <<
"Returning ";
417 case std::codecvt_base::ok:
418 std::cout <<
"ok" << std::endl;
420 case std::codecvt_base::partial:
421 std::cout <<
"partial" << std::endl;
423 case std::codecvt_base::error:
424 std::cout <<
"error" << std::endl;
427 std::cout <<
"other" << std::endl;
430 std::cout <<
"State " << std::hex << state <<std::endl;
431 std::cout <<
"Left in " << std::dec << from_end - from <<
" out " << to_end -to << std::endl;
436 virtual std::codecvt_base::result
437 do_out( std::mbstate_t &std_state,
439 uchar
const *from_end,
440 uchar
const *&from_next,
443 char *&to_next)
const 445 std::codecvt_base::result r=std::codecvt_base::ok;
446 while(to < to_end && from < from_end)
449 std::cout <<
"Entering OUT --------------" << std::endl;
450 std::cout <<
"State " << std::hex << state <<std::endl;
451 std::cout <<
"Left in " << std::dec << from_end - from <<
" out " << to_end -to << std::endl;
453 boost::uint32_t ch=0;
455 if(!boost::locale::utf::is_valid_codepoint(ch)) {
456 r=std::codecvt_base::error;
459 int len = boost::locale::utf::utf_traits<char>::width(ch);
460 if(to_end - to < len) {
461 r=std::codecvt_base::partial;
464 to = boost::locale::utf::utf_traits<char>::encode(ch,to);
469 if(r==std::codecvt_base::ok && from!=from_end)
470 r = std::codecvt_base::partial;
472 std::cout <<
"Returning ";
474 case std::codecvt_base::ok:
475 std::cout <<
"ok" << std::endl;
477 case std::codecvt_base::partial:
478 std::cout <<
"partial" << std::endl;
480 case std::codecvt_base::error:
481 std::cout <<
"error" << std::endl;
484 std::cout <<
"other" << std::endl;
487 std::cout <<
"State " << std::hex << state <<std::endl;
488 std::cout <<
"Left in " << std::dec << from_end - from <<
" out " << to_end -to << std::endl;
Definition: utf8_codecvt.hpp:30