8 #ifndef BOOST_NOWIDE_UTF8_CODECVT_HPP     9 #define BOOST_NOWIDE_UTF8_CODECVT_HPP    11 #include <boost/locale/utf.hpp>    12 #include <boost/cstdint.hpp>    13 #include <boost/static_assert.hpp>    22 BOOST_STATIC_ASSERT(
sizeof(std::mbstate_t)>=2);
    24 #if defined _MSC_VER && _MSC_VER < 1700    26 #define BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST    29 template<
typename CharType,
int CharSize=sizeof(CharType)>
    32 template<
typename CharType>
    33 class utf8_codecvt<CharType,2> : 
public std::codecvt<CharType,char,std::mbstate_t>
    36     utf8_codecvt(
size_t refs = 0) : std::codecvt<CharType,char,std::mbstate_t>(refs)
    41     typedef CharType uchar;
    43     virtual std::codecvt_base::result do_unshift(std::mbstate_t &s,
char *from,
char * ,
char *&next)
 const    45         boost::uint16_t &state = *
reinterpret_cast<boost::uint16_t *
>(&s);
    47         std::cout << 
"Entering unshift " << std::hex << state << std::dec << std::endl;
    50             return std::codecvt_base::error;
    52         return std::codecvt_base::ok;
    54     virtual int do_encoding() 
const throw()
    58     virtual int do_max_length() 
const throw()
    62     virtual bool do_always_noconv() 
const throw()
    68     do_length(  std::mbstate_t 
    69     #ifdef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST
    77         #ifndef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST    78         char const *save_from = from;
    79         boost::uint16_t &state = *
reinterpret_cast<boost::uint16_t *
>(&std_state);
    81         size_t save_max = max;
    82         boost::uint16_t state = *
reinterpret_cast<boost::uint16_t 
const *
>(&std_state);
    84         while(max > 0 && from < from_end){
    85             char const *prev_from = from;
    86             boost::uint32_t ch=boost::locale::utf::utf_traits<char>::decode(from,from_end);
    87             if(ch==boost::locale::utf::incomplete || ch==boost::locale::utf::illegal) {
   102         #ifndef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST   103         return from - save_from;
   105         return save_max - max;
   110     virtual std::codecvt_base::result 
   111     do_in(  std::mbstate_t &std_state,
   113             char const *from_end,
   114             char const *&from_next,
   117             uchar *&to_next)
 const   119         std::codecvt_base::result r=std::codecvt_base::ok;
   126         boost::uint16_t &state = *
reinterpret_cast<boost::uint16_t *
>(&std_state);
   127         while(to < to_end && from < from_end)
   130             std::cout << 
"Entering IN--------------" << std::endl;
   131             std::cout << 
"State " << std::hex << state <<std::endl;
   132             std::cout << 
"Left in " << std::dec << from_end - from << 
" out " << to_end -to << std::endl;
   134             char const *from_saved = from;
   136             uint32_t ch=boost::locale::utf::utf_traits<char>::decode(from,from_end);
   138             if(ch==boost::locale::utf::illegal) {
   140                 r=std::codecvt_base::error;
   143             if(ch==boost::locale::utf::incomplete) {
   145                 r=std::codecvt_base::partial;
   163                 boost::uint16_t vh = ch >> 10;
   164                 boost::uint16_t vl = ch & 0x3FF;
   165                 boost::uint16_t w1 = vh + 0xD800;
   166                 boost::uint16_t w2 = vl + 0xDC00;
   180         if(r == std::codecvt_base::ok && (from!=from_end || state!=0))
   181             r = std::codecvt_base::partial;
   183         std::cout << 
"Returning ";
   185         case std::codecvt_base::ok:
   186             std::cout << 
"ok" << std::endl;
   188         case std::codecvt_base::partial:
   189             std::cout << 
"partial" << std::endl;
   191         case std::codecvt_base::error:
   192             std::cout << 
"error" << std::endl;
   195             std::cout << 
"other" << std::endl;
   198         std::cout << 
"State " << std::hex << state <<std::endl;
   199         std::cout << 
"Left in " << std::dec << from_end - from << 
" out " << to_end -to << std::endl;
   204     virtual std::codecvt_base::result 
   205     do_out( std::mbstate_t &std_state,
   207             uchar 
const *from_end,
   208             uchar 
const *&from_next,
   211             char *&to_next)
 const   213         std::codecvt_base::result r=std::codecvt_base::ok;
   221         boost::uint16_t &state = *
reinterpret_cast<boost::uint16_t *
>(&std_state);
   222         while(to < to_end && from < from_end)
   225         std::cout << 
"Entering OUT --------------" << std::endl;
   226         std::cout << 
"State " << std::hex << state <<std::endl;
   227         std::cout << 
"Left in " << std::dec << from_end - from << 
" out " << to_end -to << std::endl;
   229             boost::uint32_t ch=0;
   234                 boost::uint16_t w1 = state;
   235                 boost::uint16_t w2 = *from; 
   238                 if(0xDC00 <= w2 && w2<=0xDFFF) {
   239                     boost::uint16_t vh = w1 - 0xD800;
   240                     boost::uint16_t vl = w2 - 0xDC00;
   241                     ch=((uint32_t(vh) << 10)  | vl) + 0x10000;
   245                     r=std::codecvt_base::error;
   251                 if(0xD800 <= ch && ch<=0xDBFF) {
   260                 else if(0xDC00 <= ch && ch<=0xDFFF) {
   264                     r=std::codecvt_base::error;
   268             if(!boost::locale::utf::is_valid_codepoint(ch)) {
   269                 r=std::codecvt_base::error;
   272             int len = boost::locale::utf::utf_traits<char>::width(ch);
   273             if(to_end - to < len) {
   274                 r=std::codecvt_base::partial;
   277             to = boost::locale::utf::utf_traits<char>::encode(ch,to);
   283         if(r==std::codecvt_base::ok && from!=from_end)
   284             r = std::codecvt_base::partial;
   286         std::cout << 
"Returning ";
   288         case std::codecvt_base::ok:
   289             std::cout << 
"ok" << std::endl;
   291         case std::codecvt_base::partial:
   292             std::cout << 
"partial" << std::endl;
   294         case std::codecvt_base::error:
   295             std::cout << 
"error" << std::endl;
   298             std::cout << 
"other" << std::endl;
   301         std::cout << 
"State " << std::hex << state <<std::endl;
   302         std::cout << 
"Left in " << std::dec << from_end - from << 
" out " << to_end -to << std::endl;
   309 template<
typename CharType>
   310 class utf8_codecvt<CharType,4> : 
public std::codecvt<CharType,char,std::mbstate_t>
   313     utf8_codecvt(
size_t refs = 0) : std::codecvt<CharType,char,std::mbstate_t>(refs)
   318     typedef CharType uchar;
   320     virtual std::codecvt_base::result do_unshift(std::mbstate_t &,
char *from,
char * ,
char *&next)
 const   323         return std::codecvt_base::ok;
   325     virtual int do_encoding() 
const throw()
   329     virtual int do_max_length() 
const throw()
   333     virtual bool do_always_noconv() 
const throw()
   339     do_length(  std::mbstate_t 
   340     #ifdef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST
   345             char const *from_end,
   348         #ifndef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST    349         char const *start_from = from;
   351         size_t save_max = max;
   354         while(max > 0 && from < from_end){
   355             char const *save_from = from;
   356             boost::uint32_t ch=boost::locale::utf::utf_traits<char>::decode(from,from_end);
   357             if(ch==boost::locale::utf::incomplete || ch==boost::locale::utf::illegal) {
   363         #ifndef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST    364         return from - start_from;
   366         return save_max - max;
   371     virtual std::codecvt_base::result 
   372     do_in(  std::mbstate_t &,
   374             char const *from_end,
   375             char const *&from_next,
   378             uchar *&to_next)
 const   380         std::codecvt_base::result r=std::codecvt_base::ok;
   387         while(to < to_end && from < from_end)
   390             std::cout << 
"Entering IN--------------" << std::endl;
   391             std::cout << 
"State " << std::hex << state <<std::endl;
   392             std::cout << 
"Left in " << std::dec << from_end - from << 
" out " << to_end -to << std::endl;
   394             char const *from_saved = from;
   396             uint32_t ch=boost::locale::utf::utf_traits<char>::decode(from,from_end);
   398             if(ch==boost::locale::utf::illegal) {
   399                 r=std::codecvt_base::error;
   403             if(ch==boost::locale::utf::incomplete) {
   404                 r=std::codecvt_base::partial;
   412         if(r == std::codecvt_base::ok && from!=from_end)
   413             r = std::codecvt_base::partial;
   415         std::cout << 
"Returning ";
   417         case std::codecvt_base::ok:
   418             std::cout << 
"ok" << std::endl;
   420         case std::codecvt_base::partial:
   421             std::cout << 
"partial" << std::endl;
   423         case std::codecvt_base::error:
   424             std::cout << 
"error" << std::endl;
   427             std::cout << 
"other" << std::endl;
   430         std::cout << 
"State " << std::hex << state <<std::endl;
   431         std::cout << 
"Left in " << std::dec << from_end - from << 
" out " << to_end -to << std::endl;
   436     virtual std::codecvt_base::result 
   437     do_out( std::mbstate_t &std_state,
   439             uchar 
const *from_end,
   440             uchar 
const *&from_next,
   443             char *&to_next)
 const   445         std::codecvt_base::result r=std::codecvt_base::ok;
   446         while(to < to_end && from < from_end)
   449         std::cout << 
"Entering OUT --------------" << std::endl;
   450         std::cout << 
"State " << std::hex << state <<std::endl;
   451         std::cout << 
"Left in " << std::dec << from_end - from << 
" out " << to_end -to << std::endl;
   453             boost::uint32_t ch=0;
   455             if(!boost::locale::utf::is_valid_codepoint(ch)) {
   456                 r=std::codecvt_base::error;
   459             int len = boost::locale::utf::utf_traits<char>::width(ch);
   460             if(to_end - to < len) {
   461                 r=std::codecvt_base::partial;
   464             to = boost::locale::utf::utf_traits<char>::encode(ch,to);
   469         if(r==std::codecvt_base::ok && from!=from_end)
   470             r = std::codecvt_base::partial;
   472         std::cout << 
"Returning ";
   474         case std::codecvt_base::ok:
   475             std::cout << 
"ok" << std::endl;
   477         case std::codecvt_base::partial:
   478             std::cout << 
"partial" << std::endl;
   480         case std::codecvt_base::error:
   481             std::cout << 
"error" << std::endl;
   484             std::cout << 
"other" << std::endl;
   487         std::cout << 
"State " << std::hex << state <<std::endl;
   488         std::cout << 
"Left in " << std::dec << from_end - from << 
" out " << to_end -to << std::endl;
 
Definition: utf8_codecvt.hpp:30