Geneneric generic codecvt facet, various stateless encodings to UTF-16 and UTF-32 using wchar_t, char32_t and char16_t. More...

#include <booster/booster/locale/generic_codecvt.h>

Detailed Description

template<typename CharType, typename CodecvtImpl, int CharSize = sizeof(CharType)>
class booster::locale::generic_codecvt< CharType, CodecvtImpl, CharSize >

Geneneric generic codecvt facet, various stateless encodings to UTF-16 and UTF-32 using wchar_t, char32_t and char16_t.

Implementations should dervide from this class defining itself as CodecvtImpl and provide following members

state_type - a type of special object that allows to store intermediate cached data, for example iconv_t descriptor
state_type initial_state(generic_codecvt_base::initial_convertion_state direction) const - member function that creates initial state
int max_encoding_length() const - a maximal length that one Unicode code point is represented, for UTF-8 for example it is 4 from ISO-8859-1 it is 1
utf::code_point to_unicode(state_type &state,char const *&begin,char const *end) - extract first code point from the text in range [begin,end), in case of success begin would point to the next character sequence to be encoded to next code point, in case of incomplete sequence - utf::incomplete shell be returned, and in case of invalid input sequence utf::illegal shell be returned and begin would remain unmodified
utf::code_point from_unicode(state_type &state,utf::code_point u,char *begin,char const *end) - convert a unicode code point u into a character seqnece at [begin,end). Return the length of the sequence in case of success, utf::incomplete in case of not enough room to encode the code point of utf::illegal in case conversion can not be performed

For example implementaion of codecvt for latin1/ISO-8859-1 character set

template<typename CharType>
class latin1_codecvt :boost::locale::generic_codecvt<CharType,latin1_codecvt<CharType> > 
{
public:
   
    /* Standard codecvt constructor */ 
    latin1_codecvt(size_t refs = 0) : boost::locale::generic_codecvt<CharType,latin1_codecvt<CharType> >(refs) 
    {
    }
    /* State is unused but required by generic_codecvt */
    struct state_type {};
    state_type initial_state(generic_codecvt_base::initial_convertion_state /*unused*/) const
    {
        return state_type();
    }
    
    int max_encoding_length() const
    {
        return 1;
    }
    boost::locale::utf::code_point to_unicode(state_type &,char const *&begin,char const *end) const
    {
       if(begin == end)
          return boost::locale::utf::incomplete;
       return *begin++; 
    }
    boost::locale::utf::code_point from_unicode(state_type &,boost::locale::utf::code_point u,char *begin,char const *end) const
    {
       if(u >= 256)
          return boost::locale::utf::illegal;
       if(begin == end)
          return boost::locale::utf::incomplete;
       *begin = u;
       return 1; 
    }
};

When external tools used for encoding conversion, the state_type is useful to save objects used for conversions. For example, icu::UConverter can be saved in such a state for an efficient use:

template<typename CharType>
class icu_codecvt :boost::locale::generic_codecvt<CharType,icu_codecvt<CharType> > 
{
public:
   
    /* Standard codecvt constructor */ 
    icu_codecvt(std::string const &name,refs = 0) : 
        boost::locale::generic_codecvt<CharType,latin1_codecvt<CharType> >(refs)
    { ... }
    /* State is unused but required by generic_codecvt */
    struct std::unique_ptr<UConverter,void (*)(UConverter*)> state_type;
    state_type &&initial_state(generic_codecvt_base::initial_convertion_state /*unused*/) const
    {
        UErrorCode err = U_ZERO_ERROR;
        state_type ptr(ucnv_safeClone(converter_,0,0,&err,ucnv_close);
        return std::move(ptr);
    }
    
    boost::locale::utf::code_point to_unicode(state_type &ptr,char const *&begin,char const *end) const
    {
        UErrorCode err = U_ZERO_ERROR;
        boost::locale::utf::code_point cp = ucnv_getNextUChar(ptr.get(),&begin,end,&err);
        ...
    }
    ...
};

The documentation for this class was generated from the following file:

booster/locale/generic_codecvt.h

Detailed Description

template<typename CharType, typename CodecvtImpl, int CharSize = sizeof(CharType)> class booster::locale::generic_codecvt< CharType, CodecvtImpl, CharSize >

template<typename CharType, typename CodecvtImpl, int CharSize = sizeof(CharType)>
class booster::locale::generic_codecvt< CharType, CodecvtImpl, CharSize >