CppCMS
|
this Namespace holds various function for dealing with encoding More...
Functions | |
bool CPPCMS_API | valid (std::locale const &loc, char const *begin, char const *end, size_t &count) |
bool CPPCMS_API | valid_utf8 (char const *begin, char const *end, size_t &count) |
bool CPPCMS_API | valid (char const *encoding, char const *begin, char const *end, size_t &count) |
bool CPPCMS_API | valid (std::string const &encoding, char const *begin, char const *end, size_t &count) |
bool CPPCMS_API | is_ascii_compatible (std::string const &encoding) |
bool CPPCMS_API | validate_or_filter (std::string const &encoding, char const *begin, char const *end, std::string &output, char replace=0) |
std::string CPPCMS_API | to_utf8 (std::locale const &loc, char const *begin, char const *end) |
std::string CPPCMS_API | to_utf8 (char const *encoding, char const *begin, char const *end) |
std::string CPPCMS_API | to_utf8 (std::locale const &loc, std::string const &str) |
std::string CPPCMS_API | to_utf8 (char const *encoding, std::string const &str) |
std::string CPPCMS_API | from_utf8 (std::locale const &loc, char const *begin, char const *end) |
std::string CPPCMS_API | from_utf8 (char const *encoding, char const *begin, char const *end) |
std::string CPPCMS_API | from_utf8 (std::locale const &loc, std::string const &str) |
std::string CPPCMS_API | from_utf8 (char const *encoding, std::string const &str) |
this Namespace holds various function for dealing with encoding
std::string CPPCMS_API cppcms::encoding::from_utf8 | ( | std::locale const & | loc, |
char const * | begin, | ||
char const * | end | ||
) |
Convert UTF-8 string in range [begin,end) to local 8 bit encoding according to locale loc. If non-convertable characters found, the conversion is aborted and only sucessefully converted part is returned.
std::string CPPCMS_API cppcms::encoding::from_utf8 | ( | char const * | encoding, |
char const * | begin, | ||
char const * | end | ||
) |
Convert UTF-8 string in range [begin,end) to local 8 bit encoding encoding. If non-convertable characters found, the conversion is aborted and only sucessefully converted part is returned.
std::string CPPCMS_API cppcms::encoding::from_utf8 | ( | std::locale const & | loc, |
std::string const & | str | ||
) |
Convert UTF-8 string str to local 8 bit encoding according to locale loc. If non-convertable characters found, the conversion is aborted and only sucessefully converted part is returned.
std::string CPPCMS_API cppcms::encoding::from_utf8 | ( | char const * | encoding, |
std::string const & | str | ||
) |
Convert UTF-8 string str to local 8 bit encoding encoding. If non-convertable characters found, the conversion is aborted and only sucessefully converted part is returned.
bool CPPCMS_API cppcms::encoding::is_ascii_compatible | ( | std::string const & | encoding | ) |
Returns true if ASCII is strict subset of the encoding, i.e. All non-ASCII characters encoding using bytes >= 0x80.
This is very important for XML or HTML parsing to prevent invlaid detenction of HTML specific characters. So filters that work with encodings that are not ASCII compatible should convert the text to UTF-8 and then convert them back.
These are UTF-8, ISO-8859-*, windows-12* and koi encodings families.
std::string CPPCMS_API cppcms::encoding::to_utf8 | ( | std::locale const & | loc, |
char const * | begin, | ||
char const * | end | ||
) |
Convert string in range [begin,end) from local 8 bit encoding according to locale loc to UTF-8 If illegal characters found, the conversion is aborted and only sucessefully converted part is returned.
std::string CPPCMS_API cppcms::encoding::to_utf8 | ( | char const * | encoding, |
char const * | begin, | ||
char const * | end | ||
) |
Convert string in range [begin,end) from local 8 bit encoding encoding to UTF-8 If illegal characters found, the conversion is aborted and only sucessefully converted part is returned.
std::string CPPCMS_API cppcms::encoding::to_utf8 | ( | std::locale const & | loc, |
std::string const & | str | ||
) |
Convert string str from local 8 bit encoding according to locale loc to UTF-8 If illegal characters found, the conversion is aborted and only sucessefully converted part is returned.
std::string CPPCMS_API cppcms::encoding::to_utf8 | ( | char const * | encoding, |
std::string const & | str | ||
) |
Convert string str from local 8 bit encoding according to encoding encoding If illegal characters found, the conversion is aborted and only sucessefully converted part is returned.
bool CPPCMS_API cppcms::encoding::valid | ( | std::locale const & | loc, |
char const * | begin, | ||
char const * | end, | ||
size_t & | count | ||
) |
Note: all these function assume that control characters that invalid in HTML are illegal. For example. NUL is legal UTF-8 code but it is illegal in terms of HTML validity thus, valid_utf8 would return false. Check if string in range [begin,end) is valid in the locale loc and does not include HTML illegal characters. Number of codepoints is stored in count
bool CPPCMS_API cppcms::encoding::valid | ( | char const * | encoding, |
char const * | begin, | ||
char const * | end, | ||
size_t & | count | ||
) |
Check if string in range [begin,end) is valid encoding encoding and does not include HTML illegal characters. Number of codepoints is stored in count
bool CPPCMS_API cppcms::encoding::valid | ( | std::string const & | encoding, |
char const * | begin, | ||
char const * | end, | ||
size_t & | count | ||
) |
Check if string in range [begin,end) is valid encoding encoding and does not include HTML illegal characters. Number of codepoints is stored in count
bool CPPCMS_API cppcms::encoding::valid_utf8 | ( | char const * | begin, |
char const * | end, | ||
size_t & | count | ||
) |
Check if string in range [begin,end) is valid UTF-8 and does not include HTML illegal characters. Number of codepoints is stored in count
bool CPPCMS_API cppcms::encoding::validate_or_filter | ( | std::string const & | encoding, |
char const * | begin, | ||
char const * | end, | ||
std::string & | output, | ||
char | replace = 0 |
||
) |
Check if the encoding is valid for the text in range [begin, end) , if it is valid, returns true otherwise removes all invalid characters (if replace == 0) or replaces them with replace and saves the result to output returning false.