11 #ifndef __TEXTWOLF_CHARSET_UTF16_HPP__
12 #define __TEXTWOLF_CHARSET_UTF16_HPP__
31 template <
int encoding=ByteOrder::BE>
51 template <
class Iterator>
52 static inline void fetchbytes(
char* buf,
unsigned int& bufpos, Iterator& itr)
72 template <
class Iterator>
73 static inline unsigned int size(
char* buf,
unsigned int& bufpos, Iterator& itr)
77 UChar rt = (
unsigned char)buf[ MSB];
78 if ((rt - 0xD8) > 0x03)
89 template <
class Iterator>
90 static inline void skip(
char* buf,
unsigned int& bufpos, Iterator& itr)
92 unsigned int bufsize =
size( buf, bufpos, itr);
93 for (;bufpos < bufsize; ++bufpos)
100 template <
class Iterator>
101 static inline signed char asciichar(
char* buf,
unsigned int& bufpos, Iterator& itr)
104 return (ch > 127)?-1:(char)ch;
108 template <
class Iterator>
111 unsigned int bufsize =
size( buf, bufpos, itr);
113 UChar rt = (
unsigned char)buf[ MSB];
114 rt = (rt << 8) + (
unsigned char)buf[ LSB];
119 while (bufpos < bufsize)
127 unsigned short lo = (
unsigned char)buf[ 2+MSB];
128 if ((lo - 0xDC) > 0x03)
return 0xFFFF;
129 lo = (lo << 8) + (
unsigned char)buf[ 2+LSB];
130 return rt + lo - 0xDC00 + 0x010000;
135 template <
class Iterator>
136 inline UChar value(
char* buf,
unsigned int& bufpos, Iterator& itr)
const
142 template <
class Buffer_>
147 if ((ch - 0xD800) < 0x400)
153 buf.push_back( (
char)(
unsigned char)((ch >> Print1shift) & 0xFF));
154 buf.push_back( (
char)(
unsigned char)((ch >> Print2shift) & 0xFF));
158 else if (ch <= 0x10FFFF)
161 unsigned short hi = (
unsigned short )((ch / 0x400) + 0xD800);
162 unsigned short lo = (
unsigned short )((ch % 0x400) + 0xDC00);
163 buf.push_back( (
char)(
unsigned char)((hi >> Print1shift) & 0xFF));
164 buf.push_back( (
char)(
unsigned char)((hi >> Print2shift) & 0xFF));
165 buf.push_back( (
char)(
unsigned char)((lo >> Print1shift) & 0xFF));
166 buf.push_back( (
char)(
unsigned char)((lo >> Print2shift) & 0xFF));
174 buf.push_back( (
char)(
unsigned char)(((
UChar)*cc >> Print1shift) & 0xFF));
175 buf.push_back( (
char)(
unsigned char)(((
UChar)*cc >> Print2shift) & 0xFF));
UTF-16 little endian character set encoding.
Definition: charset_utf16.hpp:189
static void fetchbytes(char *buf, unsigned int &bufpos, Iterator &itr)
See template<class Iterator>Interface::fetchbytes(char*,unsigned int&,Iterator&)
Definition: charset_utf16.hpp:52
static UChar value_impl(char *buf, unsigned int &bufpos, Iterator &itr)
See template<class Iterator>Interface::value(char*,unsigned int&,Iterator&)
Definition: charset_utf16.hpp:109
static bool encode(UChar chr, char *bufptr, std::size_t bufsize)
Write the character 'chr' in encoded form as nul-terminated string to a buffer.
Definition: charset_interface.hpp:29
Definition: charset_interface.hpp:119
static void skip(char *buf, unsigned int &bufpos, Iterator &itr)
See template<class Iterator>Interface::skip(char*,unsigned int&,Iterator&)
Definition: charset_utf16.hpp:90
Definition: charset_interface.hpp:120
uint32_t UChar
Unicode character type.
Definition: char.hpp:37
Character set UTF16 (little/big endian)
Definition: charset_utf16.hpp:32
Definition of unicode characters.
static unsigned int size(char *buf, unsigned int &bufpos, Iterator &itr)
Get the size of the current character in bytes (variable length encoding)
Definition: charset_utf16.hpp:73
Interface that describes what a character set encoding implementation has to define to be used as cha...
static bool is_equal(const UTF16 &, const UTF16 &)
See template<class Buffer>Interface::is_equal( const Interface&, const Interface&) ...
Definition: charset_utf16.hpp:181
UChar value(char *buf, unsigned int &bufpos, Iterator &itr) const
Definition: charset_utf16.hpp:136
Definition of exceptions with containing error codes thrown by textwolf.
Definition: charset_utf16.hpp:46
void print(UChar ch, Buffer_ &buf) const
See template<class Buffer>Interface::print(UChar,Buffer&)
Definition: charset_utf16.hpp:143
UTF-16 big endian character set encoding.
Definition: charset_utf16.hpp:192
static signed char asciichar(char *buf, unsigned int &bufpos, Iterator &itr)
See template<class Iterator>Interface::asciichar(char*,unsigned int&,Iterator&)
Definition: charset_utf16.hpp:101