11 #ifndef __TEXTWOLF_CHARSET_UTF8_HPP__
12 #define __TEXTWOLF_CHARSET_UTF8_HPP__
74 template <
class Iterator>
75 static inline unsigned int size(
char* buf,
unsigned int& bufpos, Iterator& itr)
84 return charLengthTab[ (
unsigned char)buf[ 0]];
88 template <
class Iterator>
89 static inline void skip(
char* buf,
unsigned int& bufpos, Iterator& itr)
91 unsigned int bufsize =
size( buf, bufpos, itr);
92 for (;bufpos < bufsize; ++bufpos)
99 template <
class Iterator>
100 static inline signed char asciichar(
char* buf,
unsigned int& bufpos, Iterator& itr)
108 return ((
unsigned char)(buf[0])>127)?-1:buf[0];
112 template <
class Iterator>
113 static inline void fetchbytes(
char* buf,
unsigned int& bufpos, Iterator& itr)
121 unsigned int bufsize =
size( buf, bufpos, itr);
122 for (;bufpos < bufsize; ++bufpos)
130 template <
class Iterator>
131 UChar value(
char* buf,
unsigned int& bufpos, Iterator& itr)
const
135 UChar res = (
unsigned char)buf[0];
141 res = ((
unsigned char)buf[0])&(
B00011111>>gg);
142 for (
int ii=0; ii<=gg; ii++)
144 unsigned char xx = (
unsigned char)buf[ii+1];
156 template <
class Buffer_>
162 buf.push_back( (
char)(
unsigned char)chr);
166 for (pp=1,sf=5; pp<5; pp++,sf+=5)
168 if (chr < (
unsigned int)((1<<6)<<sf))
break;
171 unsigned char HB = (
unsigned char)(
B11111111 << (8-rt));
172 unsigned char shf = (
unsigned char)(pp*6);
174 buf.push_back( (
char)(((
unsigned char)(chr >> shf) & (~HB >> 1)) | HB));
175 for (ii=1,shf-=6; ii<=pp; shf-=6,ii++)
Definition: charset_utf8.hpp:38
Definition: charset_utf8.hpp:45
Definition: charset_utf8.hpp:26
static void skip(char *buf, unsigned int &bufpos, Iterator &itr)
See template<class Iterator>Interface::skip(char*,unsigned int&,Iterator&)
Definition: charset_utf8.hpp:89
Definition: charset_utf8.hpp:46
static void fetchbytes(char *buf, unsigned int &bufpos, Iterator &itr)
See template<class Iterator>Interface::fetch(char*,unsigned int&,Iterator&)
Definition: charset_utf8.hpp:113
Definition: charset_utf8.hpp:33
Definition: charset_utf8.hpp:48
Definition: charset_utf8.hpp:40
Table that maps the first UTF-8 character byte to the length of the character in bytes.
Definition: charset_utf8.hpp:54
uint32_t UChar
Unicode character type.
Definition: char.hpp:37
Definition: charset_utf8.hpp:47
Definition: charset_utf8.hpp:34
Definition: charset_utf8.hpp:28
Definition of unicode characters.
UChar value(char *buf, unsigned int &bufpos, Iterator &itr) const
See template<class Iterator>Interface::value(char*,unsigned int&,Iterator&)
Definition: charset_utf8.hpp:131
Definition: charset_utf8.hpp:32
static bool is_equal(const UTF8 &, const UTF8 &)
See template<class Buffer>Interface::is_equal( const Interface&, const Interface&) ...
Definition: charset_utf8.hpp:182
Interface that describes what a character set encoding implementation has to define to be used as cha...
Definition: charset_utf8.hpp:42
Definition: charset_utf8.hpp:43
Definition: charset_utf8.hpp:29
void print(UChar chr, Buffer_ &buf) const
See template<class Buffer>Interface::print(UChar,Buffer&)
Definition: charset_utf8.hpp:157
Definition: charset_utf8.hpp:39
Definition of exceptions with containing error codes thrown by textwolf.
Definition: charset_utf8.hpp:41
static unsigned int size(char *buf, unsigned int &bufpos, Iterator &itr)
Get the size of the current character in bytes (variable length encoding)
Definition: charset_utf8.hpp:75
Character map for fast typing of a character byte.
Definition: char.hpp:50
Definition: charset_utf8.hpp:49
Definition: charset_utf8.hpp:31
static signed char asciichar(char *buf, unsigned int &bufpos, Iterator &itr)
See template<class Iterator>Interface::asciichar(char*,unsigned int&,Iterator&)
Definition: charset_utf8.hpp:100
Definition: charset_utf8.hpp:35
CharLengthTab()
Definition: charset_utf8.hpp:56
Definition: charset_utf8.hpp:37
Definition: charset_utf8.hpp:30
character set encoding UTF-8
Definition: charset_utf8.hpp:23
Definition: charset_utf8.hpp:36