textwolf  0.2
Classes | Public Types | Public Member Functions | Static Public Member Functions | List of all members
textwolf::charset::UTF8 Class Reference

character set encoding UTF-8 More...

#include <charset_utf8.hpp>

Classes

class  CharLengthTab
 Table that maps the first UTF-8 character byte to the length of the character in bytes. More...
 

Public Types

enum  { MaxChar =0x7FFFFFFFU }
 Maximum character that can be represented by this encoding implementation. More...
 
enum  {
  B11111111 =0xFF, B01111111 =0x7F, B00111111 =0x3F, B00011111 =0x1F,
  B00001111 =0x0F, B00000111 =0x07, B00000011 =0x03, B00000001 =0x01,
  B00000000 =0x00, B10000000 =0x80, B11000000 =0xC0, B11100000 =0xE0,
  B11110000 =0xF0, B11111000 =0xF8, B11111100 =0xFC, B11111110 =0xFE,
  B11011111 =B11000000|B00011111, B11101111 =B11100000|B00001111, B11110111 =B11110000|B00000111, B11111011 =B11111000|B00000011,
  B11111101 =B11111100|B00000001
}
 

Public Member Functions

template<class Iterator >
UChar value (char *buf, unsigned int &bufpos, Iterator &itr) const
 See template<class Iterator>Interface::value(char*,unsigned int&,Iterator&) More...
 
template<class Buffer_ >
void print (UChar chr, Buffer_ &buf) const
 See template<class Buffer>Interface::print(UChar,Buffer&) More...
 

Static Public Member Functions

template<class Iterator >
static unsigned int size (char *buf, unsigned int &bufpos, Iterator &itr)
 Get the size of the current character in bytes (variable length encoding) More...
 
template<class Iterator >
static void skip (char *buf, unsigned int &bufpos, Iterator &itr)
 See template<class Iterator>Interface::skip(char*,unsigned int&,Iterator&) More...
 
template<class Iterator >
static signed char asciichar (char *buf, unsigned int &bufpos, Iterator &itr)
 See template<class Iterator>Interface::asciichar(char*,unsigned int&,Iterator&) More...
 
template<class Iterator >
static void fetchbytes (char *buf, unsigned int &bufpos, Iterator &itr)
 See template<class Iterator>Interface::fetch(char*,unsigned int&,Iterator&) More...
 
static bool is_equal (const UTF8 &, const UTF8 &)
 See template<class Buffer>Interface::is_equal( const Interface&, const Interface&) More...
 

Detailed Description

character set encoding UTF-8

Member Enumeration Documentation

anonymous enum

Maximum character that can be represented by this encoding implementation.

Enumerator
MaxChar 
anonymous enum
Enumerator
B11111111 
B01111111 
B00111111 
B00011111 
B00001111 
B00000111 
B00000011 
B00000001 
B00000000 
B10000000 
B11000000 
B11100000 
B11110000 
B11111000 
B11111100 
B11111110 
B11011111 
B11101111 
B11110111 
B11111011 
B11111101 

Member Function Documentation

template<class Iterator >
static signed char textwolf::charset::UTF8::asciichar ( char *  buf,
unsigned int &  bufpos,
Iterator &  itr 
)
inlinestatic
template<class Iterator >
static void textwolf::charset::UTF8::fetchbytes ( char *  buf,
unsigned int &  bufpos,
Iterator &  itr 
)
inlinestatic

See template<class Iterator>Interface::fetch(char*,unsigned int&,Iterator&)

static bool textwolf::charset::UTF8::is_equal ( const UTF8 ,
const UTF8  
)
inlinestatic
template<class Buffer_ >
void textwolf::charset::UTF8::print ( UChar  chr,
Buffer_ &  buf 
) const
inline

See template<class Buffer>Interface::print(UChar,Buffer&)

template<class Iterator >
static unsigned int textwolf::charset::UTF8::size ( char *  buf,
unsigned int &  bufpos,
Iterator &  itr 
)
inlinestatic

Get the size of the current character in bytes (variable length encoding)

Parameters
[in]bufbuffer for the character data
[in,out]bufposposition in 'buf'
[in,out]itriterator to skip
template<class Iterator >
static void textwolf::charset::UTF8::skip ( char *  buf,
unsigned int &  bufpos,
Iterator &  itr 
)
inlinestatic
template<class Iterator >
UChar textwolf::charset::UTF8::value ( char *  buf,
unsigned int &  bufpos,
Iterator &  itr 
) const
inline

See template<class Iterator>Interface::value(char*,unsigned int&,Iterator&)


The documentation for this class was generated from the following file: