textwolf  0.2
charset_ucs.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014 Patrick P. Frey
3  *
4  * This Source Code Form is subject to the terms of the Mozilla Public
5  * License, v. 2.0. If a copy of the MPL was not distributed with this
6  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7  */
10 
11 #ifndef __TEXTWOLF_CHARSET_UCS_HPP__
12 #define __TEXTWOLF_CHARSET_UCS_HPP__
13 #include "textwolf/char.hpp"
15 #include "textwolf/exception.hpp"
16 #include <cstddef>
17 
18 namespace textwolf {
19 namespace charset {
20 
27 template <int byteorder=ByteOrder::BE>
28 struct UCS2
29 {
30  enum
31  {
32  LSB=(byteorder==ByteOrder::BE), //< least significant byte index (0 or 1)
33  MSB=(byteorder==ByteOrder::LE), //< most significant byte index (0 or 1)
34  Print1shift=(byteorder==ByteOrder::BE)?8:0, //< value to shift with to get the 1st character to print
35  Print2shift=(byteorder==ByteOrder::LE)?8:0, //< value to shift with to get the 2nd character to print
36  MaxChar=0xFFFFU
37  };
38 
40  template <class Iterator>
41  static inline void skip( char*, unsigned int& bufpos, Iterator& itr)
42  {
43  for (;bufpos < 2; ++bufpos)
44  {
45  ++itr;
46  }
47  }
48 
50  template <class Iterator>
51  static inline void fetchbytes( char* buf, unsigned int& bufpos, Iterator& itr)
52  {
53  if (bufpos<2)
54  {
55  if (bufpos<1)
56  {
57  buf[0] = *itr;
58  ++itr;
59  ++bufpos;
60  }
61  buf[1] = *itr;
62  ++itr;
63  ++bufpos;
64  }
65  }
66 
67  template <class Iterator>
68  static inline UChar value_impl( char* buf, unsigned int& bufpos, Iterator& itr)
69  {
70  fetchbytes( buf, bufpos, itr);
71  UChar res = (unsigned char)buf[MSB];
72  return (res << 8) + (unsigned char)buf[LSB];
73  }
74 
76  template <class Iterator>
77  inline UChar value( char* buf, unsigned int& bufpos, Iterator& itr) const
78  {
79  return value_impl( buf, bufpos, itr);
80  }
81 
83  template <class Iterator>
84  static inline signed char asciichar( char* buf, unsigned int& bufpos, Iterator& itr)
85  {
86  UChar ch = value_impl( buf, bufpos, itr);
87  return (ch > 127)?-1:(char)ch;
88  }
89 
91  template <class Buffer_>
92  inline void print( UChar chr, Buffer_& buf) const
93  {
94  if (chr>MaxChar)
95  {
96  char tb[ 32];
97  char* cc = tb;
98  Encoder::encode( chr, tb, sizeof(tb));
99  while (*cc)
100  {
101  buf.push_back( (UChar)*cc >> Print1shift);
102  buf.push_back( (UChar)*cc >> Print2shift);
103  ++cc;
104  }
105  }
106  else
107  {
108  buf.push_back( (unsigned char)(chr >> Print1shift));
109  buf.push_back( (unsigned char)(chr >> Print2shift));
110  }
111  }
112 
114  static inline bool is_equal( const UCS2&, const UCS2&)
115  {
116  return true;
117  }
118 };
119 
123 template <int byteorder>
124 struct UCS4
125 {
126  enum
127  {
128  B0=(byteorder==ByteOrder::BE)?3:0,
129  B1=(byteorder==ByteOrder::BE)?2:1,
130  B2=(byteorder==ByteOrder::BE)?1:2,
131  B3=(byteorder==ByteOrder::BE)?0:3,
132  Print1shift=(byteorder==ByteOrder::BE)?24:0, //< value to shift with to get the 1st character to print
133  Print2shift=(byteorder==ByteOrder::BE)?16:8, //< value to shift with to get the 2nd character to print
134  Print3shift=(byteorder==ByteOrder::BE)?8:16, //< value to shift with to get the 3rd character to print
135  Print4shift=(byteorder==ByteOrder::BE)?0:24, //< value to shift with to get the 4th character to print
136  MaxChar=0xFFFFFFFFU
137  };
138 
140  template <class Iterator>
141  static inline void fetchbytes( char* buf, unsigned int& bufpos, Iterator& itr)
142  {
143  for (;bufpos < 4; ++bufpos)
144  {
145  buf[ bufpos] = *itr;
146  ++itr;
147  }
148  }
149 
151  template <class Iterator>
152  static inline UChar value( char* buf, unsigned int& bufpos, Iterator& itr)
153  {
154  fetchbytes( buf, bufpos, itr);
155  UChar res = (unsigned char)buf[B3];
156  res = (res << 8) + (unsigned char)buf[B2];
157  res = (res << 8) + (unsigned char)buf[B1];
158  return (res << 8) + (unsigned char)buf[B0];
159  }
160 
162  template <class Iterator>
163  static inline void skip( char*, unsigned int& bufpos, Iterator& itr)
164  {
165  for (;bufpos < 4; ++bufpos)
166  {
167  ++itr;
168  }
169  }
170 
172  template <class Iterator>
173  static inline signed char asciichar( char* buf, unsigned int& bufpos, Iterator& itr)
174  {
175  UChar ch = value( buf, bufpos, itr);
176  return (ch > 127)?-1:(char)ch;
177  }
178 
180  template <class Buffer_>
181  static void print( UChar chr, Buffer_& buf)
182  {
183  buf.push_back( (unsigned char)((chr >> Print1shift) & 0xFF));
184  buf.push_back( (unsigned char)((chr >> Print2shift) & 0xFF));
185  buf.push_back( (unsigned char)((chr >> Print3shift) & 0xFF));
186  buf.push_back( (unsigned char)((chr >> Print4shift) & 0xFF));
187  }
188 
190  static inline bool is_equal( const UCS4&, const UCS4&)
191  {
192  return true;
193  }
194 };
195 
198 struct UCS2LE :public UCS2<ByteOrder::LE> {};
201 struct UCS2BE :public UCS2<ByteOrder::BE> {};
204 struct UCS4LE :public UCS4<ByteOrder::LE> {};
207 struct UCS4BE :public UCS4<ByteOrder::BE> {};
208 
209 }//namespace
210 }//namespace
211 #endif
Definition: charset_ucs.hpp:32
static void skip(char *, unsigned int &bufpos, Iterator &itr)
See template<class Iterator>Interface::skip(char*,unsigned int&,Iterator&)
Definition: charset_ucs.hpp:163
Definition: charset_ucs.hpp:136
Character set UCS-2 (little/big endian)
Definition: charset_ucs.hpp:28
UCS-4 big endian character set encoding.
Definition: charset_ucs.hpp:207
Definition: charset_ucs.hpp:34
UCS-2 big endian character set encoding.
Definition: charset_ucs.hpp:201
static bool is_equal(const UCS2 &, const UCS2 &)
See template<class Buffer>Interface::is_equal( const Interface&, const Interface&) ...
Definition: charset_ucs.hpp:114
static bool is_equal(const UCS4 &, const UCS4 &)
See template<class Buffer>Interface::is_equal( const Interface&, const Interface&) ...
Definition: charset_ucs.hpp:190
Definition: charset_ucs.hpp:130
static bool encode(UChar chr, char *bufptr, std::size_t bufsize)
Write the character 'chr' in encoded form as nul-terminated string to a buffer.
Definition: charset_interface.hpp:29
Definition: charset_interface.hpp:119
Definition: charset_ucs.hpp:135
Definition: charset_ucs.hpp:128
UCS-4 little endian character set encoding.
Definition: charset_ucs.hpp:204
static void fetchbytes(char *buf, unsigned int &bufpos, Iterator &itr)
See template<class Iterator>Interface::fetchbytes(char*,unsigned int&,Iterator&)
Definition: charset_ucs.hpp:51
UCS-2 little endian character set encoding.
Definition: charset_ucs.hpp:198
static UChar value(char *buf, unsigned int &bufpos, Iterator &itr)
See template<class Iterator>Interface::value(char*,unsigned int&,Iterator&)
Definition: charset_ucs.hpp:152
Character set UCS-4 (little/big endian)
Definition: charset_ucs.hpp:124
Definition: charset_ucs.hpp:132
Definition: charset_ucs.hpp:131
Definition: charset_interface.hpp:120
static signed char asciichar(char *buf, unsigned int &bufpos, Iterator &itr)
See template<class Iterator>Interface::asciichar(char*,unsigned int&,Iterator&)
Definition: charset_ucs.hpp:173
void print(UChar chr, Buffer_ &buf) const
See template<class Buffer>Interface::print(UChar,Buffer&)
Definition: charset_ucs.hpp:92
uint32_t UChar
Unicode character type.
Definition: char.hpp:37
Definition of unicode characters.
Definition: charset_ucs.hpp:36
Definition: charset_ucs.hpp:133
UChar value(char *buf, unsigned int &bufpos, Iterator &itr) const
See template<class Iterator>Interface::value(char*,unsigned int&,Iterator&)
Definition: charset_ucs.hpp:77
Interface that describes what a character set encoding implementation has to define to be used as cha...
Definition: charset_ucs.hpp:129
static void fetchbytes(char *buf, unsigned int &bufpos, Iterator &itr)
See template<class Iterator>Interface::fetchbytes(char*,unsigned int&,Iterator&)
Definition: charset_ucs.hpp:141
Definition: charset_ucs.hpp:35
Definition: charset_ucs.hpp:33
Definition of exceptions with containing error codes thrown by textwolf.
static UChar value_impl(char *buf, unsigned int &bufpos, Iterator &itr)
Definition: charset_ucs.hpp:68
Definition: charset_ucs.hpp:134
static void print(UChar chr, Buffer_ &buf)
See template<class Buffer>Interface::print(UChar,Buffer&)
Definition: charset_ucs.hpp:181
static void skip(char *, unsigned int &bufpos, Iterator &itr)
See template<class Iterator>Interface::skip(char*,unsigned int&,Iterator&)
Definition: charset_ucs.hpp:41
static signed char asciichar(char *buf, unsigned int &bufpos, Iterator &itr)
See template<class Iterator>Interface::value(char*,unsigned int&,Iterator&)
Definition: charset_ucs.hpp:84