strusBase  0.17
utf8.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014 Patrick P. Frey
3  *
4  * This Source Code Form is subject to the terms of the Mozilla Public
5  * License, v. 2.0. If a copy of the MPL was not distributed with this
6  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7  */
10 #ifndef _STRUS_BASE_UTF8_ENCODING_DECODING_HPP_INCLUDED
11 #define _STRUS_BASE_UTF8_ENCODING_DECODING_HPP_INCLUDED
13 #include "strus/base/stdint.h"
14 
15 namespace strus
16 {
17 enum {
18  B11111111=0xFF,
19  B01111111=0x7F,
20  B00111111=0x3F,
21  B00011111=0x1F,
22  B00001111=0x0F,
23  B00000111=0x07,
24  B00000011=0x03,
25  B00000001=0x01,
26  B00000000=0x00,
27  B10000000=0x80,
28  B11000000=0xC0,
29  B11100000=0xE0,
30  B11110000=0xF0,
31  B11111000=0xF8,
32  B11111100=0xFC,
33  B11111110=0xFE,
34 
40 };
41 
43 static inline bool utf8midchr( unsigned char ch)
44 {
45  return (ch & B11000000) == B10000000;
46 }
47 
51 static inline const char* utf8prev( char const* end)
52 {
53  for (--end; ((unsigned char)*end & B11000000) == B10000000; --end){}
54  return end;
55 }
56 
60 static inline unsigned char utf8charlen( unsigned char ch)
61 {
62  unsigned char cl = 9-BitOperations::bitScanReverse( (uint8_t)(ch^0xFF));
63  return cl>2?(cl-1):1;
64 }
65 
67 int32_t utf8decode( const char* itr, unsigned int charsize);
68 
70 std::size_t utf8encode( char* buf, int32_t chr);
71 
72 } //namespace
73 #endif
74 
75 
Definition: utf8.hpp:23
Definition: utf8.hpp:24
Definition: utf8.hpp:37
Definition: utf8.hpp:18
Definition: utf8.hpp:31
static unsigned int bitScanReverse(const uint32_t &idx)
Definition: bitOperations.hpp:52
static unsigned char utf8charlen(unsigned char ch)
Get the lenght of an UTF-8 encoded character from its first byte.
Definition: utf8.hpp:60
static bool utf8midchr(unsigned char ch)
Return true, if the character passed as argument is a non start character of a multi byte encoded uni...
Definition: utf8.hpp:43
Definition: utf8.hpp:25
Definition: utf8.hpp:39
Definition: utf8.hpp:19
Definition: utf8.hpp:21
Definition: utf8.hpp:20
Definition: utf8.hpp:22
static const char * utf8prev(char const *end)
Skip to the begin of an UTF-8 encoded character from a pointer into it.
Definition: utf8.hpp:51
Definition: utf8.hpp:33
Definition: utf8.hpp:32
Definition: utf8.hpp:27
Definition: utf8.hpp:38
Definition: utf8.hpp:29
Definition: utf8.hpp:26
Definition: utf8.hpp:36
Definition: utf8.hpp:35
Definition: utf8.hpp:28
std::size_t utf8encode(char *buf, int32_t chr)
Encoding of a single UTF-8 character into a string buffer.
Definition: utf8.hpp:30
int32_t utf8decode(const char *itr, unsigned int charsize)
Decoding of a single UTF-8 character in a string.