8 #ifndef __TEXTWOLF_TEXT_SCANNER_HPP__
9 #define __TEXTWOLF_TEXT_SCANNER_HPP__
23 template <
typename Iterator>
29 static inline std::size_t
getPosition(
const char* start,
char const* itr)
67 template <
typename Iterator,
class CharSet>
113 :val(0),cur(0),state(0),charset(charset_)
115 for (
unsigned int ii=0; ii<
sizeof(buf); ii++) buf[ii] = 0;
119 :start(p_iterator),input(p_iterator),val(0),cur(0),state(0),charset(charset_)
121 for (
unsigned int ii=0; ii<
sizeof(buf); ii++) buf[ii] = 0;
125 :start(p_iterator),input(p_iterator),val(0),cur(0),state(0),charset(CharSet())
127 for (
unsigned int ii=0; ii<
sizeof(buf); ii++) buf[ii] = 0;
138 ,charset(orig.charset)
140 for (
unsigned int ii=0; ii<
sizeof(buf); ii++) buf[ii]=orig.buf[ii];
145 template <
class IteratorAssignment>
165 val = charset.value( buf, state, input);
173 cur = CharSet::asciichar( buf, state, input);
191 template <
class Buffer>
192 inline void copychar( CharSet& output_, Buffer& buf_)
195 if (CharSet::is_equal( charset, output_))
199 charset.fetchbytes( buf, state, input);
201 #if (__GNUC__ >= 5 && __GNUC_MINOR__ >= 0)
202 for (
unsigned int ii=0; ii<8 && ii<state; ++ii) buf_.push_back(buf[ii]);
204 for (
unsigned int ii=0; ii<state; ++ii) buf_.push_back(buf[ii]);
207 for (
unsigned int ii=0; ii<state; ++ii) buf_.push_back(buf[ii]);
212 output_.print(
chr(), buf_);
220 static ControlCharMap controlCharMap;
222 return controlCharMap[ (
unsigned char)cur];
230 return cur>=0?(
unsigned char)cur:0;
237 CharSet::skip( buf, state, input);
static std::size_t getPosition(const char *start, char const *itr)
Definition: textscanner.hpp:29
TextScanner(const CharSet &charset_)
Constructor.
Definition: textscanner.hpp:112
TextScanner & skip()
Skip to the next character of the source.
Definition: textscanner.hpp:235
void setSource(const IteratorAssignment &a)
Assign something to the iterator while keeping the state.
Definition: textscanner.hpp:146
PositionIndex position() const
Definition: istreamiterator.hpp:151
Input iterator as source for the XML scanner with the possibility of being fed chunk by chunk...
Definition: sourceiterator.hpp:25
ControlCharMap()
Definition: textscanner.hpp:84
PositionIndex position() const
Definition: sourceiterator.hpp:112
ControlCharacter control()
Get the control character representation of the current character.
Definition: textscanner.hpp:218
Definition of iterators for textwolf on an input stream class.
Map of ASCII characters to control character identifiers used in the XML scanner automaton.
Definition: textscanner.hpp:82
const Iterator & getIterator() const
Get the iterator pointing to the current source position.
Definition: textscanner.hpp:177
uint32_t UChar
Unicode character type.
Definition: char.hpp:37
TextScanner(const Iterator &p_iterator)
Definition: textscanner.hpp:124
UChar operator*()
see TextScanner::chr()
Definition: textscanner.hpp:245
Definition of unicode characters.
void getcur()
Fill the internal buffer with as many current character bytes needed for reading the ASCII representa...
Definition: textscanner.hpp:171
textwolf byte source iterator template
static std::size_t getPosition(const IStreamIterator &, const IStreamIterator &itr)
Definition: textscanner.hpp:47
unsigned char ascii()
Get the ASCII character representation of the current character.
Definition: textscanner.hpp:227
Interface that describes what a character set encoding implementation has to define to be used as cha...
Definition: textscanner.hpp:24
std::size_t getPosition() const
Get the current source iterator position.
Definition: textscanner.hpp:154
Definition of exceptions with containing error codes thrown by textwolf.
ControlCharacter
Enumeration of control characters needed as events for XML scanner statemachine.
Definition: char.hpp:78
UChar chr()
Get the unicode representation of the current character.
Definition: textscanner.hpp:161
Reader for scanning the input character by character.
Definition: textscanner.hpp:68
Input iterator on a constant string returning null characters after EOF as required by textwolf scann...
Definition: cstringiterator.hpp:23
Input iterator on an STL input stream.
Definition: istreamiterator.hpp:95
TextScanner(const CharSet &charset_, const Iterator &p_iterator)
Definition: textscanner.hpp:118
textwolf iterator on strings
TextScanner & operator++()
Preincrement: Skip to the next character of the source.
Definition: textscanner.hpp:252
TextScanner(const TextScanner &orig)
Copy constructor.
Definition: textscanner.hpp:132
unsigned int pos() const
Return current char position.
Definition: cstringiterator.hpp:76
Character map for fast typing of a character byte.
Definition: char.hpp:50
static std::size_t getPosition(const SrcIterator &, const SrcIterator &itr)
Definition: textscanner.hpp:38
Iterator & getIterator()
Get the iterator pointing to the current source position.
Definition: textscanner.hpp:183
static std::size_t getPosition(const CStringIterator &, const CStringIterator &itr)
Definition: textscanner.hpp:56
void copychar(CharSet &output_, Buffer &buf_)
Definition: textscanner.hpp:192