11 #ifndef __TEXTWOLF_XML_SCANNER_HPP__
12 #define __TEXTWOLF_XML_SCANNER_HPP__
64 return tab + stateIdx;
73 void newState(
int stateIdx)
throw(
exception)
82 void addOtherTransition(
int nextState)
throw(exception)
88 if (tab[ size-1].next[ inputchr] == -1) tab[ size-1].
next[ inputchr] = (
unsigned char)nextState;
96 void addTransition(
ControlCharacter inputchr,
int nextState)
throw(exception)
99 if ((
int)inputchr >= (
int)NofControlCharacter)
throw exception(
InvalidParamChar);
100 if (nextState < 0 || nextState > MaxNofStates)
throw exception(
InvalidParamState);
102 tab[ size-1].
next[ inputchr] = (
unsigned char)nextState;
110 addTransition( inputchr, size-1);
116 void addAction(
int action_op,
int action_arg=0) throw(exception)
126 void addMiss(
int error)
throw(exception)
129 if (tab[ size-1].missError != -1)
throw exception(
InvalidState);
135 void addFallback(
int stateIdx)
throw(exception)
138 if (tab[ size-1].fallbackState != -1)
throw exception(
InvalidState);
139 if (stateIdx < 0 || stateIdx > MaxNofStates)
throw exception(
InvalidParamState);
201 static const char* names[
NofElementTypes] = {
"None",
"ErrorOccurred",
"HeaderStart",
"HeaderAttribName",
"HeaderAttribValue",
"HeaderEnd",
"DocAttribValue",
"DocAttribEnd",
"TagAttribName",
"TagAttribValue",
"OpenTag",
"CloseTag",
"CloseTagIm",
"Content",
"Exit"};
202 return names[ (
unsigned int)ee];
233 static const char* sError[NofErrors]
234 = {0,
"illegal document attribute definition",
237 "unexpected end of text",
239 "string not terminated",
240 "undefined character entity",
243 "expected tag attribute",
244 "expected CDATA tag",
245 "internal (illegal state)",
246 "unexpected end of input",
247 "expected end of line",
248 "expected 2nd '-' to complete marker for start of comment '<!--'"
250 return sError[(
unsigned int)ee];
257 START,
STARTTAG,
XTAG,
PITAG,
PITAGEND,
XTAGEND,
XTAGDONE,
XTAGAISK,
XTAGANAM,
XTAGAESK,
XTAGAVSK,
XTAGAVID,
XTAGAVSQ,
XTAGAVDQ,
XTAGAVQE,
258 DOCSTART,
CONTENT,
TOKEN,
SEEKTOK,
XMLTAG,
OPENTAG,
CLOSETAG,
TAGCLSK,
TAGAISK,
TAGANAM,
TAGAESK,
TAGAVSK,
TAGAVID,
TAGAVSQ,
TAGAVDQ,
TAGAVQE,
268 enum Constant {NofStates=48};
269 static const char* sState[NofStates]
271 "START",
"STARTTAG",
"XTAG",
"PITAG",
"PITAGEND",
272 "XTAGEND",
"XTAGDONE",
"XTAGAISK",
"XTAGANAM",
273 "XTAGAESK",
"XTAGAVSK",
"XTAGAVID",
"XTAGAVSQ",
"XTAGAVDQ",
274 "XTAGAVQE",
"DOCSTART",
"CONTENT",
"TOKEN",
"SEEKTOK",
"XMLTAG",
275 "OPENTAG",
"CLOSETAG",
"TAGCLSK",
"TAGAISK",
"TAGANAM",
276 "TAGAESK",
"TAGAVSK",
"TAGAVID",
"TAGAVSQ",
"TAGAVDQ",
277 "TAGAVQE",
"TAGCLIM",
"ENTITYSL",
"ENTITY",
"ENTITYE",
278 "ENTITYID",
"ENTITYSQ",
"ENTITYDQ",
"ENTITYLC",
279 "COMDASH2",
"COMSEEKE",
"COMENDD2",
"COMENDCL",
280 "CDATA",
"CDATA1",
"CDATA2",
"CDATA3",
"EXIT"
282 return sState[(
unsigned int)s];
298 static const char* name[
NofSTMActions] = {
"Return",
"ReturnWord",
"ReturnContent",
"ReturnIdentifier",
"ReturnSQString",
"ReturnDQString",
"ExpectIdentifierXML",
"ExpectIdentifierCDATA",
"ReturnEOF"};
299 return name[ (
unsigned int)a];
382 (*this)(
Undef,
true)(
Equal,
true)(
Gt,
true)(
Slash,
true)(
Dash,
true)(
Exclam,
true)(
Questm,
true)(
Sq,
true)(
Dq,
true)(
Osb,
true)(
Csb,
true)(
Any,
true);
392 (*this)(
Cntrl,
true)(
Space,
true)(
EndOfLine,
true)(
Undef,
true)(
Equal,
true)(
Gt,
true)(
Slash,
true)(
Dash,
true)(
Exclam,
true)(
Questm,
true)(
Sq,
true)(
Dq,
true)(
Osb,
true)(
Csb,
true)(
Any,
true);
428 class OutputCharSet_,
447 ParsingNumericEntity,
448 ParsingNumericBaseEntity,
467 TokState() :id(Start),eolnState(SRC),pos(0),base(0),value(0),curchr_saved(0) {}
472 void init(Id id_=Start, EolnState eolnState_=SRC)
474 id=id_;eolnState=eolnState_;pos=0;base=0;value=0;curchr_saved=0;
495 m_output.print( ch, m_outputBuf);
500 m_src.
copychar( m_output, m_outputBuf);
503 void copychar_impl(
const traits::TypeCheck::NO&)
510 copychar_impl( traits::TypeCheck::is_same<InputCharSet,OutputCharSet>::type());
515 static unsigned char HEX(
unsigned char ch)
517 struct HexCharMap :
public CharMap<unsigned char, 0xFF>
522 (
'0',0) (
'1', 1)(
'2', 2)(
'3', 3)(
'4', 4)(
'5', 5)(
'6', 6)(
'7', 7)(
'8', 8)(
'9', 9)
523 (
'A',10)(
'B',11)(
'C',12)(
'D',13)(
'E',14)(
'F',15)(
'a',10)(
'b',11)(
'c',12)(
'd',13)(
'e',14)(
'f',15);
526 static HexCharMap hexCharMap;
527 return hexCharMap[ch];
536 unsigned char ch = ir.ascii();
538 if (ch !=
'#')
return 0;
553 unsigned char chval = HEX(ch);
554 if (value >= base)
return 0;
555 value = value * base + chval;
556 if (value >= 0xFFFFFFFF)
return 0;
565 void fallbackEntity()
569 case TokState::Start:
570 case TokState::ParsingDone:
571 case TokState::ParsingKey:
572 case TokState::ParsingToken:
574 case TokState::ParsingEntity:
577 case TokState::ParsingNumericEntity:
581 case TokState::ParsingNumericBaseEntity:
584 for (
unsigned int ii=0; ii<tokstate.pos; ii++) push( tokstate.buf[ii]);
586 case TokState::ParsingNamedEntity:
588 for (
unsigned int ii=0; ii<tokstate.pos; ii++) push( tokstate.buf[ii]);
598 tokstate.id = TokState::ParsingEntity;
603 return parseNumericEntity();
607 return parseNamedEntity();
613 bool parseNumericEntity()
616 tokstate.id = TokState::ParsingNumericEntity;
622 return parseNumericBaseEntity();
627 return parseNumericBaseEntity();
633 bool parseNumericBaseEntity()
636 tokstate.id = TokState::ParsingNumericBaseEntity;
638 while (tokstate.pos <
sizeof(tokstate.buf))
643 if (tokstate.value > 0xFFFFFFFF)
645 tokstate.buf[ tokstate.pos++] = ch;
649 push( (
UChar)tokstate.value);
650 tokstate.init( TokState::ParsingToken);
656 unsigned char chval = HEX(ch);
657 if (chval >= tokstate.base)
662 tokstate.buf[ tokstate.pos++] = ch;
663 tokstate.value = tokstate.value * tokstate.base + chval;
673 bool parseNamedEntity()
676 tokstate.id = TokState::ParsingNamedEntity;
678 while (tokstate.pos <
sizeof(tokstate.buf)-1 && ch !=
';' && m_src.
control() ==
Any)
680 tokstate.buf[ tokstate.pos] = ch;
687 tokstate.buf[ tokstate.pos] =
'\0';
688 if (!pushEntity( tokstate.buf))
return false;
689 tokstate.init( TokState::ParsingToken);
702 bool parseTokenRecover()
705 if (tokstate.curchr_saved)
707 push( tokstate.curchr_saved);
708 tokstate.curchr_saved = 0;
712 case TokState::Start:
713 case TokState::ParsingDone:
714 case TokState::ParsingKey:
715 case TokState::ParsingToken:
718 case TokState::ParsingEntity: rt = parseEntity();
break;
719 case TokState::ParsingNumericEntity: rt = parseNumericEntity();
break;
720 case TokState::ParsingNumericBaseEntity: rt = parseNumericBaseEntity();
break;
721 case TokState::ParsingNamedEntity: rt = parseNamedEntity();
break;
723 tokstate.init( TokState::ParsingToken);
732 if (tokstate.id == TokState::Start)
735 tokstate.id = TokState::ParsingToken;
738 else if (tokstate.id != TokState::ParsingToken)
740 if (!parseTokenRecover())
752 while (isTok[ (
unsigned char)(ch=m_src.
control())])
754 unsigned char aa = m_src.
ascii();
760 push( (
unsigned char)
'\n');
761 tokstate.eolnState = TokState::CR;
765 if (tokstate.eolnState != TokState::CR)
767 push( (
unsigned char)
'\n');
769 tokstate.eolnState = TokState::SRC;
774 tokstate.eolnState = TokState::SRC;
780 tokstate.eolnState = TokState::SRC;
787 if (!parseEntity())
break;
788 tokstate.init( TokState::ParsingToken);
793 tokstate.init( TokState::ParsingDone);
808 template <
class OutputBufferType>
819 if (isTok[ (
unsigned char)(ch=ir.
control())])
825 pc = parseStaticNumericEntityValue( ir);
831 output.print( pc, buf);
846 while (isTok[ (
unsigned char)(ch=m_src.
control())] || ch ==
Amp)
858 bool expectStr(
const char* str)
861 tokstate.id = TokState::ParsingKey;
862 for (; str[tokstate.pos] !=
'\0'; m_src.
skip(),tokstate.pos++)
864 if (m_src.
ascii() == str[ tokstate.pos])
continue;
877 tokstate.init( TokState::ParsingDone);
884 bool pushPredefinedEntity(
const char* str)
889 if (str[1] ==
'u' && str[2] ==
'o' && str[3] ==
't' && str[4] ==
'\0')
899 if (str[2] ==
'p' && str[3] ==
'\0')
905 else if (str[1] ==
'p')
907 if (str[2] ==
'o' && str[3] ==
's' && str[4] ==
'\0')
916 if (str[1] ==
't' && str[2] ==
'\0')
924 if (str[1] ==
't' && str[2] ==
'\0')
932 if (str[1] ==
'b' && str[2] ==
's' && str[3] ==
'p' && str[4] ==
'\0')
945 bool pushEntity(
const char* str)
947 if (pushPredefinedEntity( str))
951 else if (m_entityMap)
953 EntityMap::const_iterator itr = m_entityMap->find( str);
954 if (itr == m_entityMap->end())
961 UChar ch = itr->second;
980 std::size_t m_tokenpos;
999 :state(
START),error(
Ok),m_src(p_charset,p_src),m_entityMap(&p_entityMap),m_output(
OutputCharSet()),m_tokenpos(0)
1005 :state(
START),error(
Ok),m_src(p_charset,p_src),m_entityMap(0),m_output(
OutputCharSet()),m_tokenpos(0)
1010 :state(
START),error(
Ok),m_src(p_charset),m_entityMap(0),m_tokenpos(0)
1023 ,m_entityMap(o.m_entityMap)
1024 ,m_outputBuf(o.m_outputBuf)
1025 ,m_tokenpos(o.m_tokenpos)
1030 template <
class IteratorAssignment>
1050 const char*
getItemPtr()
const {
return m_outputBuf.size()?&m_outputBuf.at(0):
"\0\0\0\0";}
1068 return stm.
get( state);
1105 static const char* stringDefs[
NofSTMActions] = {0,0,0,0,0,0,
"xml",
"CDATA",0};
1116 if (tokstate.id != TokState::ParsingDone)
1129 else if (stringDefs[sd->
action.
op])
1131 if (tokstate.id != TokState::ParsingDone)
1138 push(
'?'); push(
'x'); push(
'm'); push(
'l');
1152 m_outputBuf.clear();
1165 tokstate.id = TokState::Start;
1167 if (sd->
next[ ch] != -1)
1212 const char* m_content;
1232 std::size_t
size()
const {
return m_size;}
1239 Element(
const Element& orig) :m_type(orig.m_type),m_content(orig.m_content),m_size(orig.m_size) {}
1256 iterator& skip(
unsigned short mask=0xFFFF)
1260 element.m_type = input->
nextItem(mask);
1270 bool compare(
const iterator& iter)
const
1272 if (element.
type() == iter.element.
type())
1284 element = orig.element;
1300 element.m_type = input->
nextItem();
1346 return iterator( *
this, doSkipToFirst);
uint64_t EChar
Definition: char.hpp:38
ScannerStatemachine & operator()(ControlCharacter i1, ControlCharacter i2, int ns)
See ScannerStatemachine::addTransition(ControlCharacter,int)
Definition: xmlscanner.hpp:151
end of input tag
Definition: xmlscanner.hpp:1198
iterator & operator++()
Preincrement.
Definition: xmlscanner.hpp:1328
ScannerStatemachine & fallback(int stateIdx)
See ScannerStatemachine::addFallback(int)
Definition: xmlscanner.hpp:161
Definition: xmlscanner.hpp:257
XMLScanner(const InputIterator &p_src, const EntityMap &p_entityMap)
Constructor.
Definition: xmlscanner.hpp:986
const char * getItemPtr() const
Get the current parsed XML element pointer, if it was not masked out, see nextItem(unsigned short) ...
Definition: xmlscanner.hpp:1050
expected equal in tag attribute definition
Definition: xmlscanner.hpp:218
STMState
Enumeration of states of the XML scanner state machine.
Definition: xmlscanner.hpp:255
Element * get(int stateIdx)
Get state addressed by its index.
Definition: xmlscanner.hpp:61
Definition: xmlscanner.hpp:258
Definition: xmlscanner.hpp:258
duplicate transition definition in automaton. Internal textwolf error
Definition: exception.hpp:31
static bool parseStaticToken(const IsTokenCharMap &isTok, InputReader ir, OutputBufferType &buf)
Static version of parse a token for parsing table definition elements.
Definition: xmlscanner.hpp:809
XMLScanner(const InputCharSet &p_charset)
Constructor.
Definition: xmlscanner.hpp:1009
[0] empty (NULL)
Definition: xmlscanner.hpp:175
expected end of tag
Definition: xmlscanner.hpp:217
std::size_t difference_type
Definition: xmlscanner.hpp:1243
expected an open tag in this state
Definition: xmlscanner.hpp:211
Error getError(const char **str=0)
Get the last error.
Definition: xmlscanner.hpp:1074
Element()
Constructor.
Definition: xmlscanner.hpp:1234
std::size_t size_type
Definition: xmlscanner.hpp:1244
Definition: xmlscanner.hpp:259
TextScanner & skip()
Skip to the next character of the source.
Definition: textscanner.hpp:235
Definition: xmlscanner.hpp:289
void setSource(const IteratorAssignment &a)
Assign something to the iterator while keeping the state.
Definition: textscanner.hpp:146
Definition: xmlscanner.hpp:260
Definition: xmlscanner.hpp:260
InputIterator & getIterator()
Get the iterator pointing to the current source position.
Definition: xmlscanner.hpp:1089
void setSource(const IteratorAssignment &a)
Assign something to the source iterator while keeping the state.
Definition: xmlscanner.hpp:1031
[4] tag attribute value in the XML header
Definition: xmlscanner.hpp:179
Definition: xmlscanner.hpp:258
named entity is not defined in the entity map
Definition: xmlscanner.hpp:216
ElementType
Enumeration of XML element types returned by an XML scanner.
Definition: xmlscanner.hpp:173
Definition: xmlscanner.hpp:260
iterator & operator=(const iterator &orig)
Assignement operator.
Definition: xmlscanner.hpp:1311
Base class for structures that can throw exceptions for non recoverable errors.
Definition: exception.hpp:20
Definition: xmlscanner.hpp:258
One state in the state machine.
Definition: xmlscanner.hpp:34
bool operator!=(const iterator &iter) const
Compare to check for unequality.
Definition: xmlscanner.hpp:1338
Definition: xmlscanner.hpp:260
ControlCharacter control()
Get the control character representation of the current character.
Definition: textscanner.hpp:218
ScannerStatemachine & other(int stateIdx)
See ScannerStatemachine::addOtherTransition(int)
Definition: xmlscanner.hpp:163
std::size_t getTokenPosition() const
Get the current token position.
Definition: xmlscanner.hpp:1043
bool operator==(const iterator &iter) const
Compare to check for equality.
Definition: xmlscanner.hpp:1335
invalid state definition in automaton. Internal textwolf error
Definition: exception.hpp:32
IsDQStringCharMap()
Definition: xmlscanner.hpp:410
ScannerStatemachine & action(int aa, int arg=0)
See ScannerStatemachine::addAction(int,int)
Definition: xmlscanner.hpp:157
XMLScanner(const InputCharSet &p_charset, const InputIterator &p_src)
Constructor.
Definition: xmlscanner.hpp:1004
Definition: xmlscanner.hpp:258
Definition: xmlscanner.hpp:259
expected mandatory end of line (after XML header)
Definition: xmlscanner.hpp:223
const Iterator & getIterator() const
Get the iterator pointing to the current source position.
Definition: textscanner.hpp:177
ScannerStatemachine & operator()(ControlCharacter inputchr)
See ScannerStatemachine::addTransition(ControlCharacter)
Definition: xmlscanner.hpp:155
IsContentCharMap()
Definition: xmlscanner.hpp:390
Definition: xmlscanner.hpp:258
IsWordCharMap()
Definition: xmlscanner.hpp:380
Definition: xmlscanner.hpp:259
Definition: xmlscanner.hpp:258
Definition: xmlscanner.hpp:258
std::input_iterator_tag iterator_category
Definition: xmlscanner.hpp:1247
static const char * getElementTypeName(ElementType ee)
Get the XML element type as string.
Definition: xmlscanner.hpp:199
Definition: xmlscanner.hpp:259
Definition: xmlscanner.hpp:259
[3] tag attribute name in the XML header
Definition: xmlscanner.hpp:178
int arg
action argument
Definition: xmlscanner.hpp:44
ScannerStatemachine::Element * getState()
Get the current XML scanner state machine state.
Definition: xmlscanner.hpp:1065
Direct copy of a character from input to output without encoding/decoding it.
no error, everything is OK
Definition: xmlscanner.hpp:209
[10] open tag (e.g. "bla" for "<bla...")
Definition: xmlscanner.hpp:185
[13] content element string (separated by spaces or end of line)
Definition: xmlscanner.hpp:188
Definition: xmlscanner.hpp:257
void assign(const iterator &orig)
Assign an iterator to another.
Definition: xmlscanner.hpp:1281
Definition: xmlscanner.hpp:289
iterator(ThisXMLScanner &p_input, bool doSkipToFirst=true)
Constructor.
Definition: xmlscanner.hpp:1295
uint32_t UChar
Unicode character type.
Definition: char.hpp:37
unexpected end of input stream
Definition: xmlscanner.hpp:222
XMLScanner()
Default constructor.
Definition: xmlscanner.hpp:1013
Element value_type
Definition: xmlscanner.hpp:1242
STMAction
Enumeration of actions in the XML scanner state machine.
Definition: xmlscanner.hpp:287
Element(const Element &orig)
Copy constructor.
Definition: xmlscanner.hpp:1239
Error
Enumeration of XML scanner error codes.
Definition: xmlscanner.hpp:207
int missError
error code in case of an event that does not match and there is no fallback
Definition: xmlscanner.hpp:37
XMLScanner(const InputCharSet &p_charset, const InputIterator &p_src, const EntityMap &p_entityMap)
Constructor.
Definition: xmlscanner.hpp:998
Defines the set characters belonging to a double quoted string.
Definition: xmlscanner.hpp:408
InputCharSet_ InputCharSet
Definition: xmlscanner.hpp:480
Definition: xmlscanner.hpp:289
Definition: xmlscanner.hpp:257
Definition: xmlscanner.hpp:257
Definition: xmlscanner.hpp:257
Definition: xmlscanner.hpp:257
ScannerStatemachine & operator()(ControlCharacter i1, ControlCharacter i2, ControlCharacter i3, int ns)
See ScannerStatemachine::addTransition(ControlCharacter,int)
Definition: xmlscanner.hpp:153
Element(const End &)
Constructor.
Definition: xmlscanner.hpp:1236
Element()
Constructor.
Definition: xmlscanner.hpp:51
Definition: xmlscanner.hpp:289
Iterator element visited.
Definition: xmlscanner.hpp:1207
Definition of unicode characters.
int fallbackState
state transition if the event does not match (it belongs to the next state = fallbackState) ...
Definition: xmlscanner.hpp:36
textwolf exception class
Definition: exception.hpp:48
IsTagCharMap()
Definition: xmlscanner.hpp:369
iterator()
Constructor.
Definition: xmlscanner.hpp:1308
error in document attribute or entity definition
Definition: xmlscanner.hpp:210
Defines the set characters belonging to a single quoted string.
Definition: xmlscanner.hpp:398
XML scanner base class for things common for all XML scanners.
Definition: xmlscanner.hpp:168
[9] tag attribute value (e.g. "5" in <person id='5'>
Definition: xmlscanner.hpp:184
const InputIterator & getIterator() const
Get the iterator pointing to the current source position.
Definition: xmlscanner.hpp:1083
Action action
action executed after entering this state
Definition: xmlscanner.hpp:46
iterator(const End &et)
Constructor.
Definition: xmlscanner.hpp:1306
static const char * getErrorString(Error ee)
Get the error code as string.
Definition: xmlscanner.hpp:230
Definition: xmlscanner.hpp:260
Definition: xmlscanner.hpp:258
Element & reference
Definition: xmlscanner.hpp:1246
Defines the set of tag characters.
Definition: xmlscanner.hpp:367
Definition: xmlscanner.hpp:257
unsigned char ascii()
Get the ASCII character representation of the current character.
Definition: textscanner.hpp:227
static const char * getActionString(STMAction a)
Get the scanner state machine action as string.
Definition: xmlscanner.hpp:296
ElementType type() const
Type of the current element.
Definition: xmlscanner.hpp:1226
Interface that describes what a character set encoding implementation has to define to be used as cha...
expected CDATA tag definition
Definition: xmlscanner.hpp:220
ElementType nextItem(unsigned short mask=0xFFFF)
Scan the next XML element.
Definition: xmlscanner.hpp:1097
[6] document attribute value in a DOCTYPE or ENTITY definition
Definition: xmlscanner.hpp:181
ScannerStatemachine & operator()(ControlCharacter inputchr, int ns)
See ScannerStatemachine::addTransition(ControlCharacter,int)
Definition: xmlscanner.hpp:149
[11] close tag (e.g. "bla" for "</bla>")
Definition: xmlscanner.hpp:186
[1] XML scanning error error reported
Definition: xmlscanner.hpp:176
Definition: xmlscanner.hpp:259
attribute string in XML not terminated on the same line
Definition: xmlscanner.hpp:215
Definition: xmlscanner.hpp:260
expected second '-' after '<!-' to start an XML comment as ''
Definition: xmlscanner.hpp:224
[5] end of XML header event (after parsing '?>')
Definition: xmlscanner.hpp:180
XML scanner automaton definition check failed. Labels of states must be equal to their indices...
Definition: exception.hpp:28
Definition: xmlscanner.hpp:258
ScannerStatemachine & miss(int ee)
See ScannerStatemachine::addMiss(int)
Definition: xmlscanner.hpp:159
std::size_t getPosition() const
Get the current source iterator position.
Definition: textscanner.hpp:154
Definition: xmlscanner.hpp:289
iterator begin(bool doSkipToFirst=true)
Get begin iterator.
Definition: xmlscanner.hpp:1344
const char * error() const
Return the current error.
Definition: xmlscanner.hpp:1220
Definition: xmlscanner.hpp:289
int op
action operand
Definition: xmlscanner.hpp:43
bool valid() const
Check if the element does neither mark the end of document nor reports an error occurred.
Definition: xmlscanner.hpp:1217
std::map< const char *, UChar > EntityMap
Definition: xmlscanner.hpp:487
const Element & operator*() const
Element dereference operator.
Definition: xmlscanner.hpp:1317
XML scanner template that adds the functionality to the statemachine base definition.
Definition: xmlscanner.hpp:431
Definition: xmlscanner.hpp:258
CharMap< bool, false, NofControlCharacter > IsTokenCharMap
Forms a set of characters by assigning (true/false) to the whole domain.
Definition: xmlscanner.hpp:363
std::size_t getPosition() const
Get the current source iterator position.
Definition: xmlscanner.hpp:1038
iterator end()
Get the pointer to the end of content.
Definition: xmlscanner.hpp:1350
Defines the set of content word characters (for tokenization)
Definition: xmlscanner.hpp:378
iterator operator++(int)
Postincrement.
Definition: xmlscanner.hpp:1331
std::size_t getItemSize() const
Get the size of the current parsed XML element in bytes.
Definition: xmlscanner.hpp:1054
maximum number of states (fixed allocated array for state machine)
Definition: xmlscanner.hpp:30
Definition: xmlscanner.hpp:257
XMLScanner(const XMLScanner &o)
Copy constructor.
Definition: xmlscanner.hpp:1019
memory reserved for statically allocated table or memory block is too small. Increase the size of mem...
Definition: exception.hpp:27
Definition of exceptions with containing error codes thrown by textwolf.
const char * content() const
Value of the current element.
Definition: xmlscanner.hpp:1229
[14] end of document
Definition: xmlscanner.hpp:189
std::size_t size() const
Size of the value of the current element in bytes.
Definition: xmlscanner.hpp:1232
Definition: xmlscanner.hpp:257
ControlCharacter
Enumeration of control characters needed as events for XML scanner statemachine.
Definition: char.hpp:78
UChar chr()
Get the unicode representation of the current character.
Definition: textscanner.hpp:161
Definition: xmlscanner.hpp:289
Definition: xmlscanner.hpp:257
Definition: xmlscanner.hpp:258
internal error (textwolf implementation error)
Definition: xmlscanner.hpp:221
Definition: xmlscanner.hpp:289
Definition: xmlscanner.hpp:260
expected tag attribute
Definition: xmlscanner.hpp:219
Definition: xmlscanner.hpp:258
const OutputBuffer & getItem() const
Get the current parsed XML element, if it was not masked out, see nextItem(unsigned short) ...
Definition: xmlscanner.hpp:1058
XMLScanner(const InputIterator &p_src)
Constructor.
Definition: xmlscanner.hpp:991
Definition: xmlscanner.hpp:260
Definition: xmlscanner.hpp:257
Definition: xmlscanner.hpp:259
const char * name() const
Type of the current element as string.
Definition: xmlscanner.hpp:1223
OutputBuffer_ OutputBuffer
Definition: xmlscanner.hpp:488
Statemachine()
Constructor (defines the state machine completely)
Definition: xmlscanner.hpp:307
Definition: xmlscanner.hpp:258
Definition: xmlscanner.hpp:289
Definition: xmlscanner.hpp:290
number of XML element types defined
Definition: xmlscanner.hpp:193
Character map for fast typing of a character byte.
Definition: char.hpp:50
Element * pointer
Definition: xmlscanner.hpp:1245
[2] open XML header tag
Definition: xmlscanner.hpp:177
parameter check (for control character) in automaton definition failed. Internal textwolf error ...
Definition: exception.hpp:30
Class to build up the XML element scanner state machine in a descriptive way.
Definition: xmlscanner.hpp:25
parameter check (for state) in automaton definition failed. Internal textwolf error ...
Definition: exception.hpp:29
Definition: xmlscanner.hpp:258
ScannerStatemachine()
Constructor.
Definition: xmlscanner.hpp:144
const Element * operator->() const
Element dereference operator.
Definition: xmlscanner.hpp:1322
Definition: xmlscanner.hpp:257
Definition of action fired by the state machine.
Definition: xmlscanner.hpp:41
static const char * getStateString(STMState s)
Get the scanner state machine state as string.
Definition: xmlscanner.hpp:266
unexpected end of text in the middle of the XML definition
Definition: xmlscanner.hpp:213
Definition: traits.hpp:21
XML scanner state machine implementation.
Definition: xmlscanner.hpp:304
XMLScanner< InputIterator, InputCharSet_, OutputCharSet_, OutputBuffer_ > ThisXMLScanner
Definition: xmlscanner.hpp:486
OutputCharSet_ OutputCharSet
Definition: xmlscanner.hpp:481
iterator(const iterator &orig)
Copy constructor.
Definition: xmlscanner.hpp:1288
signed char next[NofControlCharacter]
follow state fired by an event (control character type parsed)
Definition: xmlscanner.hpp:48
TextScanner< InputIterator, InputCharSet_ > InputReader
Definition: xmlscanner.hpp:482
unsigned char nofnext
number of follow states defined
Definition: xmlscanner.hpp:47
Definition: xmlscanner.hpp:257
Defines the set of content token characters.
Definition: xmlscanner.hpp:388
Definition: xmlscanner.hpp:257
Implementation of iterator for character-wise parsing of input.
Definition: xmlscanner.hpp:260
input iterator for iterating on the output of an XML scanner
Definition: xmlscanner.hpp:1202
Definition: xmlscanner.hpp:257
a specific string expected as token in XML but does not match
Definition: xmlscanner.hpp:214
expected an <?xml tag in this state
Definition: xmlscanner.hpp:212
ScannerStatemachine & operator[](int stateIdx)
See ScannerStatemachine::newState(int)
Definition: xmlscanner.hpp:147
Definition: xmlscanner.hpp:258
void copychar(CharSet &output_, Buffer &buf_)
Definition: textscanner.hpp:192
IsSQStringCharMap()
Definition: xmlscanner.hpp:400
Definition: xmlscanner.hpp:259
[8] tag attribute name (e.g. "id" in <person id='5'>
Definition: xmlscanner.hpp:183
[7] end of a document attribute definition <! .. !>
Definition: xmlscanner.hpp:182
[12] immediate close tag (e.g. "bla" for "<bla />")
Definition: xmlscanner.hpp:187