11 #ifndef __TEXTWOLF_XML_HEADER_PARSER_HPP__
12 #define __TEXTWOLF_XML_HEADER_PARSER_HPP__
31 ,m_attributetype(Encoding)
40 ,m_attributetype(o.m_attributetype)
42 ,m_charsConsumed(o.m_charsConsumed)
43 ,m_zeroCount(o.m_zeroCount)
51 void putInput(
const char* src_, std::size_t srcsize_)
53 m_src.append( src_, srcsize_);
67 unsigned char ch = nextChar();
68 for (;ch != 0; ch = nextChar())
75 m_state = ParseXmlOpen;
83 setError(
"expected open tag angle bracket '>'");
91 m_state = ParseXmlHdr;
97 else if (((ch|32) >=
'a' && (ch|32) <=
'z') || ch ==
'_')
103 setError(
"expected xml header question mark '?' after open tag angle bracket '<'");
109 if (ch <= 32 || ch ==
'?')
113 setError(
"expected '<?xml' as xml header start");
117 if (ch ==
'?')
return true;
119 m_state = FindAttributeName;
121 else if (((ch|32) >=
'a' && (ch|32) <=
'z') || ch ==
'_')
123 m_item.push_back(ch);
128 setError(
"unexpected close angle bracket '>' in xml header after '<?xml'");
133 setError(
"expected '<?xml' as xml header start (invalid character)");
138 case FindAttributeName:
143 else if (ch ==
'>' || ch ==
'?')
147 setError(
"unexpected close angle bracket '>' in xml header (missing '?')");
152 else if (((ch|32) >=
'a' && (ch|32) <=
'z') || ch ==
'_')
154 m_item.push_back(ch);
155 m_state = ParseAttributeName;
159 setError(
"invalid character in xml header attribute name");
163 case ParseAttributeName:
164 if (ch <= 32 || ch ==
'=')
166 if (m_item ==
"encoding")
168 m_attributetype = Encoding;
170 else if (m_item ==
"version")
172 m_attributetype = Version;
174 else if (m_item ==
"standalone")
176 m_attributetype = Standalone;
180 setError(
"unknown xml header attribute name");
186 m_state = FindAttributeValue;
189 m_state = FindAttributeAssign;
191 else if (((ch|32) >=
'a' && (ch|32) <=
'z') || ch ==
'_')
193 m_item.push_back(ch);
198 setError(
"invalid character in xml header attribute name");
202 case FindAttributeAssign:
205 m_state = FindAttributeValue;
213 setError(
"expected '=' after xml header attribute name");
217 case FindAttributeValue:
220 m_state = ParseAttributeValueDq;
225 m_state = ParseAttributeValueSq;
234 setError(
"expected single or double quote string as xml header attribute value");
238 case ParseAttributeValueSq:
241 switch (m_attributetype)
251 m_state = FindAttributeName;
256 m_item.push_back( ch);
259 case ParseAttributeValueDq:
262 switch (m_attributetype)
272 m_state = FindAttributeName;
277 m_item.push_back( ch);
289 return m_lasterror.empty()?0:m_lasterror.c_str();
296 return m_encoding.empty()?0:m_encoding.c_str();
303 return m_charsConsumed;
310 m_attributetype = Encoding;
321 void setError(
const std::string& m)
326 unsigned char nextChar()
328 for (; m_zeroCount<4; m_zeroCount++)
330 if (m_idx >= m_src.size())
return 0;
331 unsigned char ch = m_src[m_idx];
355 ParseAttributeValueSq,
356 ParseAttributeValueDq
366 static const char* stateName( State i)
368 static const char* ar[] = {
"Init",
"ParseXmlOpen",
"ParseXmlHdr",
"FindAttributeName",
"ParseAttributeName",
"FindAttributeAssign",
"FindAttributeValue",
"ParseAttributeValueSq",
"ParseAttributeValueDq"};
374 AttributeType m_attributetype;
376 std::size_t m_charsConsumed;
377 std::size_t m_zeroCount;
380 std::string m_encoding;
381 std::string m_lasterror;
std::size_t charsConsumed() const
Get the number of ASCII characters consumed.
Definition: xmlhdrparser.hpp:301
const char * encoding() const
Get the encoding specified as attribute in the header.
Definition: xmlhdrparser.hpp:294
Base class for structures that can throw exceptions for non recoverable errors.
Definition: exception.hpp:20
illegal XML header (more than 4 null bytes in a row). Usage error
Definition: exception.hpp:39
Class for parsing the header to get the character set encoding.
Definition: xmlhdrparser.hpp:24
textwolf byte source iterator template
const char * lasterror() const
Get the last error occurred.
Definition: xmlhdrparser.hpp:287
bool parse()
Call the first/next iteration of parsing the header.
Definition: xmlhdrparser.hpp:65
XmlHdrParser()
Constructor.
Definition: xmlhdrparser.hpp:29
Definition of exceptions with containing error codes thrown by textwolf.
XmlHdrParser(const XmlHdrParser &o)
Copy constructor.
Definition: xmlhdrparser.hpp:38
void putInput(const char *src_, std::size_t srcsize_)
Add another input chunk to process.
Definition: xmlhdrparser.hpp:51
const std::string & consumedData() const
Get the whole original data added with subsequent calls of putInput(const char*,std::size_t) ...
Definition: xmlhdrparser.hpp:58
void clear()
Clear the data, reset the state.
Definition: xmlhdrparser.hpp:307