11 #ifndef __TEXTWOLF_XML_PATH_AUTOMATON_PARSE_HPP__
12 #define __TEXTWOLF_XML_PATH_AUTOMATON_PARSE_HPP__
29 template <
class SrcCharSet=
charset::UTF8,
class AtmCharSet=
charset::UTF8>
45 std::string idstrings;
48 std::vector<std::size_t> idref;
51 char const* xx = (
char const*)std::memchr( esrc,
':', esrcsize);
54 std::size_t xpos = xx - esrc;
55 if (xpos + 1 < esrcsize && xx[1] ==
':')
59 xx = (
char const*)std::memchr( xx+1,
':', esrcsize - (xpos+1));
61 for (; *pp; skipSpaces( pp))
77 id = parseValue( pp, idstrings);
81 while (*pp != 0 && *pp !=
')') pp++;
88 id = parseIdentifier( pp, idstrings);
97 typename std::vector<std::size_t>::const_iterator di = idref.begin(), de = idref.end();
102 for (; *src; skipSpaces( src))
110 skipIdentifier( src);
111 expr.selectAttribute( getIdentifier( *di++, idstrings));
127 expr.selectAttribute( 0);
132 skipIdentifier( src);
133 expr.selectAttribute( getIdentifier( *di++, idstrings));
136 else if (*src ==
'(')
150 skipIdentifier( src);
151 expr.selectTag( getIdentifier( *di++, idstrings));
163 expr.selectAttribute( 0);
168 skipIdentifier( src);
169 expr.selectAttribute( getIdentifier( *di++, idstrings));
172 else if (*src ==
'(')
186 skipIdentifier( src);
187 expr.selectTag( getIdentifier( *di++, idstrings));
196 expr.selectCloseTag();
201 ++src; skipSpaces( src);
204 ++src; skipSpaces( src);
206 skipIdentifier( src);
209 const char* attrname = getIdentifier( *di++, idstrings);
211 ++src; skipSpaces( src);
214 const char* attrval = getIdentifier( *di++, idstrings);
216 expr.ifAttribute( attrname, attrval);
222 skipIdentifier( src);
225 const char* range_start_str = getIdentifier( *di++, idstrings);
226 int range_start = parseNum( range_start_str);
227 if (range_start < 0 || range_start_str[0])
return src.
getPosition()+1;
231 ++src; skipSpaces( src);
234 expr.FROM( range_start);
239 skipIdentifier( src);
242 const char* range_end_str = getIdentifier( *di++, idstrings);
243 int range_end = parseNum( range_end_str);
244 if (range_end < 0 || range_end_str[0])
return src.
getPosition()+1;
245 ++src; skipSpaces( src);
247 expr.RANGE( range_start, range_end);
251 else if (*src ==
']')
253 expr.INDEX( range_start);
268 expr.selectContent();
277 expr.assignType( typeidx);
284 for (; src.control() ==
Space; ++src);
287 static int parseNum(
char const*& src)
290 for (; *src>=
'0' && *src<=
'9';++src) num.push_back( *src);
291 if (num.size() == 0 || num.size() > 8)
return -1;
292 return std::atoi( num.c_str());
295 static bool isIdentifierChar(
SrcScanner& src)
297 if (src.control() ==
Undef || src.control() ==
Any)
299 if (*src == (
unsigned char)
'*')
return false;
300 if (*src == (
unsigned char)
'~')
return false;
301 if (*src == (
unsigned char)
'/')
return false;
302 if (*src == (
unsigned char)
'(')
return false;
303 if (*src == (
unsigned char)
')')
return false;
304 if (*src == (
unsigned char)
'@')
return false;
310 std::size_t parseIdentifier(
SrcScanner& src, std::string& idstrings)
312 std::size_t rt = idstrings.size();
313 for (; isIdentifierChar(src); ++src)
315 m_atmcharset.print( *src, idstrings);
317 m_atmcharset.print( 0, idstrings);
321 std::size_t parseValue(
SrcScanner& src, std::string& idstrings)
323 std::size_t rt = idstrings.size();
324 if (*src ==
'"' || *src ==
'\'')
326 unsigned char eb = *src;
327 for (++src; *src && *src != eb; ++src)
329 m_atmcharset.print( *src, idstrings);
335 for (; isIdentifierChar(src); ++src)
337 m_atmcharset.print( *src, idstrings);
340 m_atmcharset.print( 0, idstrings);
346 for (; isIdentifierChar(src); ++src){}
351 if (*src ==
'"' || *src ==
'\'')
353 unsigned char eb = *src;
354 for (++src; *src && *src != eb; ++src){}
359 for (; isIdentifierChar(src); ++src){}
363 const char* getIdentifier( std::size_t idx,
const std::string& idstrings)
const
365 return idstrings.c_str() + idx;
369 AtmCharSet m_atmcharset;
370 SrcCharSet m_srccharset;
XMLPathSelectAutomaton< AtmCharSet > ThisAutomaton
Definition: xmlpathautomatonparse.hpp:33
ControlCharacter control()
Get the control character representation of the current character.
Definition: textscanner.hpp:218
virtual ~XMLPathSelectAutomatonParser()
Definition: xmlpathautomatonparse.hpp:41
std::size_t getPosition() const
Get the current source iterator position.
Definition: textscanner.hpp:154
Reader for scanning the input character by character.
Definition: textscanner.hpp:68
Input iterator on a constant string returning null characters after EOF as required by textwolf scann...
Definition: cstringiterator.hpp:23
XMLPathSelectAutomatonParser()
Constructor.
Definition: xmlpathautomatonparse.hpp:40
textwolf iterator on strings
Automaton to select path expressions from an XML iterator.
ThisAutomaton::PathElement PathElement
Definition: xmlpathautomatonparse.hpp:34
Automaton to define XML path expressions and assign types (int values) to them.
Definition: xmlpathautomatonparse.hpp:30
XMLPathSelectAutomatonParser This
Definition: xmlpathautomatonparse.hpp:35
int addExpression(int typeidx, const char *esrc, std::size_t esrcsize)
Definition: xmlpathautomatonparse.hpp:43
TextScanner< CStringIterator, SrcCharSet > SrcScanner
Definition: xmlpathautomatonparse.hpp:36
Automaton to define XML path expressions and assign types (int values) to them.
Definition: xmlpathautomaton.hpp:32
Character set encodings already implemented in textwolf.