textwolf  0.2
xmlpathautomaton.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014 Patrick P. Frey
3  *
4  * This Source Code Form is subject to the terms of the Mozilla Public
5  * License, v. 2.0. If a copy of the MPL was not distributed with this
6  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7  */
10 
11 #ifndef __TEXTWOLF_XML_PATH_AUTOMATON_HPP__
12 #define __TEXTWOLF_XML_PATH_AUTOMATON_HPP__
13 #include "textwolf/char.hpp"
14 #include "textwolf/charset.hpp"
15 #include "textwolf/exception.hpp"
16 #include "textwolf/xmlscanner.hpp"
18 #include <limits>
19 #include <sstream>
20 #include <string>
21 #include <vector>
22 #include <map>
23 #include <cstddef>
24 #include <stdexcept>
25 
26 namespace textwolf {
27 
31 template <class CharSet_=charset::UTF8>
33 {
34 public:
37  {}
38 
39  typedef CharSet_ CharSet;
40  typedef int Hash;
42 
44 
45 public:
48  enum Operation
49  {
50  Content, //< searching content token
51  OpenTag, //< searching an open tag
52  CloseTag, //< searching a close tag
53  Attribute, //< searching an attribute
54  ThisAttributeValue, //< checking the value of the attribute just parsed (not an arbitrary but this one)
55  AttributeValue, //< searching a value of an attribute
56  ContentStart //< looking for the start of content (to signal the end of the XML header)
57  };
58 
61  static const char* operationName( Operation op)
62  {
63  static const char* name[ 7] = {"Content", "OpenTag", "CloseTag", "Attribute", "ThisAttributeValue", "AttributeValue", "ContentStart"};
64  return name[ (unsigned int)op];
65  }
66 
69  struct Mask
70  {
71  unsigned short pos; //< positively selected elements bitmask
72  unsigned short neg; //< negatively selected elements bitmask that determines when a search pattern is given up copletely
73 
76  bool empty() const {return (pos==0);}
77 
81  Mask( unsigned short p_pos=0, unsigned short p_neg=0):pos(p_pos),neg(p_neg) {}
82 
85  Mask( const Mask& orig) :pos(orig.pos),neg(orig.neg) {}
86 
88  void reset() {pos=0; neg=0;}
89 
91  void reject( XMLScannerBase::ElementType e) {neg |= (1<<(unsigned short)e);}
92  bool hasReject( XMLScannerBase::ElementType e) const {return (neg & (1<<(unsigned short)e)) != 0;}
93 
95  void match( XMLScannerBase::ElementType e) {pos |= (1<<(unsigned short)e);}
96  bool hasMatch( XMLScannerBase::ElementType e) const {return (pos & (1<<(unsigned short)e)) != 0;}
97 
98  bool operator==( const Mask& o)
99  {
100  return (o.pos == pos) && (o.neg == neg);
101  }
102 
104  void seekop( Operation op)
105  {
106  switch (op)
107  {
108  case OpenTag:
111  break;
112  case CloseTag:
116  break;
117  case Attribute:
121  break;
122  case ThisAttributeValue:
129  break;
130  case AttributeValue:
134  break;
135  case Content:
137  break;
138  case ContentStart:
140  break;
141  }
142  }
143 
145  const char* seekopName() const
146  {
149  return "OpenTag";
150 
154  return "CloseTag";
155 
159  return "Attribute";
160 
164  return "AttributeValue";
165 
172  return "ThisAttributeValue";
173 
174  if (this->hasMatch( XMLScannerBase::Content))
175  return "Content";
176 
178  return "ContentStart";
179 
180  if (pos == 0 && neg == 0)
181  return "None";
182 
183  return "";
184  }
185 
188  void join( const Mask& mask) {pos |= mask.pos; neg |= mask.neg;}
189 
192  bool matches( XMLScannerBase::ElementType e) const {return (0 != (pos & (1<<(unsigned short)e)));}
193 
196  bool rejects( XMLScannerBase::ElementType e) const {return (0 != (neg & (1<<(unsigned short)e)));}
197  };
198 
201  struct Core
202  {
203  Mask mask; //< mask definiting what tokens are matching this state
204  bool follow; //< true, if the state is seeking tokens in all follow scopes in the XML tree
205  int typeidx; //< type of the element emitted by this state on a match
206  int cnt_start; //< lower bound of the element index matching (for index ranges)
207  int cnt_end; //< upper bound of the element index matching (for index ranges)
208 
210  Core() :follow(false),typeidx(0),cnt_start(0),cnt_end(-1) {}
214  };
215 
218  struct State
219  {
220  Core core; //< core of the state (the part used in processing)
221  unsigned int keysize; //< key size of the element
222  char* key; //< key of the element
223  char* srckey; //< key of the element as in source (for debugging or reporting, etc.)
224  int next; //< follow state
225  int link; //< alternative state to check
226 
229  :keysize(0),key(0),srckey(0),next(-1),link(-1) {}
230 
233  State( const State& orig)
234  :core(orig.core),keysize(orig.keysize),key(0),srckey(0),next(orig.next),link(orig.link)
235  {
236  defineKey( orig.keysize, orig.key, orig.srckey);
237  }
238 
241  {
242  if (key) delete [] key;
243  if (srckey) delete [] srckey;
244  }
245 
248  bool isempty() {return key==0&&core.typeidx==0&&next==0&&link==0&&core.mask.empty();}
249 
254  void defineKey( unsigned int p_keysize, const char* p_key, const char* p_srckey)
255  {
256  unsigned int ii;
257  if (key)
258  {
259  delete [] key;
260  key = 0;
261  }
262  if (srckey)
263  {
264  delete [] srckey;
265  srckey = 0;
266  }
267  if (p_key)
268  {
269  key = new char[ keysize=p_keysize];
270  for (ii=0; ii<keysize; ii++) key[ii]=p_key[ii];
271  }
272  if (p_srckey)
273  {
274  for (ii=0; p_srckey[ii]!=0; ii++);
275  srckey = new char[ ii+1];
276  for (ii=0; p_srckey[ii]!=0; ii++) srckey[ii]=p_srckey[ii];
277  srckey[ ii] = 0;
278  }
279  }
280 
288  void defineNext( Operation op, unsigned int p_keysize, const char* p_key, const char* p_srckey, int p_next, bool p_follow=false)
289  {
290  core.mask.seekop( op);
291  defineKey( p_keysize, p_key, p_srckey);
292  next = p_next;
293  core.follow = p_follow;
294  }
295 
302  void defineOutput( const Mask& mask, int p_typeidx, bool p_follow, int p_start, int p_end)
303  {
304  core.mask = mask;
305  core.typeidx = p_typeidx;
306  core.cnt_end = p_end;
307  core.cnt_start = p_start;
308  core.follow = p_follow;
309  }
310 
313  void defLink( int p_link)
314  {
315  link = p_link;
316  }
317 
318  std::string tostring() const
319  {
320  std::ostringstream rt;
321  if (next >= 0) rt << " ->" << next;
322  if (link >= 0) rt << " ~" << link;
323  rt << ' ';
324  if (core.follow)
325  {
326  rt << '/';
327  }
328  rt << '/';
329  rt << core.mask.seekopName();
330  if (srckey)
331  {
332  rt << " '" << srckey << "'";
333  }
334  else
335  {
336  rt << " (null)";
337  }
338  if (core.cnt_end > 0)
339  {
340  rt << '[' << core.cnt_start << ',' << rt << core.cnt_end << ']';
341  }
342  if (core.typeidx)
343  {
344  rt << " =>" << core.typeidx;
345  }
346  return rt.str();
347  }
348  };
349  std::vector<State> states; //< the states of the statemachine
350 
352  std::string tostring() const
353  {
354  std::ostringstream rt;
355  typename std::vector<State>::const_iterator ii=states.begin(), ee=states.end();
356  for (; ii != ee; ++ii)
357  {
358  rt << (int)(ii-states.begin()) << ": " << ii->tostring() << std::endl;
359  }
360  return rt.str();
361  }
362 
369  template <class Buffer>
370  void getEmmitedTokens( unsigned int stateidx, XMLScannerBase::ElementType e, Buffer& buf) const
371  {
372  int si = states[ stateidx].next;
373  while (si >= 0)
374  {
375  if (states[ si].core.typeidx && states[ si].core.mask.matches( e))
376  {
377  buf.push_back( states[ si].core.typeidx);
378  }
379  si = states[ si].link;
380  }
381  }
382 
385  struct Token
386  {
387  Core core; //< core of the state
388  int stateidx; //< index into the automaton, poiting to the state
389 
391  Token() :stateidx(-1) {}
393  Token( const Token& orig) :core(orig.core),stateidx(orig.stateidx) {}
397  Token( const State& state, int p_stateidx) :core(state.core),stateidx(p_stateidx) {}
398  };
399 
402  struct Scope
403  {
404  Mask mask; //< joined mask of all tokens active in this scope
405  Mask followMask; //< joined mask of all tokens active in this and all sub scopes of this scope
406 
409  struct Range
410  {
411  unsigned int tokenidx_from; //< lower bound token index
412  unsigned int tokenidx_to; //< upper bound token index
413  unsigned int followidx; //< pointer to follow token stack with tokens active in this and all sub scopes of this scope
414 
420  };
421  Range range; //< valid (active) token range of this scope (on the token stacks)
422 
425  Scope( const Scope& orig) :mask(orig.mask),followMask(orig.followMask),range(orig.range) {}
428  Scope& operator =( const Scope& orig) {mask=orig.mask; followMask=orig.followMask; range=orig.range; return *this;}
430  Scope() {}
431  };
432 
433 private:
442  int defineNext( int stateidx, Operation op, unsigned int keysize, const char* key, const char* srckey, bool follow=false) throw(exception,std::bad_alloc)
443  {
444  try
445  {
446  State state;
447  if (states.size() == 0)
448  {
449  stateidx = states.size();
450  states.push_back( state);
451  }
452  Mask mask;
453  mask.seekop( op);
454 
455  for (int ee=stateidx; ee != -1; stateidx=ee,ee=states[ee].link)
456  {
457  if ((states[ee].key != 0) && (keysize == states[ee].keysize) && (states[ee].core.follow == follow) && (mask == states[ee].core.mask))
458  {
459  unsigned int ii;
460  for (ii=0; ii<keysize && states[ee].key[ii]==key[ii]; ii++);
461  if (ii == keysize) return states[ee].next;
462  }
463  }
464  if (!states[ stateidx].isempty())
465  {
466  while (states[ stateidx].link >= 0)
467  {
468  stateidx = states[ stateidx].link;
469  }
470  states[ stateidx].link = states.size();
471  stateidx = states.size();
472  states.push_back( state);
473  }
474  states.push_back( state);
475  unsigned int lastidx = states.size()-1;
476  states[ stateidx].defineNext( op, keysize, key, srckey, lastidx, follow);
477  return stateidx=lastidx;
478  }
479  catch (std::bad_alloc)
480  {
481  throw exception( OutOfMem);
482  }
483  catch (...)
484  {
485  throw exception( Unknown);
486  }
487  }
488 
497  int defineOutput( int stateidx, const Mask& printOpMask, int typeidx, bool follow, int start, int end) throw(exception,std::bad_alloc)
498  {
499  try
500  {
501  State state;
502  if (states.size() == 0)
503  {
504  stateidx = states.size();
505  states.push_back( state);
506  }
507  if ((unsigned int)stateidx >= states.size()) throw exception( IllegalParam);
508 
509  if (!states[stateidx].isempty())
510  {
511  while (states[ stateidx].link >= 0)
512  {
513  stateidx = states[stateidx].link;
514  }
515  states[ stateidx].link = states.size();
516  stateidx = states.size();
517  states.push_back( state);
518 
519  }
520  states[ stateidx].defineOutput( printOpMask, typeidx, follow, start, end);
521  return stateidx;
522  }
523  catch (std::bad_alloc)
524  {
525  throw exception( OutOfMem);
526  }
527  catch (...)
528  {
529  throw exception( Unknown);
530  }
531  }
532 
533 public:
538  {
539  private:
540  XMLPathSelectAutomaton* xs; //< XML Path select automaton where this node is an element of
541  int stateidx; //< state of this element in the automaton
542 
545  struct Range
546  {
547  int start; //< index of starting element starting with 0
548  int end; //< index of upper boundary element (not belonging to range anymore). -1 if undefined (unlimited)
549 
552  Range( const Range& o) :start(o.start),end(o.end){}
556  Range( int p_start, int p_end) :start(p_start),end(p_end){}
559  Range( int count) :start(0),end(count){}
561  Range() :start(0),end(-1){}
562  };
563  Range range; //< Index range of this XML path element
564  bool follow; //< true, if this element is active (firing) for all sub scopes of the activation scope
565  Mask pushOpMask; //< mask for firing element actions
566  Mask printOpMask; //< mask for printing element actions
567 
568  private:
572  PathElement& defineOutput( Operation op)
573  {
574  printOpMask.reset();
575  printOpMask.seekop( op);
576  return *this;
577  }
578 
583  PathElement& doSelect( Operation op, const char* value) throw(exception,std::bad_alloc)
584  {
585  static XMLScannerBase::IsTagCharMap isTagCharMap;
586  if (xs != 0)
587  {
588  if (value)
589  {
590  char buf[ 1024];
591  StaticBuffer pb( buf, sizeof(buf));
592  char* itr = const_cast<char*>(value);
593  typedef XMLScanner<char*,CharSet,CharSet,StaticBuffer> StaticXMLScanner;
594  if (!StaticXMLScanner::parseStaticToken( isTagCharMap, itr, pb))
595  {
596  throw exception( IllegalAttributeName);
597  }
598  stateidx = xs->defineNext( stateidx, op, pb.size(), pb.ptr(), value, follow);
599  }
600  else
601  {
602  stateidx = xs->defineNext( stateidx, op, 0, 0, 0, follow);
603  }
604  }
605  return *this;
606  }
607 
610  PathElement& doFollow()
611  {
612  follow = true;
613  return *this;
614  }
615 
620  PathElement& doRange( int p_start, int p_end)
621  {
622  if (range.end == -1)
623  {
624  range = Range( p_start, p_end);
625  }
626  else if (p_end < range.end)
627  {
628  range.end = p_end;
629  }
630  else if (p_start > range.start)
631  {
632  range.start = p_start;
633  }
634  return *this;
635  }
636 
640  PathElement& doCount( int p_count)
641  {
642  return doRange( 0, p_count);
643  }
644 
648  PathElement& doStart( int p_start)
649  {
650  return doRange( p_start, std::numeric_limits<int>::max());
651  }
652 
656  PathElement& push( int typeidx) throw(exception,std::bad_alloc)
657  {
658  if (xs != 0) stateidx = xs->defineOutput( stateidx, printOpMask, typeidx, follow, range.start, range.end);
659  return *this;
660  }
661 
662  public:
664  PathElement() :xs(0),stateidx(0),follow(false),pushOpMask(0),printOpMask(0){}
668  PathElement( XMLPathSelectAutomaton* p_xs, int p_si=0) :xs(p_xs),stateidx(p_si),follow(false),pushOpMask(0),printOpMask(0){}
671  PathElement( const PathElement& orig) :xs(orig.xs),stateidx(orig.stateidx),range(orig.range),follow(orig.follow),pushOpMask(orig.pushOpMask),printOpMask(orig.printOpMask) {}
672 
675  PathElement& operator --(int) {return doFollow();}
680  PathElement& operator []( const char* name) throw(exception,std::bad_alloc) {return doSelect( OpenTag, name);}
684  PathElement& selectTag( const char* name) throw(exception,std::bad_alloc) {return doSelect( OpenTag, name);}
687  PathElement& selectCloseTag() throw(exception,std::bad_alloc) {return doSelect( CloseTag, 0);}
688 
693  PathElement& operator ()( const char* name) throw(exception,std::bad_alloc) {return doSelect( Attribute, name).defineOutput( ThisAttributeValue);}
697  PathElement& selectAttribute( const char* name) throw(exception,std::bad_alloc) {return doSelect( Attribute, name).defineOutput( ThisAttributeValue);}
698 
704  PathElement& operator ()( const char* name, const char* value) throw(exception,std::bad_alloc) {return doSelect( Attribute, name).doSelect( ThisAttributeValue, value);}
705 
710  PathElement& ifAttribute( const char* name, const char* value) throw(exception,std::bad_alloc) {return doSelect( Attribute, name).doSelect( ThisAttributeValue, value);}
711 
715  PathElement& TO(int idx) throw(exception,std::bad_alloc) {return doCount((idx>=0)?(idx+1):-1);}
719  PathElement& FROM(int idx) throw(exception,std::bad_alloc) {return doStart(idx); return *this;}
724  PathElement& RANGE(int idx1, int idx2) throw(exception,std::bad_alloc) {return doRange(idx1,(idx2>=0)?(idx2+1):-1); return *this;}
728  PathElement& INDEX(int idx) throw(exception,std::bad_alloc) {return doRange(idx,idx+1); return *this;}
729 
734  PathElement& operator =(int type) throw(exception,std::bad_alloc) {return push( type);}
738  PathElement& assignType(int type) throw(exception,std::bad_alloc) {return push( type);}
739 
743  PathElement& operator ()() throw(exception,std::bad_alloc) {return defineOutput(Content);}
746  PathElement& selectContent() throw(exception,std::bad_alloc) {return defineOutput(Content);}
747  };
748 
751  PathElement operator*()
752  {
753  return PathElement( this);
754  }
755 };
756 
757 } //namespace
758 #endif
std::vector< State > states
Definition: xmlpathautomaton.hpp:349
PathElement & FROM(int idx)
Define minimum element index to push.
Definition: xmlpathautomaton.hpp:719
unsigned short neg
Definition: xmlpathautomaton.hpp:72
PathElement & selectTag(const char *name)
Find tag by name.
Definition: xmlpathautomaton.hpp:684
bool hasReject(XMLScannerBase::ElementType e) const
Definition: xmlpathautomaton.hpp:92
Scope(const Scope &orig)
Copy constructor.
Definition: xmlpathautomaton.hpp:425
~State()
Destructor.
Definition: xmlpathautomaton.hpp:240
bool operator==(const Mask &o)
Definition: xmlpathautomaton.hpp:98
Mask followMask
Definition: xmlpathautomaton.hpp:405
std::size_t size() const
Return the number of characters in the buffer.
Definition: staticbuffer.hpp:99
State()
Constructor.
Definition: xmlpathautomaton.hpp:228
Definition: xmlpathautomaton.hpp:50
uknown error
Definition: exception.hpp:26
PathElement & selectContent()
Define grab content.
Definition: xmlpathautomaton.hpp:746
Definition: xmlpathautomaton.hpp:53
[4] tag attribute value in the XML header
Definition: xmlscanner.hpp:179
PathElement(const PathElement &orig)
Copy constructor.
Definition: xmlpathautomaton.hpp:671
invalid string for a tag or attribute in the automaton definition. Usage error
Definition: exception.hpp:34
PathElement & operator[](const char *name)
Find tag by name.
Definition: xmlpathautomaton.hpp:680
ElementType
Enumeration of XML element types returned by an XML scanner.
Definition: xmlscanner.hpp:173
Base class for structures that can throw exceptions for non recoverable errors.
Definition: exception.hpp:20
Fixed size buffer fulfilling the requirement of a back insertion sequence needed for textwolf output...
Token(const Token &orig)
Copy constructor.
Definition: xmlpathautomaton.hpp:393
PathElement & operator--(int)
Corresponds to "//" in abbreviated syntax of XPath.
Definition: xmlpathautomaton.hpp:675
Tag scope definition.
Definition: xmlpathautomaton.hpp:402
std::string tostring() const
Definition: xmlpathautomaton.hpp:318
Mask mask
Definition: xmlpathautomaton.hpp:203
Core()
Constructor.
Definition: xmlpathautomaton.hpp:210
PathElement operator*()
Get automaton root element to start an XML path definition.
Definition: xmlpathautomaton.hpp:751
bool hasMatch(XMLScannerBase::ElementType e) const
Definition: xmlpathautomaton.hpp:96
PathElement & TO(int idx)
Define maximum element index to push.
Definition: xmlpathautomaton.hpp:715
void join(const Mask &mask)
Join two mask definitions.
Definition: xmlpathautomaton.hpp:188
int link
Definition: xmlpathautomaton.hpp:225
Token(const State &state, int p_stateidx)
Constructor by value.
Definition: xmlpathautomaton.hpp:397
void defineNext(Operation op, unsigned int p_keysize, const char *p_key, const char *p_srckey, int p_next, bool p_follow=false)
Define a state transition by key and operation.
Definition: xmlpathautomaton.hpp:288
void reject(XMLScannerBase::ElementType e)
Deactivate operation for a certain element type.
Definition: xmlpathautomaton.hpp:91
const char * ptr() const
Return the buffer content as 0-terminated string.
Definition: staticbuffer.hpp:103
PathElement & assignType(int type)
Define element type to push.
Definition: xmlpathautomaton.hpp:738
Simple back insertion sequence for storing the outputs of textwolf in a contant size buffer...
Definition: staticbuffer.hpp:24
unsigned int tokenidx_from
Definition: xmlpathautomaton.hpp:411
static const char * operationName(Operation op)
Get the name of the operation as string.
Definition: xmlpathautomaton.hpp:61
[3] tag attribute name in the XML header
Definition: xmlscanner.hpp:178
[10] open tag (e.g. "bla" for "&lt;bla...")
Definition: xmlscanner.hpp:185
[13] content element string (separated by spaces or end of line)
Definition: xmlscanner.hpp:188
void defineKey(unsigned int p_keysize, const char *p_key, const char *p_srckey)
Define the matching key of this state.
Definition: xmlpathautomaton.hpp:254
int cnt_end
Definition: xmlpathautomaton.hpp:207
void match(XMLScannerBase::ElementType e)
Declare an operation to match on an element type.
Definition: xmlpathautomaton.hpp:95
void getEmmitedTokens(unsigned int stateidx, XMLScannerBase::ElementType e, Buffer &buf) const
Get the emmitted results for a successor state that match to an element of a type.
Definition: xmlpathautomaton.hpp:370
XMLPathSelectAutomaton()
Constructor.
Definition: xmlpathautomaton.hpp:36
parameter check in automaton definition failed. Internal textwolf error
Definition: exception.hpp:33
int next
Definition: xmlpathautomaton.hpp:224
Definition of unicode characters.
out of memory in the automaton definition. System error (std::bad_alloc)
Definition: exception.hpp:35
textwolf exception class
Definition: exception.hpp:48
int Hash
Definition: xmlpathautomaton.hpp:40
void seekop(Operation op)
Declare an operation as seek operation.
Definition: xmlpathautomaton.hpp:104
[9] tag attribute value (e.g. "5" in <person id='5'>
Definition: xmlscanner.hpp:184
Range(const Scope &orig)
Copy constructor.
Definition: xmlpathautomaton.hpp:419
Mask to query for element types, if they match or not.
Definition: xmlpathautomaton.hpp:69
char * srckey
Definition: xmlpathautomaton.hpp:223
unsigned int tokenidx_to
Definition: xmlpathautomaton.hpp:412
Defines the set of tag characters.
Definition: xmlscanner.hpp:367
Mask(unsigned short p_pos=0, unsigned short p_neg=0)
Constructor by values.
Definition: xmlpathautomaton.hpp:81
void defineOutput(const Mask &mask, int p_typeidx, bool p_follow, int p_start, int p_end)
Define an element output operation.
Definition: xmlpathautomaton.hpp:302
Operation
Definition: xmlpathautomaton.hpp:48
[11] close tag (e.g. "bla" for "&lt;/bla&gt;")
Definition: xmlscanner.hpp:186
void defLink(int p_link)
Link another state to check to the current state.
Definition: xmlpathautomaton.hpp:313
Core(const Core &o)
Copy constructor.
Definition: xmlpathautomaton.hpp:213
Definition: xmlpathautomaton.hpp:54
PathElement & RANGE(int idx1, int idx2)
Define minimum and maximum element index to push.
Definition: xmlpathautomaton.hpp:724
[5] end of XML header event (after parsing '?>')
Definition: xmlscanner.hpp:180
Range on the token stack with all tokens that belong to this scope.
Definition: xmlpathautomaton.hpp:409
XMLPathSelectAutomaton< CharSet > ThisXMLPathSelectAutomaton
Definition: xmlpathautomaton.hpp:41
Defines one node in the XML Path element tree in the construction phase.
Definition: xmlpathautomaton.hpp:537
int typeidx
Definition: xmlpathautomaton.hpp:205
Scope()
Constructor.
Definition: xmlpathautomaton.hpp:430
XML scanner template that adds the functionality to the statemachine base definition.
Definition: xmlscanner.hpp:431
Core core
Definition: xmlpathautomaton.hpp:387
Mask(const Mask &orig)
Copy constructor.
Definition: xmlpathautomaton.hpp:85
PathElement & selectAttribute(const char *name)
Find tag with one attribute.
Definition: xmlpathautomaton.hpp:697
Definition of exceptions with containing error codes thrown by textwolf.
virtual ~XMLPathSelectAutomaton()
Definition: xmlpathautomaton.hpp:43
Mask mask
Definition: xmlpathautomaton.hpp:404
PathElement()
Constructor.
Definition: xmlpathautomaton.hpp:664
PathElement & operator()()
Define grab content.
Definition: xmlpathautomaton.hpp:743
PathElement(XMLPathSelectAutomaton *p_xs, int p_si=0)
Constructor by values.
Definition: xmlpathautomaton.hpp:668
unsigned int keysize
Definition: xmlpathautomaton.hpp:221
unsigned short pos
Definition: xmlpathautomaton.hpp:71
Active or passive but still valid token of the XML processing (this is a trigger waiting to match) ...
Definition: xmlpathautomaton.hpp:385
State(const State &orig)
Copy constructor.
Definition: xmlpathautomaton.hpp:233
char * key
Definition: xmlpathautomaton.hpp:222
bool empty() const
Tells if mask does not select anything anymore.
Definition: xmlpathautomaton.hpp:76
XML parser iterator interface for processing the XML elements one by one.
bool isempty()
Check it the state definition is empty.
Definition: xmlpathautomaton.hpp:248
bool rejects(XMLScannerBase::ElementType e) const
Check if an element type should reset a mask.
Definition: xmlpathautomaton.hpp:196
int cnt_start
Definition: xmlpathautomaton.hpp:206
const char * seekopName() const
Get the name of a seek operation.
Definition: xmlpathautomaton.hpp:145
Definition: xmlpathautomaton.hpp:55
CharSet_ CharSet
Definition: xmlpathautomaton.hpp:39
Scope & operator=(const Scope &orig)
Assignement operator.
Definition: xmlpathautomaton.hpp:428
Range range
Definition: xmlpathautomaton.hpp:421
[2] open XML header tag
Definition: xmlscanner.hpp:177
Range()
Constructor.
Definition: xmlpathautomaton.hpp:416
Definition: xmlpathautomaton.hpp:51
Core of an automaton state definition that is used during XML processing.
Definition: xmlpathautomaton.hpp:201
Automaton to define XML path expressions and assign types (int values) to them.
Definition: xmlpathautomaton.hpp:32
PathElement & ifAttribute(const char *name, const char *value)
Find tag with one attribute,value condition.
Definition: xmlpathautomaton.hpp:710
PathElement & INDEX(int idx)
Define index of the element index to push.
Definition: xmlpathautomaton.hpp:728
PathElement & selectCloseTag()
Find close tag of current tag selected.
Definition: xmlpathautomaton.hpp:687
State of an automaton in its definition.
Definition: xmlpathautomaton.hpp:218
int stateidx
Definition: xmlpathautomaton.hpp:388
unsigned int followidx
Definition: xmlpathautomaton.hpp:413
PathElement & operator=(int type)
Define element type to push.
Definition: xmlpathautomaton.hpp:734
bool matches(XMLScannerBase::ElementType e) const
Check if an element type matches the mask.
Definition: xmlpathautomaton.hpp:192
Token()
Constructor.
Definition: xmlpathautomaton.hpp:391
Core core
Definition: xmlpathautomaton.hpp:220
std::string tostring() const
Returns the content of the automaton as pretty printed string for debug output.
Definition: xmlpathautomaton.hpp:352
Definition: xmlpathautomaton.hpp:56
[8] tag attribute name (e.g. "id" in <person id='5'>
Definition: xmlscanner.hpp:183
void reset()
Reset operation (deactivate)
Definition: xmlpathautomaton.hpp:88
bool follow
Definition: xmlpathautomaton.hpp:204
Definition: xmlpathautomaton.hpp:52
[12] immediate close tag (e.g. "bla" for "&lt;bla /&gt;")
Definition: xmlscanner.hpp:187
Character set encodings already implemented in textwolf.