11 #ifndef __TEXTWOLF_XML_PATH_SELECT_HPP__
12 #define __TEXTWOLF_XML_PATH_SELECT_HPP__
27 template <
typename Element>
29 :
public std::vector<Element>
34 :std::vector<Element>(o){}
40 template <
class CharSet_,
template <
typename>
class StackType_=DefaultStackType>
63 unsigned int scope_iter;
77 scope_iter = scope.range.tokenidx_from;
81 StackType_<Scope> scopestk;
82 StackType_<unsigned int> follows;
83 StackType_<int> triggers;
84 StackType_<Token> tokens;
89 void expand(
int stateidx)
93 const State& st = atm->
states[ stateidx];
94 context.scope.mask.join( st.core.mask);
95 if (st.core.mask.empty() && st.core.typeidx != 0)
97 triggers.push_back( st.core.typeidx);
103 context.scope.followMask.join( st.core.mask);
104 follows.push_back( tokens.size());
106 tokens.push_back( Token( st, stateidx));
122 context.scope.range.tokenidx_from = context.scope.range.tokenidx_to;
124 context.scope.range.tokenidx_to = tokens.size();
125 context.scope.range.followidx = follows.size();
126 context.init( type, key, keysize);
130 scopestk.push_back( context.scope);
131 context.scope.mask = context.scope.followMask;
137 void closeProcessElement()
141 if (!scopestk.empty())
143 context.scope = scopestk.back();
145 follows.resize( context.scope.range.followidx);
146 tokens.resize( context.scope.range.tokenidx_to);
154 void produce(
unsigned int tokenidx,
const State& st)
156 const Token& tk = tokens[ tokenidx];
157 if (tk.core.cnt_end == -1)
163 if (tk.core.cnt_end > 0)
165 if (--tokens[ tokenidx].core.cnt_end == 0)
167 tokens[ tokenidx].core.mask.reset();
169 if (tk.core.cnt_start <= 0)
175 --tokens[ tokenidx].core.cnt_start;
184 int match(
unsigned int tokenidx)
187 if (context.key != 0)
189 if (tokenidx >= context.scope.range.tokenidx_to)
return 0;
191 Token* tk = &tokens[ tokenidx];
192 if (tk->core.mask.matches( context.type))
194 const State& st = atm->
states[ tk->stateidx];
197 if (st.keysize == context.keysize)
200 for (ii=0; ii<context.keysize && st.key[ii] == context.key[ii]; ii++);
201 if (ii==context.keysize)
203 produce( tokenidx, st);
204 tk = &tokens[ tokenidx];
210 produce( tokenidx, st);
211 tk = &tokens[ tokenidx];
213 if (tk->core.typeidx != 0)
215 if (tk->core.cnt_end == -1)
217 rt = tk->core.typeidx;
219 else if (tk->core.cnt_end > 0)
221 if (--tk->core.cnt_end == 0)
223 tk->core.mask.reset();
225 if (tk->core.cnt_start <= 0)
227 rt = tk->core.typeidx;
231 --tk->core.cnt_start;
236 if (tk->core.mask.rejects( context.type))
239 tk->core.mask.reset();
251 if (context.scope.mask.matches( context.type))
255 if (context.scope_iter < context.scope.range.tokenidx_to)
257 type = match( context.scope_iter);
258 ++context.scope_iter;
262 unsigned int ii = context.scope_iter - context.scope.range.tokenidx_to;
264 if (ii < context.scope.range.followidx && context.scope.range.tokenidx_from > follows[ ii])
266 type = match( follows[ ii]);
267 ++context.scope_iter;
269 else if (!triggers.empty())
271 type = triggers.back();
298 template <
class Buffer>
301 unsigned int ti = context.scope.range.tokenidx_to, te = tokens.size();
304 const Token& tk = tokens[ (std::size_t)ti];
307 buf.push_back( tokens[ti].stateidx);
312 ti=0; te = context.scope.range.followidx;
315 if (tokens[ follows[ ti]].core.mask.matches( type))
317 buf.push_back( tokens[ follows[ ti]].stateidx);
327 :atm(p_atm),scopestk(),follows(),triggers(),tokens()
329 if (atm->
states.size() > 0) expand(0);
335 :atm(o.atm),scopestk(o.scopestk),follows(o.follows),triggers(o.triggers),tokens(o.tokens){}
358 element = input->fetch();
370 bool compare(
const iterator& iter)
const
372 return (element == iter.element);
381 element = orig.element;
399 input->initProcessElement( p_type, p_key, p_keysize);
405 if (input) input->closeProcessElement();
410 :element(0),input(0) {}
456 return iterator( *
this, type, key, keysize);
463 return iterator( *
this, type, key.c_str(), key.size());
Definition: xmlpathselect.hpp:28
std::vector< State > states
Definition: xmlpathautomaton.hpp:349
DefaultStackType()
Definition: xmlpathselect.hpp:32
ElementType
Enumeration of XML element types returned by an XML scanner.
Definition: xmlscanner.hpp:173
int & reference
Definition: xmlpathselect.hpp:345
Base class for structures that can throw exceptions for non recoverable errors.
Definition: exception.hpp:20
Fixed size buffer fulfilling the requirement of a back insertion sequence needed for textwolf output...
iterator(ThisXMLPathSelect &p_input, XMLScannerBase::ElementType p_type, const char *p_key, int p_keysize)
Constructor by values.
Definition: xmlpathselect.hpp:396
Tag scope definition.
Definition: xmlpathautomaton.hpp:402
const int * operator->() const
Element acceess.
Definition: xmlpathselect.hpp:430
Mask mask
Definition: xmlpathautomaton.hpp:203
int * pointer
Definition: xmlpathselect.hpp:344
int operator*() const
Element acceess.
Definition: xmlpathselect.hpp:423
[10] open tag (e.g. "bla" for "<bla...")
Definition: xmlscanner.hpp:185
[13] content element string (separated by spaces or end of line)
Definition: xmlscanner.hpp:188
iterator operator++(int)
Postincrement.
Definition: xmlpathselect.hpp:441
~iterator()
Definition: xmlpathselect.hpp:403
bool operator==(const iterator &iter) const
Compare elements for equality.
Definition: xmlpathselect.hpp:445
iterator(const iterator &orig)
Copy constructor.
Definition: xmlpathselect.hpp:386
iterator & operator++()
Preincrement.
Definition: xmlpathselect.hpp:437
Definition of unicode characters.
textwolf exception class
Definition: exception.hpp:48
int Hash
Definition: xmlpathautomaton.hpp:40
iterator()
Default constructor.
Definition: xmlpathselect.hpp:409
iterator end()
Get the end of results returned by 'push(XMLScannerBase::ElementType,const char*, int)'...
Definition: xmlpathselect.hpp:468
Mask to query for element types, if they match or not.
Definition: xmlpathautomaton.hpp:69
void getTokenTypeMatchingStates(XMLScannerBase::ElementType type, bool withFollows, Buffer &buf) const
Get the next states states that match to an element of a type.
Definition: xmlpathselect.hpp:299
bool operator!=(const iterator &iter) const
Compare elements for inequality.
Definition: xmlpathselect.hpp:449
Interface that describes what a character set encoding implementation has to define to be used as cha...
void assign(const iterator &orig)
Assign iterator.
Definition: xmlpathselect.hpp:378
[11] close tag (e.g. "bla" for "</bla>")
Definition: xmlscanner.hpp:186
XML path select template.
Definition: xmlpathselect.hpp:41
XMLPathSelect(const ThisXMLPathSelectAutomaton *p_atm)
Constructor.
Definition: xmlpathselect.hpp:326
iterator & operator=(const iterator &orig)
Assignement.
Definition: xmlpathselect.hpp:415
Core core
Definition: xmlpathautomaton.hpp:387
Definition of exceptions with containing error codes thrown by textwolf.
int value_type
Definition: xmlpathselect.hpp:342
XMLPathSelect< CharSet_, StackType_ > ThisXMLPathSelect
Definition: xmlpathselect.hpp:45
std::input_iterator_tag iterator_category
Definition: xmlpathselect.hpp:346
Active or passive but still valid token of the XML processing (this is a trigger waiting to match) ...
Definition: xmlpathautomaton.hpp:385
XML parser iterator interface for processing the XML elements one by one.
XMLPathSelectAutomaton< CharSet_ > ThisXMLPathSelectAutomaton
Definition: xmlpathselect.hpp:44
std::size_t difference_type
Definition: xmlpathselect.hpp:343
Automaton to select path expressions from an XML iterator.
iterator push(XMLScannerBase::ElementType type, const char *key, int keysize)
Feed the path selector with the next token and get the start iterator for the results.
Definition: xmlpathselect.hpp:454
input iterator for the output of this XMLScanner
Definition: xmlpathselect.hpp:339
XMLPathSelect(const XMLPathSelect &o)
Copy constructor.
Definition: xmlpathselect.hpp:334
Automaton to define XML path expressions and assign types (int values) to them.
Definition: xmlpathautomaton.hpp:32
iterator push(XMLScannerBase::ElementType type, const std::string &key)
Feed the path selector with the next token and get the start iterator for the results.
Definition: xmlpathselect.hpp:461
DefaultStackType(const DefaultStackType &o)
Definition: xmlpathselect.hpp:33
State of an automaton in its definition.
Definition: xmlpathautomaton.hpp:218
bool matches(XMLScannerBase::ElementType e) const
Check if an element type matches the mask.
Definition: xmlpathautomaton.hpp:192
[12] immediate close tag (e.g. "bla" for "<bla />")
Definition: xmlscanner.hpp:187