textwolf  0.2
1 /*
2  * Copyright (c) 2014 Patrick P. Frey
3  *
4  * This Source Code Form is subject to the terms of the Mozilla Public
5  * License, v. 2.0. If a copy of the MPL was not distributed with this
6  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7  */
13 #include "textwolf/char.hpp"
15 #include "textwolf/exception.hpp"
16 #include "textwolf/textscanner.hpp"
17 #include "textwolf/traits.hpp"
18 #include <map>
19 #include <cstddef>
21 namespace textwolf {
26 {
27 public:
28  enum
29  {
31  };
34  struct Element
35  {
37  int missError;
41  struct Action
42  {
43  int op;
44  int arg;
45  };
47  unsigned char nofnext;
48  signed char next[ NofControlCharacter];
52  {
53  action.op = -1;
54  action.arg = 0;
55  for (unsigned int ii=0; ii<NofControlCharacter; ii++) next[ii] = -1;
56  }
57  };
61  Element* get( int stateIdx) throw(exception)
62  {
63  if ((unsigned int)stateIdx>size) throw exception(InvalidState);
64  return tab + stateIdx;
65  }
67 private:
68  Element tab[ MaxNofStates];
69  unsigned int size;
73  void newState( int stateIdx) throw(exception)
74  {
75  if (size != (unsigned int)stateIdx) throw exception( StateNumbersNotAscending);
76  if (size >= MaxNofStates) throw exception( DimOutOfRange);
77  size++;
78  }
82  void addOtherTransition( int nextState) throw(exception)
83  {
84  if (size == 0) throw exception( InvalidState);
85  if (nextState < 0 || nextState > MaxNofStates) throw exception( InvalidParamState);
86  for (unsigned int inputchr=0; inputchr<NofControlCharacter; inputchr++)
87  {
88  if (tab[ size-1].next[ inputchr] == -1) tab[ size-1].next[ inputchr] = (unsigned char)nextState;
89  }
90  tab[ size-1].nofnext = NofControlCharacter;
91  }
96  void addTransition( ControlCharacter inputchr, int nextState) throw(exception)
97  {
98  if (size == 0) throw exception( InvalidState);
99  if ((int)inputchr >= (int)NofControlCharacter) throw exception( InvalidParamChar);
100  if (nextState < 0 || nextState > MaxNofStates) throw exception( InvalidParamState);
101  if (tab[ size-1].next[ inputchr] != -1) throw exception( DuplicateStateTransition);
102  tab[ size-1].next[ inputchr] = (unsigned char)nextState;
103  tab[ size-1].nofnext += 1;
104  }
108  void addTransition( ControlCharacter inputchr) throw(exception)
109  {
110  addTransition( inputchr, size-1);
111  }
116  void addAction( int action_op, int action_arg=0) throw(exception)
117  {
118  if (size == 0) throw exception( InvalidState);
119  if (tab[ size-1].action.op != -1) throw exception( InvalidState);
120  tab[ size-1].action.op = action_op;
121  tab[ size-1].action.arg = action_arg;
122  }
126  void addMiss( int error) throw(exception)
127  {
128  if (size == 0) throw exception( InvalidState);
129  if (tab[ size-1].missError != -1) throw exception( InvalidState);
130  tab[ size-1].missError = error;
131  }
135  void addFallback( int stateIdx) throw(exception)
136  {
137  if (size == 0) throw exception( InvalidState);
138  if (tab[ size-1].fallbackState != -1) throw exception( InvalidState);
139  if (stateIdx < 0 || stateIdx > MaxNofStates) throw exception( InvalidParamState);
140  tab[ size-1].fallbackState = stateIdx;
141  }
142 public:
144  ScannerStatemachine() :size(0){}
147  ScannerStatemachine& operator[]( int stateIdx) {newState(stateIdx); return *this;}
149  ScannerStatemachine& operator()( ControlCharacter inputchr, int ns) {addTransition(inputchr,ns); return *this;}
151  ScannerStatemachine& operator()( ControlCharacter i1, ControlCharacter i2, int ns) {addTransition(i1,ns); addTransition(i2,ns); return *this;}
153  ScannerStatemachine& operator()( ControlCharacter i1, ControlCharacter i2, ControlCharacter i3, int ns) {addTransition(i1,ns); addTransition(i2,ns); addTransition(i3,ns); return *this;}
155  ScannerStatemachine& operator()( ControlCharacter inputchr) {addTransition(inputchr); return *this;}
157  ScannerStatemachine& action( int aa, int arg=0) {addAction(aa,arg); return *this;}
159  ScannerStatemachine& miss( int ee) {addMiss(ee); return *this;}
161  ScannerStatemachine& fallback( int stateIdx) {addFallback(stateIdx); return *this;}
163  ScannerStatemachine& other( int stateIdx) {addOtherTransition(stateIdx); return *this;}
164 };
169 {
170 public:
174  {
190  };
191  enum
192  {
194  };
199  static const char* getElementTypeName( ElementType ee)
200  {
201  static const char* names[ NofElementTypes] = {"None","ErrorOccurred","HeaderStart","HeaderAttribName","HeaderAttribValue","HeaderEnd", "DocAttribValue", "DocAttribEnd", "TagAttribName","TagAttribValue","OpenTag","CloseTag","CloseTagIm","Content","Exit"};
202  return names[ (unsigned int)ee];
203  }
207  enum Error
208  {
209  Ok,
225  };
230  static const char* getErrorString( Error ee)
231  {
232  enum {NofErrors=16};
233  static const char* sError[NofErrors]
234  = {0,"illegal document attribute definition",
235  "expected open tag",
236  "expected XML tag",
237  "unexpected end of text",
238  "syntax token",
239  "string not terminated",
240  "undefined character entity",
241  "expected tag end",
242  "expected equal",
243  "expected tag attribute",
244  "expected CDATA tag",
245  "internal (illegal state)",
246  "unexpected end of input",
247  "expected end of line",
248  "expected 2nd '-' to complete marker for start of comment '<!--'"
249  };
250  return sError[(unsigned int)ee];
251  }
255  enum STMState
256  {
261  };
266  static const char* getStateString( STMState s)
267  {
268  enum Constant {NofStates=48};
269  static const char* sState[NofStates]
270  = {
280  "CDATA", "CDATA1", "CDATA2", "CDATA3", "EXIT"
281  };
282  return sState[(unsigned int)s];
283  }
288  {
291  };
296  static const char* getActionString( STMAction a)
297  {
298  static const char* name[ NofSTMActions] = {"Return", "ReturnWord", "ReturnContent", "ReturnIdentifier", "ReturnSQString", "ReturnDQString", "ExpectIdentifierXML", "ExpectIdentifierCDATA", "ReturnEOF"};
299  return name[ (unsigned int)a];
300  };
305  {
308  {
309  (*this)
357  [ EXIT ].action(Return,Exit);
358  }
359  };
368  {
370  {
371  (*this)(Undef,true)(Any,true)(Dash,true);
372  }
373  };
379  {
381  {
382  (*this)(Undef,true)(Equal,true)(Gt,true)(Slash,true)(Dash,true)(Exclam,true)(Questm,true)(Sq,true)(Dq,true)(Osb,true)(Csb,true)(Any,true);
383  }
384  };
389  {
391  {
392  (*this)(Cntrl,true)(Space,true)(EndOfLine,true)(Undef,true)(Equal,true)(Gt,true)(Slash,true)(Dash,true)(Exclam,true)(Questm,true)(Sq,true)(Dq,true)(Osb,true)(Csb,true)(Any,true);
393  }
394  };
399  {
401  {
402  (*this)(Sq,false)(Space,true);
403  }
404  };
409  {
411  {
412  (*this)(Dq,false)(Space,true);
413  }
414  };
415 };
424 template
425 <
426  class InputIterator,
427  class InputCharSet_,
428  class OutputCharSet_,
429  class OutputBuffer_
430 >
432 {
433 private:
436  struct TokState
437  {
441  enum Id
442  {
443  Start,
444  ParsingDone,
445  ParsingKey,
446  ParsingEntity,
447  ParsingNumericEntity,
448  ParsingNumericBaseEntity,
449  ParsingNamedEntity,
450  ParsingToken
451  };
452  Id id;
454  enum EolnState
455  {
456  SRC,CR
457  };
458  EolnState eolnState;
460  unsigned int pos;
461  unsigned int base;
462  EChar value;
463  char buf[ 16];
464  UChar curchr_saved;
467  TokState() :id(Start),eolnState(SRC),pos(0),base(0),value(0),curchr_saved(0) {}
472  void init(Id id_=Start, EolnState eolnState_=SRC)
473  {
474  id=id_;eolnState=eolnState_;pos=0;base=0;value=0;curchr_saved=0;
475  }
476  };
477  TokState tokstate;
479 public:
480  typedef InputCharSet_ InputCharSet;
481  typedef OutputCharSet_ OutputCharSet;
482  class iterator;
484 public:
487  typedef std::map<const char*,UChar> EntityMap;
488  typedef OutputBuffer_ OutputBuffer;
490 private:
493  void push( UChar ch)
494  {
495  m_output.print( ch, m_outputBuf);
496  }
498  void copychar_impl( const traits::TypeCheck::YES&)
499  {
500  m_src.copychar( m_output, m_outputBuf);
501  }
503  void copychar_impl( const traits::TypeCheck::NO&)
504  {
505  push( m_src.chr());
506  }
508  void copychar()
509  {
510  copychar_impl( traits::TypeCheck::is_same<InputCharSet,OutputCharSet>::type());
511  }
515  static unsigned char HEX( unsigned char ch)
516  {
517  struct HexCharMap :public CharMap<unsigned char, 0xFF>
518  {
519  HexCharMap()
520  {
521  (*this)
522  ('0',0) ('1', 1)('2', 2)('3', 3)('4', 4)('5', 5)('6', 6)('7', 7)('8', 8)('9', 9)
523  ('A',10)('B',11)('C',12)('D',13)('E',14)('F',15)('a',10)('b',11)('c',12)('d',13)('e',14)('f',15);
524  }
525  };
526  static HexCharMap hexCharMap;
527  return hexCharMap[ch];
528  }
533  static UChar parseStaticNumericEntityValue( InputReader& ir)
534  {
535  EChar value = 0;
536  unsigned char ch = ir.ascii();
537  unsigned int base;
538  if (ch != '#') return 0;
539  ir.skip();
540  ch = ir.ascii();
541  if (ch == 'x')
542  {
543  ir.skip();
544  ch = ir.ascii();
545  base = 16;
546  }
547  else
548  {
549  base = 10;
550  }
551  while (ch != ';')
552  {
553  unsigned char chval = HEX(ch);
554  if (value >= base) return 0;
555  value = value * base + chval;
556  if (value >= 0xFFFFFFFF) return 0;
557  ir.skip();
558  ch = ir.ascii();
559  }
560  return (UChar)value;
561  }
565  void fallbackEntity()
566  {
567  switch (tokstate.id)
568  {
569  case TokState::Start:
570  case TokState::ParsingDone:
571  case TokState::ParsingKey:
572  case TokState::ParsingToken:
573  break;
574  case TokState::ParsingEntity:
575  push('&');
576  break;
577  case TokState::ParsingNumericEntity:
578  push('&');
579  push('#');
580  break;
581  case TokState::ParsingNumericBaseEntity:
582  push('&');
583  push('#');
584  for (unsigned int ii=0; ii<tokstate.pos; ii++) push( tokstate.buf[ii]);
585  break;
586  case TokState::ParsingNamedEntity:
587  push('&');
588  for (unsigned int ii=0; ii<tokstate.pos; ii++) push( tokstate.buf[ii]);
589  break;
590  }
591  }
595  bool parseEntity()
596  {
597  unsigned char ch;
598  tokstate.id = TokState::ParsingEntity;
599  ch = m_src.ascii();
600  if (ch == '#')
601  {
602  m_src.skip();
603  return parseNumericEntity();
604  }
605  else
606  {
607  return parseNamedEntity();
608  }
609  }
613  bool parseNumericEntity()
614  {
615  unsigned char ch;
616  tokstate.id = TokState::ParsingNumericEntity;
617  ch = m_src.ascii();
618  if (ch == 'x')
619  {
620  tokstate.base = 16;
621  m_src.skip();
622  return parseNumericBaseEntity();
623  }
624  else
625  {
626  tokstate.base = 10;
627  return parseNumericBaseEntity();
628  }
629  }
633  bool parseNumericBaseEntity()
634  {
635  unsigned char ch;
636  tokstate.id = TokState::ParsingNumericBaseEntity;
638  while (tokstate.pos < sizeof(tokstate.buf))
639  {
640  ch = m_src.ascii();
641  if (ch == ';')
642  {
643  if (tokstate.value > 0xFFFFFFFF)
644  {
645  tokstate.buf[ tokstate.pos++] = ch;
646  fallbackEntity();
647  return true;
648  }
649  push( (UChar)tokstate.value);
650  tokstate.init( TokState::ParsingToken);
651  m_src.skip();
652  return true;
653  }
654  else
655  {
656  unsigned char chval = HEX(ch);
657  if (chval >= tokstate.base)
658  {
659  fallbackEntity();
660  return true;
661  }
662  tokstate.buf[ tokstate.pos++] = ch;
663  tokstate.value = tokstate.value * tokstate.base + chval;
664  m_src.skip();
665  }
666  }
667  fallbackEntity();
668  return true;
669  }
673  bool parseNamedEntity()
674  {
675  unsigned char ch;
676  tokstate.id = TokState::ParsingNamedEntity;
677  ch = m_src.ascii();
678  while (tokstate.pos < sizeof(tokstate.buf)-1 && ch != ';' && m_src.control() == Any)
679  {
680  tokstate.buf[ tokstate.pos] = ch;
681  m_src.skip();
682  tokstate.pos++;
683  ch = m_src.ascii();
684  }
685  if (ch == ';')
686  {
687  tokstate.buf[ tokstate.pos] = '\0';
688  if (!pushEntity( tokstate.buf)) return false;
689  tokstate.init( TokState::ParsingToken);
690  m_src.skip();
691  return true;
692  }
693  else
694  {
695  fallbackEntity();
696  return true;
697  }
698  }
702  bool parseTokenRecover()
703  {
704  bool rt = false;
705  if (tokstate.curchr_saved)
706  {
707  push( tokstate.curchr_saved);
708  tokstate.curchr_saved = 0;
709  }
710  switch (tokstate.id)
711  {
712  case TokState::Start:
713  case TokState::ParsingDone:
714  case TokState::ParsingKey:
715  case TokState::ParsingToken:
716  error = ErrInternal;
717  return false;
718  case TokState::ParsingEntity: rt = parseEntity(); break;
719  case TokState::ParsingNumericEntity: rt = parseNumericEntity(); break;
720  case TokState::ParsingNumericBaseEntity: rt = parseNumericBaseEntity(); break;
721  case TokState::ParsingNamedEntity: rt = parseNamedEntity(); break;
722  }
723  tokstate.init( TokState::ParsingToken);
724  return rt;
725  }
730  bool parseToken( const IsTokenCharMap& isTok)
731  {
732  if (tokstate.id == TokState::Start)
733  {
734  m_tokenpos = m_src.getPosition();
735  tokstate.id = TokState::ParsingToken;
736  m_outputBuf.clear();
737  }
738  else if (tokstate.id != TokState::ParsingToken)
739  {
740  if (!parseTokenRecover())
741  {
742  tokstate.init();
743  return false;
744  }
745  }
746  for (;;)
747  {
751  ControlCharacter ch;
752  while (isTok[ (unsigned char)(ch=m_src.control())])
753  {
754  unsigned char aa = m_src.ascii();
755  if (aa <= 0xD)
756  {
757  //handling W3C requirements for end of line translation in XML:
758  if (aa == '\r')
759  {
760  push( (unsigned char)'\n');
761  tokstate.eolnState = TokState::CR;
762  }
763  else if (aa == '\n')
764  {
765  if (tokstate.eolnState != TokState::CR)
766  {
767  push( (unsigned char)'\n');
768  }
769  tokstate.eolnState = TokState::SRC;
770  }
771  else
772  {
773  copychar();
774  tokstate.eolnState = TokState::SRC;
775  }
776  }
777  else
778  {
779  copychar();
780  tokstate.eolnState = TokState::SRC;
781  }
782  m_src.skip();
783  }
784  if (ch == Amp)
785  {
786  m_src.skip();
787  if (!parseEntity()) break;
788  tokstate.init( TokState::ParsingToken);
789  continue;
790  }
791  else
792  {
793  tokstate.init( TokState::ParsingDone);
794  return true;
795  }
796  }
797  tokstate.init();
798  return false;
799  }
801 public:
808  template <class OutputBufferType>
809  static bool parseStaticToken( const IsTokenCharMap& isTok, InputReader ir, OutputBufferType& buf)
810  {
811  static OutputCharSet output;
812  buf.clear();
813  for (;;)
814  {
815  ControlCharacter ch;
816  for (;;)
817  {
818  UChar pc;
819  if (isTok[ (unsigned char)(ch=ir.control())])
820  {
821  pc = ir.chr();
822  }
823  else if (ch == Amp)
824  {
825  pc = parseStaticNumericEntityValue( ir);
826  }
827  else
828  {
829  return true;
830  }
831  output.print( pc, buf);
832  ir.skip();
833  }
834  }
835  }
837 private:
841  bool skipToken( const IsTokenCharMap& isTok)
842  {
843  do
844  {
845  ControlCharacter ch;
846  while (isTok[ (unsigned char)(ch=m_src.control())] || ch == Amp)
847  {
848  m_src.skip();
849  }
850  }
851  while (m_src.control() == Any);
852  return true;
853  }
858  bool expectStr( const char* str)
859  {
860  bool rt = true;
861  tokstate.id = TokState::ParsingKey;
862  for (; str[tokstate.pos] != '\0'; m_src.skip(),tokstate.pos++)
863  {
864  if (m_src.ascii() == str[ tokstate.pos]) continue;
865  ControlCharacter ch = m_src.control();
866  if (ch == EndOfText)
867  {
868  error = ErrUnexpectedEndOfText;
869  }
870  else
871  {
872  error = ErrSyntaxToken;
873  }
874  rt = false;
875  break;
876  }
877  tokstate.init( TokState::ParsingDone);
878  return rt;
879  }
884  bool pushPredefinedEntity( const char* str)
885  {
886  switch (str[0])
887  {
888  case 'q':
889  if (str[1] == 'u' && str[2] == 'o' && str[3] == 't' && str[4] == '\0')
890  {
891  push( '\"');
892  return true;
893  }
894  break;
896  case 'a':
897  if (str[1] == 'm')
898  {
899  if (str[2] == 'p' && str[3] == '\0')
900  {
901  push( '&');
902  return true;
903  }
904  }
905  else if (str[1] == 'p')
906  {
907  if (str[2] == 'o' && str[3] == 's' && str[4] == '\0')
908  {
909  push( '\'');
910  return true;
911  }
912  }
913  break;
915  case 'l':
916  if (str[1] == 't' && str[2] == '\0')
917  {
918  push( '<');
919  return true;
920  }
921  break;
923  case 'g':
924  if (str[1] == 't' && str[2] == '\0')
925  {
926  push( '>');
927  return true;
928  }
929  break;
931  case 'n':
932  if (str[1] == 'b' && str[2] == 's' && str[3] == 'p' && str[4] == '\0')
933  {
934  push( ' ');
935  return true;
936  }
937  break;
938  }
939  return false;
940  }
945  bool pushEntity( const char* str)
946  {
947  if (pushPredefinedEntity( str))
948  {
949  return true;
950  }
951  else if (m_entityMap)
952  {
953  EntityMap::const_iterator itr = m_entityMap->find( str);
954  if (itr == m_entityMap->end())
955  {
957  return false;
958  }
959  else
960  {
961  UChar ch = itr->second;
962  push( ch);
963  return true;
964  }
965  }
966  else
967  {
969  return false;
970  }
971  }
973 private:
974  STMState state;
975  Error error;
976  InputReader m_src;
977  const EntityMap* m_entityMap;
978  OutputBuffer m_outputBuf;
979  OutputCharSet m_output;
980  std::size_t m_tokenpos;
982 public:
986  XMLScanner( const InputIterator& p_src, const EntityMap& p_entityMap)
987  :state(START),error(Ok),m_src(InputCharSet(),p_src),m_entityMap(&p_entityMap),m_output(OutputCharSet()),m_tokenpos(0)
988  {}
991  explicit XMLScanner( const InputIterator& p_src)
992  :state(START),error(Ok),m_src(InputCharSet(),p_src),m_entityMap(0),m_output(OutputCharSet()),m_tokenpos(0)
993  {}
998  XMLScanner( const InputCharSet& p_charset, const InputIterator& p_src, const EntityMap& p_entityMap)
999  :state(START),error(Ok),m_src(p_charset,p_src),m_entityMap(&p_entityMap),m_output(OutputCharSet()),m_tokenpos(0)
1000  {}
1004  XMLScanner( const InputCharSet& p_charset, const InputIterator& p_src)
1005  :state(START),error(Ok),m_src(p_charset,p_src),m_entityMap(0),m_output(OutputCharSet()),m_tokenpos(0)
1006  {}
1009  explicit XMLScanner( const InputCharSet& p_charset)
1010  :state(START),error(Ok),m_src(p_charset),m_entityMap(0),m_tokenpos(0)
1011  {}
1014  :state(START),error(Ok),m_src(InputCharSet()),m_entityMap(0),m_tokenpos(0)
1015  {}
1020  :state(o.state)
1021  ,error(o.error)
1022  ,m_src(o.m_src)
1023  ,m_entityMap(o.m_entityMap)
1024  ,m_outputBuf(o.m_outputBuf)
1025  ,m_tokenpos(o.m_tokenpos)
1026  {}
1030  template <class IteratorAssignment>
1031  void setSource( const IteratorAssignment& a)
1032  {
1033  m_src.setSource( a);
1034  }
1038  std::size_t getPosition() const
1039  {
1040  return m_src.getPosition();
1041  }
1043  std::size_t getTokenPosition() const
1044  {
1045  return m_tokenpos;
1046  }
1050  const char* getItemPtr() const {return m_outputBuf.size()?&m_outputBuf.at(0):"\0\0\0\0";}
1054  std::size_t getItemSize() const {return m_outputBuf.size();}
1058  const OutputBuffer& getItem() const
1059  {
1060  return m_outputBuf;
1061  }
1066  {
1067  static Statemachine stm;
1068  return stm.get( state);
1069  }
1074  Error getError( const char** str=0)
1075  {
1076  Error rt = error;
1077  error = Ok;
1078  if (str) *str=getErrorString(rt);
1079  return rt;
1080  }
1083  const InputIterator& getIterator() const
1084  {
1085  return m_src.getIterator();
1086  }
1089  InputIterator& getIterator()
1090  {
1091  return m_src.getIterator();
1092  }
1097  ElementType nextItem( unsigned short mask=0xFFFF)
1098  {
1099  static const IsWordCharMap wordC;
1100  static const IsContentCharMap contentC;
1101  static const IsTagCharMap tagC;
1102  static const IsSQStringCharMap sqC;
1103  static const IsDQStringCharMap dqC;
1104  static const IsTokenCharMap* tokenDefs[ NofSTMActions] = {0,&wordC,&contentC,&tagC,&sqC,&dqC,0,0,0};
1105  static const char* stringDefs[ NofSTMActions] = {0,0,0,0,0,0,"xml","CDATA",0};
1107  ElementType rt = None;
1108  ControlCharacter ch;
1109  do
1110  {
1112  if (sd->action.op != -1)
1113  {
1114  if (tokenDefs[sd->action.op])
1115  {
1116  if (tokstate.id != TokState::ParsingDone)
1117  {
1118  if ((mask&(1<<sd->action.arg)) != 0)
1119  {
1120  if (!parseToken( *tokenDefs[ sd->action.op])) return ErrorOccurred;
1121  }
1122  else
1123  {
1124  if (!skipToken( *tokenDefs[ sd->action.op])) return ErrorOccurred;
1125  }
1126  }
1127  rt = (ElementType)sd->action.arg;
1128  }
1129  else if (stringDefs[sd->action.op])
1130  {
1131  if (tokstate.id != TokState::ParsingDone)
1132  {
1133  if (!expectStr( stringDefs[sd->action.op])) return ErrorOccurred;
1134  if (sd->action.op == ExpectIdentifierXML)
1135  {
1136  //... special treatement for xml header for not
1137  // enforcing the model too much just for this case
1138  push( '?'); push( 'x'); push( 'm'); push( 'l');
1139  rt = HeaderStart;
1140  }
1141  }
1142  else if (sd->action.op == ExpectIdentifierXML)
1143  {
1144  //... special treatement for xml header for not
1145  // enforcing the model too much just for this case
1146  rt = HeaderStart;
1147  }
1148  }
1149  else
1150  {
1151  m_tokenpos = m_src.getPosition();
1152  m_outputBuf.clear();
1153  rt = (ElementType)sd->action.arg;
1154  }
1155  if (sd->nofnext == 0)
1156  {
1157  if (sd->fallbackState != -1)
1158  {
1159  state = (STMState)sd->fallbackState;
1160  }
1161  return rt;
1162  }
1163  }
1164  ch = m_src.control();
1165  tokstate.id = TokState::Start;
1167  if (sd->next[ ch] != -1)
1168  {
1169  state = (STMState)sd->next[ ch];
1170  m_src.skip();
1171  }
1172  else if (sd->fallbackState != -1)
1173  {
1174  state = (STMState)sd->fallbackState;
1175  }
1176  else if (sd->missError != -1)
1177  {
1178  error = (Error)sd->missError;
1179  return ErrorOccurred;
1180  }
1181  else if (ch == EndOfText)
1182  {
1183  error = ErrUnexpectedEndOfText;
1184  return ErrorOccurred;
1185  }
1186  else
1187  {
1188  error = ErrInternal;
1189  return ErrorOccurred;
1190  }
1191  }
1192  while (rt == None);
1193  return rt;
1194  }
1198  struct End {};
1202  class iterator
1203  {
1204  public:
1207  class Element
1208  {
1209  private:
1210  friend class iterator;
1211  ElementType m_type;
1212  const char* m_content;
1213  std::size_t m_size;
1214  public:
1217  bool valid() const {return m_type != Exit && m_type != ErrorOccurred;}
1220  const char* error() const {return m_type == ErrorOccurred ? m_content : 0;}
1223  const char* name() const {return getElementTypeName( m_type);}
1226  ElementType type() const {return m_type;}
1229  const char* content() const {return m_content;}
1232  std::size_t size() const {return m_size;}
1234  Element() :m_type(None),m_content(0),m_size(0) {}
1236  Element( const End&) :m_type(Exit),m_content(0),m_size(0) {}
1239  Element( const Element& orig) :m_type(orig.m_type),m_content(orig.m_content),m_size(orig.m_size) {}
1240  };
1241  // input iterator traits
1243  typedef std::size_t difference_type;
1244  typedef std::size_t size_type;
1245  typedef Element* pointer;
1246  typedef Element& reference;
1247  typedef std::input_iterator_tag iterator_category;
1249  private:
1250  Element element;
1251  ThisXMLScanner* input;
1256  iterator& skip( unsigned short mask=0xFFFF)
1257  {
1258  if (input != 0)
1259  {
1260  element.m_type = input->nextItem(mask);
1261  element.m_content = input->getItemPtr();
1262  element.m_size = input->getItemSize();
1263  }
1264  return *this;
1265  }
1270  bool compare( const iterator& iter) const
1271  {
1272  if (element.type() == iter.element.type())
1273  {
1274  if (element.type() == Exit || element.type() == None) return true; //equal only at beginning and end
1275  }
1276  return false;
1277  }
1278  public:
1281  void assign( const iterator& orig)
1282  {
1283  input = orig.input;
1284  element = orig.element;
1285  }
1288  iterator( const iterator& orig)
1289  {
1290  assign( orig);
1291  }
1295  iterator( ThisXMLScanner& p_input, bool doSkipToFirst=true)
1296  :input( &p_input)
1297  {
1298  if (doSkipToFirst)
1299  {
1300  element.m_type = input->nextItem();
1301  element.m_content = input->getItemPtr();
1302  element.m_size = input->getItemSize();
1303  }
1304  }
1306  iterator( const End& et) :element(et),input(0) {}
1308  iterator() :input(0) {}
1312  {
1313  assign( orig);
1314  return *this;
1315  }
1317  const Element& operator*() const
1318  {
1319  return element;
1320  }
1322  const Element* operator->() const
1323  {
1324  return &element;
1325  }
1328  iterator& operator++() {return skip();}
1331  iterator operator++(int) {iterator tmp(*this); skip(); return tmp;}
1335  bool operator==( const iterator& iter) const {return compare( iter);}
1338  bool operator!=( const iterator& iter) const {return !compare( iter);}
1339  };
1344  iterator begin( bool doSkipToFirst=true)
1345  {
1346  return iterator( *this, doSkipToFirst);
1347  }
1351  {
1352  return iterator( End());
1353  }
1354 };
1356 }//namespace
1357 #endif
