textwolf  0.2
xmlprinter.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014 Patrick P. Frey
3  *
4  * This Source Code Form is subject to the terms of the Mozilla Public
5  * License, v. 2.0. If a copy of the MPL was not distributed with this
6  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7  */
10 
11 #ifndef __TEXTWOLF_XML_PRINTER_HPP__
12 #define __TEXTWOLF_XML_PRINTER_HPP__
14 #include "textwolf/textscanner.hpp"
15 #include "textwolf/xmlscanner.hpp"
16 #include "textwolf/charset.hpp"
17 #include "textwolf/xmltagstack.hpp"
18 #include <cstring>
19 #include <cstdlib>
20 
23 namespace textwolf {
24 
30 template <class IOCharset, class AppCharset,class BufferType>
32 {
33 public:
38  void printToBuffer( const char* src, std::size_t srcsize, BufferType& buf) const
39  {
40  CStringIterator itr( src, srcsize);
42 
43  UChar ch;
44  while ((ch = ts.chr()) != 0)
45  {
46  m_output.print( ch, buf);
47  ++ts;
48  }
49  }
50 
57  void printEsc( char ch, BufferType& buf, unsigned int nof_echr, const char* echr, const char** estr) const
58  {
59  const char* cc = (const char*)memchr( echr, ch, nof_echr);
60  if (cc)
61  {
62  unsigned int ii = 0;
63  const char* tt = estr[ cc-echr];
64  while (tt[ii]) m_output.print( tt[ii++], buf);
65  }
66  else
67  {
68  m_output.print( ch, buf);
69  }
70  }
71 
79  void printToBufferSubstChr( const char* src, std::size_t srcsize, BufferType& buf, unsigned int nof_echr, const char* echr, const char** estr) const
80  {
81  CStringIterator itr( src, srcsize);
83 
84  textwolf::UChar ch;
85  while ((ch = ts.chr()) != 0)
86  {
87  if (ch < 128)
88  {
89  printEsc( (char)ch, buf, nof_echr, echr, estr);
90  }
91  else
92  {
93  m_output.print( ch, buf);
94  }
95  ++ts;
96  }
97  }
98 
103  void printToBufferAttributeValue( const char* src, std::size_t srcsize, BufferType& buf) const
104  {
105  enum {nof_echr = 12};
106  static const char* estr[nof_echr] = {"&lt;", "&gt;", "&apos;", "&quot;", "&amp;", "&#0;", "&#8;", "&#9;", "&#10;", "&#13;"};
107  static const char echr[nof_echr+1] = "<>'\"&\0\b\t\n\r";
108  m_output.print( '"', buf);
109  printToBufferSubstChr( src, srcsize, buf, nof_echr, echr, estr);
110  m_output.print( '"', buf);
111  }
112 
117  void printToBufferContent( const char* src, std::size_t srcsize, BufferType& buf) const
118  {
119  enum {nof_echr = 6};
120  static const char* estr[nof_echr] = {"&lt;", "&gt;", "&amp;", "&#0;", "&#8;"};
121  static const char echr[nof_echr+1] = "<>&\0\b";
122  printToBufferSubstChr( src, srcsize, buf, nof_echr, echr, estr);
123  }
124 
128  void printToBuffer( char ch, BufferType& buf) const
129  {
130  m_output.print( (textwolf::UChar)(unsigned char)ch, buf);
131  }
132 
133 public:
137  explicit XMLPrinter( bool subDocument=false)
138  :m_state(subDocument?Content:Init),m_lasterror(0){}
139 
143  explicit XMLPrinter( const IOCharset& output_, bool subDocument=false)
144  :m_state(subDocument?Content:Init),m_output(output_),m_lasterror(0){}
145 
148  :m_state(o.m_state),m_tagstack(o.m_tagstack),m_output(o.m_output),m_lasterror(o.m_lasterror)
149  {}
150 
153  void reset( bool subDocument=false)
154  {
155  m_state = subDocument?Content:Init;
156  m_tagstack.clear();
157  m_lasterror = 0;
158  }
159 
165  bool printHeader( const char* encoding, const char* standalone, BufferType& buf)
166  {
167  if (m_state != Init)
168  {
169  m_lasterror = "printing xml header not at the beginning of the document";
170  return false;
171  }
172  std::string enc = encoding?encoding:"UTF-8";
173  printToBuffer( "<?xml version=\"1.0\" encoding=\"", 30, buf);
174  printToBuffer( enc.c_str(), enc.size(), buf);
175  if (standalone)
176  {
177  printToBuffer( "\" standalone=\"", 14, buf);
178  printToBuffer( standalone, std::strlen(standalone), buf);
179  printToBuffer( "\"?>\n", 4, buf);
180  }
181  else
182  {
183  printToBuffer( "\"?>\n", 4, buf);
184  }
185  m_state = Content;
186  return true;
187  }
188 
195  bool printDoctype( const char* rootid, const char* publicid, const char* systemid, BufferType& buf)
196  {
197  if (rootid)
198  {
199  if (publicid)
200  {
201  if (!systemid)
202  {
203  m_lasterror = "defined DOCTYPE with PUBLIC id but no SYSTEM id";
204  return false;
205  }
206  printToBuffer( "<!DOCTYPE ", 10, buf);
207  printToBuffer( rootid, std::strlen( rootid), buf);
208  printToBuffer( " PUBLIC \"", 9, buf);
209  printToBuffer( publicid, std::strlen( publicid), buf);
210  printToBuffer( "\" \"", 3, buf);
211  printToBuffer( systemid, std::strlen( systemid), buf);
212  printToBuffer( "\">", 2, buf);
213  }
214  else if (systemid)
215  {
216  printToBuffer( "<!DOCTYPE ", 10, buf);
217  printToBuffer( rootid, std::strlen( rootid), buf);
218  printToBuffer( " SYSTEM \"", 9, buf);
219  printToBuffer( systemid, std::strlen( systemid), buf);
220  printToBuffer( "\">", 2, buf);
221  }
222  else
223  {
224  printToBuffer( "<!DOCTYPE ", 11, buf);
225  printToBuffer( rootid, std::strlen( rootid), buf);
226  printToBuffer( ">", 2, buf);
227  }
228  }
229  return true;
230  }
231 
235  bool exitTagContext( BufferType& buf)
236  {
237  if (m_state != Content)
238  {
239  if (m_state == Init)
240  {
241  m_lasterror = "printed xml without root element";
242  return false;
243  }
244  printToBuffer( '>', buf);
245  m_state = Content;
246  }
247  return true;
248  }
249 
255  bool printOpenTag( const char* src, std::size_t srcsize, BufferType& buf)
256  {
257  if (!exitTagContext( buf)) return false;
258  printToBuffer( '<', buf);
259  printToBuffer( (const char*)src, srcsize, buf);
260 
261  m_tagstack.push( src, srcsize);
262  m_state = TagElement;
263  return true;
264  }
265 
271  bool printAttribute( const char* src, std::size_t srcsize, BufferType& buf)
272  {
273  if (m_state == TagElement)
274  {
275  printToBuffer( ' ', buf);
276  printToBuffer( (const char*)src, srcsize, buf);
277  printToBuffer( '=', buf);
278  m_state = TagAttribute;
279  return true;
280  }
281  return false;
282  }
283 
289  bool printValue( const char* src, std::size_t srcsize, BufferType& buf)
290  {
291  if (m_state == TagAttribute)
292  {
293  printToBufferAttributeValue( (const char*)src, srcsize, buf);
294  m_state = TagElement;
295  }
296  else
297  {
298  if (!exitTagContext( buf)) return false;
299  printToBufferContent( (const char*)src, srcsize, buf);
300  }
301  return true;
302  }
303 
307  bool printCloseTag( BufferType& buf)
308  {
309  const void* cltag;
310  std::size_t cltagsize;
311 
312  if (!m_tagstack.top( cltag, cltagsize) || !cltagsize)
313  {
314  return false;
315  }
316  if (m_state == TagElement)
317  {
318  printToBuffer( '/', buf);
319  printToBuffer( '>', buf);
320  m_state = Content;
321  }
322  else if (m_state != Content)
323  {
324  return false;
325  }
326  else
327  {
328  printToBuffer( '<', buf);
329  printToBuffer( '/', buf);
330  printToBuffer( (const char*)cltag, cltagsize, buf);
331  printToBuffer( '>', buf);
332  }
333  m_tagstack.pop();
334  if (m_tagstack.empty())
335  {
336  printToBuffer( '\n', buf);
337  }
338  return true;
339  }
340 
342  enum State
343  {
348  };
349 
352  State state() const
353  {
354  return m_state;
355  }
356 
359  const char* lasterror() const
360  {
361  return m_lasterror;
362  }
363 
364 private:
365  State m_state;
366  TagStack m_tagstack;
367  IOCharset m_output;
368  const char* m_lasterror;
369 };
370 
371 } //namespace
372 #endif
bool printDoctype(const char *rootid, const char *publicid, const char *systemid, BufferType &buf)
Prints an XML <!DOCTYPE ...> declaration.
Definition: xmlprinter.hpp:195
XMLPrinter(const XMLPrinter &o)
Copy constructor.
Definition: xmlprinter.hpp:147
textwolf XML printer tag stack
State
Internal state.
Definition: xmlprinter.hpp:342
Definition: xmlprinter.hpp:347
Character encoding dependent XML printer.
Definition: xmlprinter.hpp:31
bool printAttribute(const char *src, std::size_t srcsize, BufferType &buf)
Print the start of an attribute name.
Definition: xmlprinter.hpp:271
bool top(const void *&element, std::size_t &elementsize)
Get the topmost tag.
Definition: xmltagstack.hpp:73
void printToBuffer(char ch, BufferType &buf) const
Prints a character to an STL back insertion sequence buffer in the IO character set encoding...
Definition: xmlprinter.hpp:128
stack of tag names
Definition: xmltagstack.hpp:23
uint32_t UChar
Unicode character type.
Definition: char.hpp:37
void printToBuffer(const char *src, std::size_t srcsize, BufferType &buf) const
Prints a character string to an STL back insertion sequence buffer in the IO character set encoding...
Definition: xmlprinter.hpp:38
const char * lasterror() const
Get the last error occurred.
Definition: xmlprinter.hpp:359
void printToBufferAttributeValue(const char *src, std::size_t srcsize, BufferType &buf) const
print attribute value string
Definition: xmlprinter.hpp:103
void clear()
Definition: xmltagstack.hpp:97
bool empty() const
Find out if the stack is empty.
Definition: xmltagstack.hpp:92
State state() const
Get the current internal state.
Definition: xmlprinter.hpp:352
bool printOpenTag(const char *src, std::size_t srcsize, BufferType &buf)
Print the start of an open tag.
Definition: xmlprinter.hpp:255
void pop()
Pop (remove) the topmost tag.
Definition: xmltagstack.hpp:82
XMLPrinter(const IOCharset &output_, bool subDocument=false)
Constructor.
Definition: xmlprinter.hpp:143
void printToBufferSubstChr(const char *src, std::size_t srcsize, BufferType &buf, unsigned int nof_echr, const char *echr, const char **estr) const
print a value with some characters replaced by a string
Definition: xmlprinter.hpp:79
void printEsc(char ch, BufferType &buf, unsigned int nof_echr, const char *echr, const char **estr) const
print a character substitute or the character itself
Definition: xmlprinter.hpp:57
UChar chr()
Get the unicode representation of the current character.
Definition: textscanner.hpp:161
Reader for scanning the input character by character.
Definition: textscanner.hpp:68
Input iterator on a constant string returning null characters after EOF as required by textwolf scann...
Definition: cstringiterator.hpp:23
void push(const char *pp, std::size_t nn)
Push a tag on top.
Definition: xmltagstack.hpp:50
XML parser iterator interface for processing the XML elements one by one.
textwolf iterator on strings
bool printCloseTag(BufferType &buf)
Print the close of the current tag open.
Definition: xmlprinter.hpp:307
void printToBufferContent(const char *src, std::size_t srcsize, BufferType &buf) const
print content value string
Definition: xmlprinter.hpp:117
bool printValue(const char *src, std::size_t srcsize, BufferType &buf)
Print a content or attribute value depending on context.
Definition: xmlprinter.hpp:289
bool printHeader(const char *encoding, const char *standalone, BufferType &buf)
Prints an XML header (version "1.0")
Definition: xmlprinter.hpp:165
Definition: xmlprinter.hpp:346
XMLPrinter(bool subDocument=false)
Default constructor.
Definition: xmlprinter.hpp:137
Definition: xmlprinter.hpp:344
void reset(bool subDocument=false)
Reset the state.
Definition: xmlprinter.hpp:153
Implementation of iterator for character-wise parsing of input.
bool exitTagContext(BufferType &buf)
Close the current tag attribute context opened.
Definition: xmlprinter.hpp:235
Definition: xmlprinter.hpp:345
Character set encodings already implemented in textwolf.