PTLib  Version 2.14.3
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
pxml.h
Go to the documentation of this file.
1 /*
2  * pxml.h
3  *
4  * XML parser support
5  *
6  * Portable Windows Library
7  *
8  * Copyright (c) 2002 Equivalence Pty. Ltd.
9  *
10  * The contents of this file are subject to the Mozilla Public License
11  * Version 1.0 (the "License"); you may not use this file except in
12  * compliance with the License. You may obtain a copy of the License at
13  * http://www.mozilla.org/MPL/
14  *
15  * Software distributed under the License is distributed on an "AS IS"
16  * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
17  * the License for the specific language governing rights and limitations
18  * under the License.
19  *
20  * The Original Code is Portable Windows Library.
21  *
22  * The Initial Developer of the Original Code is Equivalence Pty. Ltd.
23  *
24  * Contributor(s): ______________________________________.
25  *
26  * $Revision: 29535 $
27  * $Author: rjongbloed $
28  * $Date: 2013-04-19 18:51:15 +1000 (Fri, 19 Apr 2013) $
29  */
30 
31 #ifndef PTLIB_PXML_H
32 #define PTLIB_PXML_H
33 
34 #ifdef P_USE_PRAGMA
35 #pragma interface
36 #endif
37 
38 #include <ptlib.h>
39 
40 #ifdef P_EXPAT
41 
42 #include <ptlib/bitwise_enum.h>
43 #include <ptclib/http.h>
44 
45 
46 class PXMLElement;
47 class PXMLRootElement;
48 
49 
51 
52 class PXMLBase : public PObject
53 {
54  PCLASSINFO(PXMLBase, PObject);
55  public:
56  enum {
58  };
59 
61  Options,
62  7,
63  (
64  NoOptions,
65  Indent,
66  IndentWithTabs,
67  NewLineAfterElement,
68  NoIgnoreWhiteSpace,
69  CloseExtended,
70  WithNS,
71  FragmentOnly
72  ),
73  AllOptions = (1<<(6+1))-1
74  );
75 
81  };
82 
83  PXMLBase(Options opts = NoOptions);
84 
85  void SetOptions(Options opts) { m_options = opts; }
86  Options GetOptions() const { return m_options; }
87 
88  void SetMaxEntityLength(unsigned len) { m_maxEntityLength = len; }
89  unsigned GetMaxEntityLength() const { return m_maxEntityLength; }
90 
91  virtual PBoolean IsNoIndentElement(const PString & /*elementName*/) const
92  { return false; }
93 
94  virtual bool OutputProgress() const { return true; }
95 
96  bool OutputIndent(ostream & strm, int indent, const PString & elementName = PString::Empty()) const;
97 
98  protected:
99  Options m_options;
101 };
102 
103 
104 class PXML : public PXMLBase
105 {
106  PCLASSINFO(PXML, PXMLBase);
107  public:
108  PXML(
109  Options options = NoOptions,
110  const char * noIndentElements = NULL
111  );
112  PXML(const PXML & xml);
113  ~PXML();
114 
115  void ReadFrom(istream & strm);
116  void PrintOn(ostream & strm) const;
117  PString AsString() const;
118 
119  bool IsDirty() const;
120 
121  bool Load(const PString & data);
122  bool Load(const PString & data, Options options);
123  bool LoadFile(const PFilePath & fn);
124  bool LoadFile(const PFilePath & fn, Options options);
125 
126  virtual bool OnLoadProgress(unsigned /*percent*/) const { return true; }
127  virtual void OnLoaded() { }
128 
129  bool Save();
130  bool Save(Options options);
131  PString AsString();
132  PString AsString(Options options);
133  bool SaveFile(const PFilePath & fn);
134  bool SaveFile(const PFilePath & fn, Options options);
135  virtual bool OnSaveProgress(unsigned /*percent*/) const { return true; }
136  virtual bool OutputProgress() const;
137 
138  virtual PINDEX GetObjectCount() const;
139 
140  void RemoveAll();
141 
142  virtual PBoolean IsNoIndentElement(
143  const PString & elementName
144  ) const;
145 
146 
147  virtual PXMLElement * CreateElement(const PCaselessString & name, const char * data = NULL);
148  virtual PXMLRootElement * CreateRootElement(const PCaselessString & name);
149 
150  PXMLElement * GetElement(const PCaselessString & name, const PCaselessString & attr, const PString & attrval) const;
151  PXMLElement * GetElement(const PCaselessString & name, PINDEX idx = 0) const;
152  PXMLElement * GetElement(PINDEX idx) const;
153  PINDEX GetNumElements() const;
155  PXMLElement * SetRootElement(const PString & documentType);
156 
157 
177 
182  };
183 
187  };
188 
189  struct ValidationInfo {
191  const char * m_name;
192 
193  union {
194  const void * m_placeHolder;
195  const char * m_attributeValues;
197  const char * m_namespace;
198  };
199 
200  PINDEX m_minCount;
201  PINDEX m_maxCount;
202  };
203 
204  bool Validate(const ValidationInfo * validator);
205  bool ValidateElements(ValidationContext & context, PXMLElement * baseElement, const ValidationInfo * elements);
206  bool ValidateElement(ValidationContext & context, PXMLElement * element, const ValidationInfo * elements);
207  bool LoadAndValidate(const PString & body, const PXML::ValidationInfo * validator, PString & error, Options options = NoOptions);
208 
209  const PCaselessString & GetVersion() const { return m_version; }
210  const PCaselessString & GetEncoding() const { return m_encoding; }
212 
213  bool IsLoaded() const { return m_rootElement != NULL; }
215 
217  const PCaselessString & GetDocType() const { return m_docType; }
218  const PCaselessString & GetPubicIdentifier() const { return m_publicId; }
219  const PCaselessString & GetDtdURI() const { return m_dtdURI; }
220 
222  unsigned GetErrorColumn() const { return m_errorColumn; }
223  unsigned GetErrorLine() const { return m_errorLine; }
224 
225  static PString EscapeSpecialChars(const PString & string);
226 
227  protected:
229 
236 
238 
240  unsigned m_errorLine;
241  unsigned m_errorColumn;
242 
244 
246 
248  mutable PINDEX m_savedObjects;
249  mutable unsigned m_percent;
250 
251  friend class PXMLParser;
252 };
253 
254 
255 #if P_HTTP
256 class PXML_HTTP : public PXML
257 {
258  PCLASSINFO(PXML_HTTP, PXML);
259  public:
260  PXML_HTTP(
261  Options options = NoOptions,
262  const char * noIndentElements = NULL
263  );
264 
265  bool StartAutoReloadURL(
266  const PURL & url,
267  const PTimeInterval & timeout,
268  const PTimeInterval & refreshTime,
269  Options options = NoOptions
270  );
271  bool StopAutoReloadURL();
273  bool AutoLoadURL();
274  virtual void OnAutoLoad(PBoolean ok);
275 
276  bool LoadURL(const PURL & url);
277  bool LoadURL(const PURL & url, const PTimeInterval & timeout, Options options = NoOptions);
278  bool LoadURL(const PURL & url, const PURL::LoadParams & params, Options options = NoOptions);
279 
280  protected:
281  PDECLARE_NOTIFIER(PTimer, PXML_HTTP, AutoReloadTimeout);
282  PDECLARE_NOTIFIER(PThread, PXML_HTTP, AutoReloadThread);
283 
289 };
290 #endif // P_HTTP
291 
292 
294 
295 class PConfig; // stupid gcc 4 does not recognize PConfig as a class
296 
297 class PXMLSettings : public PXML
298 {
299  PCLASSINFO(PXMLSettings, PXML);
300  public:
301  PXMLSettings(Options options = NewLineAfterElement);
302 
303  void SetAttribute(const PCaselessString & section, const PString & key, const PString & value);
304 
305  PString GetAttribute(const PCaselessString & section, const PString & key) const;
306  bool HasAttribute(const PCaselessString & section, const PString & key) const;
307 
308  void ToConfig(PConfig & cfg) const;
309  void FromConfig(const PConfig & cfg);
310 };
311 
312 
314 
315 class PXMLObject : public PObject
316 {
317  PCLASSINFO(PXMLObject, PObject);
318  protected:
319  PXMLObject();
320 
321  public:
323  { return m_parent; }
324 
325  bool SetParent(PXMLElement * parent);
326 
327  virtual PINDEX GetObjectCount() const { return 1; }
328 
329  PXMLObject * GetNextObject() const;
330 
331  PString AsString() const;
332 
333  virtual void Output(ostream & strm, const PXMLBase & xml, int indent) const = 0;
334 
335  virtual PBoolean IsElement() const = 0;
336 
337  void SetDirty();
338  bool IsDirty() const { return m_dirty; }
339 
340  void GetFilePosition(unsigned & col, unsigned & line) const { col = m_column; line = m_lineNumber; }
341  void SetFilePosition(unsigned col, unsigned line) { m_column = col; m_lineNumber = line; }
342 
343  virtual PXMLObject * Clone() const = 0;
344 
345  protected:
347  bool m_dirty;
348  unsigned m_lineNumber;
349  unsigned m_column;
350 
352 };
353 
354 PARRAY(PXMLObjectArray, PXMLObject);
355 
357 
358 class PXMLData : public PXMLObject
359 {
360  PCLASSINFO(PXMLData, PXMLObject);
361  public:
362  PXMLData(const PString & data);
363  PXMLData(const char * data, int len);
364 
365  PBoolean IsElement() const { return false; }
366 
367  void SetString(const PString & str, bool dirty = true);
368 
369  const PString & GetString() const { return m_value; }
370 
371  void Output(ostream & strm, const PXMLBase & xml, int indent) const;
372 
373  PXMLObject * Clone() const;
374 
375  protected:
377 };
378 
379 
381 
382 class PXMLElement : public PXMLObject
383 {
384  PCLASSINFO(PXMLElement, PXMLObject);
385  protected:
386  PXMLElement(const PXMLElement & copy);
387  public:
388  PXMLElement(const char * name = NULL, const char * data = NULL);
389 
390  virtual PINDEX GetObjectCount() const;
391 
392  PBoolean IsElement() const { return true; }
393 
394  void PrintOn(ostream & strm) const;
395  void Output(ostream & strm, const PXMLBase & xml, int indent) const;
396 
397  const PCaselessString & GetName() const
398  { return m_name; }
399 
400  void SetName(const PString & v)
401  { m_name = v; }
402 
408 
409  PINDEX GetSize() const
410  { return m_subObjects.GetSize(); }
411 
412  PINDEX FindObject(const PXMLObject * ptr) const;
413 
414  bool HasSubObjects() const
415  { return !m_subObjects.IsEmpty(); }
416 
417  virtual PXMLObject * AddSubObject(PXMLObject * elem, bool dirty = true);
418  bool RemoveSubObject(PINDEX idx, bool dispose = true);
419 
420  virtual PXMLElement * CreateElement(const PCaselessString & name, const char * data = NULL);
421 
422  PXMLElement * AddElement(const char * name);
423  PXMLElement * AddElement(const PString & name, const PString & data);
424  PXMLElement * AddElement(const PString & name, const PString & attrName, const PString & attrVal);
425 
426  void SetAttribute(const PCaselessString & key,
427  const PString & value,
428  bool setDirty = true);
429 
430  PString GetAttribute(const PCaselessString & key) const;
431  bool HasAttribute(const PCaselessString & key) const;
432  bool HasAttributes() const { return m_attributes.GetSize() > 0; }
433  const PStringToString & GetAttributes() const { return m_attributes; }
434 
435  PXMLObject * GetSubObject(PINDEX idx) const;
436  PXMLElement * GetElement(PINDEX idx = 0) const;
437  PXMLElement * GetElement(const PCaselessString & name, PINDEX idx = 0) const;
438  PXMLElement * GetElement(const PCaselessString & name, const PCaselessString & attr, const PString & attrval) const;
439 
440  template <class T> T * GetElementAs(PINDEX idx = 0) const { return dynamic_cast<T *>(GetElement(idx)); }
441  template <class T> T * GetElementAs(const PCaselessString & name, PINDEX idx = 0) const { return dynamic_cast<T *>(GetElement(name, idx)); }
442  template <class T> T * GetElementAs(const PCaselessString & name, const PCaselessString & attr, const PString & attrval) const { return dynamic_cast<T *>(GetElement(name, attr, attrval)); }
443 
444  PString GetData(bool trim = true) const;
445 
446  PXMLObjectArray GetSubObjects() const
447  { return m_subObjects; }
448 
449  void SetData(const PString & data);
450  virtual PXMLData * AddData(const PString & data);
451  virtual void EndData() { }
452 
453  PXMLObject * Clone() const;
454 
455  void AddNamespace(const PString & prefix, const PString & uri);
456  void RemoveNamespace(const PString & prefix);
457 
458  bool GetDefaultNamespace(PCaselessString & str) const;
459  bool GetNamespace(const PCaselessString & prefix, PCaselessString & str) const;
461  bool GetURIForNamespace(const PCaselessString & prefix, PCaselessString & uri) const;
462 
463  protected:
468 
470 };
471 
472 
474 
476 {
477  PCLASSINFO(PXMLRootElement, PXMLElement);
478  public:
479  PXMLRootElement(PXML & doc, const char * name = NULL)
480  : PXMLElement(name)
481  , m_document(doc)
482  { }
483 
484  PXMLRootElement(PXML & doc, const PXMLElement & copy)
485  : PXMLElement(copy)
486  , m_document(doc)
487  { }
488 
489  virtual PObject * Clone();
490  virtual PXMLElement * CreateElement(const PCaselessString & name, const char * data = NULL);
491 
492  protected:
494 };
495 
496 
498 
500 {
501  protected:
502  PXMLParserBase(bool withNS);
503 
504  public:
505  ~PXMLParserBase();
506 
507  bool Parse(istream & strm);
508  bool Parse(const char * data, size_t dataLen, bool final);
509 
510  virtual void StartDocTypeDecl(const char * docType, const char * sysid, const char * pubid, int hasInternalSubSet);
511  virtual void EndDocTypeDecl();
512  virtual void XmlDecl(const char * version, const char * encoding, int standAlone);
513  virtual void StartNamespaceDeclHandler(const char * prefix, const char * uri);
514  virtual void EndNamespaceDeclHandler(const char * prefix);
515  virtual void StartElement(const char * name, const char **attrs) = 0;
516  virtual void EndElement(const char * name) = 0;
517  virtual void AddCharacterData(const char * data, int len) = 0;
518 
519  virtual bool Progress() { return true; }
520 
521  void GetFilePosition(unsigned & col, unsigned & line) const;
522  void GetErrorInfo(PString & errorString, unsigned & errorCol, unsigned & errorLine) const;
523 
524  bool IsParsing() const { return m_parsing; }
525 
526  protected:
527  void * m_context;
528  bool m_parsing;
529  off_t m_total;
530  off_t m_consumed;
531  unsigned m_percent;
533 };
534 
535 
537 
538 class PXMLParser : public PXMLBase, public PXMLParserBase
539 {
540  PCLASSINFO(PXMLParser, PXMLBase);
541  public:
542  PXMLParser(
543  PXML & doc,
544  Options options,
545  off_t progressTotal
546  );
547 
548  virtual void StartDocTypeDecl(const char * docType, const char * sysid, const char * pubid, int hasInternalSubSet);
549  virtual void XmlDecl(const char * version, const char * encoding, int standAlone);
550  virtual void StartNamespaceDeclHandler(const char * prefix, const char * uri);
551  virtual void StartElement(const char * name, const char **attrs);
552  virtual void EndElement(const char * name);
553  virtual void AddCharacterData(const char * data, int len);
554 
555  virtual bool Progress();
556 
557  PXML & GetDocument() const { return m_document; }
558 
559  protected:
561 
565 };
566 
567 
569 
571 {
572  PCLASSINFO(PXMLStreamParser, PXMLParser);
573  public:
574  PXMLStreamParser(PXML & doc, Options options = NoOptions);
575 
576  virtual void EndElement(const char * name);
577  virtual PXMLElement * Read(PChannel * channel);
578 
579  protected:
581 };
582 
583 
584 #else
585 
586 namespace PXML {
587  extern PString EscapeSpecialChars(const PString & str);
588 };
589 
590 #endif // P_EXPAT
591 
592 #endif // PTLIB_PXML_H
593 
594 
595 // End Of File ///////////////////////////////////////////////////////////////