xml.h 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729
  1. /////////////////////////////////////////////////////////////////////////////
  2. // Name: xml/xml.h
  3. // Purpose: interface of wxXmlNode, wxXmlAttribute, wxXmlDocument
  4. // Author: wxWidgets team
  5. // Licence: wxWindows licence
  6. /////////////////////////////////////////////////////////////////////////////
  7. /// Represents XML node type.
  8. enum wxXmlNodeType
  9. {
  10. // note: values are synchronized with xmlElementType from libxml
  11. wxXML_ELEMENT_NODE = 1,
  12. wxXML_ATTRIBUTE_NODE = 2,
  13. wxXML_TEXT_NODE = 3,
  14. wxXML_CDATA_SECTION_NODE = 4,
  15. wxXML_ENTITY_REF_NODE = 5,
  16. wxXML_ENTITY_NODE = 6,
  17. wxXML_PI_NODE = 7,
  18. wxXML_COMMENT_NODE = 8,
  19. wxXML_DOCUMENT_NODE = 9,
  20. wxXML_DOCUMENT_TYPE_NODE = 10,
  21. wxXML_DOCUMENT_FRAG_NODE = 11,
  22. wxXML_NOTATION_NODE = 12,
  23. wxXML_HTML_DOCUMENT_NODE = 13
  24. };
  25. /**
  26. @class wxXmlNode
  27. Represents a node in an XML document. See wxXmlDocument.
  28. Node has a name and may have content and attributes.
  29. Most common node types are @c wxXML_TEXT_NODE (name and attributes are irrelevant)
  30. and @c wxXML_ELEMENT_NODE.
  31. Example: in <tt>\<title\>hi\</title\></tt> there is an element with the name
  32. @c title and irrelevant content and one child of type @c wxXML_TEXT_NODE
  33. with @c hi as content.
  34. The @c wxXML_PI_NODE type sets the name to the PI target and the contents to
  35. the instructions. Note that whilst the PI instructions are often in the form
  36. of pseudo-attributes these do not use the nodes attribute system. It is the users
  37. responsibility to code and decode the instruction text.
  38. If @c wxUSE_UNICODE is 0, all strings are encoded in the encoding given to
  39. wxXmlDocument::Load (default is UTF-8).
  40. @library{wxxml}
  41. @category{xml}
  42. @see wxXmlDocument, wxXmlAttribute
  43. */
  44. class wxXmlNode
  45. {
  46. public:
  47. /**
  48. Creates this XML node and eventually insert it into an existing XML tree.
  49. @param parent
  50. The parent node to which append this node instance.
  51. If this argument is @NULL this new node will be floating and it can
  52. be appended later to another one using the AddChild() or InsertChild()
  53. functions. Otherwise the child is already added to the XML tree by
  54. this constructor and it shouldn't be done again.
  55. @param type
  56. One of the ::wxXmlNodeType enumeration value.
  57. @param name
  58. The name of the node. This is the string which appears between angular brackets.
  59. @param content
  60. The content of the node.
  61. Only meaningful when type is @c wxXML_TEXT_NODE or @c wxXML_CDATA_SECTION_NODE.
  62. @param attrs
  63. If not @NULL, this wxXmlAttribute object and its eventual siblings are attached to the node.
  64. @param next
  65. If not @NULL, this node and its eventual siblings are attached to the node.
  66. @param lineNo
  67. Number of line this node was present at in input file or -1.
  68. */
  69. wxXmlNode(wxXmlNode* parent, wxXmlNodeType type,
  70. const wxString& name,
  71. const wxString& content = wxEmptyString,
  72. wxXmlAttribute* attrs = NULL,
  73. wxXmlNode* next = NULL, int lineNo = -1);
  74. /**
  75. A simplified version of the first constructor form, assuming a @NULL parent.
  76. @param type
  77. One of the ::wxXmlNodeType enumeration value.
  78. @param name
  79. The name of the node. This is the string which appears between angular brackets.
  80. @param content
  81. The content of the node.
  82. Only meaningful when type is @c wxXML_TEXT_NODE or @c wxXML_CDATA_SECTION_NODE.
  83. @param lineNo
  84. Number of line this node was present at in input file or -1.
  85. */
  86. wxXmlNode(wxXmlNodeType type, const wxString& name,
  87. const wxString& content = wxEmptyString,
  88. int lineNo = -1);
  89. /**
  90. Copy constructor.
  91. Note that this does NOT copy siblings and parent pointer, i.e. GetParent()
  92. and GetNext() will return @NULL after using copy ctor and are never unmodified by operator=().
  93. On the other hand, it DOES copy children and attributes.
  94. */
  95. wxXmlNode(const wxXmlNode& node);
  96. /**
  97. The virtual destructor. Deletes attached children and attributes.
  98. */
  99. virtual ~wxXmlNode();
  100. /**
  101. Appends a attribute with given @a name and @a value to the list of
  102. attributes for this node.
  103. */
  104. virtual void AddAttribute(const wxString& name, const wxString& value);
  105. /**
  106. Appends given attribute to the list of attributes for this node.
  107. */
  108. virtual void AddAttribute(wxXmlAttribute* attr);
  109. /**
  110. Adds node @a child as the last child of this node.
  111. @note
  112. Note that this function works in O(n) time where @e n is the number
  113. of existing children. Consequently, adding large number of child
  114. nodes using this method can be expensive, because it has O(n^2) time
  115. complexity in number of nodes to be added. Use InsertChildAfter() to
  116. populate XML tree in linear time.
  117. @see InsertChild(), InsertChildAfter()
  118. */
  119. virtual void AddChild(wxXmlNode* child);
  120. /**
  121. Removes the first attributes which has the given @a name from the list of
  122. attributes for this node.
  123. */
  124. virtual bool DeleteAttribute(const wxString& name);
  125. /**
  126. Returns true if a attribute named attrName could be found.
  127. The value of that attribute is saved in value (which must not be @NULL).
  128. */
  129. bool GetAttribute(const wxString& attrName, wxString* value) const;
  130. /**
  131. Returns the value of the attribute named @a attrName if it does exist.
  132. If it does not exist, the @a defaultVal is returned.
  133. */
  134. wxString GetAttribute(const wxString& attrName,
  135. const wxString& defaultVal = wxEmptyString) const;
  136. /**
  137. Return a pointer to the first attribute of this node.
  138. */
  139. wxXmlAttribute* GetAttributes() const;
  140. /**
  141. Returns the first child of this node.
  142. To get a pointer to the second child of this node (if it does exist), use the
  143. GetNext() function on the returned value.
  144. */
  145. wxXmlNode* GetChildren() const;
  146. /**
  147. Returns the content of this node. Can be an empty string.
  148. Be aware that for nodes of type @c wxXML_ELEMENT_NODE (the most used node type)
  149. the content is an empty string. See GetNodeContent() for more details.
  150. */
  151. const wxString& GetContent() const;
  152. /**
  153. Returns the number of nodes which separate this node from @c grandparent.
  154. This function searches only the parents of this node until it finds
  155. @a grandparent or the @NULL node (which is the parent of non-linked
  156. nodes or the parent of a wxXmlDocument's root element node).
  157. */
  158. int GetDepth(wxXmlNode* grandparent = NULL) const;
  159. /**
  160. Returns a flag indicating whether encoding conversion is necessary when saving. The default is @false.
  161. You can improve saving efficiency considerably by setting this value.
  162. */
  163. bool GetNoConversion() const;
  164. /**
  165. Returns line number of the node in the input XML file or @c -1 if it is unknown.
  166. */
  167. int GetLineNumber() const;
  168. /**
  169. Returns the name of this node.
  170. Can be an empty string (e.g. for nodes of type @c wxXML_TEXT_NODE or
  171. @c wxXML_CDATA_SECTION_NODE).
  172. */
  173. const wxString& GetName() const;
  174. /**
  175. Returns a pointer to the sibling of this node or @NULL if there are no
  176. siblings.
  177. */
  178. wxXmlNode* GetNext() const;
  179. /**
  180. Returns the content of the first child node of type @c wxXML_TEXT_NODE
  181. or @c wxXML_CDATA_SECTION_NODE.
  182. This function is very useful since the XML snippet @c "tagnametagcontent/tagname"
  183. is represented by expat with the following tag tree:
  184. @code
  185. wxXML_ELEMENT_NODE name="tagname", content=""
  186. |-- wxXML_TEXT_NODE name="", content="tagcontent"
  187. @endcode
  188. or eventually:
  189. @code
  190. wxXML_ELEMENT_NODE name="tagname", content=""
  191. |-- wxXML_CDATA_SECTION_NODE name="", content="tagcontent"
  192. @endcode
  193. An empty string is returned if the node has no children of type
  194. @c wxXML_TEXT_NODE or @c wxXML_CDATA_SECTION_NODE, or if the content
  195. of the first child of such types is empty.
  196. */
  197. wxString GetNodeContent() const;
  198. /**
  199. Returns a pointer to the parent of this node or @NULL if this node has no
  200. parent.
  201. */
  202. wxXmlNode* GetParent() const;
  203. /**
  204. Returns the type of this node.
  205. */
  206. wxXmlNodeType GetType() const;
  207. /**
  208. Returns @true if this node has a attribute named @a attrName.
  209. */
  210. bool HasAttribute(const wxString& attrName) const;
  211. /**
  212. Inserts the @a child node immediately before @a followingNode in the
  213. children list.
  214. @return @true if @a followingNode has been found and the @a child
  215. node has been inserted.
  216. @note
  217. For historical reasons, @a followingNode may be @NULL. In that case,
  218. then @a child is prepended to the list of children and becomes the
  219. first child of this node, i.e. it behaves identically to using the
  220. first children (as returned by GetChildren()) for @a followingNode).
  221. @see AddChild(), InsertChildAfter()
  222. */
  223. virtual bool InsertChild(wxXmlNode* child, wxXmlNode* followingNode);
  224. /**
  225. Inserts the @a child node immediately after @a precedingNode in the
  226. children list.
  227. @return @true if @a precedingNode has been found and the @a child
  228. node has been inserted.
  229. @param child
  230. The child to insert.
  231. @param precedingNode
  232. The node to insert @a child after. As a special case, this can be
  233. @NULL if this node has no children yet -- in that case, @a child
  234. will become this node's only child node.
  235. @since 2.8.8
  236. @see InsertChild(), AddChild()
  237. */
  238. virtual bool InsertChildAfter(wxXmlNode* child, wxXmlNode* precedingNode);
  239. /**
  240. Returns @true if the content of this node is a string containing only
  241. whitespaces (spaces, tabs, new lines, etc).
  242. Note that this function is locale-independent since the parsing of XML
  243. documents must always produce the exact same tree regardless of the
  244. locale it runs under.
  245. */
  246. bool IsWhitespaceOnly() const;
  247. /**
  248. Removes the given node from the children list.
  249. Returns @true if the node was found and removed or @false if the node
  250. could not be found.
  251. Note that the caller is responsible for deleting the removed node in order
  252. to avoid memory leaks.
  253. */
  254. virtual bool RemoveChild(wxXmlNode* child);
  255. /**
  256. Sets as first attribute the given wxXmlAttribute object.
  257. The caller is responsible for deleting any previously present attributes
  258. attached to this node.
  259. */
  260. void SetAttributes(wxXmlAttribute* attr);
  261. /**
  262. Sets as first child the given node.
  263. The caller is responsible for deleting any previously present children node.
  264. */
  265. void SetChildren(wxXmlNode* child);
  266. /**
  267. Sets the content of this node.
  268. */
  269. void SetContent(const wxString& con);
  270. /**
  271. Sets the name of this node.
  272. */
  273. void SetName(const wxString& name);
  274. /**
  275. Sets as sibling the given node.
  276. The caller is responsible for deleting any previously present sibling node.
  277. */
  278. void SetNext(wxXmlNode* next);
  279. /**
  280. Sets a flag to indicate whether encoding conversion is necessary when saving. The default is @false.
  281. You can improve saving efficiency considerably by setting this value.
  282. */
  283. void SetNoConversion(bool noconversion);
  284. /**
  285. Sets as parent the given node.
  286. The caller is responsible for deleting any previously present parent node.
  287. */
  288. void SetParent(wxXmlNode* parent);
  289. /**
  290. Sets the type of this node.
  291. */
  292. void SetType(wxXmlNodeType type);
  293. /**
  294. See the copy constructor for more info.
  295. */
  296. wxXmlNode& operator=(const wxXmlNode& node);
  297. };
  298. /**
  299. @class wxXmlAttribute
  300. Represents a node attribute.
  301. Example: in <tt>\<img src="hello.gif" id="3"/\></tt>, @c src is an attribute
  302. with value @c hello.gif and @c id is an attribute with value @c 3.
  303. @library{wxxml}
  304. @category{xml}
  305. @see wxXmlDocument, wxXmlNode
  306. */
  307. class wxXmlAttribute
  308. {
  309. public:
  310. /**
  311. Default constructor.
  312. */
  313. wxXmlAttribute();
  314. /**
  315. Creates the attribute with given @a name and @a value.
  316. If @a next is not @NULL, then sets it as sibling of this attribute.
  317. */
  318. wxXmlAttribute(const wxString& name, const wxString& value,
  319. wxXmlAttribute* next = NULL);
  320. /**
  321. The virtual destructor.
  322. */
  323. virtual ~wxXmlAttribute();
  324. /**
  325. Returns the name of this attribute.
  326. */
  327. wxString GetName() const;
  328. /**
  329. Returns the sibling of this attribute or @NULL if there are no siblings.
  330. */
  331. wxXmlAttribute* GetNext() const;
  332. /**
  333. Returns the value of this attribute.
  334. */
  335. wxString GetValue() const;
  336. /**
  337. Sets the name of this attribute.
  338. */
  339. void SetName(const wxString& name);
  340. /**
  341. Sets the sibling of this attribute.
  342. */
  343. void SetNext(wxXmlAttribute* next);
  344. /**
  345. Sets the value of this attribute.
  346. */
  347. void SetValue(const wxString& value);
  348. };
  349. //* special indentation value for wxXmlDocument::Save
  350. #define wxXML_NO_INDENTATION (-1)
  351. //* flags for wxXmlDocument::Load
  352. enum wxXmlDocumentLoadFlag
  353. {
  354. wxXMLDOC_NONE,
  355. wxXMLDOC_KEEP_WHITESPACE_NODES
  356. };
  357. /**
  358. @class wxXmlDocument
  359. This class holds XML data/document as parsed by XML parser in the root node.
  360. wxXmlDocument internally uses the expat library which comes with wxWidgets to
  361. parse the given stream.
  362. A simple example of using XML classes is:
  363. @code
  364. wxXmlDocument doc;
  365. if (!doc.Load("myfile.xml"))
  366. return false;
  367. // start processing the XML file
  368. if (doc.GetRoot()->GetName() != "myroot-node")
  369. return false;
  370. // examine prologue
  371. wxXmlNode *prolog = doc.GetDocumentNode()->GetChildren();
  372. while (prolog) {
  373. if (prolog->GetType() == wxXML_PI_NODE && prolog->GetName() == "target") {
  374. // process Process Instruction contents
  375. wxString pi = prolog->GetContent();
  376. ...
  377. }
  378. }
  379. wxXmlNode *child = doc.GetRoot()->GetChildren();
  380. while (child) {
  381. if (child->GetName() == "tag1") {
  382. // process text enclosed by tag1/tag1
  383. wxString content = child->GetNodeContent();
  384. ...
  385. // process attributes of tag1
  386. wxString attrvalue1 =
  387. child->GetAttribute("attr1", "default-value");
  388. wxString attrvalue2 =
  389. child->GetAttribute("attr2", "default-value");
  390. ...
  391. } else if (child->GetName() == "tag2") {
  392. // process tag2 ...
  393. }
  394. child = child->GetNext();
  395. }
  396. @endcode
  397. Note that if you want to preserve the original formatting of the loaded file
  398. including whitespaces and indentation, you need to turn off whitespace-only
  399. textnode removal and automatic indentation:
  400. @code
  401. wxXmlDocument doc;
  402. doc.Load("myfile.xml", "UTF-8", wxXMLDOC_KEEP_WHITESPACE_NODES);
  403. // myfile2.xml will be identical to myfile.xml saving it this way:
  404. doc.Save("myfile2.xml", wxXML_NO_INDENTATION);
  405. @endcode
  406. Using default parameters, you will get a reformatted document which in general
  407. is different from the original loaded content:
  408. @code
  409. wxXmlDocument doc;
  410. doc.Load("myfile.xml");
  411. doc.Save("myfile2.xml"); // myfile2.xml != myfile.xml
  412. @endcode
  413. @library{wxxml}
  414. @category{xml}
  415. @see wxXmlNode, wxXmlAttribute
  416. */
  417. class wxXmlDocument : public wxObject
  418. {
  419. public:
  420. /**
  421. Default constructor.
  422. */
  423. wxXmlDocument();
  424. /**
  425. Copy constructor. Deep copies all the XML tree of the given document.
  426. */
  427. wxXmlDocument(const wxXmlDocument& doc);
  428. /**
  429. Loads the given filename using the given encoding. See Load().
  430. */
  431. wxXmlDocument(const wxString& filename,
  432. const wxString& encoding = "UTF-8"));
  433. /**
  434. Loads the XML document from given stream using the given encoding. See Load().
  435. */
  436. wxXmlDocument(wxInputStream& stream,
  437. const wxString& encoding = "UTF-8");
  438. /**
  439. Virtual destructor. Frees the document root node.
  440. */
  441. virtual ~wxXmlDocument();
  442. /**
  443. Appends a Process Instruction or Comment node to the document prologue.
  444. Calling this function will create a prologue or attach the node to the
  445. end of an existing prologue.
  446. @since 2.9.2
  447. */
  448. void AppendToProlog(wxXmlNode* node);
  449. /**
  450. Detaches the document node and returns it.
  451. The document node will be set to @NULL and thus IsOk() will
  452. return @false after calling this function.
  453. Note that the caller is responsible for deleting the returned node in order
  454. to avoid memory leaks.
  455. @since 2.9.2
  456. */
  457. wxXmlNode* DetachDocumentNode();
  458. /**
  459. Detaches the root entity node and returns it.
  460. After calling this function, the document node will remain together with
  461. any prologue nodes, but IsOk() will return @false since the root entity
  462. will be missing.
  463. Note that the caller is responsible for deleting the returned node in order
  464. to avoid memory leaks.
  465. */
  466. wxXmlNode* DetachRoot();
  467. /**
  468. Returns encoding of in-memory representation of the document
  469. (same as passed to Load() or constructor, defaults to UTF-8).
  470. @note this is meaningless in Unicode build where data are stored as @c wchar_t*.
  471. */
  472. wxString GetEncoding() const;
  473. /**
  474. Returns encoding of document (may be empty).
  475. @note This is the encoding original file was saved in, @b not the
  476. encoding of in-memory representation!
  477. */
  478. const wxString& GetFileEncoding() const;
  479. /**
  480. Returns the document node of the document.
  481. @since 2.9.2
  482. */
  483. wxXmlNode* GetDocumentNode() const;
  484. /**
  485. Returns the root element node of the document.
  486. */
  487. wxXmlNode* GetRoot() const;
  488. /**
  489. Returns the version of document.
  490. This is the value in the @c \<?xml version="1.0"?\> header of the XML document.
  491. If the version attribute was not explicitly given in the header, this function
  492. returns an empty string.
  493. */
  494. const wxString& GetVersion() const;
  495. /**
  496. Returns @true if the document has been loaded successfully.
  497. */
  498. bool IsOk() const;
  499. /**
  500. Parses @a filename as an xml document and loads its data.
  501. If @a flags does not contain wxXMLDOC_KEEP_WHITESPACE_NODES, then, while loading,
  502. all nodes of type @c wxXML_TEXT_NODE (see wxXmlNode) are automatically skipped
  503. if they contain whitespaces only.
  504. The removal of these nodes makes the load process slightly faster and requires
  505. less memory however makes impossible to recreate exactly the loaded text with a
  506. Save() call later. Read the initial description of this class for more info.
  507. Returns true on success, false otherwise.
  508. */
  509. virtual bool Load(const wxString& filename,
  510. const wxString& encoding = "UTF-8", int flags = wxXMLDOC_NONE);
  511. /**
  512. Like Load(const wxString&, const wxString&, int) but takes the data from
  513. given input stream.
  514. */
  515. virtual bool Load(wxInputStream& stream,
  516. const wxString& encoding = "UTF-8", int flags = wxXMLDOC_NONE);
  517. /**
  518. Saves XML tree creating a file named with given string.
  519. If @a indentstep is greater than or equal to zero, then, while saving,
  520. an automatic indentation is added with steps composed by indentstep spaces.
  521. If @a indentstep is @c wxXML_NO_INDENTATION, then, automatic indentation
  522. is turned off.
  523. */
  524. virtual bool Save(const wxString& filename, int indentstep = 2) const;
  525. /**
  526. Saves XML tree in the given output stream.
  527. See Save(const wxString&, int) for a description of @a indentstep.
  528. */
  529. virtual bool Save(wxOutputStream& stream, int indentstep = 2) const;
  530. /**
  531. Sets the document node of this document.
  532. Deletes any previous document node.
  533. Use DetachDocumentNode() and then SetDocumentNode() if you want to
  534. replace the document node without deleting the old document tree.
  535. @since 2.9.2
  536. */
  537. void SetDocumentNode(wxXmlNode* node);
  538. /**
  539. Sets the encoding of the document.
  540. */
  541. void SetEncoding(const wxString& enc);
  542. /**
  543. Sets the enconding of the file which will be used to save the document.
  544. */
  545. void SetFileEncoding(const wxString& encoding);
  546. /**
  547. Sets the root element node of this document.
  548. Will create the document node if necessary. Any previous
  549. root element node is deleted.
  550. */
  551. void SetRoot(wxXmlNode* node);
  552. /**
  553. Sets the version of the XML file which will be used to save the document.
  554. */
  555. void SetVersion(const wxString& version);
  556. /**
  557. Deep copies the given document.
  558. */
  559. wxXmlDocument& operator=(const wxXmlDocument& doc);
  560. /**
  561. Get expat library version information.
  562. @since 2.9.2
  563. @see wxVersionInfo
  564. */
  565. static wxVersionInfo GetLibraryVersionInfo();
  566. };