| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435 |
- // Copyright (C) 2004-2024 Artifex Software, Inc.
- //
- // This file is part of MuPDF.
- //
- // MuPDF is free software: you can redistribute it and/or modify it under the
- // terms of the GNU Affero General Public License as published by the Free
- // Software Foundation, either version 3 of the License, or (at your option)
- // any later version.
- //
- // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
- // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
- // details.
- //
- // You should have received a copy of the GNU Affero General Public License
- // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
- //
- // Alternative licensing terms are available from the licensor.
- // For commercial licensing, see <https://www.artifex.com/> or contact
- // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
- // CA 94129, USA, for further information.
- #ifndef MUPDF_FITZ_XML_H
- #define MUPDF_FITZ_XML_H
- #include "mupdf/fitz/system.h"
- #include "mupdf/fitz/context.h"
- #include "mupdf/fitz/buffer.h"
- #include "mupdf/fitz/pool.h"
- #include "mupdf/fitz/archive.h"
- /**
- XML document model
- */
- typedef struct fz_xml fz_xml;
- /* For backwards compatibility */
- typedef fz_xml fz_xml_doc;
- /**
- Parse the contents of buffer into a tree of xml nodes.
- preserve_white: whether to keep or delete all-whitespace nodes.
- */
- fz_xml *fz_parse_xml(fz_context *ctx, fz_buffer *buf, int preserve_white);
- /**
- Parse the contents of buffer into a tree of xml nodes.
- preserve_white: whether to keep or delete all-whitespace nodes.
- */
- fz_xml *fz_parse_xml_stream(fz_context *ctx, fz_stream *stream, int preserve_white);
- /**
- Parse the contents of an archive entry into a tree of xml nodes.
- preserve_white: whether to keep or delete all-whitespace nodes.
- */
- fz_xml *fz_parse_xml_archive_entry(fz_context *ctx, fz_archive *dir, const char *filename, int preserve_white);
- /**
- Try and parse the contents of an archive entry into a tree of xml nodes.
- preserve_white: whether to keep or delete all-whitespace nodes.
- Will return NULL if the archive entry can't be found. Otherwise behaves
- the same as fz_parse_xml_archive_entry. May throw exceptions.
- */
- fz_xml *fz_try_parse_xml_archive_entry(fz_context *ctx, fz_archive *dir, const char *filename, int preserve_white);
- /**
- Parse the contents of a buffer into a tree of XML nodes,
- using the HTML5 parsing algorithm.
- */
- fz_xml *fz_parse_xml_from_html5(fz_context *ctx, fz_buffer *buf);
- /**
- Add a reference to the XML.
- */
- fz_xml *fz_keep_xml(fz_context *ctx, fz_xml *xml);
- /**
- Drop a reference to the XML. When the last reference is
- dropped, the node and all its children and siblings will
- be freed.
- */
- void fz_drop_xml(fz_context *ctx, fz_xml *xml);
- /**
- Detach a node from the tree, unlinking it from its parent,
- and setting the document root to the node.
- */
- void fz_detach_xml(fz_context *ctx, fz_xml *node);
- /**
- Return the topmost XML node of a document.
- */
- fz_xml *fz_xml_root(fz_xml_doc *xml);
- /**
- Return previous sibling of XML node.
- */
- fz_xml *fz_xml_prev(fz_xml *item);
- /**
- Return next sibling of XML node.
- */
- fz_xml *fz_xml_next(fz_xml *item);
- /**
- Return parent of XML node.
- */
- fz_xml *fz_xml_up(fz_xml *item);
- /**
- Return first child of XML node.
- */
- fz_xml *fz_xml_down(fz_xml *item);
- /**
- Return true if the tag name matches.
- */
- int fz_xml_is_tag(fz_xml *item, const char *name);
- /**
- Return tag of XML node. Return NULL for text nodes.
- */
- char *fz_xml_tag(fz_xml *item);
- /**
- Return the value of an attribute of an XML node.
- NULL if the attribute doesn't exist.
- */
- char *fz_xml_att(fz_xml *item, const char *att);
- /**
- Return the value of an attribute of an XML node.
- If the first attribute doesn't exist, try the second.
- NULL if neither attribute exists.
- */
- char *fz_xml_att_alt(fz_xml *item, const char *one, const char *two);
- /**
- Check for a matching attribute on an XML node.
- If the node has the requested attribute (name), and the value
- matches (match) then return 1. Otherwise, 0.
- */
- int fz_xml_att_eq(fz_xml *item, const char *name, const char *match);
- /**
- Add an attribute to an XML node.
- */
- void fz_xml_add_att(fz_context *ctx, fz_pool *pool, fz_xml *node, const char *key, const char *val);
- /**
- Return the text content of an XML node.
- Return NULL if the node is a tag.
- */
- char *fz_xml_text(fz_xml *item);
- /**
- Pretty-print an XML tree to given output.
- */
- void fz_output_xml(fz_context *ctx, fz_output *out, fz_xml *item, int level);
- /**
- Pretty-print an XML tree to stdout. (Deprecated, use
- fz_output_xml in preference).
- */
- void fz_debug_xml(fz_xml *item, int level);
- /**
- Search the siblings of XML nodes starting with item looking for
- the first with the given tag.
- Return NULL if none found.
- */
- fz_xml *fz_xml_find(fz_xml *item, const char *tag);
- /**
- Search the siblings of XML nodes starting with the first sibling
- of item looking for the first with the given tag.
- Return NULL if none found.
- */
- fz_xml *fz_xml_find_next(fz_xml *item, const char *tag);
- /**
- Search the siblings of XML nodes starting with the first child
- of item looking for the first with the given tag.
- Return NULL if none found.
- */
- fz_xml *fz_xml_find_down(fz_xml *item, const char *tag);
- /**
- Search the siblings of XML nodes starting with item looking for
- the first with the given tag (or any tag if tag is NULL), and
- with a matching attribute.
- Return NULL if none found.
- */
- fz_xml *fz_xml_find_match(fz_xml *item, const char *tag, const char *att, const char *match);
- /**
- Search the siblings of XML nodes starting with the first sibling
- of item looking for the first with the given tag (or any tag if tag
- is NULL), and with a matching attribute.
- Return NULL if none found.
- */
- fz_xml *fz_xml_find_next_match(fz_xml *item, const char *tag, const char *att, const char *match);
- /**
- Search the siblings of XML nodes starting with the first child
- of item looking for the first with the given tag (or any tag if
- tag is NULL), and with a matching attribute.
- Return NULL if none found.
- */
- fz_xml *fz_xml_find_down_match(fz_xml *item, const char *tag, const char *att, const char *match);
- /**
- Perform a depth first search from item, returning the first
- child that matches the given tag (or any tag if tag is NULL),
- with the given attribute (if att is non NULL), that matches
- match (if match is non NULL).
- */
- fz_xml *fz_xml_find_dfs(fz_xml *item, const char *tag, const char *att, const char *match);
- /**
- Perform a depth first search from item, returning the first
- child that matches the given tag (or any tag if tag is NULL),
- with the given attribute (if att is non NULL), that matches
- match (if match is non NULL). The search stops if it ever
- reaches the top of the tree, or the declared 'top' item.
- */
- fz_xml *fz_xml_find_dfs_top(fz_xml *item, const char *tag, const char *att, const char *match, fz_xml *top);
- /**
- Perform a depth first search onwards from item, returning the first
- child that matches the given tag (or any tag if tag is NULL),
- with the given attribute (if att is non NULL), that matches
- match (if match is non NULL).
- */
- fz_xml *fz_xml_find_next_dfs(fz_xml *item, const char *tag, const char *att, const char *match);
- /**
- Perform a depth first search onwards from item, returning the first
- child that matches the given tag (or any tag if tag is NULL),
- with the given attribute (if att is non NULL), that matches
- match (if match is non NULL). The search stops if it ever reaches
- the top of the tree, or the declared 'top' item.
- */
- fz_xml *fz_xml_find_next_dfs_top(fz_xml *item, const char *tag, const char *att, const char *match, fz_xml *top);
- /**
- DOM-like functions for html in xml.
- */
- /**
- Return a borrowed reference for the 'body' element of
- the given DOM.
- */
- fz_xml *fz_dom_body(fz_context *ctx, fz_xml *dom);
- /**
- Return a borrowed reference for the document (the top
- level element) of the DOM.
- */
- fz_xml *fz_dom_document_element(fz_context *ctx, fz_xml *dom);
- /**
- Create an element of a given tag type for the given DOM.
- The element is not linked into the DOM yet.
- */
- fz_xml *fz_dom_create_element(fz_context *ctx, fz_xml *dom, const char *tag);
- /**
- Create a text node for the given DOM.
- The element is not linked into the DOM yet.
- */
- fz_xml *fz_dom_create_text_node(fz_context *ctx, fz_xml *dom, const char *text);
- /**
- Find the first element matching the requirements in a depth first traversal from elt.
- The tagname must match tag, unless tag is NULL, when all tag names are considered to match.
- If att is NULL, then all tags match.
- Otherwise:
- If match is NULL, then only nodes that have an att attribute match.
- If match is non-NULL, then only nodes that have an att attribute that matches match match.
- Returns NULL (if no match found), or a borrowed reference to the first matching element.
- */
- fz_xml *fz_dom_find(fz_context *ctx, fz_xml *elt, const char *tag, const char *att, const char *match);
- /**
- Find the next element matching the requirements.
- */
- fz_xml *fz_dom_find_next(fz_context *ctx, fz_xml *elt, const char *tag, const char *att, const char *match);
- /**
- Insert an element as the last child of a parent, unlinking the
- child from its current position if required.
- */
- void fz_dom_append_child(fz_context *ctx, fz_xml *parent, fz_xml *child);
- /**
- Insert an element (new_elt), before another element (node),
- unlinking the new_elt from its current position if required.
- */
- void fz_dom_insert_before(fz_context *ctx, fz_xml *node, fz_xml *new_elt);
- /**
- Insert an element (new_elt), after another element (node),
- unlinking the new_elt from its current position if required.
- */
- void fz_dom_insert_after(fz_context *ctx, fz_xml *node, fz_xml *new_elt);
- /**
- Remove an element from the DOM. The element can be added back elsewhere
- if required.
- No reference counting changes for the element.
- */
- void fz_dom_remove(fz_context *ctx, fz_xml *elt);
- /**
- Clone an element (and its children).
- A borrowed reference to the clone is returned. The clone is not
- yet linked into the DOM.
- */
- fz_xml *fz_dom_clone(fz_context *ctx, fz_xml *elt);
- /**
- Return a borrowed reference to the first child of a node,
- or NULL if there isn't one.
- */
- fz_xml *fz_dom_first_child(fz_context *ctx, fz_xml *elt);
- /**
- Return a borrowed reference to the parent of a node,
- or NULL if there isn't one.
- */
- fz_xml *fz_dom_parent(fz_context *ctx, fz_xml *elt);
- /**
- Return a borrowed reference to the next sibling of a node,
- or NULL if there isn't one.
- */
- fz_xml *fz_dom_next(fz_context *ctx, fz_xml *elt);
- /**
- Return a borrowed reference to the previous sibling of a node,
- or NULL if there isn't one.
- */
- fz_xml *fz_dom_previous(fz_context *ctx, fz_xml *elt);
- /**
- Add an attribute to an element.
- Ownership of att and value remain with the caller.
- */
- void fz_dom_add_attribute(fz_context *ctx, fz_xml *elt, const char *att, const char *value);
- /**
- Remove an attribute from an element.
- */
- void fz_dom_remove_attribute(fz_context *ctx, fz_xml *elt, const char *att);
- /**
- Retrieve the value of a given attribute from a given element.
- Returns a borrowed pointer to the value or NULL if not found.
- */
- const char *fz_dom_attribute(fz_context *ctx, fz_xml *elt, const char *att);
- /**
- Enumerate through the attributes of an element.
- Call with i=0,1,2,3... to enumerate attributes.
- On return *att and the return value will be NULL if there are not
- that many attributes to read. Otherwise, *att will be filled in
- with a borrowed pointer to the attribute name, and the return
- value will be a borrowed pointer to the value.
- */
- const char *fz_dom_get_attribute(fz_context *ctx, fz_xml *elt, int i, const char **att);
- /**
- Make new xml dom root element.
- */
- fz_xml *fz_new_dom(fz_context *ctx, const char *tag);
- /**
- Create a new dom node.
- This will NOT be linked in yet.
- */
- fz_xml *fz_new_dom_node(fz_context *ctx, fz_xml *dom, const char *tag);
- /**
- Create a new dom text node.
- This will NOT be linked in yet.
- */
- fz_xml *fz_new_dom_text_node(fz_context *ctx, fz_xml *dom, const char *text);
- /**
- Write our xml structure out to an xml stream.
- Properly formatted XML is only allowed to have a single top-level node
- under which everything must sit. Our structures allow for multiple
- top level nodes. If required, we will output an extra 'ROOT' node
- at the top so that the xml is well-formed.
- If 'indented' is non-zero then additional whitespace will be added to
- make the XML easier to read in a text editor. It will NOT be properly
- compliant.
- */
- void fz_write_xml(fz_context *ctx, fz_xml *root, fz_output *out, int indented);
- /**
- As for fz_write_xml, but direct to a file.
- */
- void fz_save_xml(fz_context *ctx, fz_xml *root, const char *path, int indented);
- #endif
|