aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/xml.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils/adt/xml.c')
-rw-r--r--src/backend/utils/adt/xml.c212
1 files changed, 200 insertions, 12 deletions
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index 079bcb12085..15adbd6a016 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -52,6 +52,7 @@
#include <libxml/tree.h>
#include <libxml/uri.h>
#include <libxml/xmlerror.h>
+#include <libxml/xmlsave.h>
#include <libxml/xmlversion.h>
#include <libxml/xmlwriter.h>
#include <libxml/xpath.h>
@@ -146,6 +147,8 @@ static bool print_xml_decl(StringInfo buf, const xmlChar *version,
static bool xml_doctype_in_content(const xmlChar *str);
static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
bool preserve_whitespace, int encoding,
+ XmlOptionType *parsed_xmloptiontype,
+ xmlNodePtr *parsed_nodes,
Node *escontext);
static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt);
static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
@@ -273,7 +276,7 @@ xml_in(PG_FUNCTION_ARGS)
* Note: we don't need to worry about whether a soft error is detected.
*/
doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding(),
- fcinfo->context);
+ NULL, NULL, fcinfo->context);
if (doc != NULL)
xmlFreeDoc(doc);
@@ -400,7 +403,7 @@ xml_recv(PG_FUNCTION_ARGS)
* Parse the data to check if it is well-formed XML data. Assume that
* xml_parse will throw ERROR if not.
*/
- doc = xml_parse(result, xmloption, true, encoding, NULL);
+ doc = xml_parse(result, xmloption, true, encoding, NULL, NULL, NULL);
xmlFreeDoc(doc);
/* Now that we know what we're dealing with, convert to server encoding */
@@ -619,15 +622,182 @@ xmltotext(PG_FUNCTION_ARGS)
text *
-xmltotext_with_xmloption(xmltype *data, XmlOptionType xmloption_arg)
+xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
{
- if (xmloption_arg == XMLOPTION_DOCUMENT && !xml_is_document(data))
+#ifdef USE_LIBXML
+ text *volatile result;
+ xmlDocPtr doc;
+ XmlOptionType parsed_xmloptiontype;
+ xmlNodePtr content_nodes;
+ volatile xmlBufferPtr buf = NULL;
+ volatile xmlSaveCtxtPtr ctxt = NULL;
+ ErrorSaveContext escontext = {T_ErrorSaveContext};
+ PgXmlErrorContext *xmlerrcxt;
+#endif
+
+ if (xmloption_arg != XMLOPTION_DOCUMENT && !indent)
+ {
+ /*
+ * We don't actually need to do anything, so just return the
+ * binary-compatible input. For backwards-compatibility reasons,
+ * allow such cases to succeed even without USE_LIBXML.
+ */
+ return (text *) data;
+ }
+
+#ifdef USE_LIBXML
+ /* Parse the input according to the xmloption */
+ doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding(),
+ &parsed_xmloptiontype, &content_nodes,
+ (Node *) &escontext);
+ if (doc == NULL || escontext.error_occurred)
+ {
+ if (doc)
+ xmlFreeDoc(doc);
+ /* A soft error must be failure to conform to XMLOPTION_DOCUMENT */
ereport(ERROR,
(errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
errmsg("not an XML document")));
+ }
+
+ /* If we weren't asked to indent, we're done. */
+ if (!indent)
+ {
+ xmlFreeDoc(doc);
+ return (text *) data;
+ }
+
+ /* Otherwise, we gotta spin up some error handling. */
+ xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
+
+ PG_TRY();
+ {
+ size_t decl_len = 0;
+
+ /* The serialized data will go into this buffer. */
+ buf = xmlBufferCreate();
+
+ if (buf == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not allocate xmlBuffer");
+
+ /* Detect whether there's an XML declaration */
+ parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL);
+
+ /*
+ * Emit declaration only if the input had one. Note: some versions of
+ * xmlSaveToBuffer leak memory if a non-null encoding argument is
+ * passed, so don't do that. We don't want any encoding conversion
+ * anyway.
+ */
+ if (decl_len == 0)
+ ctxt = xmlSaveToBuffer(buf, NULL,
+ XML_SAVE_NO_DECL | XML_SAVE_FORMAT);
+ else
+ ctxt = xmlSaveToBuffer(buf, NULL,
+ XML_SAVE_FORMAT);
+
+ if (ctxt == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not allocate xmlSaveCtxt");
+
+ if (parsed_xmloptiontype == XMLOPTION_DOCUMENT)
+ {
+ /* If it's a document, saving is easy. */
+ if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+ "could not save document to xmlBuffer");
+ }
+ else if (content_nodes != NULL)
+ {
+ /*
+ * Deal with the case where we have non-singly-rooted XML.
+ * libxml's dump functions don't work well for that without help.
+ * We build a fake root node that serves as a container for the
+ * content nodes, and then iterate over the nodes.
+ */
+ xmlNodePtr root;
+ xmlNodePtr newline;
+
+ root = xmlNewNode(NULL, (const xmlChar *) "content-root");
+ if (root == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not allocate xml node");
+
+ /* This attaches root to doc, so we need not free it separately. */
+ xmlDocSetRootElement(doc, root);
+ xmlAddChild(root, content_nodes);
- /* It's actually binary compatible, save for the above check. */
- return (text *) data;
+ /*
+ * We use this node to insert newlines in the dump. Note: in at
+ * least some libxml versions, xmlNewDocText would not attach the
+ * node to the document even if we passed it. Therefore, manage
+ * freeing of this node manually, and pass NULL here to make sure
+ * there's not a dangling link.
+ */
+ newline = xmlNewDocText(NULL, (const xmlChar *) "\n");
+ if (newline == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not allocate xml node");
+
+ for (xmlNodePtr node = root->children; node; node = node->next)
+ {
+ /* insert newlines between nodes */
+ if (node->type != XML_TEXT_NODE && node->prev != NULL)
+ {
+ if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred)
+ {
+ xmlFreeNode(newline);
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+ "could not save newline to xmlBuffer");
+ }
+ }
+
+ if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred)
+ {
+ xmlFreeNode(newline);
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+ "could not save content to xmlBuffer");
+ }
+ }
+
+ xmlFreeNode(newline);
+ }
+
+ if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred)
+ {
+ ctxt = NULL; /* don't try to close it again */
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+ "could not close xmlSaveCtxtPtr");
+ }
+
+ result = (text *) xmlBuffer_to_xmltype(buf);
+ }
+ PG_CATCH();
+ {
+ if (ctxt)
+ xmlSaveClose(ctxt);
+ if (buf)
+ xmlBufferFree(buf);
+ if (doc)
+ xmlFreeDoc(doc);
+
+ pg_xml_done(xmlerrcxt, true);
+
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+
+ xmlBufferFree(buf);
+ xmlFreeDoc(doc);
+
+ pg_xml_done(xmlerrcxt, false);
+
+ return result;
+#else
+ NO_XML_SUPPORT();
+ return NULL;
+#endif
}
@@ -762,7 +932,7 @@ xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
xmlDocPtr doc;
doc = xml_parse(data, xmloption_arg, preserve_whitespace,
- GetDatabaseEncoding(), NULL);
+ GetDatabaseEncoding(), NULL, NULL, NULL);
xmlFreeDoc(doc);
return (xmltype *) data;
@@ -902,7 +1072,7 @@ xml_is_document(xmltype *arg)
* We'll report "true" if no soft error is reported by xml_parse().
*/
doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true,
- GetDatabaseEncoding(), (Node *) &escontext);
+ GetDatabaseEncoding(), NULL, NULL, (Node *) &escontext);
if (doc)
xmlFreeDoc(doc);
@@ -1491,6 +1661,14 @@ xml_doctype_in_content(const xmlChar *str)
* and xmloption_arg and preserve_whitespace are options for the
* transformation.
*
+ * If parsed_xmloptiontype isn't NULL, *parsed_xmloptiontype is set to the
+ * XmlOptionType actually used to parse the input (typically the same as
+ * xmloption_arg, but a DOCTYPE node in the input can force DOCUMENT mode).
+ *
+ * If parsed_nodes isn't NULL and the input is not an XML document, the list
+ * of parsed nodes from the xmlParseBalancedChunkMemory call will be returned
+ * to *parsed_nodes.
+ *
* Errors normally result in ereport(ERROR), but if escontext is an
* ErrorSaveContext, then "safe" errors are reported there instead, and the
* caller must check SOFT_ERROR_OCCURRED() to see whether that happened.
@@ -1503,8 +1681,10 @@ xml_doctype_in_content(const xmlChar *str)
* yet do not use SAX - see xmlreader.c)
*/
static xmlDocPtr
-xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
- int encoding, Node *escontext)
+xml_parse(text *data, XmlOptionType xmloption_arg,
+ bool preserve_whitespace, int encoding,
+ XmlOptionType *parsed_xmloptiontype, xmlNodePtr *parsed_nodes,
+ Node *escontext)
{
int32 len;
xmlChar *string;
@@ -1574,6 +1754,13 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
parse_as_document = true;
}
+ /* initialize output parameters */
+ if (parsed_xmloptiontype != NULL)
+ *parsed_xmloptiontype = parse_as_document ? XMLOPTION_DOCUMENT :
+ XMLOPTION_CONTENT;
+ if (parsed_nodes != NULL)
+ *parsed_nodes = NULL;
+
if (parse_as_document)
{
/*
@@ -1620,7 +1807,8 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
if (*(utf8string + count))
{
res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
- utf8string + count, NULL);
+ utf8string + count,
+ parsed_nodes);
if (res_code != 0 || xmlerrcxt->err_occurred)
{
xml_errsave(escontext, xmlerrcxt,
@@ -4305,7 +4493,7 @@ wellformed_xml(text *data, XmlOptionType xmloption_arg)
* We'll report "true" if no soft error is reported by xml_parse().
*/
doc = xml_parse(data, xmloption_arg, true,
- GetDatabaseEncoding(), (Node *) &escontext);
+ GetDatabaseEncoding(), NULL, NULL, (Node *) &escontext);
if (doc)
xmlFreeDoc(doc);