aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/xml.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2024-07-10 20:15:52 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2024-07-10 20:15:52 -0400
commitf85c91a1867b45742bb28e4578ca2b4a0976383f (patch)
tree6ad6b2a69f2e42442e0fbb107c305d04ded9f77c /src/backend/utils/adt/xml.c
parent0d483ad4cc4c4d4ebdbe456a5565f11fa137bd24 (diff)
downloadpostgresql-f85c91a1867b45742bb28e4578ca2b4a0976383f.tar.gz
postgresql-f85c91a1867b45742bb28e4578ca2b4a0976383f.zip
Make our back branches compatible with libxml2 2.13.x.
This back-patches HEAD commits 066e8ac6e, 6082b3d5d, e7192486d, and 896cd266f into supported branches. Changes: * Use xmlAddChildList not xmlAddChild in XMLSERIALIZE (affects v16 and up only). This was a flat-out coding mistake that we got away with due to lax checking in previous versions of xmlAddChild. * Use xmlParseInNodeContext not xmlParseBalancedChunkMemory. This is to dodge a bug in xmlParseBalancedChunkMemory in libxm2 releases 2.13.0-2.13.2. While that bug is now fixed upstream and will probably never be seen in any production-oriented distro, it is currently a problem on some more-bleeding-edge-friendly platforms. * Suppress "chunk is not well balanced" errors from libxml2, unless it is the only error. This eliminates an error-reporting discrepancy between 2.13 and older releases. This error is almost always redundant with previous errors, if not flat-out inappropriate, which is why 2.13 changed the behavior and why nobody's likely to miss it. Erik Wienhold and Tom Lane, per report from Frank Streitzig. Discussion: https://postgr.es/m/trinity-b0161630-d230-4598-9ebc-7a23acdb37cb-1720186432160@3c-app-gmx-bap25 Discussion: https://postgr.es/m/trinity-361ba18b-541a-4fe7-bc63-655ae3a7d599-1720259822452@3c-app-gmx-bs01
Diffstat (limited to 'src/backend/utils/adt/xml.c')
-rw-r--r--src/backend/utils/adt/xml.c90
1 files changed, 66 insertions, 24 deletions
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index 9f4e7750030..0255349aa43 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -736,7 +736,7 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
/* This attaches root to doc, so we need not free it separately. */
xmlDocSetRootElement(doc, root);
- xmlAddChild(root, content_nodes);
+ xmlAddChildList(root, content_nodes);
/*
* We use this node to insert newlines in the dump. Note: in at
@@ -1675,9 +1675,9 @@ xml_doctype_in_content(const xmlChar *str)
* XmlOptionType actually used to parse the input (typically the same as
* xmloption_arg, but a DOCTYPE node in the input can force DOCUMENT mode).
*
- * If parsed_nodes isn't NULL and the input is not an XML document, the list
- * of parsed nodes from the xmlParseBalancedChunkMemory call will be returned
- * to *parsed_nodes.
+ * If parsed_nodes isn't NULL and we parse in CONTENT mode, the list
+ * of parsed nodes from the xmlParseInNodeContext call will be returned
+ * to *parsed_nodes. (It is caller's responsibility to free that.)
*
* Errors normally result in ereport(ERROR), but if escontext is an
* ErrorSaveContext, then "safe" errors are reported there instead, and the
@@ -1729,6 +1729,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
PG_TRY();
{
bool parse_as_document = false;
+ int options;
int res_code;
size_t count = 0;
xmlChar *version = NULL;
@@ -1737,11 +1738,6 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
/* Any errors here are reported as hard ereport's */
xmlInitParser();
- ctxt = xmlNewParserCtxt();
- if (ctxt == NULL || xmlerrcxt->err_occurred)
- xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
- "could not allocate parser context");
-
/* Decide whether to parse as document or content */
if (xmloption_arg == XMLOPTION_DOCUMENT)
parse_as_document = true;
@@ -1764,6 +1760,18 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
parse_as_document = true;
}
+ /*
+ * Select parse options.
+ *
+ * Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR)
+ * according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined by
+ * internal DTD are applied'. As for external DTDs, we try to support
+ * them too (see SQL/XML:2008 GR 10.16.7.e), but that doesn't really
+ * happen because xmlPgEntityLoader prevents it.
+ */
+ options = XML_PARSE_NOENT | XML_PARSE_DTDATTR
+ | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS);
+
/* initialize output parameters */
if (parsed_xmloptiontype != NULL)
*parsed_xmloptiontype = parse_as_document ? XMLOPTION_DOCUMENT :
@@ -1773,18 +1781,16 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
if (parse_as_document)
{
- /*
- * Note, that here we try to apply DTD defaults
- * (XML_PARSE_DTDATTR) according to SQL/XML:2008 GR 10.16.7.d:
- * 'Default values defined by internal DTD are applied'. As for
- * external DTDs, we try to support them too, (see SQL/XML:2008 GR
- * 10.16.7.e)
- */
+ ctxt = xmlNewParserCtxt();
+ if (ctxt == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not allocate parser context");
+
doc = xmlCtxtReadDoc(ctxt, utf8string,
- NULL,
+ NULL, /* no URL */
"UTF-8",
- XML_PARSE_NOENT | XML_PARSE_DTDATTR
- | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
+ options);
+
if (doc == NULL || xmlerrcxt->err_occurred)
{
/* Use original option to decide which error code to report */
@@ -1801,6 +1807,9 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
}
else
{
+ xmlNodePtr root;
+
+ /* set up document with empty root node to be the context node */
doc = xmlNewDoc(version);
if (doc == NULL || xmlerrcxt->err_occurred)
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
@@ -1813,19 +1822,38 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
"could not allocate XML document");
doc->standalone = standalone;
+ root = xmlNewNode(NULL, (const xmlChar *) "content-root");
+ if (root == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not allocate xml node");
+ /* This attaches root to doc, so we need not free it separately. */
+ xmlDocSetRootElement(doc, root);
+
/* allow empty content */
if (*(utf8string + count))
{
- res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
- utf8string + count,
- parsed_nodes);
- if (res_code != 0 || xmlerrcxt->err_occurred)
+ xmlNodePtr node_list = NULL;
+ xmlParserErrors res;
+
+ res = xmlParseInNodeContext(root,
+ (char *) utf8string + count,
+ strlen((char *) utf8string + count),
+ options,
+ &node_list);
+
+ if (res != XML_ERR_OK || xmlerrcxt->err_occurred)
{
+ xmlFreeNodeList(node_list);
xml_errsave(escontext, xmlerrcxt,
ERRCODE_INVALID_XML_CONTENT,
"invalid XML content");
goto fail;
}
+
+ if (parsed_nodes != NULL)
+ *parsed_nodes = node_list;
+ else
+ xmlFreeNodeList(node_list);
}
}
@@ -1845,7 +1873,8 @@ fail:
}
PG_END_TRY();
- xmlFreeParserCtxt(ctxt);
+ if (ctxt != NULL)
+ xmlFreeParserCtxt(ctxt);
pg_xml_done(xmlerrcxt, false);
@@ -2064,6 +2093,19 @@ xml_errorHandler(void *data, PgXmlErrorPtr error)
switch (domain)
{
case XML_FROM_PARSER:
+
+ /*
+ * XML_ERR_NOT_WELL_BALANCED is typically reported after some
+ * other, more on-point error. Furthermore, libxml2 2.13 reports
+ * it under a completely different set of rules than prior
+ * versions. To avoid cross-version behavioral differences,
+ * suppress it so long as we already logged some error.
+ */
+ if (error->code == XML_ERR_NOT_WELL_BALANCED &&
+ xmlerrcxt->err_occurred)
+ return;
+ /* fall through */
+
case XML_FROM_NONE:
case XML_FROM_MEMORY:
case XML_FROM_IO: