aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/xml.c
diff options
context:
space:
mode:
authorNoah Misch <noah@leadboat.com>2017-11-11 11:10:53 -0800
committerNoah Misch <noah@leadboat.com>2017-11-11 11:11:15 -0800
commit30a5e940ac9a96c5fee449223528a1b5555502ed (patch)
tree1f2eb22ef3787972c8d207d2db09ffcb0b7b830d /src/backend/utils/adt/xml.c
parentf9e2885d51bf585bec2d5ffeda4f9e5e1a723f4d (diff)
downloadpostgresql-30a5e940ac9a96c5fee449223528a1b5555502ed.tar.gz
postgresql-30a5e940ac9a96c5fee449223528a1b5555502ed.zip
Ignore XML declaration in xpath_internal(), for UTF8 databases.
When a value contained an XML declaration naming some other encoding, this function interpreted UTF8 bytes as the named encoding, yielding mojibake. xml_parse() already has similar logic. This would be necessary but not sufficient for non-UTF8 databases, so preserve behavior there until the xpath facility can support such databases comprehensively. Back-patch to 9.3 (all supported versions). Pavel Stehule and Noah Misch Discussion: https://postgr.es/m/CAFj8pRC-dM=tT=QkGi+Achkm+gwPmjyOayGuUfXVumCxkDgYWg@mail.gmail.com
Diffstat (limited to 'src/backend/utils/adt/xml.c')
-rw-r--r--src/backend/utils/adt/xml.c14
1 files changed, 13 insertions, 1 deletions
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index 323614c183d..233dd63e89b 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -3842,6 +3842,7 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
int32 xpath_len;
xmlChar *string;
xmlChar *xpath_expr;
+ size_t xmldecl_len = 0;
int i;
int ndim;
Datum *ns_names_uris;
@@ -3897,6 +3898,16 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
string = pg_xmlCharStrndup(datastr, len);
xpath_expr = pg_xmlCharStrndup(VARDATA_ANY(xpath_expr_text), xpath_len);
+ /*
+ * In a UTF8 database, skip any xml declaration, which might assert
+ * another encoding. Ignore parse_xml_decl() failure, letting
+ * xmlCtxtReadMemory() report parse errors. Documentation disclaims
+ * xpath() support for non-ASCII data in non-UTF8 databases, so leave
+ * those scenarios bug-compatible with historical behavior.
+ */
+ if (GetDatabaseEncoding() == PG_UTF8)
+ parse_xml_decl(string, &xmldecl_len, NULL, NULL, NULL);
+
xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
PG_TRY();
@@ -3911,7 +3922,8 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
if (ctxt == NULL || xmlerrcxt->err_occurred)
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
"could not allocate parser context");
- doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0);
+ doc = xmlCtxtReadMemory(ctxt, (char *) string + xmldecl_len,
+ len - xmldecl_len, NULL, NULL, 0);
if (doc == NULL || xmlerrcxt->err_occurred)
xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
"could not parse XML document");