aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2024-09-10 16:20:31 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2024-09-10 16:20:31 -0400
commit06c285018a81ce4364e370d276be796a632115f8 (patch)
tree4ba84c86537d511921803fda749043e59034c2c0
parentedb0f6e41b09d261326c5340acad3a2cea59d718 (diff)
downloadpostgresql-06c285018a81ce4364e370d276be796a632115f8.tar.gz
postgresql-06c285018a81ce4364e370d276be796a632115f8.zip
Fix some whitespace issues in XMLSERIALIZE(... INDENT).
We must drop whitespace while parsing the input, else libxml2 will include "blank" nodes that interfere with the desired indentation behavior. The end result is that we didn't indent nodes separated by whitespace. Also, it seems that libxml2 may add a trailing newline when working in DOCUMENT mode. This is semantically insignificant, so strip it. This is in the gray area between being a bug fix and a definition change. However, the INDENT option is still pretty new (since v16), so I think we can get away with changing this in stable branches. Hence, back-patch to v16. Jim Jones Discussion: https://postgr.es/m/872865a8-548b-48e1-bfcd-4e38e672c1e4@uni-muenster.de
-rw-r--r--src/backend/utils/adt/xml.c27
-rw-r--r--src/test/regress/expected/xml.out33
-rw-r--r--src/test/regress/expected/xml_1.out11
-rw-r--r--src/test/regress/expected/xml_2.out33
-rw-r--r--src/test/regress/sql/xml.sql3
5 files changed, 84 insertions, 23 deletions
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index 0255349aa43..68bbf86cc46 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -656,8 +656,14 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
}
#ifdef USE_LIBXML
- /* Parse the input according to the xmloption */
- doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding(),
+
+ /*
+ * Parse the input according to the xmloption.
+ *
+ * preserve_whitespace is set to false in case we are indenting, otherwise
+ * libxml2 will fail to indent elements that have whitespace between them.
+ */
+ doc = xml_parse(data, xmloption_arg, !indent, GetDatabaseEncoding(),
&parsed_xmloptiontype, &content_nodes,
(Node *) &escontext);
if (doc == NULL || escontext.error_occurred)
@@ -781,7 +787,22 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
"could not close xmlSaveCtxtPtr");
}
- result = (text *) xmlBuffer_to_xmltype(buf);
+ /*
+ * xmlDocContentDumpOutput may add a trailing newline, so remove that.
+ */
+ if (xmloption_arg == XMLOPTION_DOCUMENT)
+ {
+ const char *str = (const char *) xmlBufferContent(buf);
+ int len = xmlBufferLength(buf);
+
+ while (len > 0 && (str[len - 1] == '\n' ||
+ str[len - 1] == '\r'))
+ len--;
+
+ result = cstring_to_text_with_len(str, len);
+ }
+ else
+ result = (text *) xmlBuffer_to_xmltype(buf);
}
PG_CATCH();
{
diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out
index 15d99185794..894ee6bd2b7 100644
--- a/src/test/regress/expected/xml.out
+++ b/src/test/regress/expected/xml.out
@@ -485,8 +485,7 @@ SELECT xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text
<bar> +
<val x="y">42</val>+
</bar> +
- </foo> +
-
+ </foo>
(1 row)
SELECT xmlserialize(CONTENT '<foo><bar><val x="y">42</val></bar></foo>' AS text INDENT);
@@ -546,8 +545,7 @@ SELECT xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val><val x="y">text node<
<val x="y">42</val> +
<val x="y">text node<val>73</val></val>+
</bar> +
- </foo> +
-
+ </foo>
(1 row)
SELECT xmlserialize(CONTENT '<foo><bar><val x="y">42</val><val x="y">text node<val>73</val></val></bar></foo>' AS text INDENT);
@@ -601,8 +599,7 @@ SELECT xmlserialize(DOCUMENT '<?xml version="1.0" encoding="UTF-8"?><foo><bar><v
<bar> +
<val>73</val> +
</bar> +
- </foo> +
-
+ </foo>
(1 row)
SELECT xmlserialize(CONTENT '<?xml version="1.0" encoding="UTF-8"?><foo><bar><val>73</val></bar></foo>' AS text INDENT);
@@ -620,8 +617,7 @@ SELECT xmlserialize(DOCUMENT '<!DOCTYPE a><a/>' AS text INDENT);
xmlserialize
--------------
<!DOCTYPE a>+
- <a/> +
-
+ <a/>
(1 row)
SELECT xmlserialize(CONTENT '<!DOCTYPE a><a/>' AS text INDENT);
@@ -638,8 +634,7 @@ SELECT xmlserialize(DOCUMENT '<foo><bar></bar></foo>' AS text INDENT);
--------------
<foo> +
<bar/> +
- </foo> +
-
+ </foo>
(1 row)
SELECT xmlserialize(CONTENT '<foo><bar></bar></foo>' AS text INDENT);
@@ -663,6 +658,24 @@ SELECT xmlserialize(CONTENT '<foo><bar><val x="y">42</val></bar></foo>' AS text
t
(1 row)
+-- indent xml strings containing blank nodes
+SELECT xmlserialize(DOCUMENT '<foo> <bar></bar> </foo>' AS text INDENT);
+ xmlserialize
+--------------
+ <foo> +
+ <bar/> +
+ </foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT 'text node<foo> <bar></bar> </foo>' AS text INDENT);
+ xmlserialize
+--------------
+ text node +
+ <foo> +
+ <bar/> +
+ </foo>
+(1 row)
+
SELECT xml '<foo>bar</foo>' IS DOCUMENT;
?column?
----------
diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out
index 63b779470ff..7e9611f1d38 100644
--- a/src/test/regress/expected/xml_1.out
+++ b/src/test/regress/expected/xml_1.out
@@ -443,6 +443,17 @@ ERROR: unsupported XML feature
LINE 1: SELECT xmlserialize(CONTENT '<foo><bar><val x="y">42</val><...
^
DETAIL: This functionality requires the server to be built with libxml support.
+-- indent xml strings containing blank nodes
+SELECT xmlserialize(DOCUMENT '<foo> <bar></bar> </foo>' AS text INDENT);
+ERROR: unsupported XML feature
+LINE 1: SELECT xmlserialize(DOCUMENT '<foo> <bar></bar> </foo>'...
+ ^
+DETAIL: This functionality requires the server to be built with libxml support.
+SELECT xmlserialize(CONTENT 'text node<foo> <bar></bar> </foo>' AS text INDENT);
+ERROR: unsupported XML feature
+LINE 1: SELECT xmlserialize(CONTENT 'text node<foo> <bar></bar> ...
+ ^
+DETAIL: This functionality requires the server to be built with libxml support.
SELECT xml '<foo>bar</foo>' IS DOCUMENT;
ERROR: unsupported XML feature
LINE 1: SELECT xml '<foo>bar</foo>' IS DOCUMENT;
diff --git a/src/test/regress/expected/xml_2.out b/src/test/regress/expected/xml_2.out
index 8894f7b4a84..7d5c961e240 100644
--- a/src/test/regress/expected/xml_2.out
+++ b/src/test/regress/expected/xml_2.out
@@ -471,8 +471,7 @@ SELECT xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text
<bar> +
<val x="y">42</val>+
</bar> +
- </foo> +
-
+ </foo>
(1 row)
SELECT xmlserialize(CONTENT '<foo><bar><val x="y">42</val></bar></foo>' AS text INDENT);
@@ -532,8 +531,7 @@ SELECT xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val><val x="y">text node<
<val x="y">42</val> +
<val x="y">text node<val>73</val></val>+
</bar> +
- </foo> +
-
+ </foo>
(1 row)
SELECT xmlserialize(CONTENT '<foo><bar><val x="y">42</val><val x="y">text node<val>73</val></val></bar></foo>' AS text INDENT);
@@ -587,8 +585,7 @@ SELECT xmlserialize(DOCUMENT '<?xml version="1.0" encoding="UTF-8"?><foo><bar><v
<bar> +
<val>73</val> +
</bar> +
- </foo> +
-
+ </foo>
(1 row)
SELECT xmlserialize(CONTENT '<?xml version="1.0" encoding="UTF-8"?><foo><bar><val>73</val></bar></foo>' AS text INDENT);
@@ -606,8 +603,7 @@ SELECT xmlserialize(DOCUMENT '<!DOCTYPE a><a/>' AS text INDENT);
xmlserialize
--------------
<!DOCTYPE a>+
- <a/> +
-
+ <a/>
(1 row)
SELECT xmlserialize(CONTENT '<!DOCTYPE a><a/>' AS text INDENT);
@@ -624,8 +620,7 @@ SELECT xmlserialize(DOCUMENT '<foo><bar></bar></foo>' AS text INDENT);
--------------
<foo> +
<bar/> +
- </foo> +
-
+ </foo>
(1 row)
SELECT xmlserialize(CONTENT '<foo><bar></bar></foo>' AS text INDENT);
@@ -649,6 +644,24 @@ SELECT xmlserialize(CONTENT '<foo><bar><val x="y">42</val></bar></foo>' AS text
t
(1 row)
+-- indent xml strings containing blank nodes
+SELECT xmlserialize(DOCUMENT '<foo> <bar></bar> </foo>' AS text INDENT);
+ xmlserialize
+--------------
+ <foo> +
+ <bar/> +
+ </foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT 'text node<foo> <bar></bar> </foo>' AS text INDENT);
+ xmlserialize
+--------------
+ text node +
+ <foo> +
+ <bar/> +
+ </foo>
+(1 row)
+
SELECT xml '<foo>bar</foo>' IS DOCUMENT;
?column?
----------
diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql
index a591eea2e5d..0b07075414e 100644
--- a/src/test/regress/sql/xml.sql
+++ b/src/test/regress/sql/xml.sql
@@ -168,6 +168,9 @@ SELECT xmlserialize(CONTENT '<foo><bar></bar></foo>' AS text INDENT);
-- 'no indent' = not using 'no indent'
SELECT xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text) = xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text NO INDENT);
SELECT xmlserialize(CONTENT '<foo><bar><val x="y">42</val></bar></foo>' AS text) = xmlserialize(CONTENT '<foo><bar><val x="y">42</val></bar></foo>' AS text NO INDENT);
+-- indent xml strings containing blank nodes
+SELECT xmlserialize(DOCUMENT '<foo> <bar></bar> </foo>' AS text INDENT);
+SELECT xmlserialize(CONTENT 'text node<foo> <bar></bar> </foo>' AS text INDENT);
SELECT xml '<foo>bar</foo>' IS DOCUMENT;
SELECT xml '<foo>bar</foo><bar>foo</bar>' IS DOCUMENT;