aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/src/sgml/json.sgml19
-rw-r--r--doc/src/sgml/release-9.4.sgml16
-rw-r--r--src/backend/utils/adt/json.c49
-rw-r--r--src/test/regress/expected/json.out58
-rw-r--r--src/test/regress/expected/json_1.out62
-rw-r--r--src/test/regress/expected/jsonb.out78
-rw-r--r--src/test/regress/expected/jsonb_1.out84
-rw-r--r--src/test/regress/sql/json.sql18
-rw-r--r--src/test/regress/sql/jsonb.sql26
9 files changed, 250 insertions, 160 deletions
diff --git a/doc/src/sgml/json.sgml b/doc/src/sgml/json.sgml
index 8feb2fbf0ad..6282ab88539 100644
--- a/doc/src/sgml/json.sgml
+++ b/doc/src/sgml/json.sgml
@@ -69,12 +69,14 @@
regardless of the database encoding, and are checked only for syntactic
correctness (that is, that four hex digits follow <literal>\u</>).
However, the input function for <type>jsonb</> is stricter: it disallows
- Unicode escapes for non-ASCII characters (those
- above <literal>U+007F</>) unless the database encoding is UTF8. It also
- insists that any use of Unicode surrogate pairs to designate characters
- outside the Unicode Basic Multilingual Plane be correct. Valid Unicode
- escapes, except for <literal>\u0000</>, are then converted to the
- equivalent ASCII or UTF8 character for storage.
+ Unicode escapes for non-ASCII characters (those above <literal>U+007F</>)
+ unless the database encoding is UTF8. The <type>jsonb</> type also
+ rejects <literal>\u0000</> (because that cannot be represented in
+ <productname>PostgreSQL</productname>'s <type>text</> type), and it insists
+ that any use of Unicode surrogate pairs to designate characters outside
+ the Unicode Basic Multilingual Plane be correct. Valid Unicode escapes
+ are converted to the equivalent ASCII or UTF8 character for storage;
+ this includes folding surrogate pairs into a single character.
</para>
<note>
@@ -101,7 +103,7 @@
constitutes valid <type>jsonb</type> data that do not apply to
the <type>json</type> type, nor to JSON in the abstract, corresponding
to limits on what can be represented by the underlying data type.
- Specifically, <type>jsonb</> will reject numbers that are outside the
+ Notably, <type>jsonb</> will reject numbers that are outside the
range of the <productname>PostgreSQL</productname> <type>numeric</> data
type, while <type>json</> will not. Such implementation-defined
restrictions are permitted by <acronym>RFC</> 7159. However, in
@@ -134,7 +136,8 @@
<row>
<entry><type>string</></entry>
<entry><type>text</></entry>
- <entry>See notes above concerning encoding restrictions</entry>
+ <entry><literal>\u0000</> is disallowed, as are non-ASCII Unicode
+ escapes if database encoding is not UTF8</entry>
</row>
<row>
<entry><type>number</></entry>
diff --git a/doc/src/sgml/release-9.4.sgml b/doc/src/sgml/release-9.4.sgml
index 961e4617978..11bbf3bf36c 100644
--- a/doc/src/sgml/release-9.4.sgml
+++ b/doc/src/sgml/release-9.4.sgml
@@ -103,22 +103,6 @@
<listitem>
<para>
- Unicode escapes in <link linkend="datatype-json"><type>JSON</type></link>
- text values are no longer rendered with the backslash escaped
- (Andrew Dunstan)
- </para>
-
- <para>
- Previously, all backslashes in text values being formed into JSON
- were escaped. Now a backslash followed by <literal>u</> and four
- hexadecimal digits is not escaped, as this is a legal sequence in a
- JSON string value, and escaping the backslash led to some perverse
- results.
- </para>
- </listitem>
-
- <listitem>
- <para>
When converting values of type <type>date</>, <type>timestamp</>
or <type>timestamptz</>
to <link linkend="datatype-json"><type>JSON</type></link>, render the
diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c
index 3c137ead1d0..951b6554007 100644
--- a/src/backend/utils/adt/json.c
+++ b/src/backend/utils/adt/json.c
@@ -806,14 +806,17 @@ json_lex_string(JsonLexContext *lex)
* For UTF8, replace the escape sequence by the actual
* utf8 character in lex->strval. Do this also for other
* encodings if the escape designates an ASCII character,
- * otherwise raise an error. We don't ever unescape a
- * \u0000, since that would result in an impermissible nul
- * byte.
+ * otherwise raise an error.
*/
if (ch == 0)
{
- appendStringInfoString(lex->strval, "\\u0000");
+ /* We can't allow this, since our TEXT type doesn't */
+ ereport(ERROR,
+ (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
+ errmsg("unsupported Unicode escape sequence"),
+ errdetail("\\u0000 cannot be converted to text."),
+ report_json_context(lex)));
}
else if (GetDatabaseEncoding() == PG_UTF8)
{
@@ -833,8 +836,8 @@ json_lex_string(JsonLexContext *lex)
else
{
ereport(ERROR,
- (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
- errmsg("invalid input syntax for type json"),
+ (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
+ errmsg("unsupported Unicode escape sequence"),
errdetail("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8."),
report_json_context(lex)));
}
@@ -1284,8 +1287,8 @@ json_categorize_type(Oid typoid,
/*
* We need to get the output function for everything except date and
- * timestamp types, array and composite types, booleans,
- * and non-builtin types where there's a cast to json.
+ * timestamp types, array and composite types, booleans, and non-builtin
+ * types where there's a cast to json.
*/
switch (typoid)
@@ -1335,11 +1338,12 @@ json_categorize_type(Oid typoid,
/* but let's look for a cast to json, if it's not built-in */
if (typoid >= FirstNormalObjectId)
{
- Oid castfunc;
+ Oid castfunc;
CoercionPathType ctype;
ctype = find_coercion_pathway(JSONOID, typoid,
- COERCION_EXPLICIT, &castfunc);
+ COERCION_EXPLICIT,
+ &castfunc);
if (ctype == COERCION_PATH_FUNC && OidIsValid(castfunc))
{
*tcategory = JSONTYPE_CAST;
@@ -2382,30 +2386,7 @@ escape_json(StringInfo buf, const char *str)
appendStringInfoString(buf, "\\\"");
break;
case '\\':
-
- /*
- * Unicode escapes are passed through as is. There is no
- * requirement that they denote a valid character in the
- * server encoding - indeed that is a big part of their
- * usefulness.
- *
- * All we require is that they consist of \uXXXX where the Xs
- * are hexadecimal digits. It is the responsibility of the
- * caller of, say, to_json() to make sure that the unicode
- * escape is valid.
- *
- * In the case of a jsonb string value being escaped, the only
- * unicode escape that should be present is \u0000, all the
- * other unicode escapes will have been resolved.
- */
- if (p[1] == 'u' &&
- isxdigit((unsigned char) p[2]) &&
- isxdigit((unsigned char) p[3]) &&
- isxdigit((unsigned char) p[4]) &&
- isxdigit((unsigned char) p[5]))
- appendStringInfoCharMacro(buf, *p);
- else
- appendStringInfoString(buf, "\\\\");
+ appendStringInfoString(buf, "\\\\");
break;
default:
if ((unsigned char) *p < ' ')
diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out
index e435d3e1650..16704363dc6 100644
--- a/src/test/regress/expected/json.out
+++ b/src/test/regress/expected/json.out
@@ -426,20 +426,6 @@ select to_json(timestamptz '2014-05-28 12:22:35.614298-04');
(1 row)
COMMIT;
--- unicode escape - backslash is not escaped
-select to_json(text '\uabcd');
- to_json
-----------
- "\uabcd"
-(1 row)
-
--- any other backslash is escaped
-select to_json(text '\abcd');
- to_json
-----------
- "\\abcd"
-(1 row)
-
--json_agg
SELECT json_agg(q)
FROM ( SELECT $$a$$ || x AS b, y AS c,
@@ -1400,6 +1386,36 @@ ERROR: invalid input syntax for type json
DETAIL: Unicode low surrogate must follow a high surrogate.
CONTEXT: JSON data, line 1: { "a":...
--handling of simple unicode escapes
+select json '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
+ correct_in_utf8
+---------------------------------------
+ { "a": "the Copyright \u00a9 sign" }
+(1 row)
+
+select json '{ "a": "dollar \u0024 character" }' as correct_everywhere;
+ correct_everywhere
+-------------------------------------
+ { "a": "dollar \u0024 character" }
+(1 row)
+
+select json '{ "a": "dollar \\u0024 character" }' as not_an_escape;
+ not_an_escape
+--------------------------------------
+ { "a": "dollar \\u0024 character" }
+(1 row)
+
+select json '{ "a": "null \u0000 escape" }' as not_unescaped;
+ not_unescaped
+--------------------------------
+ { "a": "null \u0000 escape" }
+(1 row)
+
+select json '{ "a": "null \\u0000 escape" }' as not_an_escape;
+ not_an_escape
+---------------------------------
+ { "a": "null \\u0000 escape" }
+(1 row)
+
select json '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
correct_in_utf8
----------------------
@@ -1412,8 +1428,18 @@ select json '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
dollar $ character
(1 row)
-select json '{ "a": "null \u0000 escape" }' ->> 'a' as not_unescaped;
- not_unescaped
+select json '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
+ not_an_escape
+-------------------------
+ dollar \u0024 character
+(1 row)
+
+select json '{ "a": "null \u0000 escape" }' ->> 'a' as fails;
+ERROR: unsupported Unicode escape sequence
+DETAIL: \u0000 cannot be converted to text.
+CONTEXT: JSON data, line 1: { "a":...
+select json '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
+ not_an_escape
--------------------
null \u0000 escape
(1 row)
diff --git a/src/test/regress/expected/json_1.out b/src/test/regress/expected/json_1.out
index 106b481fab9..807814641dd 100644
--- a/src/test/regress/expected/json_1.out
+++ b/src/test/regress/expected/json_1.out
@@ -426,20 +426,6 @@ select to_json(timestamptz '2014-05-28 12:22:35.614298-04');
(1 row)
COMMIT;
--- unicode escape - backslash is not escaped
-select to_json(text '\uabcd');
- to_json
-----------
- "\uabcd"
-(1 row)
-
--- any other backslash is escaped
-select to_json(text '\abcd');
- to_json
-----------
- "\\abcd"
-(1 row)
-
--json_agg
SELECT json_agg(q)
FROM ( SELECT $$a$$ || x AS b, y AS c,
@@ -1378,7 +1364,7 @@ select * from json_populate_recordset(row('def',99,null)::jpop,'[{"a":[100,200,3
-- handling of unicode surrogate pairs
select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8;
-ERROR: invalid input syntax for type json
+ERROR: unsupported Unicode escape sequence
DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
CONTEXT: JSON data, line 1: { "a":...
select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
@@ -1398,8 +1384,38 @@ ERROR: invalid input syntax for type json
DETAIL: Unicode low surrogate must follow a high surrogate.
CONTEXT: JSON data, line 1: { "a":...
--handling of simple unicode escapes
+select json '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
+ correct_in_utf8
+---------------------------------------
+ { "a": "the Copyright \u00a9 sign" }
+(1 row)
+
+select json '{ "a": "dollar \u0024 character" }' as correct_everywhere;
+ correct_everywhere
+-------------------------------------
+ { "a": "dollar \u0024 character" }
+(1 row)
+
+select json '{ "a": "dollar \\u0024 character" }' as not_an_escape;
+ not_an_escape
+--------------------------------------
+ { "a": "dollar \\u0024 character" }
+(1 row)
+
+select json '{ "a": "null \u0000 escape" }' as not_unescaped;
+ not_unescaped
+--------------------------------
+ { "a": "null \u0000 escape" }
+(1 row)
+
+select json '{ "a": "null \\u0000 escape" }' as not_an_escape;
+ not_an_escape
+---------------------------------
+ { "a": "null \\u0000 escape" }
+(1 row)
+
select json '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
-ERROR: invalid input syntax for type json
+ERROR: unsupported Unicode escape sequence
DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
CONTEXT: JSON data, line 1: { "a":...
select json '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
@@ -1408,8 +1424,18 @@ select json '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
dollar $ character
(1 row)
-select json '{ "a": "null \u0000 escape" }' ->> 'a' as not_unescaped;
- not_unescaped
+select json '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
+ not_an_escape
+-------------------------
+ dollar \u0024 character
+(1 row)
+
+select json '{ "a": "null \u0000 escape" }' ->> 'a' as fails;
+ERROR: unsupported Unicode escape sequence
+DETAIL: \u0000 cannot be converted to text.
+CONTEXT: JSON data, line 1: { "a":...
+select json '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
+ not_an_escape
--------------------
null \u0000 escape
(1 row)
diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out
index aa5686ffb69..6c6ed950f08 100644
--- a/src/test/regress/expected/jsonb.out
+++ b/src/test/regress/expected/jsonb.out
@@ -60,12 +60,18 @@ LINE 1: SELECT '"\u000g"'::jsonb;
^
DETAIL: "\u" must be followed by four hexadecimal digits.
CONTEXT: JSON data, line 1: "\u000g...
-SELECT '"\u0000"'::jsonb; -- OK, legal escape
- jsonb
-----------
- "\u0000"
+SELECT '"\u0045"'::jsonb; -- OK, legal escape
+ jsonb
+-------
+ "E"
(1 row)
+SELECT '"\u0000"'::jsonb; -- ERROR, we don't support U+0000
+ERROR: unsupported Unicode escape sequence
+LINE 1: SELECT '"\u0000"'::jsonb;
+ ^
+DETAIL: \u0000 cannot be converted to text.
+CONTEXT: JSON data, line 1: ...
-- use octet_length here so we don't get an odd unicode char in the
-- output
SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
@@ -324,20 +330,6 @@ select to_jsonb(timestamptz '2014-05-28 12:22:35.614298-04');
(1 row)
COMMIT;
--- unicode escape - backslash is not escaped
-select to_jsonb(text '\uabcd');
- to_jsonb
-----------
- "\uabcd"
-(1 row)
-
--- any other backslash is escaped
-select to_jsonb(text '\abcd');
- to_jsonb
-----------
- "\\abcd"
-(1 row)
-
--jsonb_agg
CREATE TEMP TABLE rows AS
SELECT x, 'txt' || x as y
@@ -1971,20 +1963,62 @@ LINE 1: SELECT jsonb '{ "a": "\ude04X" }' -> 'a';
DETAIL: Unicode low surrogate must follow a high surrogate.
CONTEXT: JSON data, line 1: { "a":...
-- handling of simple unicode escapes
-SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' AS correct_in_utf8;
+SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
+ correct_in_utf8
+-------------------------------
+ {"a": "the Copyright © sign"}
+(1 row)
+
+SELECT jsonb '{ "a": "dollar \u0024 character" }' as correct_everywhere;
+ correct_everywhere
+-----------------------------
+ {"a": "dollar $ character"}
+(1 row)
+
+SELECT jsonb '{ "a": "dollar \\u0024 character" }' as not_an_escape;
+ not_an_escape
+-----------------------------------
+ {"a": "dollar \\u0024 character"}
+(1 row)
+
+SELECT jsonb '{ "a": "null \u0000 escape" }' as fails;
+ERROR: unsupported Unicode escape sequence
+LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' as fails;
+ ^
+DETAIL: \u0000 cannot be converted to text.
+CONTEXT: JSON data, line 1: { "a":...
+SELECT jsonb '{ "a": "null \\u0000 escape" }' as not_an_escape;
+ not_an_escape
+------------------------------
+ {"a": "null \\u0000 escape"}
+(1 row)
+
+SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
correct_in_utf8
----------------------
the Copyright © sign
(1 row)
-SELECT jsonb '{ "a": "dollar \u0024 character" }' ->> 'a' AS correct_everyWHERE;
+SELECT jsonb '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
correct_everywhere
--------------------
dollar $ character
(1 row)
-SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' AS not_unescaped;
- not_unescaped
+SELECT jsonb '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
+ not_an_escape
+-------------------------
+ dollar \u0024 character
+(1 row)
+
+SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fails;
+ERROR: unsupported Unicode escape sequence
+LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fai...
+ ^
+DETAIL: \u0000 cannot be converted to text.
+CONTEXT: JSON data, line 1: { "a":...
+SELECT jsonb '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
+ not_an_escape
--------------------
null \u0000 escape
(1 row)
diff --git a/src/test/regress/expected/jsonb_1.out b/src/test/regress/expected/jsonb_1.out
index 687ae63b707..f30148d51c1 100644
--- a/src/test/regress/expected/jsonb_1.out
+++ b/src/test/regress/expected/jsonb_1.out
@@ -60,16 +60,22 @@ LINE 1: SELECT '"\u000g"'::jsonb;
^
DETAIL: "\u" must be followed by four hexadecimal digits.
CONTEXT: JSON data, line 1: "\u000g...
-SELECT '"\u0000"'::jsonb; -- OK, legal escape
- jsonb
-----------
- "\u0000"
+SELECT '"\u0045"'::jsonb; -- OK, legal escape
+ jsonb
+-------
+ "E"
(1 row)
+SELECT '"\u0000"'::jsonb; -- ERROR, we don't support U+0000
+ERROR: unsupported Unicode escape sequence
+LINE 1: SELECT '"\u0000"'::jsonb;
+ ^
+DETAIL: \u0000 cannot be converted to text.
+CONTEXT: JSON data, line 1: ...
-- use octet_length here so we don't get an odd unicode char in the
-- output
SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
-ERROR: invalid input syntax for type json
+ERROR: unsupported Unicode escape sequence
LINE 1: SELECT octet_length('"\uaBcD"'::jsonb::text);
^
DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
@@ -324,20 +330,6 @@ select to_jsonb(timestamptz '2014-05-28 12:22:35.614298-04');
(1 row)
COMMIT;
--- unicode escape - backslash is not escaped
-select to_jsonb(text '\uabcd');
- to_jsonb
-----------
- "\uabcd"
-(1 row)
-
--- any other backslash is escaped
-select to_jsonb(text '\abcd');
- to_jsonb
-----------
- "\\abcd"
-(1 row)
-
--jsonb_agg
CREATE TEMP TABLE rows AS
SELECT x, 'txt' || x as y
@@ -1941,7 +1933,7 @@ SELECT * FROM jsonb_populate_recordset(row('def',99,NULL)::jbpop,'[{"a":[100,200
-- handling of unicode surrogate pairs
SELECT octet_length((jsonb '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a')::text) AS correct_in_utf8;
-ERROR: invalid input syntax for type json
+ERROR: unsupported Unicode escape sequence
LINE 1: SELECT octet_length((jsonb '{ "a": "\ud83d\ude04\ud83d\udc3...
^
DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
@@ -1971,20 +1963,62 @@ LINE 1: SELECT jsonb '{ "a": "\ude04X" }' -> 'a';
DETAIL: Unicode low surrogate must follow a high surrogate.
CONTEXT: JSON data, line 1: { "a":...
-- handling of simple unicode escapes
-SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' AS correct_in_utf8;
-ERROR: invalid input syntax for type json
+SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
+ERROR: unsupported Unicode escape sequence
+LINE 1: SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as corr...
+ ^
+DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
+CONTEXT: JSON data, line 1: { "a":...
+SELECT jsonb '{ "a": "dollar \u0024 character" }' as correct_everywhere;
+ correct_everywhere
+-----------------------------
+ {"a": "dollar $ character"}
+(1 row)
+
+SELECT jsonb '{ "a": "dollar \\u0024 character" }' as not_an_escape;
+ not_an_escape
+-----------------------------------
+ {"a": "dollar \\u0024 character"}
+(1 row)
+
+SELECT jsonb '{ "a": "null \u0000 escape" }' as fails;
+ERROR: unsupported Unicode escape sequence
+LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' as fails;
+ ^
+DETAIL: \u0000 cannot be converted to text.
+CONTEXT: JSON data, line 1: { "a":...
+SELECT jsonb '{ "a": "null \\u0000 escape" }' as not_an_escape;
+ not_an_escape
+------------------------------
+ {"a": "null \\u0000 escape"}
+(1 row)
+
+SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
+ERROR: unsupported Unicode escape sequence
LINE 1: SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a'...
^
DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
CONTEXT: JSON data, line 1: { "a":...
-SELECT jsonb '{ "a": "dollar \u0024 character" }' ->> 'a' AS correct_everyWHERE;
+SELECT jsonb '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
correct_everywhere
--------------------
dollar $ character
(1 row)
-SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' AS not_unescaped;
- not_unescaped
+SELECT jsonb '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
+ not_an_escape
+-------------------------
+ dollar \u0024 character
+(1 row)
+
+SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fails;
+ERROR: unsupported Unicode escape sequence
+LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fai...
+ ^
+DETAIL: \u0000 cannot be converted to text.
+CONTEXT: JSON data, line 1: { "a":...
+SELECT jsonb '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
+ not_an_escape
--------------------
null \u0000 escape
(1 row)
diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql
index 36a6674ff91..53a37a88439 100644
--- a/src/test/regress/sql/json.sql
+++ b/src/test/regress/sql/json.sql
@@ -111,14 +111,6 @@ SET LOCAL TIME ZONE -8;
select to_json(timestamptz '2014-05-28 12:22:35.614298-04');
COMMIT;
--- unicode escape - backslash is not escaped
-
-select to_json(text '\uabcd');
-
--- any other backslash is escaped
-
-select to_json(text '\abcd');
-
--json_agg
SELECT json_agg(q)
@@ -401,9 +393,17 @@ select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
--handling of simple unicode escapes
+select json '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
+select json '{ "a": "dollar \u0024 character" }' as correct_everywhere;
+select json '{ "a": "dollar \\u0024 character" }' as not_an_escape;
+select json '{ "a": "null \u0000 escape" }' as not_unescaped;
+select json '{ "a": "null \\u0000 escape" }' as not_an_escape;
+
select json '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
select json '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
-select json '{ "a": "null \u0000 escape" }' ->> 'a' as not_unescaped;
+select json '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
+select json '{ "a": "null \u0000 escape" }' ->> 'a' as fails;
+select json '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
--json_typeof() function
select value, json_typeof(value)
diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql
index a8461039330..53cc2393c62 100644
--- a/src/test/regress/sql/jsonb.sql
+++ b/src/test/regress/sql/jsonb.sql
@@ -10,7 +10,8 @@ SELECT '"\v"'::jsonb; -- ERROR, not a valid JSON escape
SELECT '"\u"'::jsonb; -- ERROR, incomplete escape
SELECT '"\u00"'::jsonb; -- ERROR, incomplete escape
SELECT '"\u000g"'::jsonb; -- ERROR, g is not a hex digit
-SELECT '"\u0000"'::jsonb; -- OK, legal escape
+SELECT '"\u0045"'::jsonb; -- OK, legal escape
+SELECT '"\u0000"'::jsonb; -- ERROR, we don't support U+0000
-- use octet_length here so we don't get an odd unicode char in the
-- output
SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
@@ -73,14 +74,6 @@ SET LOCAL TIME ZONE -8;
select to_jsonb(timestamptz '2014-05-28 12:22:35.614298-04');
COMMIT;
--- unicode escape - backslash is not escaped
-
-select to_jsonb(text '\uabcd');
-
--- any other backslash is escaped
-
-select to_jsonb(text '\abcd');
-
--jsonb_agg
CREATE TEMP TABLE rows AS
@@ -488,9 +481,18 @@ SELECT jsonb '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
-- handling of simple unicode escapes
-SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' AS correct_in_utf8;
-SELECT jsonb '{ "a": "dollar \u0024 character" }' ->> 'a' AS correct_everyWHERE;
-SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' AS not_unescaped;
+
+SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
+SELECT jsonb '{ "a": "dollar \u0024 character" }' as correct_everywhere;
+SELECT jsonb '{ "a": "dollar \\u0024 character" }' as not_an_escape;
+SELECT jsonb '{ "a": "null \u0000 escape" }' as fails;
+SELECT jsonb '{ "a": "null \\u0000 escape" }' as not_an_escape;
+
+SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
+SELECT jsonb '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
+SELECT jsonb '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
+SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fails;
+SELECT jsonb '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
-- jsonb_to_record and jsonb_to_recordset