diff options
-rw-r--r-- | doc/src/sgml/json.sgml | 19 | ||||
-rw-r--r-- | doc/src/sgml/release-9.4.sgml | 16 | ||||
-rw-r--r-- | src/backend/utils/adt/json.c | 49 | ||||
-rw-r--r-- | src/test/regress/expected/json.out | 58 | ||||
-rw-r--r-- | src/test/regress/expected/json_1.out | 62 | ||||
-rw-r--r-- | src/test/regress/expected/jsonb.out | 78 | ||||
-rw-r--r-- | src/test/regress/expected/jsonb_1.out | 84 | ||||
-rw-r--r-- | src/test/regress/sql/json.sql | 18 | ||||
-rw-r--r-- | src/test/regress/sql/jsonb.sql | 26 |
9 files changed, 250 insertions, 160 deletions
diff --git a/doc/src/sgml/json.sgml b/doc/src/sgml/json.sgml index 8feb2fbf0ad..6282ab88539 100644 --- a/doc/src/sgml/json.sgml +++ b/doc/src/sgml/json.sgml @@ -69,12 +69,14 @@ regardless of the database encoding, and are checked only for syntactic correctness (that is, that four hex digits follow <literal>\u</>). However, the input function for <type>jsonb</> is stricter: it disallows - Unicode escapes for non-ASCII characters (those - above <literal>U+007F</>) unless the database encoding is UTF8. It also - insists that any use of Unicode surrogate pairs to designate characters - outside the Unicode Basic Multilingual Plane be correct. Valid Unicode - escapes, except for <literal>\u0000</>, are then converted to the - equivalent ASCII or UTF8 character for storage. + Unicode escapes for non-ASCII characters (those above <literal>U+007F</>) + unless the database encoding is UTF8. The <type>jsonb</> type also + rejects <literal>\u0000</> (because that cannot be represented in + <productname>PostgreSQL</productname>'s <type>text</> type), and it insists + that any use of Unicode surrogate pairs to designate characters outside + the Unicode Basic Multilingual Plane be correct. Valid Unicode escapes + are converted to the equivalent ASCII or UTF8 character for storage; + this includes folding surrogate pairs into a single character. </para> <note> @@ -101,7 +103,7 @@ constitutes valid <type>jsonb</type> data that do not apply to the <type>json</type> type, nor to JSON in the abstract, corresponding to limits on what can be represented by the underlying data type. - Specifically, <type>jsonb</> will reject numbers that are outside the + Notably, <type>jsonb</> will reject numbers that are outside the range of the <productname>PostgreSQL</productname> <type>numeric</> data type, while <type>json</> will not. Such implementation-defined restrictions are permitted by <acronym>RFC</> 7159. However, in @@ -134,7 +136,8 @@ <row> <entry><type>string</></entry> <entry><type>text</></entry> - <entry>See notes above concerning encoding restrictions</entry> + <entry><literal>\u0000</> is disallowed, as are non-ASCII Unicode + escapes if database encoding is not UTF8</entry> </row> <row> <entry><type>number</></entry> diff --git a/doc/src/sgml/release-9.4.sgml b/doc/src/sgml/release-9.4.sgml index 961e4617978..11bbf3bf36c 100644 --- a/doc/src/sgml/release-9.4.sgml +++ b/doc/src/sgml/release-9.4.sgml @@ -103,22 +103,6 @@ <listitem> <para> - Unicode escapes in <link linkend="datatype-json"><type>JSON</type></link> - text values are no longer rendered with the backslash escaped - (Andrew Dunstan) - </para> - - <para> - Previously, all backslashes in text values being formed into JSON - were escaped. Now a backslash followed by <literal>u</> and four - hexadecimal digits is not escaped, as this is a legal sequence in a - JSON string value, and escaping the backslash led to some perverse - results. - </para> - </listitem> - - <listitem> - <para> When converting values of type <type>date</>, <type>timestamp</> or <type>timestamptz</> to <link linkend="datatype-json"><type>JSON</type></link>, render the diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c index 3c137ead1d0..951b6554007 100644 --- a/src/backend/utils/adt/json.c +++ b/src/backend/utils/adt/json.c @@ -806,14 +806,17 @@ json_lex_string(JsonLexContext *lex) * For UTF8, replace the escape sequence by the actual * utf8 character in lex->strval. Do this also for other * encodings if the escape designates an ASCII character, - * otherwise raise an error. We don't ever unescape a - * \u0000, since that would result in an impermissible nul - * byte. + * otherwise raise an error. */ if (ch == 0) { - appendStringInfoString(lex->strval, "\\u0000"); + /* We can't allow this, since our TEXT type doesn't */ + ereport(ERROR, + (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER), + errmsg("unsupported Unicode escape sequence"), + errdetail("\\u0000 cannot be converted to text."), + report_json_context(lex))); } else if (GetDatabaseEncoding() == PG_UTF8) { @@ -833,8 +836,8 @@ json_lex_string(JsonLexContext *lex) else { ereport(ERROR, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("invalid input syntax for type json"), + (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER), + errmsg("unsupported Unicode escape sequence"), errdetail("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8."), report_json_context(lex))); } @@ -1284,8 +1287,8 @@ json_categorize_type(Oid typoid, /* * We need to get the output function for everything except date and - * timestamp types, array and composite types, booleans, - * and non-builtin types where there's a cast to json. + * timestamp types, array and composite types, booleans, and non-builtin + * types where there's a cast to json. */ switch (typoid) @@ -1335,11 +1338,12 @@ json_categorize_type(Oid typoid, /* but let's look for a cast to json, if it's not built-in */ if (typoid >= FirstNormalObjectId) { - Oid castfunc; + Oid castfunc; CoercionPathType ctype; ctype = find_coercion_pathway(JSONOID, typoid, - COERCION_EXPLICIT, &castfunc); + COERCION_EXPLICIT, + &castfunc); if (ctype == COERCION_PATH_FUNC && OidIsValid(castfunc)) { *tcategory = JSONTYPE_CAST; @@ -2382,30 +2386,7 @@ escape_json(StringInfo buf, const char *str) appendStringInfoString(buf, "\\\""); break; case '\\': - - /* - * Unicode escapes are passed through as is. There is no - * requirement that they denote a valid character in the - * server encoding - indeed that is a big part of their - * usefulness. - * - * All we require is that they consist of \uXXXX where the Xs - * are hexadecimal digits. It is the responsibility of the - * caller of, say, to_json() to make sure that the unicode - * escape is valid. - * - * In the case of a jsonb string value being escaped, the only - * unicode escape that should be present is \u0000, all the - * other unicode escapes will have been resolved. - */ - if (p[1] == 'u' && - isxdigit((unsigned char) p[2]) && - isxdigit((unsigned char) p[3]) && - isxdigit((unsigned char) p[4]) && - isxdigit((unsigned char) p[5])) - appendStringInfoCharMacro(buf, *p); - else - appendStringInfoString(buf, "\\\\"); + appendStringInfoString(buf, "\\\\"); break; default: if ((unsigned char) *p < ' ') diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out index e435d3e1650..16704363dc6 100644 --- a/src/test/regress/expected/json.out +++ b/src/test/regress/expected/json.out @@ -426,20 +426,6 @@ select to_json(timestamptz '2014-05-28 12:22:35.614298-04'); (1 row) COMMIT; --- unicode escape - backslash is not escaped -select to_json(text '\uabcd'); - to_json ----------- - "\uabcd" -(1 row) - --- any other backslash is escaped -select to_json(text '\abcd'); - to_json ----------- - "\\abcd" -(1 row) - --json_agg SELECT json_agg(q) FROM ( SELECT $$a$$ || x AS b, y AS c, @@ -1400,6 +1386,36 @@ ERROR: invalid input syntax for type json DETAIL: Unicode low surrogate must follow a high surrogate. CONTEXT: JSON data, line 1: { "a":... --handling of simple unicode escapes +select json '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8; + correct_in_utf8 +--------------------------------------- + { "a": "the Copyright \u00a9 sign" } +(1 row) + +select json '{ "a": "dollar \u0024 character" }' as correct_everywhere; + correct_everywhere +------------------------------------- + { "a": "dollar \u0024 character" } +(1 row) + +select json '{ "a": "dollar \\u0024 character" }' as not_an_escape; + not_an_escape +-------------------------------------- + { "a": "dollar \\u0024 character" } +(1 row) + +select json '{ "a": "null \u0000 escape" }' as not_unescaped; + not_unescaped +-------------------------------- + { "a": "null \u0000 escape" } +(1 row) + +select json '{ "a": "null \\u0000 escape" }' as not_an_escape; + not_an_escape +--------------------------------- + { "a": "null \\u0000 escape" } +(1 row) + select json '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8; correct_in_utf8 ---------------------- @@ -1412,8 +1428,18 @@ select json '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere; dollar $ character (1 row) -select json '{ "a": "null \u0000 escape" }' ->> 'a' as not_unescaped; - not_unescaped +select json '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape; + not_an_escape +------------------------- + dollar \u0024 character +(1 row) + +select json '{ "a": "null \u0000 escape" }' ->> 'a' as fails; +ERROR: unsupported Unicode escape sequence +DETAIL: \u0000 cannot be converted to text. +CONTEXT: JSON data, line 1: { "a":... +select json '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape; + not_an_escape -------------------- null \u0000 escape (1 row) diff --git a/src/test/regress/expected/json_1.out b/src/test/regress/expected/json_1.out index 106b481fab9..807814641dd 100644 --- a/src/test/regress/expected/json_1.out +++ b/src/test/regress/expected/json_1.out @@ -426,20 +426,6 @@ select to_json(timestamptz '2014-05-28 12:22:35.614298-04'); (1 row) COMMIT; --- unicode escape - backslash is not escaped -select to_json(text '\uabcd'); - to_json ----------- - "\uabcd" -(1 row) - --- any other backslash is escaped -select to_json(text '\abcd'); - to_json ----------- - "\\abcd" -(1 row) - --json_agg SELECT json_agg(q) FROM ( SELECT $$a$$ || x AS b, y AS c, @@ -1378,7 +1364,7 @@ select * from json_populate_recordset(row('def',99,null)::jpop,'[{"a":[100,200,3 -- handling of unicode surrogate pairs select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8; -ERROR: invalid input syntax for type json +ERROR: unsupported Unicode escape sequence DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. CONTEXT: JSON data, line 1: { "a":... select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row @@ -1398,8 +1384,38 @@ ERROR: invalid input syntax for type json DETAIL: Unicode low surrogate must follow a high surrogate. CONTEXT: JSON data, line 1: { "a":... --handling of simple unicode escapes +select json '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8; + correct_in_utf8 +--------------------------------------- + { "a": "the Copyright \u00a9 sign" } +(1 row) + +select json '{ "a": "dollar \u0024 character" }' as correct_everywhere; + correct_everywhere +------------------------------------- + { "a": "dollar \u0024 character" } +(1 row) + +select json '{ "a": "dollar \\u0024 character" }' as not_an_escape; + not_an_escape +-------------------------------------- + { "a": "dollar \\u0024 character" } +(1 row) + +select json '{ "a": "null \u0000 escape" }' as not_unescaped; + not_unescaped +-------------------------------- + { "a": "null \u0000 escape" } +(1 row) + +select json '{ "a": "null \\u0000 escape" }' as not_an_escape; + not_an_escape +--------------------------------- + { "a": "null \\u0000 escape" } +(1 row) + select json '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8; -ERROR: invalid input syntax for type json +ERROR: unsupported Unicode escape sequence DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. CONTEXT: JSON data, line 1: { "a":... select json '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere; @@ -1408,8 +1424,18 @@ select json '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere; dollar $ character (1 row) -select json '{ "a": "null \u0000 escape" }' ->> 'a' as not_unescaped; - not_unescaped +select json '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape; + not_an_escape +------------------------- + dollar \u0024 character +(1 row) + +select json '{ "a": "null \u0000 escape" }' ->> 'a' as fails; +ERROR: unsupported Unicode escape sequence +DETAIL: \u0000 cannot be converted to text. +CONTEXT: JSON data, line 1: { "a":... +select json '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape; + not_an_escape -------------------- null \u0000 escape (1 row) diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out index aa5686ffb69..6c6ed950f08 100644 --- a/src/test/regress/expected/jsonb.out +++ b/src/test/regress/expected/jsonb.out @@ -60,12 +60,18 @@ LINE 1: SELECT '"\u000g"'::jsonb; ^ DETAIL: "\u" must be followed by four hexadecimal digits. CONTEXT: JSON data, line 1: "\u000g... -SELECT '"\u0000"'::jsonb; -- OK, legal escape - jsonb ----------- - "\u0000" +SELECT '"\u0045"'::jsonb; -- OK, legal escape + jsonb +------- + "E" (1 row) +SELECT '"\u0000"'::jsonb; -- ERROR, we don't support U+0000 +ERROR: unsupported Unicode escape sequence +LINE 1: SELECT '"\u0000"'::jsonb; + ^ +DETAIL: \u0000 cannot be converted to text. +CONTEXT: JSON data, line 1: ... -- use octet_length here so we don't get an odd unicode char in the -- output SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK @@ -324,20 +330,6 @@ select to_jsonb(timestamptz '2014-05-28 12:22:35.614298-04'); (1 row) COMMIT; --- unicode escape - backslash is not escaped -select to_jsonb(text '\uabcd'); - to_jsonb ----------- - "\uabcd" -(1 row) - --- any other backslash is escaped -select to_jsonb(text '\abcd'); - to_jsonb ----------- - "\\abcd" -(1 row) - --jsonb_agg CREATE TEMP TABLE rows AS SELECT x, 'txt' || x as y @@ -1971,20 +1963,62 @@ LINE 1: SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; DETAIL: Unicode low surrogate must follow a high surrogate. CONTEXT: JSON data, line 1: { "a":... -- handling of simple unicode escapes -SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' AS correct_in_utf8; +SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8; + correct_in_utf8 +------------------------------- + {"a": "the Copyright © sign"} +(1 row) + +SELECT jsonb '{ "a": "dollar \u0024 character" }' as correct_everywhere; + correct_everywhere +----------------------------- + {"a": "dollar $ character"} +(1 row) + +SELECT jsonb '{ "a": "dollar \\u0024 character" }' as not_an_escape; + not_an_escape +----------------------------------- + {"a": "dollar \\u0024 character"} +(1 row) + +SELECT jsonb '{ "a": "null \u0000 escape" }' as fails; +ERROR: unsupported Unicode escape sequence +LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' as fails; + ^ +DETAIL: \u0000 cannot be converted to text. +CONTEXT: JSON data, line 1: { "a":... +SELECT jsonb '{ "a": "null \\u0000 escape" }' as not_an_escape; + not_an_escape +------------------------------ + {"a": "null \\u0000 escape"} +(1 row) + +SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8; correct_in_utf8 ---------------------- the Copyright © sign (1 row) -SELECT jsonb '{ "a": "dollar \u0024 character" }' ->> 'a' AS correct_everyWHERE; +SELECT jsonb '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere; correct_everywhere -------------------- dollar $ character (1 row) -SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' AS not_unescaped; - not_unescaped +SELECT jsonb '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape; + not_an_escape +------------------------- + dollar \u0024 character +(1 row) + +SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fails; +ERROR: unsupported Unicode escape sequence +LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fai... + ^ +DETAIL: \u0000 cannot be converted to text. +CONTEXT: JSON data, line 1: { "a":... +SELECT jsonb '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape; + not_an_escape -------------------- null \u0000 escape (1 row) diff --git a/src/test/regress/expected/jsonb_1.out b/src/test/regress/expected/jsonb_1.out index 687ae63b707..f30148d51c1 100644 --- a/src/test/regress/expected/jsonb_1.out +++ b/src/test/regress/expected/jsonb_1.out @@ -60,16 +60,22 @@ LINE 1: SELECT '"\u000g"'::jsonb; ^ DETAIL: "\u" must be followed by four hexadecimal digits. CONTEXT: JSON data, line 1: "\u000g... -SELECT '"\u0000"'::jsonb; -- OK, legal escape - jsonb ----------- - "\u0000" +SELECT '"\u0045"'::jsonb; -- OK, legal escape + jsonb +------- + "E" (1 row) +SELECT '"\u0000"'::jsonb; -- ERROR, we don't support U+0000 +ERROR: unsupported Unicode escape sequence +LINE 1: SELECT '"\u0000"'::jsonb; + ^ +DETAIL: \u0000 cannot be converted to text. +CONTEXT: JSON data, line 1: ... -- use octet_length here so we don't get an odd unicode char in the -- output SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK -ERROR: invalid input syntax for type json +ERROR: unsupported Unicode escape sequence LINE 1: SELECT octet_length('"\uaBcD"'::jsonb::text); ^ DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. @@ -324,20 +330,6 @@ select to_jsonb(timestamptz '2014-05-28 12:22:35.614298-04'); (1 row) COMMIT; --- unicode escape - backslash is not escaped -select to_jsonb(text '\uabcd'); - to_jsonb ----------- - "\uabcd" -(1 row) - --- any other backslash is escaped -select to_jsonb(text '\abcd'); - to_jsonb ----------- - "\\abcd" -(1 row) - --jsonb_agg CREATE TEMP TABLE rows AS SELECT x, 'txt' || x as y @@ -1941,7 +1933,7 @@ SELECT * FROM jsonb_populate_recordset(row('def',99,NULL)::jbpop,'[{"a":[100,200 -- handling of unicode surrogate pairs SELECT octet_length((jsonb '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a')::text) AS correct_in_utf8; -ERROR: invalid input syntax for type json +ERROR: unsupported Unicode escape sequence LINE 1: SELECT octet_length((jsonb '{ "a": "\ud83d\ude04\ud83d\udc3... ^ DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. @@ -1971,20 +1963,62 @@ LINE 1: SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; DETAIL: Unicode low surrogate must follow a high surrogate. CONTEXT: JSON data, line 1: { "a":... -- handling of simple unicode escapes -SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' AS correct_in_utf8; -ERROR: invalid input syntax for type json +SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8; +ERROR: unsupported Unicode escape sequence +LINE 1: SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as corr... + ^ +DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. +CONTEXT: JSON data, line 1: { "a":... +SELECT jsonb '{ "a": "dollar \u0024 character" }' as correct_everywhere; + correct_everywhere +----------------------------- + {"a": "dollar $ character"} +(1 row) + +SELECT jsonb '{ "a": "dollar \\u0024 character" }' as not_an_escape; + not_an_escape +----------------------------------- + {"a": "dollar \\u0024 character"} +(1 row) + +SELECT jsonb '{ "a": "null \u0000 escape" }' as fails; +ERROR: unsupported Unicode escape sequence +LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' as fails; + ^ +DETAIL: \u0000 cannot be converted to text. +CONTEXT: JSON data, line 1: { "a":... +SELECT jsonb '{ "a": "null \\u0000 escape" }' as not_an_escape; + not_an_escape +------------------------------ + {"a": "null \\u0000 escape"} +(1 row) + +SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8; +ERROR: unsupported Unicode escape sequence LINE 1: SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a'... ^ DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. CONTEXT: JSON data, line 1: { "a":... -SELECT jsonb '{ "a": "dollar \u0024 character" }' ->> 'a' AS correct_everyWHERE; +SELECT jsonb '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere; correct_everywhere -------------------- dollar $ character (1 row) -SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' AS not_unescaped; - not_unescaped +SELECT jsonb '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape; + not_an_escape +------------------------- + dollar \u0024 character +(1 row) + +SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fails; +ERROR: unsupported Unicode escape sequence +LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fai... + ^ +DETAIL: \u0000 cannot be converted to text. +CONTEXT: JSON data, line 1: { "a":... +SELECT jsonb '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape; + not_an_escape -------------------- null \u0000 escape (1 row) diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql index 36a6674ff91..53a37a88439 100644 --- a/src/test/regress/sql/json.sql +++ b/src/test/regress/sql/json.sql @@ -111,14 +111,6 @@ SET LOCAL TIME ZONE -8; select to_json(timestamptz '2014-05-28 12:22:35.614298-04'); COMMIT; --- unicode escape - backslash is not escaped - -select to_json(text '\uabcd'); - --- any other backslash is escaped - -select to_json(text '\abcd'); - --json_agg SELECT json_agg(q) @@ -401,9 +393,17 @@ select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate --handling of simple unicode escapes +select json '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8; +select json '{ "a": "dollar \u0024 character" }' as correct_everywhere; +select json '{ "a": "dollar \\u0024 character" }' as not_an_escape; +select json '{ "a": "null \u0000 escape" }' as not_unescaped; +select json '{ "a": "null \\u0000 escape" }' as not_an_escape; + select json '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8; select json '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere; -select json '{ "a": "null \u0000 escape" }' ->> 'a' as not_unescaped; +select json '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape; +select json '{ "a": "null \u0000 escape" }' ->> 'a' as fails; +select json '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape; --json_typeof() function select value, json_typeof(value) diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql index a8461039330..53cc2393c62 100644 --- a/src/test/regress/sql/jsonb.sql +++ b/src/test/regress/sql/jsonb.sql @@ -10,7 +10,8 @@ SELECT '"\v"'::jsonb; -- ERROR, not a valid JSON escape SELECT '"\u"'::jsonb; -- ERROR, incomplete escape SELECT '"\u00"'::jsonb; -- ERROR, incomplete escape SELECT '"\u000g"'::jsonb; -- ERROR, g is not a hex digit -SELECT '"\u0000"'::jsonb; -- OK, legal escape +SELECT '"\u0045"'::jsonb; -- OK, legal escape +SELECT '"\u0000"'::jsonb; -- ERROR, we don't support U+0000 -- use octet_length here so we don't get an odd unicode char in the -- output SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK @@ -73,14 +74,6 @@ SET LOCAL TIME ZONE -8; select to_jsonb(timestamptz '2014-05-28 12:22:35.614298-04'); COMMIT; --- unicode escape - backslash is not escaped - -select to_jsonb(text '\uabcd'); - --- any other backslash is escaped - -select to_jsonb(text '\abcd'); - --jsonb_agg CREATE TEMP TABLE rows AS @@ -488,9 +481,18 @@ SELECT jsonb '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate -- handling of simple unicode escapes -SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' AS correct_in_utf8; -SELECT jsonb '{ "a": "dollar \u0024 character" }' ->> 'a' AS correct_everyWHERE; -SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' AS not_unescaped; + +SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8; +SELECT jsonb '{ "a": "dollar \u0024 character" }' as correct_everywhere; +SELECT jsonb '{ "a": "dollar \\u0024 character" }' as not_an_escape; +SELECT jsonb '{ "a": "null \u0000 escape" }' as fails; +SELECT jsonb '{ "a": "null \\u0000 escape" }' as not_an_escape; + +SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8; +SELECT jsonb '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere; +SELECT jsonb '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape; +SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fails; +SELECT jsonb '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape; -- jsonb_to_record and jsonb_to_recordset |