diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/utils/adt/json.c | 40 | ||||
-rw-r--r-- | src/test/regress/expected/json.out | 58 | ||||
-rw-r--r-- | src/test/regress/expected/json_1.out | 62 | ||||
-rw-r--r-- | src/test/regress/expected/jsonb.out | 64 | ||||
-rw-r--r-- | src/test/regress/expected/jsonb_1.out | 70 | ||||
-rw-r--r-- | src/test/regress/sql/json.sql | 18 | ||||
-rw-r--r-- | src/test/regress/sql/jsonb.sql | 18 |
7 files changed, 234 insertions, 96 deletions
diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c index 1d6b752a28b..48f03e0b36a 100644 --- a/src/backend/utils/adt/json.c +++ b/src/backend/utils/adt/json.c @@ -807,14 +807,17 @@ json_lex_string(JsonLexContext *lex) * For UTF8, replace the escape sequence by the actual * utf8 character in lex->strval. Do this also for other * encodings if the escape designates an ASCII character, - * otherwise raise an error. We don't ever unescape a - * \u0000, since that would result in an impermissible nul - * byte. + * otherwise raise an error. */ if (ch == 0) { - appendStringInfoString(lex->strval, "\\u0000"); + /* We can't allow this, since our TEXT type doesn't */ + ereport(ERROR, + (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER), + errmsg("unsupported Unicode escape sequence"), + errdetail("\\u0000 cannot be converted to text."), + report_json_context(lex))); } else if (GetDatabaseEncoding() == PG_UTF8) { @@ -834,8 +837,8 @@ json_lex_string(JsonLexContext *lex) else { ereport(ERROR, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("invalid input syntax for type json"), + (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER), + errmsg("unsupported Unicode escape sequence"), errdetail("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8."), report_json_context(lex))); } @@ -2374,30 +2377,7 @@ escape_json(StringInfo buf, const char *str) appendStringInfoString(buf, "\\\""); break; case '\\': - - /* - * Unicode escapes are passed through as is. There is no - * requirement that they denote a valid character in the - * server encoding - indeed that is a big part of their - * usefulness. - * - * All we require is that they consist of \uXXXX where the Xs - * are hexadecimal digits. It is the responsibility of the - * caller of, say, to_json() to make sure that the unicode - * escape is valid. - * - * In the case of a jsonb string value being escaped, the only - * unicode escape that should be present is \u0000, all the - * other unicode escapes will have been resolved. - */ - if (p[1] == 'u' && - isxdigit((unsigned char) p[2]) && - isxdigit((unsigned char) p[3]) && - isxdigit((unsigned char) p[4]) && - isxdigit((unsigned char) p[5])) - appendStringInfoCharMacro(buf, *p); - else - appendStringInfoString(buf, "\\\\"); + appendStringInfoString(buf, "\\\\"); break; default: if ((unsigned char) *p < ' ') diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out index bb4d9ed4beb..c916678427d 100644 --- a/src/test/regress/expected/json.out +++ b/src/test/regress/expected/json.out @@ -426,20 +426,6 @@ select to_json(timestamptz '2014-05-28 12:22:35.614298-04'); (1 row) COMMIT; --- unicode escape - backslash is not escaped -select to_json(text '\uabcd'); - to_json ----------- - "\uabcd" -(1 row) - --- any other backslash is escaped -select to_json(text '\abcd'); - to_json ----------- - "\\abcd" -(1 row) - --json_agg SELECT json_agg(q) FROM ( SELECT $$a$$ || x AS b, y AS c, @@ -1400,6 +1386,36 @@ ERROR: invalid input syntax for type json DETAIL: Unicode low surrogate must follow a high surrogate. CONTEXT: JSON data, line 1: { "a":... --handling of simple unicode escapes +select json '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8; + correct_in_utf8 +--------------------------------------- + { "a": "the Copyright \u00a9 sign" } +(1 row) + +select json '{ "a": "dollar \u0024 character" }' as correct_everywhere; + correct_everywhere +------------------------------------- + { "a": "dollar \u0024 character" } +(1 row) + +select json '{ "a": "dollar \\u0024 character" }' as not_an_escape; + not_an_escape +-------------------------------------- + { "a": "dollar \\u0024 character" } +(1 row) + +select json '{ "a": "null \u0000 escape" }' as not_unescaped; + not_unescaped +-------------------------------- + { "a": "null \u0000 escape" } +(1 row) + +select json '{ "a": "null \\u0000 escape" }' as not_an_escape; + not_an_escape +--------------------------------- + { "a": "null \\u0000 escape" } +(1 row) + select json '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8; correct_in_utf8 ---------------------- @@ -1412,8 +1428,18 @@ select json '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere; dollar $ character (1 row) -select json '{ "a": "null \u0000 escape" }' ->> 'a' as not_unescaped; - not_unescaped +select json '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape; + not_an_escape +------------------------- + dollar \u0024 character +(1 row) + +select json '{ "a": "null \u0000 escape" }' ->> 'a' as fails; +ERROR: unsupported Unicode escape sequence +DETAIL: \u0000 cannot be converted to text. +CONTEXT: JSON data, line 1: { "a":... +select json '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape; + not_an_escape -------------------- null \u0000 escape (1 row) diff --git a/src/test/regress/expected/json_1.out b/src/test/regress/expected/json_1.out index 83c1d7d492c..ce63bfb227e 100644 --- a/src/test/regress/expected/json_1.out +++ b/src/test/regress/expected/json_1.out @@ -426,20 +426,6 @@ select to_json(timestamptz '2014-05-28 12:22:35.614298-04'); (1 row) COMMIT; --- unicode escape - backslash is not escaped -select to_json(text '\uabcd'); - to_json ----------- - "\uabcd" -(1 row) - --- any other backslash is escaped -select to_json(text '\abcd'); - to_json ----------- - "\\abcd" -(1 row) - --json_agg SELECT json_agg(q) FROM ( SELECT $$a$$ || x AS b, y AS c, @@ -1378,7 +1364,7 @@ select * from json_populate_recordset(row('def',99,null)::jpop,'[{"a":[100,200,3 -- handling of unicode surrogate pairs select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8; -ERROR: invalid input syntax for type json +ERROR: unsupported Unicode escape sequence DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. CONTEXT: JSON data, line 1: { "a":... select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row @@ -1398,8 +1384,38 @@ ERROR: invalid input syntax for type json DETAIL: Unicode low surrogate must follow a high surrogate. CONTEXT: JSON data, line 1: { "a":... --handling of simple unicode escapes +select json '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8; + correct_in_utf8 +--------------------------------------- + { "a": "the Copyright \u00a9 sign" } +(1 row) + +select json '{ "a": "dollar \u0024 character" }' as correct_everywhere; + correct_everywhere +------------------------------------- + { "a": "dollar \u0024 character" } +(1 row) + +select json '{ "a": "dollar \\u0024 character" }' as not_an_escape; + not_an_escape +-------------------------------------- + { "a": "dollar \\u0024 character" } +(1 row) + +select json '{ "a": "null \u0000 escape" }' as not_unescaped; + not_unescaped +-------------------------------- + { "a": "null \u0000 escape" } +(1 row) + +select json '{ "a": "null \\u0000 escape" }' as not_an_escape; + not_an_escape +--------------------------------- + { "a": "null \\u0000 escape" } +(1 row) + select json '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8; -ERROR: invalid input syntax for type json +ERROR: unsupported Unicode escape sequence DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. CONTEXT: JSON data, line 1: { "a":... select json '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere; @@ -1408,8 +1424,18 @@ select json '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere; dollar $ character (1 row) -select json '{ "a": "null \u0000 escape" }' ->> 'a' as not_unescaped; - not_unescaped +select json '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape; + not_an_escape +------------------------- + dollar \u0024 character +(1 row) + +select json '{ "a": "null \u0000 escape" }' ->> 'a' as fails; +ERROR: unsupported Unicode escape sequence +DETAIL: \u0000 cannot be converted to text. +CONTEXT: JSON data, line 1: { "a":... +select json '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape; + not_an_escape -------------------- null \u0000 escape (1 row) diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out index 9146f59435b..523f50c5465 100644 --- a/src/test/regress/expected/jsonb.out +++ b/src/test/regress/expected/jsonb.out @@ -60,12 +60,18 @@ LINE 1: SELECT '"\u000g"'::jsonb; ^ DETAIL: "\u" must be followed by four hexadecimal digits. CONTEXT: JSON data, line 1: "\u000g... -SELECT '"\u0000"'::jsonb; -- OK, legal escape - jsonb ----------- - "\u0000" +SELECT '"\u0045"'::jsonb; -- OK, legal escape + jsonb +------- + "E" (1 row) +SELECT '"\u0000"'::jsonb; -- ERROR, we don't support U+0000 +ERROR: unsupported Unicode escape sequence +LINE 1: SELECT '"\u0000"'::jsonb; + ^ +DETAIL: \u0000 cannot be converted to text. +CONTEXT: JSON data, line 1: ... -- use octet_length here so we don't get an odd unicode char in the -- output SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK @@ -1798,20 +1804,62 @@ LINE 1: SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; DETAIL: Unicode low surrogate must follow a high surrogate. CONTEXT: JSON data, line 1: { "a":... -- handling of simple unicode escapes -SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' AS correct_in_utf8; +SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8; + correct_in_utf8 +------------------------------- + {"a": "the Copyright © sign"} +(1 row) + +SELECT jsonb '{ "a": "dollar \u0024 character" }' as correct_everywhere; + correct_everywhere +----------------------------- + {"a": "dollar $ character"} +(1 row) + +SELECT jsonb '{ "a": "dollar \\u0024 character" }' as not_an_escape; + not_an_escape +----------------------------------- + {"a": "dollar \\u0024 character"} +(1 row) + +SELECT jsonb '{ "a": "null \u0000 escape" }' as fails; +ERROR: unsupported Unicode escape sequence +LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' as fails; + ^ +DETAIL: \u0000 cannot be converted to text. +CONTEXT: JSON data, line 1: { "a":... +SELECT jsonb '{ "a": "null \\u0000 escape" }' as not_an_escape; + not_an_escape +------------------------------ + {"a": "null \\u0000 escape"} +(1 row) + +SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8; correct_in_utf8 ---------------------- the Copyright © sign (1 row) -SELECT jsonb '{ "a": "dollar \u0024 character" }' ->> 'a' AS correct_everyWHERE; +SELECT jsonb '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere; correct_everywhere -------------------- dollar $ character (1 row) -SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' AS not_unescaped; - not_unescaped +SELECT jsonb '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape; + not_an_escape +------------------------- + dollar \u0024 character +(1 row) + +SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fails; +ERROR: unsupported Unicode escape sequence +LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fai... + ^ +DETAIL: \u0000 cannot be converted to text. +CONTEXT: JSON data, line 1: { "a":... +SELECT jsonb '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape; + not_an_escape -------------------- null \u0000 escape (1 row) diff --git a/src/test/regress/expected/jsonb_1.out b/src/test/regress/expected/jsonb_1.out index 83d61f8c7e0..eee22b4883c 100644 --- a/src/test/regress/expected/jsonb_1.out +++ b/src/test/regress/expected/jsonb_1.out @@ -60,16 +60,22 @@ LINE 1: SELECT '"\u000g"'::jsonb; ^ DETAIL: "\u" must be followed by four hexadecimal digits. CONTEXT: JSON data, line 1: "\u000g... -SELECT '"\u0000"'::jsonb; -- OK, legal escape - jsonb ----------- - "\u0000" +SELECT '"\u0045"'::jsonb; -- OK, legal escape + jsonb +------- + "E" (1 row) +SELECT '"\u0000"'::jsonb; -- ERROR, we don't support U+0000 +ERROR: unsupported Unicode escape sequence +LINE 1: SELECT '"\u0000"'::jsonb; + ^ +DETAIL: \u0000 cannot be converted to text. +CONTEXT: JSON data, line 1: ... -- use octet_length here so we don't get an odd unicode char in the -- output SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK -ERROR: invalid input syntax for type json +ERROR: unsupported Unicode escape sequence LINE 1: SELECT octet_length('"\uaBcD"'::jsonb::text); ^ DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. @@ -1768,7 +1774,7 @@ SELECT * FROM jsonb_populate_recordset(row('def',99,NULL)::jbpop,'[{"a":[100,200 -- handling of unicode surrogate pairs SELECT octet_length((jsonb '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a')::text) AS correct_in_utf8; -ERROR: invalid input syntax for type json +ERROR: unsupported Unicode escape sequence LINE 1: SELECT octet_length((jsonb '{ "a": "\ud83d\ude04\ud83d\udc3... ^ DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. @@ -1798,20 +1804,62 @@ LINE 1: SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; DETAIL: Unicode low surrogate must follow a high surrogate. CONTEXT: JSON data, line 1: { "a":... -- handling of simple unicode escapes -SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' AS correct_in_utf8; -ERROR: invalid input syntax for type json +SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8; +ERROR: unsupported Unicode escape sequence +LINE 1: SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as corr... + ^ +DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. +CONTEXT: JSON data, line 1: { "a":... +SELECT jsonb '{ "a": "dollar \u0024 character" }' as correct_everywhere; + correct_everywhere +----------------------------- + {"a": "dollar $ character"} +(1 row) + +SELECT jsonb '{ "a": "dollar \\u0024 character" }' as not_an_escape; + not_an_escape +----------------------------------- + {"a": "dollar \\u0024 character"} +(1 row) + +SELECT jsonb '{ "a": "null \u0000 escape" }' as fails; +ERROR: unsupported Unicode escape sequence +LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' as fails; + ^ +DETAIL: \u0000 cannot be converted to text. +CONTEXT: JSON data, line 1: { "a":... +SELECT jsonb '{ "a": "null \\u0000 escape" }' as not_an_escape; + not_an_escape +------------------------------ + {"a": "null \\u0000 escape"} +(1 row) + +SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8; +ERROR: unsupported Unicode escape sequence LINE 1: SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a'... ^ DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. CONTEXT: JSON data, line 1: { "a":... -SELECT jsonb '{ "a": "dollar \u0024 character" }' ->> 'a' AS correct_everyWHERE; +SELECT jsonb '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere; correct_everywhere -------------------- dollar $ character (1 row) -SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' AS not_unescaped; - not_unescaped +SELECT jsonb '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape; + not_an_escape +------------------------- + dollar \u0024 character +(1 row) + +SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fails; +ERROR: unsupported Unicode escape sequence +LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fai... + ^ +DETAIL: \u0000 cannot be converted to text. +CONTEXT: JSON data, line 1: { "a":... +SELECT jsonb '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape; + not_an_escape -------------------- null \u0000 escape (1 row) diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql index c9801321e09..a4eaa1fbc0b 100644 --- a/src/test/regress/sql/json.sql +++ b/src/test/regress/sql/json.sql @@ -111,14 +111,6 @@ SET LOCAL TIME ZONE -8; select to_json(timestamptz '2014-05-28 12:22:35.614298-04'); COMMIT; --- unicode escape - backslash is not escaped - -select to_json(text '\uabcd'); - --- any other backslash is escaped - -select to_json(text '\abcd'); - --json_agg SELECT json_agg(q) @@ -401,9 +393,17 @@ select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate --handling of simple unicode escapes +select json '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8; +select json '{ "a": "dollar \u0024 character" }' as correct_everywhere; +select json '{ "a": "dollar \\u0024 character" }' as not_an_escape; +select json '{ "a": "null \u0000 escape" }' as not_unescaped; +select json '{ "a": "null \\u0000 escape" }' as not_an_escape; + select json '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8; select json '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere; -select json '{ "a": "null \u0000 escape" }' ->> 'a' as not_unescaped; +select json '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape; +select json '{ "a": "null \u0000 escape" }' ->> 'a' as fails; +select json '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape; --json_typeof() function select value, json_typeof(value) diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql index f1ed021be2d..a8665848731 100644 --- a/src/test/regress/sql/jsonb.sql +++ b/src/test/regress/sql/jsonb.sql @@ -10,7 +10,8 @@ SELECT '"\v"'::jsonb; -- ERROR, not a valid JSON escape SELECT '"\u"'::jsonb; -- ERROR, incomplete escape SELECT '"\u00"'::jsonb; -- ERROR, incomplete escape SELECT '"\u000g"'::jsonb; -- ERROR, g is not a hex digit -SELECT '"\u0000"'::jsonb; -- OK, legal escape +SELECT '"\u0045"'::jsonb; -- OK, legal escape +SELECT '"\u0000"'::jsonb; -- ERROR, we don't support U+0000 -- use octet_length here so we don't get an odd unicode char in the -- output SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK @@ -373,9 +374,18 @@ SELECT jsonb '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate -- handling of simple unicode escapes -SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' AS correct_in_utf8; -SELECT jsonb '{ "a": "dollar \u0024 character" }' ->> 'a' AS correct_everyWHERE; -SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' AS not_unescaped; + +SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8; +SELECT jsonb '{ "a": "dollar \u0024 character" }' as correct_everywhere; +SELECT jsonb '{ "a": "dollar \\u0024 character" }' as not_an_escape; +SELECT jsonb '{ "a": "null \u0000 escape" }' as fails; +SELECT jsonb '{ "a": "null \\u0000 escape" }' as not_an_escape; + +SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8; +SELECT jsonb '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere; +SELECT jsonb '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape; +SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fails; +SELECT jsonb '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape; -- jsonb_to_record and jsonb_to_recordset |