aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/utils/adt/json.c29
-rw-r--r--src/test/regress/expected/json_encoding.out24
-rw-r--r--src/test/regress/expected/json_encoding_1.out36
3 files changed, 57 insertions, 32 deletions
diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c
index 26d293709aa..40d1dff39c9 100644
--- a/src/backend/utils/adt/json.c
+++ b/src/backend/utils/adt/json.c
@@ -750,6 +750,13 @@ json_lex(JsonLexContext *lex)
/*
* The next token in the input stream is known to be a string; lex it.
+ *
+ * If lex->strval isn't NULL, fill it with the decoded string.
+ * Set lex->token_terminator to the end of the decoded input, and in
+ * success cases, transfer its previous value to lex->prev_token_terminator.
+ *
+ * Note: be careful that all error cases advance lex->token_terminator
+ * to the point after the character we detected the error on.
*/
static inline void
json_lex_string(JsonLexContext *lex)
@@ -837,33 +844,42 @@ json_lex_string(JsonLexContext *lex)
if (ch >= 0xd800 && ch <= 0xdbff)
{
if (hi_surrogate != -1)
+ {
+ lex->token_terminator = s + pg_mblen(s);
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s",
"json"),
errdetail("Unicode high surrogate must not follow a high surrogate."),
report_json_context(lex)));
+ }
hi_surrogate = (ch & 0x3ff) << 10;
continue;
}
else if (ch >= 0xdc00 && ch <= 0xdfff)
{
if (hi_surrogate == -1)
+ {
+ lex->token_terminator = s + pg_mblen(s);
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "json"),
errdetail("Unicode low surrogate must follow a high surrogate."),
report_json_context(lex)));
+ }
ch = 0x10000 + hi_surrogate + (ch & 0x3ff);
hi_surrogate = -1;
}
if (hi_surrogate != -1)
+ {
+ lex->token_terminator = s + pg_mblen(s);
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "json"),
errdetail("Unicode low surrogate must follow a high surrogate."),
report_json_context(lex)));
+ }
/*
* For UTF8, replace the escape sequence by the actual
@@ -875,6 +891,7 @@ json_lex_string(JsonLexContext *lex)
if (ch == 0)
{
/* We can't allow this, since our TEXT type doesn't */
+ lex->token_terminator = s + pg_mblen(s);
ereport(ERROR,
(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
errmsg("unsupported Unicode escape sequence"),
@@ -898,24 +915,27 @@ json_lex_string(JsonLexContext *lex)
}
else
{
+ lex->token_terminator = s + pg_mblen(s);
ereport(ERROR,
(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
errmsg("unsupported Unicode escape sequence"),
errdetail("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8."),
report_json_context(lex)));
}
-
}
}
else if (lex->strval != NULL)
{
if (hi_surrogate != -1)
+ {
+ lex->token_terminator = s + pg_mblen(s);
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s",
"json"),
errdetail("Unicode low surrogate must follow a high surrogate."),
report_json_context(lex)));
+ }
switch (*s)
{
@@ -968,16 +988,18 @@ json_lex_string(JsonLexContext *lex)
extract_mb_char(s)),
report_json_context(lex)));
}
-
}
else if (lex->strval != NULL)
{
if (hi_surrogate != -1)
+ {
+ lex->token_terminator = s + pg_mblen(s);
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "json"),
errdetail("Unicode low surrogate must follow a high surrogate."),
report_json_context(lex)));
+ }
appendStringInfoChar(lex->strval, *s);
}
@@ -985,11 +1007,14 @@ json_lex_string(JsonLexContext *lex)
}
if (hi_surrogate != -1)
+ {
+ lex->token_terminator = s + pg_mblen(s);
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "json"),
errdetail("Unicode low surrogate must follow a high surrogate."),
report_json_context(lex)));
+ }
/* Hooray, we found the end of the string! */
lex->prev_token_terminator = lex->token_terminator;
diff --git a/src/test/regress/expected/json_encoding.out b/src/test/regress/expected/json_encoding.out
index d8d34f4ff6a..3156c63c6ff 100644
--- a/src/test/regress/expected/json_encoding.out
+++ b/src/test/regress/expected/json_encoding.out
@@ -41,19 +41,19 @@ select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8;
select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
ERROR: invalid input syntax for type json
DETAIL: Unicode high surrogate must not follow a high surrogate.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "\ud83d\ud83d...
select json '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
ERROR: invalid input syntax for type json
DETAIL: Unicode low surrogate must follow a high surrogate.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "\ude04...
select json '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
ERROR: invalid input syntax for type json
DETAIL: Unicode low surrogate must follow a high surrogate.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "\ud83dX...
select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
ERROR: invalid input syntax for type json
DETAIL: Unicode low surrogate must follow a high surrogate.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "\ude04...
--handling of simple unicode escapes
select json '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
correct_in_utf8
@@ -106,7 +106,7 @@ select json '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
select json '{ "a": "null \u0000 escape" }' ->> 'a' as fails;
ERROR: unsupported Unicode escape sequence
DETAIL: \u0000 cannot be converted to text.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "null \u0000...
select json '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
not_an_escape
--------------------
@@ -144,7 +144,7 @@ ERROR: unsupported Unicode escape sequence
LINE 1: SELECT '"\u0000"'::jsonb;
^
DETAIL: \u0000 cannot be converted to text.
-CONTEXT: JSON data, line 1: ...
+CONTEXT: JSON data, line 1: "\u0000...
-- use octet_length here so we don't get an odd unicode char in the
-- output
SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
@@ -165,25 +165,25 @@ ERROR: invalid input syntax for type json
LINE 1: SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a';
^
DETAIL: Unicode high surrogate must not follow a high surrogate.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "\ud83d\ud83d...
SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
ERROR: invalid input syntax for type json
LINE 1: SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a';
^
DETAIL: Unicode low surrogate must follow a high surrogate.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "\ude04...
SELECT jsonb '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
ERROR: invalid input syntax for type json
LINE 1: SELECT jsonb '{ "a": "\ud83dX" }' -> 'a';
^
DETAIL: Unicode low surrogate must follow a high surrogate.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "\ud83dX...
SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
ERROR: invalid input syntax for type json
LINE 1: SELECT jsonb '{ "a": "\ude04X" }' -> 'a';
^
DETAIL: Unicode low surrogate must follow a high surrogate.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "\ude04...
-- handling of simple unicode escapes
SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
correct_in_utf8
@@ -208,7 +208,7 @@ ERROR: unsupported Unicode escape sequence
LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' as fails;
^
DETAIL: \u0000 cannot be converted to text.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "null \u0000...
SELECT jsonb '{ "a": "null \\u0000 escape" }' as not_an_escape;
not_an_escape
------------------------------
@@ -238,7 +238,7 @@ ERROR: unsupported Unicode escape sequence
LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fai...
^
DETAIL: \u0000 cannot be converted to text.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "null \u0000...
SELECT jsonb '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
not_an_escape
--------------------
diff --git a/src/test/regress/expected/json_encoding_1.out b/src/test/regress/expected/json_encoding_1.out
index 79ed78e1c5f..d320bfa8118 100644
--- a/src/test/regress/expected/json_encoding_1.out
+++ b/src/test/regress/expected/json_encoding_1.out
@@ -35,23 +35,23 @@ SELECT '"\uaBcD"'::json; -- OK, uppercase and lower case both OK
select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8;
ERROR: unsupported Unicode escape sequence
DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "\ud83d\ude04...
select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
ERROR: invalid input syntax for type json
DETAIL: Unicode high surrogate must not follow a high surrogate.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "\ud83d\ud83d...
select json '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
ERROR: invalid input syntax for type json
DETAIL: Unicode low surrogate must follow a high surrogate.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "\ude04...
select json '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
ERROR: invalid input syntax for type json
DETAIL: Unicode low surrogate must follow a high surrogate.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "\ud83dX...
select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
ERROR: invalid input syntax for type json
DETAIL: Unicode low surrogate must follow a high surrogate.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "\ude04...
--handling of simple unicode escapes
select json '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
correct_in_utf8
@@ -86,7 +86,7 @@ select json '{ "a": "null \\u0000 escape" }' as not_an_escape;
select json '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
ERROR: unsupported Unicode escape sequence
DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "the Copyright \u00a9...
select json '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
correct_everywhere
--------------------
@@ -102,7 +102,7 @@ select json '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
select json '{ "a": "null \u0000 escape" }' ->> 'a' as fails;
ERROR: unsupported Unicode escape sequence
DETAIL: \u0000 cannot be converted to text.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "null \u0000...
select json '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
not_an_escape
--------------------
@@ -140,7 +140,7 @@ ERROR: unsupported Unicode escape sequence
LINE 1: SELECT '"\u0000"'::jsonb;
^
DETAIL: \u0000 cannot be converted to text.
-CONTEXT: JSON data, line 1: ...
+CONTEXT: JSON data, line 1: "\u0000...
-- use octet_length here so we don't get an odd unicode char in the
-- output
SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
@@ -148,45 +148,45 @@ ERROR: unsupported Unicode escape sequence
LINE 1: SELECT octet_length('"\uaBcD"'::jsonb::text);
^
DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
-CONTEXT: JSON data, line 1: ...
+CONTEXT: JSON data, line 1: "\uaBcD...
-- handling of unicode surrogate pairs
SELECT octet_length((jsonb '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a')::text) AS correct_in_utf8;
ERROR: unsupported Unicode escape sequence
LINE 1: SELECT octet_length((jsonb '{ "a": "\ud83d\ude04\ud83d\udc3...
^
DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "\ud83d\ude04...
SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
ERROR: invalid input syntax for type json
LINE 1: SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a';
^
DETAIL: Unicode high surrogate must not follow a high surrogate.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "\ud83d\ud83d...
SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
ERROR: invalid input syntax for type json
LINE 1: SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a';
^
DETAIL: Unicode low surrogate must follow a high surrogate.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "\ude04...
SELECT jsonb '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
ERROR: invalid input syntax for type json
LINE 1: SELECT jsonb '{ "a": "\ud83dX" }' -> 'a';
^
DETAIL: Unicode low surrogate must follow a high surrogate.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "\ud83dX...
SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
ERROR: invalid input syntax for type json
LINE 1: SELECT jsonb '{ "a": "\ude04X" }' -> 'a';
^
DETAIL: Unicode low surrogate must follow a high surrogate.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "\ude04...
-- handling of simple unicode escapes
SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
ERROR: unsupported Unicode escape sequence
LINE 1: SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as corr...
^
DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "the Copyright \u00a9...
SELECT jsonb '{ "a": "dollar \u0024 character" }' as correct_everywhere;
correct_everywhere
-----------------------------
@@ -204,7 +204,7 @@ ERROR: unsupported Unicode escape sequence
LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' as fails;
^
DETAIL: \u0000 cannot be converted to text.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "null \u0000...
SELECT jsonb '{ "a": "null \\u0000 escape" }' as not_an_escape;
not_an_escape
------------------------------
@@ -216,7 +216,7 @@ ERROR: unsupported Unicode escape sequence
LINE 1: SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a'...
^
DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "the Copyright \u00a9...
SELECT jsonb '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
correct_everywhere
--------------------
@@ -234,7 +234,7 @@ ERROR: unsupported Unicode escape sequence
LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fai...
^
DETAIL: \u0000 cannot be converted to text.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "null \u0000...
SELECT jsonb '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
not_an_escape
--------------------