aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/utils/adt/jsonfuncs.c1
-rw-r--r--src/common/jsonapi.c77
-rw-r--r--src/test/regress/expected/json_encoding.out24
-rw-r--r--src/test/regress/expected/json_encoding_1.out24
4 files changed, 72 insertions, 54 deletions
diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
index b342c81f27b..f6a074aa7d0 100644
--- a/src/backend/utils/adt/jsonfuncs.c
+++ b/src/backend/utils/adt/jsonfuncs.c
@@ -656,6 +656,7 @@ report_json_context(JsonLexContext *lex)
line_start = lex->line_start;
context_start = line_start;
context_end = lex->token_terminator;
+ Assert(context_end >= context_start);
/* Advance until we are close enough to context_end */
while (context_end - context_start >= 50)
diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c
index ade13aed3a4..3d0fbfa7be1 100644
--- a/src/common/jsonapi.c
+++ b/src/common/jsonapi.c
@@ -675,6 +675,14 @@ json_lex(JsonLexContext *lex)
/*
* The next token in the input stream is known to be a string; lex it.
+ *
+ * If lex->strval isn't NULL, fill it with the decoded string.
+ * Set lex->token_terminator to the end of the decoded input, and in
+ * success cases, transfer its previous value to lex->prev_token_terminator.
+ * Return JSON_SUCCESS or an error code.
+ *
+ * Note: be careful that all error exits advance lex->token_terminator
+ * to the point after the character we detected the error on.
*/
static inline JsonParseErrorType
json_lex_string(JsonLexContext *lex)
@@ -683,6 +691,19 @@ json_lex_string(JsonLexContext *lex)
int len;
int hi_surrogate = -1;
+ /* Convenience macros for error exits */
+#define FAIL_AT_CHAR_START(code) \
+ do { \
+ lex->token_terminator = s; \
+ return code; \
+ } while (0)
+#define FAIL_AT_CHAR_END(code) \
+ do { \
+ lex->token_terminator = \
+ s + pg_encoding_mblen_bounded(lex->input_encoding, s); \
+ return code; \
+ } while (0)
+
if (lex->strval != NULL)
resetStringInfo(lex->strval);
@@ -695,18 +716,14 @@ json_lex_string(JsonLexContext *lex)
len++;
/* Premature end of the string. */
if (len >= lex->input_length)
- {
- lex->token_terminator = s;
- return JSON_INVALID_TOKEN;
- }
+ FAIL_AT_CHAR_START(JSON_INVALID_TOKEN);
else if (*s == '"')
break;
else if ((unsigned char) *s < 32)
{
/* Per RFC4627, these characters MUST be escaped. */
/* Since *s isn't printable, exclude it from the context string */
- lex->token_terminator = s;
- return JSON_ESCAPING_REQUIRED;
+ FAIL_AT_CHAR_START(JSON_ESCAPING_REQUIRED);
}
else if (*s == '\\')
{
@@ -714,10 +731,7 @@ json_lex_string(JsonLexContext *lex)
s++;
len++;
if (len >= lex->input_length)
- {
- lex->token_terminator = s;
- return JSON_INVALID_TOKEN;
- }
+ FAIL_AT_CHAR_START(JSON_INVALID_TOKEN);
else if (*s == 'u')
{
int i;
@@ -728,10 +742,7 @@ json_lex_string(JsonLexContext *lex)
s++;
len++;
if (len >= lex->input_length)
- {
- lex->token_terminator = s;
- return JSON_INVALID_TOKEN;
- }
+ FAIL_AT_CHAR_START(JSON_INVALID_TOKEN);
else if (*s >= '0' && *s <= '9')
ch = (ch * 16) + (*s - '0');
else if (*s >= 'a' && *s <= 'f')
@@ -739,10 +750,7 @@ json_lex_string(JsonLexContext *lex)
else if (*s >= 'A' && *s <= 'F')
ch = (ch * 16) + (*s - 'A') + 10;
else
- {
- lex->token_terminator = s + pg_encoding_mblen_bounded(lex->input_encoding, s);
- return JSON_UNICODE_ESCAPE_FORMAT;
- }
+ FAIL_AT_CHAR_END(JSON_UNICODE_ESCAPE_FORMAT);
}
if (lex->strval != NULL)
{
@@ -752,20 +760,20 @@ json_lex_string(JsonLexContext *lex)
if (is_utf16_surrogate_first(ch))
{
if (hi_surrogate != -1)
- return JSON_UNICODE_HIGH_SURROGATE;
+ FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_SURROGATE);
hi_surrogate = ch;
continue;
}
else if (is_utf16_surrogate_second(ch))
{
if (hi_surrogate == -1)
- return JSON_UNICODE_LOW_SURROGATE;
+ FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
ch = surrogate_pair_to_codepoint(hi_surrogate, ch);
hi_surrogate = -1;
}
if (hi_surrogate != -1)
- return JSON_UNICODE_LOW_SURROGATE;
+ FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
/*
* Reject invalid cases. We can't have a value above
@@ -775,7 +783,7 @@ json_lex_string(JsonLexContext *lex)
if (ch == 0)
{
/* We can't allow this, since our TEXT type doesn't */
- return JSON_UNICODE_CODE_POINT_ZERO;
+ FAIL_AT_CHAR_END(JSON_UNICODE_CODE_POINT_ZERO);
}
/*
@@ -812,14 +820,14 @@ json_lex_string(JsonLexContext *lex)
appendStringInfoChar(lex->strval, (char) ch);
}
else
- return JSON_UNICODE_HIGH_ESCAPE;
+ FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_ESCAPE);
#endif /* FRONTEND */
}
}
else if (lex->strval != NULL)
{
if (hi_surrogate != -1)
- return JSON_UNICODE_LOW_SURROGATE;
+ FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
switch (*s)
{
@@ -844,10 +852,14 @@ json_lex_string(JsonLexContext *lex)
appendStringInfoChar(lex->strval, '\t');
break;
default:
- /* Not a valid string escape, so signal error. */
+
+ /*
+ * Not a valid string escape, so signal error. We
+ * adjust token_start so that just the escape sequence
+ * is reported, not the whole string.
+ */
lex->token_start = s;
- lex->token_terminator = s + pg_encoding_mblen_bounded(lex->input_encoding, s);
- return JSON_ESCAPING_INVALID;
+ FAIL_AT_CHAR_END(JSON_ESCAPING_INVALID);
}
}
else if (strchr("\"\\/bfnrt", *s) == NULL)
@@ -860,15 +872,14 @@ json_lex_string(JsonLexContext *lex)
* shown it's not a performance win.
*/
lex->token_start = s;
- lex->token_terminator = s + pg_encoding_mblen_bounded(lex->input_encoding, s);
- return JSON_ESCAPING_INVALID;
+ FAIL_AT_CHAR_END(JSON_ESCAPING_INVALID);
}
}
else if (lex->strval != NULL)
{
if (hi_surrogate != -1)
- return JSON_UNICODE_LOW_SURROGATE;
+ FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
appendStringInfoChar(lex->strval, *s);
}
@@ -876,12 +887,18 @@ json_lex_string(JsonLexContext *lex)
}
if (hi_surrogate != -1)
+ {
+ lex->token_terminator = s + 1;
return JSON_UNICODE_LOW_SURROGATE;
+ }
/* Hooray, we found the end of the string! */
lex->prev_token_terminator = lex->token_terminator;
lex->token_terminator = s + 1;
return JSON_SUCCESS;
+
+#undef FAIL_AT_CHAR_START
+#undef FAIL_AT_CHAR_END
}
/*
diff --git a/src/test/regress/expected/json_encoding.out b/src/test/regress/expected/json_encoding.out
index f343f74fe18..fa41b401030 100644
--- a/src/test/regress/expected/json_encoding.out
+++ b/src/test/regress/expected/json_encoding.out
@@ -56,19 +56,19 @@ select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8;
select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
ERROR: invalid input syntax for type json
DETAIL: Unicode high surrogate must not follow a high surrogate.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "\ud83d\ud83d...
select json '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
ERROR: invalid input syntax for type json
DETAIL: Unicode low surrogate must follow a high surrogate.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "\ude04...
select json '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
ERROR: invalid input syntax for type json
DETAIL: Unicode low surrogate must follow a high surrogate.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "\ud83dX...
select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
ERROR: invalid input syntax for type json
DETAIL: Unicode low surrogate must follow a high surrogate.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "\ude04...
--handling of simple unicode escapes
select json '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
correct_in_utf8
@@ -121,7 +121,7 @@ select json '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
select json '{ "a": "null \u0000 escape" }' ->> 'a' as fails;
ERROR: unsupported Unicode escape sequence
DETAIL: \u0000 cannot be converted to text.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "null \u0000...
select json '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
not_an_escape
--------------------
@@ -159,7 +159,7 @@ ERROR: unsupported Unicode escape sequence
LINE 1: SELECT '"\u0000"'::jsonb;
^
DETAIL: \u0000 cannot be converted to text.
-CONTEXT: JSON data, line 1: ...
+CONTEXT: JSON data, line 1: "\u0000...
-- use octet_length here so we don't get an odd unicode char in the
-- output
SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
@@ -180,25 +180,25 @@ ERROR: invalid input syntax for type json
LINE 1: SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a';
^
DETAIL: Unicode high surrogate must not follow a high surrogate.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "\ud83d\ud83d...
SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
ERROR: invalid input syntax for type json
LINE 1: SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a';
^
DETAIL: Unicode low surrogate must follow a high surrogate.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "\ude04...
SELECT jsonb '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
ERROR: invalid input syntax for type json
LINE 1: SELECT jsonb '{ "a": "\ud83dX" }' -> 'a';
^
DETAIL: Unicode low surrogate must follow a high surrogate.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "\ud83dX...
SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
ERROR: invalid input syntax for type json
LINE 1: SELECT jsonb '{ "a": "\ude04X" }' -> 'a';
^
DETAIL: Unicode low surrogate must follow a high surrogate.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "\ude04...
-- handling of simple unicode escapes
SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
correct_in_utf8
@@ -223,7 +223,7 @@ ERROR: unsupported Unicode escape sequence
LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' as fails;
^
DETAIL: \u0000 cannot be converted to text.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "null \u0000...
SELECT jsonb '{ "a": "null \\u0000 escape" }' as not_an_escape;
not_an_escape
------------------------------
@@ -253,7 +253,7 @@ ERROR: unsupported Unicode escape sequence
LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fai...
^
DETAIL: \u0000 cannot be converted to text.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "null \u0000...
SELECT jsonb '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
not_an_escape
--------------------
diff --git a/src/test/regress/expected/json_encoding_1.out b/src/test/regress/expected/json_encoding_1.out
index e2fc131b0fa..938f8e24aaf 100644
--- a/src/test/regress/expected/json_encoding_1.out
+++ b/src/test/regress/expected/json_encoding_1.out
@@ -52,19 +52,19 @@ ERROR: conversion between UTF8 and SQL_ASCII is not supported
select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
ERROR: invalid input syntax for type json
DETAIL: Unicode high surrogate must not follow a high surrogate.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "\ud83d\ud83d...
select json '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
ERROR: invalid input syntax for type json
DETAIL: Unicode low surrogate must follow a high surrogate.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "\ude04...
select json '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
ERROR: invalid input syntax for type json
DETAIL: Unicode low surrogate must follow a high surrogate.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "\ud83dX...
select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
ERROR: invalid input syntax for type json
DETAIL: Unicode low surrogate must follow a high surrogate.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "\ude04...
--handling of simple unicode escapes
select json '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
correct_in_utf8
@@ -113,7 +113,7 @@ select json '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
select json '{ "a": "null \u0000 escape" }' ->> 'a' as fails;
ERROR: unsupported Unicode escape sequence
DETAIL: \u0000 cannot be converted to text.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "null \u0000...
select json '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
not_an_escape
--------------------
@@ -151,7 +151,7 @@ ERROR: unsupported Unicode escape sequence
LINE 1: SELECT '"\u0000"'::jsonb;
^
DETAIL: \u0000 cannot be converted to text.
-CONTEXT: JSON data, line 1: ...
+CONTEXT: JSON data, line 1: "\u0000...
-- use octet_length here so we don't get an odd unicode char in the
-- output
SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
@@ -168,25 +168,25 @@ ERROR: invalid input syntax for type json
LINE 1: SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a';
^
DETAIL: Unicode high surrogate must not follow a high surrogate.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "\ud83d\ud83d...
SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
ERROR: invalid input syntax for type json
LINE 1: SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a';
^
DETAIL: Unicode low surrogate must follow a high surrogate.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "\ude04...
SELECT jsonb '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
ERROR: invalid input syntax for type json
LINE 1: SELECT jsonb '{ "a": "\ud83dX" }' -> 'a';
^
DETAIL: Unicode low surrogate must follow a high surrogate.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "\ud83dX...
SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
ERROR: invalid input syntax for type json
LINE 1: SELECT jsonb '{ "a": "\ude04X" }' -> 'a';
^
DETAIL: Unicode low surrogate must follow a high surrogate.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "\ude04...
-- handling of simple unicode escapes
SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
ERROR: conversion between UTF8 and SQL_ASCII is not supported
@@ -209,7 +209,7 @@ ERROR: unsupported Unicode escape sequence
LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' as fails;
^
DETAIL: \u0000 cannot be converted to text.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "null \u0000...
SELECT jsonb '{ "a": "null \\u0000 escape" }' as not_an_escape;
not_an_escape
------------------------------
@@ -237,7 +237,7 @@ ERROR: unsupported Unicode escape sequence
LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fai...
^
DETAIL: \u0000 cannot be converted to text.
-CONTEXT: JSON data, line 1: { "a":...
+CONTEXT: JSON data, line 1: { "a": "null \u0000...
SELECT jsonb '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
not_an_escape
--------------------