aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/json.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils/adt/json.c')
-rw-r--r--src/backend/utils/adt/json.c29
1 files changed, 27 insertions, 2 deletions
diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c
index 26d293709aa..40d1dff39c9 100644
--- a/src/backend/utils/adt/json.c
+++ b/src/backend/utils/adt/json.c
@@ -750,6 +750,13 @@ json_lex(JsonLexContext *lex)
/*
* The next token in the input stream is known to be a string; lex it.
+ *
+ * If lex->strval isn't NULL, fill it with the decoded string.
+ * Set lex->token_terminator to the end of the decoded input, and in
+ * success cases, transfer its previous value to lex->prev_token_terminator.
+ *
+ * Note: be careful that all error cases advance lex->token_terminator
+ * to the point after the character we detected the error on.
*/
static inline void
json_lex_string(JsonLexContext *lex)
@@ -837,33 +844,42 @@ json_lex_string(JsonLexContext *lex)
if (ch >= 0xd800 && ch <= 0xdbff)
{
if (hi_surrogate != -1)
+ {
+ lex->token_terminator = s + pg_mblen(s);
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s",
"json"),
errdetail("Unicode high surrogate must not follow a high surrogate."),
report_json_context(lex)));
+ }
hi_surrogate = (ch & 0x3ff) << 10;
continue;
}
else if (ch >= 0xdc00 && ch <= 0xdfff)
{
if (hi_surrogate == -1)
+ {
+ lex->token_terminator = s + pg_mblen(s);
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "json"),
errdetail("Unicode low surrogate must follow a high surrogate."),
report_json_context(lex)));
+ }
ch = 0x10000 + hi_surrogate + (ch & 0x3ff);
hi_surrogate = -1;
}
if (hi_surrogate != -1)
+ {
+ lex->token_terminator = s + pg_mblen(s);
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "json"),
errdetail("Unicode low surrogate must follow a high surrogate."),
report_json_context(lex)));
+ }
/*
* For UTF8, replace the escape sequence by the actual
@@ -875,6 +891,7 @@ json_lex_string(JsonLexContext *lex)
if (ch == 0)
{
/* We can't allow this, since our TEXT type doesn't */
+ lex->token_terminator = s + pg_mblen(s);
ereport(ERROR,
(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
errmsg("unsupported Unicode escape sequence"),
@@ -898,24 +915,27 @@ json_lex_string(JsonLexContext *lex)
}
else
{
+ lex->token_terminator = s + pg_mblen(s);
ereport(ERROR,
(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
errmsg("unsupported Unicode escape sequence"),
errdetail("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8."),
report_json_context(lex)));
}
-
}
}
else if (lex->strval != NULL)
{
if (hi_surrogate != -1)
+ {
+ lex->token_terminator = s + pg_mblen(s);
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s",
"json"),
errdetail("Unicode low surrogate must follow a high surrogate."),
report_json_context(lex)));
+ }
switch (*s)
{
@@ -968,16 +988,18 @@ json_lex_string(JsonLexContext *lex)
extract_mb_char(s)),
report_json_context(lex)));
}
-
}
else if (lex->strval != NULL)
{
if (hi_surrogate != -1)
+ {
+ lex->token_terminator = s + pg_mblen(s);
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "json"),
errdetail("Unicode low surrogate must follow a high surrogate."),
report_json_context(lex)));
+ }
appendStringInfoChar(lex->strval, *s);
}
@@ -985,11 +1007,14 @@ json_lex_string(JsonLexContext *lex)
}
if (hi_surrogate != -1)
+ {
+ lex->token_terminator = s + pg_mblen(s);
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "json"),
errdetail("Unicode low surrogate must follow a high surrogate."),
report_json_context(lex)));
+ }
/* Hooray, we found the end of the string! */
lex->prev_token_terminator = lex->token_terminator;