diff options
Diffstat (limited to 'src/backend/utils/adt/json.c')
-rw-r--r-- | src/backend/utils/adt/json.c | 29 |
1 files changed, 27 insertions, 2 deletions
diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c index 26d293709aa..40d1dff39c9 100644 --- a/src/backend/utils/adt/json.c +++ b/src/backend/utils/adt/json.c @@ -750,6 +750,13 @@ json_lex(JsonLexContext *lex) /* * The next token in the input stream is known to be a string; lex it. + * + * If lex->strval isn't NULL, fill it with the decoded string. + * Set lex->token_terminator to the end of the decoded input, and in + * success cases, transfer its previous value to lex->prev_token_terminator. + * + * Note: be careful that all error cases advance lex->token_terminator + * to the point after the character we detected the error on. */ static inline void json_lex_string(JsonLexContext *lex) @@ -837,33 +844,42 @@ json_lex_string(JsonLexContext *lex) if (ch >= 0xd800 && ch <= 0xdbff) { if (hi_surrogate != -1) + { + lex->token_terminator = s + pg_mblen(s); ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type %s", "json"), errdetail("Unicode high surrogate must not follow a high surrogate."), report_json_context(lex))); + } hi_surrogate = (ch & 0x3ff) << 10; continue; } else if (ch >= 0xdc00 && ch <= 0xdfff) { if (hi_surrogate == -1) + { + lex->token_terminator = s + pg_mblen(s); ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type %s", "json"), errdetail("Unicode low surrogate must follow a high surrogate."), report_json_context(lex))); + } ch = 0x10000 + hi_surrogate + (ch & 0x3ff); hi_surrogate = -1; } if (hi_surrogate != -1) + { + lex->token_terminator = s + pg_mblen(s); ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type %s", "json"), errdetail("Unicode low surrogate must follow a high surrogate."), report_json_context(lex))); + } /* * For UTF8, replace the escape sequence by the actual @@ -875,6 +891,7 @@ json_lex_string(JsonLexContext *lex) if (ch == 0) { /* We can't allow this, since our TEXT type doesn't */ + lex->token_terminator = s + pg_mblen(s); ereport(ERROR, (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER), errmsg("unsupported Unicode escape sequence"), @@ -898,24 +915,27 @@ json_lex_string(JsonLexContext *lex) } else { + lex->token_terminator = s + pg_mblen(s); ereport(ERROR, (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER), errmsg("unsupported Unicode escape sequence"), errdetail("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8."), report_json_context(lex))); } - } } else if (lex->strval != NULL) { if (hi_surrogate != -1) + { + lex->token_terminator = s + pg_mblen(s); ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type %s", "json"), errdetail("Unicode low surrogate must follow a high surrogate."), report_json_context(lex))); + } switch (*s) { @@ -968,16 +988,18 @@ json_lex_string(JsonLexContext *lex) extract_mb_char(s)), report_json_context(lex))); } - } else if (lex->strval != NULL) { if (hi_surrogate != -1) + { + lex->token_terminator = s + pg_mblen(s); ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type %s", "json"), errdetail("Unicode low surrogate must follow a high surrogate."), report_json_context(lex))); + } appendStringInfoChar(lex->strval, *s); } @@ -985,11 +1007,14 @@ json_lex_string(JsonLexContext *lex) } if (hi_surrogate != -1) + { + lex->token_terminator = s + pg_mblen(s); ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type %s", "json"), errdetail("Unicode low surrogate must follow a high surrogate."), report_json_context(lex))); + } /* Hooray, we found the end of the string! */ lex->prev_token_terminator = lex->token_terminator; |