diff options
Diffstat (limited to 'src/backend/utils/adt/json.c')
-rw-r--r-- | src/backend/utils/adt/json.c | 211 |
1 files changed, 161 insertions, 50 deletions
diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c index e79c2946d0c..a7a7c2b3ade 100644 --- a/src/backend/utils/adt/json.c +++ b/src/backend/utils/adt/json.c @@ -43,8 +43,6 @@ typedef struct /* state of JSON lexer */ char *token_start; /* start of current token within input */ char *token_terminator; /* end of previous or current token */ JsonValueType token_type; /* type of current token, once it's known */ - int line_number; /* current line number (counting from 1) */ - char *line_start; /* start of current line within input (BROKEN!!) */ } JsonLexContext; typedef enum /* states of JSON parser */ @@ -78,6 +76,7 @@ static void json_lex_string(JsonLexContext *lex); static void json_lex_number(JsonLexContext *lex, char *s); static void report_parse_error(JsonParseStack *stack, JsonLexContext *lex); static void report_invalid_token(JsonLexContext *lex); +static int report_json_context(JsonLexContext *lex); static char *extract_mb_char(char *s); static void composite_to_json(Datum composite, StringInfo result, bool use_line_feeds); @@ -185,8 +184,6 @@ json_validate_cstring(char *input) /* Set up lexing context. */ lex.input = input; lex.token_terminator = lex.input; - lex.line_number = 1; - lex.line_start = input; /* Set up parse stack. */ stacksize = 32; @@ -335,11 +332,7 @@ json_lex(JsonLexContext *lex) /* Skip leading whitespace. */ s = lex->token_terminator; while (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r') - { - if (*s == '\n') - lex->line_number++; s++; - } lex->token_start = s; /* Determine token type. */ @@ -350,7 +343,7 @@ json_lex(JsonLexContext *lex) { /* End of string. */ lex->token_start = NULL; - lex->token_terminator = NULL; + lex->token_terminator = s; } else { @@ -397,7 +390,8 @@ json_lex(JsonLexContext *lex) /* * We got some sort of unexpected punctuation or an otherwise * unexpected character, so just complain about that one - * character. + * character. (It can't be multibyte because the above loop + * will advance over any multibyte characters.) */ lex->token_terminator = s + 1; report_invalid_token(lex); @@ -443,11 +437,14 @@ json_lex_string(JsonLexContext *lex) lex->token_terminator = s; report_invalid_token(lex); } + /* Since *s isn't printable, exclude it from the context string */ + lex->token_terminator = s; ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type json"), - errdetail("line %d: Character with value \"0x%02x\" must be escaped.", - lex->line_number, (unsigned char) *s))); + errdetail("Character with value 0x%02x must be escaped.", + (unsigned char) *s), + report_json_context(lex))); } else if (*s == '\\') { @@ -465,38 +462,39 @@ json_lex_string(JsonLexContext *lex) for (i = 1; i <= 4; i++) { - if (s[i] == '\0') + s++; + if (*s == '\0') { - lex->token_terminator = s + i; + lex->token_terminator = s; report_invalid_token(lex); } - else if (s[i] >= '0' && s[i] <= '9') - ch = (ch * 16) + (s[i] - '0'); - else if (s[i] >= 'a' && s[i] <= 'f') - ch = (ch * 16) + (s[i] - 'a') + 10; - else if (s[i] >= 'A' && s[i] <= 'F') - ch = (ch * 16) + (s[i] - 'A') + 10; + else if (*s >= '0' && *s <= '9') + ch = (ch * 16) + (*s - '0'); + else if (*s >= 'a' && *s <= 'f') + ch = (ch * 16) + (*s - 'a') + 10; + else if (*s >= 'A' && *s <= 'F') + ch = (ch * 16) + (*s - 'A') + 10; else { + lex->token_terminator = s + pg_mblen(s); ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type json"), - errdetail("line %d: \"\\u\" must be followed by four hexadecimal digits.", - lex->line_number))); + errdetail("\"\\u\" must be followed by four hexadecimal digits."), + report_json_context(lex))); } } - - /* Account for the four additional bytes we just parsed. */ - s += 4; } else if (strchr("\"\\/bfnrt", *s) == NULL) { /* Not a valid string escape, so error out. */ + lex->token_terminator = s + pg_mblen(s); ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type json"), - errdetail("line %d: Invalid escape \"\\%s\".", - lex->line_number, extract_mb_char(s)))); + errdetail("Escape sequence \"\\%s\" is invalid.", + extract_mb_char(s)), + report_json_context(lex))); } } } @@ -599,68 +597,108 @@ json_lex_number(JsonLexContext *lex, char *s) /* * Report a parse error. + * + * lex->token_start and lex->token_terminator must identify the current token. */ static void report_parse_error(JsonParseStack *stack, JsonLexContext *lex) { - char *detail = NULL; - char *token = NULL; + char *token; int toklen; /* Handle case where the input ended prematurely. */ if (lex->token_start == NULL) ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("invalid input syntax for type json: \"%s\"", - lex->input), - errdetail("The input string ended unexpectedly."))); + errmsg("invalid input syntax for type json"), + errdetail("The input string ended unexpectedly."), + report_json_context(lex))); - /* Separate out the offending token. */ + /* Separate out the current token. */ toklen = lex->token_terminator - lex->token_start; token = palloc(toklen + 1); memcpy(token, lex->token_start, toklen); token[toklen] = '\0'; - /* Select correct detail message. */ + /* Complain, with the appropriate detail message. */ if (stack == NULL) - detail = "line %d: Expected end of input, but found \"%s\"."; + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type json"), + errdetail("Expected end of input, but found \"%s\".", + token), + report_json_context(lex))); else { switch (stack->state) { case JSON_PARSE_VALUE: - detail = "line %d: Expected string, number, object, array, true, false, or null, but found \"%s\"."; + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type json"), + errdetail("Expected JSON value, but found \"%s\".", + token), + report_json_context(lex))); break; case JSON_PARSE_ARRAY_START: - detail = "line %d: Expected array element or \"]\", but found \"%s\"."; + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type json"), + errdetail("Expected array element or \"]\", but found \"%s\".", + token), + report_json_context(lex))); break; case JSON_PARSE_ARRAY_NEXT: - detail = "line %d: Expected \",\" or \"]\", but found \"%s\"."; + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type json"), + errdetail("Expected \",\" or \"]\", but found \"%s\".", + token), + report_json_context(lex))); break; case JSON_PARSE_OBJECT_START: - detail = "line %d: Expected string or \"}\", but found \"%s\"."; + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type json"), + errdetail("Expected string or \"}\", but found \"%s\".", + token), + report_json_context(lex))); break; case JSON_PARSE_OBJECT_LABEL: - detail = "line %d: Expected \":\", but found \"%s\"."; + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type json"), + errdetail("Expected \":\", but found \"%s\".", + token), + report_json_context(lex))); break; case JSON_PARSE_OBJECT_NEXT: - detail = "line %d: Expected \",\" or \"}\", but found \"%s\"."; + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type json"), + errdetail("Expected \",\" or \"}\", but found \"%s\".", + token), + report_json_context(lex))); break; case JSON_PARSE_OBJECT_COMMA: - detail = "line %d: Expected string, but found \"%s\"."; + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type json"), + errdetail("Expected string, but found \"%s\".", + token), + report_json_context(lex))); break; + default: + elog(ERROR, "unexpected json parse state: %d", + (int) stack->state); } } - - ereport(ERROR, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("invalid input syntax for type json: \"%s\"", - lex->input), - detail ? errdetail(detail, lex->line_number, token) : 0)); } /* * Report an invalid input token. + * + * lex->token_start and lex->token_terminator must identify the token. */ static void report_invalid_token(JsonLexContext *lex) @@ -668,6 +706,7 @@ report_invalid_token(JsonLexContext *lex) char *token; int toklen; + /* Separate out the offending token. */ toklen = lex->token_terminator - lex->token_start; token = palloc(toklen + 1); memcpy(token, lex->token_start, toklen); @@ -676,8 +715,80 @@ report_invalid_token(JsonLexContext *lex) ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type json"), - errdetail("line %d: Token \"%s\" is invalid.", - lex->line_number, token))); + errdetail("Token \"%s\" is invalid.", token), + report_json_context(lex))); +} + +/* + * Report a CONTEXT line for bogus JSON input. + * + * lex->token_terminator must be set to identify the spot where we detected + * the error. Note that lex->token_start might be NULL, in case we recognized + * error at EOF. + * + * The return value isn't meaningful, but we make it non-void so that this + * can be invoked inside ereport(). + */ +static int +report_json_context(JsonLexContext *lex) +{ + const char *context_start; + const char *context_end; + const char *line_start; + int line_number; + char *ctxt; + int ctxtlen; + const char *prefix; + const char *suffix; + + /* Choose boundaries for the part of the input we will display */ + context_start = lex->input; + context_end = lex->token_terminator; + line_start = context_start; + line_number = 1; + for (;;) + { + /* Always advance over newlines (context_end test is just paranoia) */ + if (*context_start == '\n' && context_start < context_end) + { + context_start++; + line_start = context_start; + line_number++; + continue; + } + /* Otherwise, done as soon as we are close enough to context_end */ + if (context_end - context_start < 50) + break; + /* Advance to next multibyte character */ + if (IS_HIGHBIT_SET(*context_start)) + context_start += pg_mblen(context_start); + else + context_start++; + } + + /* + * We add "..." to indicate that the excerpt doesn't start at the + * beginning of the line ... but if we're within 3 characters of the + * beginning of the line, we might as well just show the whole line. + */ + if (context_start - line_start <= 3) + context_start = line_start; + + /* Get a null-terminated copy of the data to present */ + ctxtlen = context_end - context_start; + ctxt = palloc(ctxtlen + 1); + memcpy(ctxt, context_start, ctxtlen); + ctxt[ctxtlen] = '\0'; + + /* + * Show the context, prefixing "..." if not starting at start of line, and + * suffixing "..." if not ending at end of line. + */ + prefix = (context_start > line_start) ? "..." : ""; + suffix = (*context_end != '\0' && *context_end != '\n' && *context_end != '\r') ? "..." : ""; + + return errcontext("JSON data, line %d: %s%s%s", + line_number, prefix, ctxt, suffix); } /* |