aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/json.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils/adt/json.c')
-rw-r--r--src/backend/utils/adt/json.c211
1 files changed, 161 insertions, 50 deletions
diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c
index e79c2946d0c..a7a7c2b3ade 100644
--- a/src/backend/utils/adt/json.c
+++ b/src/backend/utils/adt/json.c
@@ -43,8 +43,6 @@ typedef struct /* state of JSON lexer */
char *token_start; /* start of current token within input */
char *token_terminator; /* end of previous or current token */
JsonValueType token_type; /* type of current token, once it's known */
- int line_number; /* current line number (counting from 1) */
- char *line_start; /* start of current line within input (BROKEN!!) */
} JsonLexContext;
typedef enum /* states of JSON parser */
@@ -78,6 +76,7 @@ static void json_lex_string(JsonLexContext *lex);
static void json_lex_number(JsonLexContext *lex, char *s);
static void report_parse_error(JsonParseStack *stack, JsonLexContext *lex);
static void report_invalid_token(JsonLexContext *lex);
+static int report_json_context(JsonLexContext *lex);
static char *extract_mb_char(char *s);
static void composite_to_json(Datum composite, StringInfo result,
bool use_line_feeds);
@@ -185,8 +184,6 @@ json_validate_cstring(char *input)
/* Set up lexing context. */
lex.input = input;
lex.token_terminator = lex.input;
- lex.line_number = 1;
- lex.line_start = input;
/* Set up parse stack. */
stacksize = 32;
@@ -335,11 +332,7 @@ json_lex(JsonLexContext *lex)
/* Skip leading whitespace. */
s = lex->token_terminator;
while (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r')
- {
- if (*s == '\n')
- lex->line_number++;
s++;
- }
lex->token_start = s;
/* Determine token type. */
@@ -350,7 +343,7 @@ json_lex(JsonLexContext *lex)
{
/* End of string. */
lex->token_start = NULL;
- lex->token_terminator = NULL;
+ lex->token_terminator = s;
}
else
{
@@ -397,7 +390,8 @@ json_lex(JsonLexContext *lex)
/*
* We got some sort of unexpected punctuation or an otherwise
* unexpected character, so just complain about that one
- * character.
+ * character. (It can't be multibyte because the above loop
+ * will advance over any multibyte characters.)
*/
lex->token_terminator = s + 1;
report_invalid_token(lex);
@@ -443,11 +437,14 @@ json_lex_string(JsonLexContext *lex)
lex->token_terminator = s;
report_invalid_token(lex);
}
+ /* Since *s isn't printable, exclude it from the context string */
+ lex->token_terminator = s;
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type json"),
- errdetail("line %d: Character with value \"0x%02x\" must be escaped.",
- lex->line_number, (unsigned char) *s)));
+ errdetail("Character with value 0x%02x must be escaped.",
+ (unsigned char) *s),
+ report_json_context(lex)));
}
else if (*s == '\\')
{
@@ -465,38 +462,39 @@ json_lex_string(JsonLexContext *lex)
for (i = 1; i <= 4; i++)
{
- if (s[i] == '\0')
+ s++;
+ if (*s == '\0')
{
- lex->token_terminator = s + i;
+ lex->token_terminator = s;
report_invalid_token(lex);
}
- else if (s[i] >= '0' && s[i] <= '9')
- ch = (ch * 16) + (s[i] - '0');
- else if (s[i] >= 'a' && s[i] <= 'f')
- ch = (ch * 16) + (s[i] - 'a') + 10;
- else if (s[i] >= 'A' && s[i] <= 'F')
- ch = (ch * 16) + (s[i] - 'A') + 10;
+ else if (*s >= '0' && *s <= '9')
+ ch = (ch * 16) + (*s - '0');
+ else if (*s >= 'a' && *s <= 'f')
+ ch = (ch * 16) + (*s - 'a') + 10;
+ else if (*s >= 'A' && *s <= 'F')
+ ch = (ch * 16) + (*s - 'A') + 10;
else
{
+ lex->token_terminator = s + pg_mblen(s);
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type json"),
- errdetail("line %d: \"\\u\" must be followed by four hexadecimal digits.",
- lex->line_number)));
+ errdetail("\"\\u\" must be followed by four hexadecimal digits."),
+ report_json_context(lex)));
}
}
-
- /* Account for the four additional bytes we just parsed. */
- s += 4;
}
else if (strchr("\"\\/bfnrt", *s) == NULL)
{
/* Not a valid string escape, so error out. */
+ lex->token_terminator = s + pg_mblen(s);
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type json"),
- errdetail("line %d: Invalid escape \"\\%s\".",
- lex->line_number, extract_mb_char(s))));
+ errdetail("Escape sequence \"\\%s\" is invalid.",
+ extract_mb_char(s)),
+ report_json_context(lex)));
}
}
}
@@ -599,68 +597,108 @@ json_lex_number(JsonLexContext *lex, char *s)
/*
* Report a parse error.
+ *
+ * lex->token_start and lex->token_terminator must identify the current token.
*/
static void
report_parse_error(JsonParseStack *stack, JsonLexContext *lex)
{
- char *detail = NULL;
- char *token = NULL;
+ char *token;
int toklen;
/* Handle case where the input ended prematurely. */
if (lex->token_start == NULL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
- errmsg("invalid input syntax for type json: \"%s\"",
- lex->input),
- errdetail("The input string ended unexpectedly.")));
+ errmsg("invalid input syntax for type json"),
+ errdetail("The input string ended unexpectedly."),
+ report_json_context(lex)));
- /* Separate out the offending token. */
+ /* Separate out the current token. */
toklen = lex->token_terminator - lex->token_start;
token = palloc(toklen + 1);
memcpy(token, lex->token_start, toklen);
token[toklen] = '\0';
- /* Select correct detail message. */
+ /* Complain, with the appropriate detail message. */
if (stack == NULL)
- detail = "line %d: Expected end of input, but found \"%s\".";
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type json"),
+ errdetail("Expected end of input, but found \"%s\".",
+ token),
+ report_json_context(lex)));
else
{
switch (stack->state)
{
case JSON_PARSE_VALUE:
- detail = "line %d: Expected string, number, object, array, true, false, or null, but found \"%s\".";
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type json"),
+ errdetail("Expected JSON value, but found \"%s\".",
+ token),
+ report_json_context(lex)));
break;
case JSON_PARSE_ARRAY_START:
- detail = "line %d: Expected array element or \"]\", but found \"%s\".";
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type json"),
+ errdetail("Expected array element or \"]\", but found \"%s\".",
+ token),
+ report_json_context(lex)));
break;
case JSON_PARSE_ARRAY_NEXT:
- detail = "line %d: Expected \",\" or \"]\", but found \"%s\".";
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type json"),
+ errdetail("Expected \",\" or \"]\", but found \"%s\".",
+ token),
+ report_json_context(lex)));
break;
case JSON_PARSE_OBJECT_START:
- detail = "line %d: Expected string or \"}\", but found \"%s\".";
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type json"),
+ errdetail("Expected string or \"}\", but found \"%s\".",
+ token),
+ report_json_context(lex)));
break;
case JSON_PARSE_OBJECT_LABEL:
- detail = "line %d: Expected \":\", but found \"%s\".";
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type json"),
+ errdetail("Expected \":\", but found \"%s\".",
+ token),
+ report_json_context(lex)));
break;
case JSON_PARSE_OBJECT_NEXT:
- detail = "line %d: Expected \",\" or \"}\", but found \"%s\".";
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type json"),
+ errdetail("Expected \",\" or \"}\", but found \"%s\".",
+ token),
+ report_json_context(lex)));
break;
case JSON_PARSE_OBJECT_COMMA:
- detail = "line %d: Expected string, but found \"%s\".";
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type json"),
+ errdetail("Expected string, but found \"%s\".",
+ token),
+ report_json_context(lex)));
break;
+ default:
+ elog(ERROR, "unexpected json parse state: %d",
+ (int) stack->state);
}
}
-
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
- errmsg("invalid input syntax for type json: \"%s\"",
- lex->input),
- detail ? errdetail(detail, lex->line_number, token) : 0));
}
/*
* Report an invalid input token.
+ *
+ * lex->token_start and lex->token_terminator must identify the token.
*/
static void
report_invalid_token(JsonLexContext *lex)
@@ -668,6 +706,7 @@ report_invalid_token(JsonLexContext *lex)
char *token;
int toklen;
+ /* Separate out the offending token. */
toklen = lex->token_terminator - lex->token_start;
token = palloc(toklen + 1);
memcpy(token, lex->token_start, toklen);
@@ -676,8 +715,80 @@ report_invalid_token(JsonLexContext *lex)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type json"),
- errdetail("line %d: Token \"%s\" is invalid.",
- lex->line_number, token)));
+ errdetail("Token \"%s\" is invalid.", token),
+ report_json_context(lex)));
+}
+
+/*
+ * Report a CONTEXT line for bogus JSON input.
+ *
+ * lex->token_terminator must be set to identify the spot where we detected
+ * the error. Note that lex->token_start might be NULL, in case we recognized
+ * error at EOF.
+ *
+ * The return value isn't meaningful, but we make it non-void so that this
+ * can be invoked inside ereport().
+ */
+static int
+report_json_context(JsonLexContext *lex)
+{
+ const char *context_start;
+ const char *context_end;
+ const char *line_start;
+ int line_number;
+ char *ctxt;
+ int ctxtlen;
+ const char *prefix;
+ const char *suffix;
+
+ /* Choose boundaries for the part of the input we will display */
+ context_start = lex->input;
+ context_end = lex->token_terminator;
+ line_start = context_start;
+ line_number = 1;
+ for (;;)
+ {
+ /* Always advance over newlines (context_end test is just paranoia) */
+ if (*context_start == '\n' && context_start < context_end)
+ {
+ context_start++;
+ line_start = context_start;
+ line_number++;
+ continue;
+ }
+ /* Otherwise, done as soon as we are close enough to context_end */
+ if (context_end - context_start < 50)
+ break;
+ /* Advance to next multibyte character */
+ if (IS_HIGHBIT_SET(*context_start))
+ context_start += pg_mblen(context_start);
+ else
+ context_start++;
+ }
+
+ /*
+ * We add "..." to indicate that the excerpt doesn't start at the
+ * beginning of the line ... but if we're within 3 characters of the
+ * beginning of the line, we might as well just show the whole line.
+ */
+ if (context_start - line_start <= 3)
+ context_start = line_start;
+
+ /* Get a null-terminated copy of the data to present */
+ ctxtlen = context_end - context_start;
+ ctxt = palloc(ctxtlen + 1);
+ memcpy(ctxt, context_start, ctxtlen);
+ ctxt[ctxtlen] = '\0';
+
+ /*
+ * Show the context, prefixing "..." if not starting at start of line, and
+ * suffixing "..." if not ending at end of line.
+ */
+ prefix = (context_start > line_start) ? "..." : "";
+ suffix = (*context_end != '\0' && *context_end != '\n' && *context_end != '\r') ? "..." : "";
+
+ return errcontext("JSON data, line %d: %s%s%s",
+ line_number, prefix, ctxt, suffix);
}
/*