aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/misc.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils/adt/misc.c')
-rw-r--r--src/backend/utils/adt/misc.c199
1 files changed, 74 insertions, 125 deletions
diff --git a/src/backend/utils/adt/misc.c b/src/backend/utils/adt/misc.c
index faa8ef3c913..6f7c4078161 100644
--- a/src/backend/utils/adt/misc.c
+++ b/src/backend/utils/adt/misc.c
@@ -723,105 +723,57 @@ pg_column_is_updatable(PG_FUNCTION_ARGS)
/*
- * This simple parser utility are compatible with lexer implementation,
- * used only in parse_ident function
+ * Is character a valid identifier start?
+ * Must match scan.l's {ident_start} character class.
*/
static bool
is_ident_start(unsigned char c)
{
+ /* Underscores and ASCII letters are OK */
if (c == '_')
return true;
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
return true;
-
- if (c >= 0200 && c <= 0377)
+ /* Any high-bit-set character is OK (might be part of a multibyte char) */
+ if (IS_HIGHBIT_SET(c))
return true;
-
return false;
}
+/*
+ * Is character a valid identifier continuation?
+ * Must match scan.l's {ident_cont} character class.
+ */
static bool
is_ident_cont(unsigned char c)
{
- if (c >= '0' && c <= '9')
+ /* Can be digit or dollar sign ... */
+ if ((c >= '0' && c <= '9') || c == '$')
return true;
-
+ /* ... or an identifier start character */
return is_ident_start(c);
}
/*
- * Sanitize SQL string for using in error message.
- */
-static char *
-sanitize_text(text *t)
-{
- int len = VARSIZE_ANY_EXHDR(t);
- const char *p = VARDATA_ANY(t);
- StringInfo dstr;
-
- dstr = makeStringInfo();
-
- appendStringInfoChar(dstr, '"');
-
- while (len--)
- {
- switch (*p)
- {
- case '\b':
- appendStringInfoString(dstr, "\\b");
- break;
- case '\f':
- appendStringInfoString(dstr, "\\f");
- break;
- case '\n':
- appendStringInfoString(dstr, "\\n");
- break;
- case '\r':
- appendStringInfoString(dstr, "\\r");
- break;
- case '\t':
- appendStringInfoString(dstr, "\\t");
- break;
- case '\'':
- appendStringInfoString(dstr, "''");
- break;
- case '\\':
- appendStringInfoString(dstr, "\\\\");
- break;
- default:
- if ((unsigned char) *p < ' ')
- appendStringInfo(dstr, "\\u%04x", (int) *p);
- else
- appendStringInfoCharMacro(dstr, *p);
- break;
- }
- p++;
- }
-
- appendStringInfoChar(dstr, '"');
-
- return dstr->data;
-}
-
-/*
- * parse_ident - parse SQL composed identifier to separate identifiers.
+ * parse_ident - parse a SQL qualified identifier into separate identifiers.
* When strict mode is active (second parameter), then any chars after
- * last identifiers are disallowed.
+ * the last identifier are disallowed.
*/
Datum
parse_ident(PG_FUNCTION_ARGS)
{
- text *qualname;
- char *qualname_str;
- bool strict;
+ text *qualname = PG_GETARG_TEXT_PP(0);
+ bool strict = PG_GETARG_BOOL(1);
+ char *qualname_str = text_to_cstring(qualname);
+ ArrayBuildState *astate = NULL;
char *nextp;
bool after_dot = false;
- ArrayBuildState *astate = NULL;
-
- qualname = PG_GETARG_TEXT_PP(0);
- qualname_str = text_to_cstring(qualname);
- strict = PG_GETARG_BOOL(1);
+ /*
+ * The code below scribbles on qualname_str in some cases, so we should
+ * reconvert qualname if we need to show the original string in error
+ * messages.
+ */
nextp = qualname_str;
/* skip leading whitespace */
@@ -830,25 +782,24 @@ parse_ident(PG_FUNCTION_ARGS)
for (;;)
{
- char *curname;
- char *endp;
- bool missing_ident;
-
- missing_ident = true;
+ char *curname;
+ bool missing_ident = true;
- if (*nextp == '\"')
+ if (*nextp == '"')
{
+ char *endp;
+
curname = nextp + 1;
for (;;)
{
- endp = strchr(nextp + 1, '\"');
+ endp = strchr(nextp + 1, '"');
if (endp == NULL)
ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("unclosed double quotes"),
- errdetail("string %s is not valid identifier",
- sanitize_text(qualname))));
- if (endp[1] != '\"')
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("string is not a valid identifier: \"%s\"",
+ text_to_cstring(qualname)),
+ errdetail("String has unclosed double quotes.")));
+ if (endp[1] != '"')
break;
memmove(endp, endp + 1, strlen(endp));
nextp = endp;
@@ -856,44 +807,40 @@ parse_ident(PG_FUNCTION_ARGS)
nextp = endp + 1;
*endp = '\0';
- /* Show complete input string in this case. */
if (endp - curname == 0)
ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("identifier should not be empty: %s",
- sanitize_text(qualname))));
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("string is not a valid identifier: \"%s\"",
+ text_to_cstring(qualname)),
+ errdetail("Quoted identifier must not be empty.")));
astate = accumArrayResult(astate, CStringGetTextDatum(curname),
false, TEXTOID, CurrentMemoryContext);
missing_ident = false;
}
- else
+ else if (is_ident_start((unsigned char) *nextp))
{
- if (is_ident_start((unsigned char) *nextp))
- {
- char *downname;
- int len;
- text *part;
-
- curname = nextp++;
- while (is_ident_cont((unsigned char) *nextp))
- nextp++;
-
- len = nextp - curname;
-
- /*
- * Unlike name, we don't implicitly truncate identifiers. This
- * is useful for allowing the user to check for specific parts
- * of the identifier being too long. It's easy enough for the
- * user to get the truncated names by casting our output to
- * name[].
- */
- downname = downcase_identifier(curname, len, false, false);
- part = cstring_to_text_with_len(downname, len);
- astate = accumArrayResult(astate, PointerGetDatum(part), false,
- TEXTOID, CurrentMemoryContext);
- missing_ident = false;
- }
+ char *downname;
+ int len;
+ text *part;
+
+ curname = nextp++;
+ while (is_ident_cont((unsigned char) *nextp))
+ nextp++;
+
+ len = nextp - curname;
+
+ /*
+ * We don't implicitly truncate identifiers. This is useful for
+ * allowing the user to check for specific parts of the identifier
+ * being too long. It's easy enough for the user to get the
+ * truncated names by casting our output to name[].
+ */
+ downname = downcase_identifier(curname, len, false, false);
+ part = cstring_to_text_with_len(downname, len);
+ astate = accumArrayResult(astate, PointerGetDatum(part), false,
+ TEXTOID, CurrentMemoryContext);
+ missing_ident = false;
}
if (missing_ident)
@@ -901,19 +848,21 @@ parse_ident(PG_FUNCTION_ARGS)
/* Different error messages based on where we failed. */
if (*nextp == '.')
ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("missing valid identifier before \".\" symbol: %s",
- sanitize_text(qualname))));
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("string is not a valid identifier: \"%s\"",
+ text_to_cstring(qualname)),
+ errdetail("No valid identifier before \".\" symbol.")));
else if (after_dot)
ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("missing valid identifier after \".\" symbol: %s",
- sanitize_text(qualname))));
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("string is not a valid identifier: \"%s\"",
+ text_to_cstring(qualname)),
+ errdetail("No valid identifier after \".\" symbol.")));
else
ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("missing valid identifier: %s",
- sanitize_text(qualname))));
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("string is not a valid identifier: \"%s\"",
+ text_to_cstring(qualname))));
}
while (isspace((unsigned char) *nextp))
@@ -934,9 +883,9 @@ parse_ident(PG_FUNCTION_ARGS)
{
if (strict)
ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("identifier contains disallowed characters: %s",
- sanitize_text(qualname))));
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("string is not a valid identifier: \"%s\"",
+ text_to_cstring(qualname))));
break;
}
}