aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/misc.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2016-03-28 01:00:30 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2016-03-28 01:00:30 -0400
commitd12e5bb79bb535c2df13b76cd7d01f0bb8dc8e4d (patch)
treefffb6e2f681baf125f307fc8519175bc529ecb34 /src/backend/utils/adt/misc.c
parent499a50571c72f41bb1365970d55dae5c8afcb6ba (diff)
downloadpostgresql-d12e5bb79bb535c2df13b76cd7d01f0bb8dc8e4d.tar.gz
postgresql-d12e5bb79bb535c2df13b76cd7d01f0bb8dc8e4d.zip
Code and docs review for commit 3187d6de0e5a9e805b27c48437897e8c39071d45.
Fix up check for high-bit-set characters, which provoked "comparison is always true due to limited range of data type" warnings on some compilers, and was unlike the way we do it elsewhere anyway. Fix omission of "$" from the set of valid identifier continuation characters. Get rid of sanitize_text(), which was utterly inconsistent with any other error report anywhere in the system, and wasn't even well designed on its own terms (double-quoting the result string without escaping contained double quotes doesn't seem very well thought out). Fix up error messages, which didn't follow the message style guidelines very well, and were overly specific in situations where the actual mistake might not be what they said. Improve documentation. (I started out just intending to fix the compiler warning, but the more I looked at the patch the less I liked it.)
Diffstat (limited to 'src/backend/utils/adt/misc.c')
-rw-r--r--src/backend/utils/adt/misc.c199
1 files changed, 74 insertions, 125 deletions
diff --git a/src/backend/utils/adt/misc.c b/src/backend/utils/adt/misc.c
index faa8ef3c913..6f7c4078161 100644
--- a/src/backend/utils/adt/misc.c
+++ b/src/backend/utils/adt/misc.c
@@ -723,105 +723,57 @@ pg_column_is_updatable(PG_FUNCTION_ARGS)
/*
- * This simple parser utility are compatible with lexer implementation,
- * used only in parse_ident function
+ * Is character a valid identifier start?
+ * Must match scan.l's {ident_start} character class.
*/
static bool
is_ident_start(unsigned char c)
{
+ /* Underscores and ASCII letters are OK */
if (c == '_')
return true;
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
return true;
-
- if (c >= 0200 && c <= 0377)
+ /* Any high-bit-set character is OK (might be part of a multibyte char) */
+ if (IS_HIGHBIT_SET(c))
return true;
-
return false;
}
+/*
+ * Is character a valid identifier continuation?
+ * Must match scan.l's {ident_cont} character class.
+ */
static bool
is_ident_cont(unsigned char c)
{
- if (c >= '0' && c <= '9')
+ /* Can be digit or dollar sign ... */
+ if ((c >= '0' && c <= '9') || c == '$')
return true;
-
+ /* ... or an identifier start character */
return is_ident_start(c);
}
/*
- * Sanitize SQL string for using in error message.
- */
-static char *
-sanitize_text(text *t)
-{
- int len = VARSIZE_ANY_EXHDR(t);
- const char *p = VARDATA_ANY(t);
- StringInfo dstr;
-
- dstr = makeStringInfo();
-
- appendStringInfoChar(dstr, '"');
-
- while (len--)
- {
- switch (*p)
- {
- case '\b':
- appendStringInfoString(dstr, "\\b");
- break;
- case '\f':
- appendStringInfoString(dstr, "\\f");
- break;
- case '\n':
- appendStringInfoString(dstr, "\\n");
- break;
- case '\r':
- appendStringInfoString(dstr, "\\r");
- break;
- case '\t':
- appendStringInfoString(dstr, "\\t");
- break;
- case '\'':
- appendStringInfoString(dstr, "''");
- break;
- case '\\':
- appendStringInfoString(dstr, "\\\\");
- break;
- default:
- if ((unsigned char) *p < ' ')
- appendStringInfo(dstr, "\\u%04x", (int) *p);
- else
- appendStringInfoCharMacro(dstr, *p);
- break;
- }
- p++;
- }
-
- appendStringInfoChar(dstr, '"');
-
- return dstr->data;
-}
-
-/*
- * parse_ident - parse SQL composed identifier to separate identifiers.
+ * parse_ident - parse a SQL qualified identifier into separate identifiers.
* When strict mode is active (second parameter), then any chars after
- * last identifiers are disallowed.
+ * the last identifier are disallowed.
*/
Datum
parse_ident(PG_FUNCTION_ARGS)
{
- text *qualname;
- char *qualname_str;
- bool strict;
+ text *qualname = PG_GETARG_TEXT_PP(0);
+ bool strict = PG_GETARG_BOOL(1);
+ char *qualname_str = text_to_cstring(qualname);
+ ArrayBuildState *astate = NULL;
char *nextp;
bool after_dot = false;
- ArrayBuildState *astate = NULL;
-
- qualname = PG_GETARG_TEXT_PP(0);
- qualname_str = text_to_cstring(qualname);
- strict = PG_GETARG_BOOL(1);
+ /*
+ * The code below scribbles on qualname_str in some cases, so we should
+ * reconvert qualname if we need to show the original string in error
+ * messages.
+ */
nextp = qualname_str;
/* skip leading whitespace */
@@ -830,25 +782,24 @@ parse_ident(PG_FUNCTION_ARGS)
for (;;)
{
- char *curname;
- char *endp;
- bool missing_ident;
-
- missing_ident = true;
+ char *curname;
+ bool missing_ident = true;
- if (*nextp == '\"')
+ if (*nextp == '"')
{
+ char *endp;
+
curname = nextp + 1;
for (;;)
{
- endp = strchr(nextp + 1, '\"');
+ endp = strchr(nextp + 1, '"');
if (endp == NULL)
ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("unclosed double quotes"),
- errdetail("string %s is not valid identifier",
- sanitize_text(qualname))));
- if (endp[1] != '\"')
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("string is not a valid identifier: \"%s\"",
+ text_to_cstring(qualname)),
+ errdetail("String has unclosed double quotes.")));
+ if (endp[1] != '"')
break;
memmove(endp, endp + 1, strlen(endp));
nextp = endp;
@@ -856,44 +807,40 @@ parse_ident(PG_FUNCTION_ARGS)
nextp = endp + 1;
*endp = '\0';
- /* Show complete input string in this case. */
if (endp - curname == 0)
ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("identifier should not be empty: %s",
- sanitize_text(qualname))));
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("string is not a valid identifier: \"%s\"",
+ text_to_cstring(qualname)),
+ errdetail("Quoted identifier must not be empty.")));
astate = accumArrayResult(astate, CStringGetTextDatum(curname),
false, TEXTOID, CurrentMemoryContext);
missing_ident = false;
}
- else
+ else if (is_ident_start((unsigned char) *nextp))
{
- if (is_ident_start((unsigned char) *nextp))
- {
- char *downname;
- int len;
- text *part;
-
- curname = nextp++;
- while (is_ident_cont((unsigned char) *nextp))
- nextp++;
-
- len = nextp - curname;
-
- /*
- * Unlike name, we don't implicitly truncate identifiers. This
- * is useful for allowing the user to check for specific parts
- * of the identifier being too long. It's easy enough for the
- * user to get the truncated names by casting our output to
- * name[].
- */
- downname = downcase_identifier(curname, len, false, false);
- part = cstring_to_text_with_len(downname, len);
- astate = accumArrayResult(astate, PointerGetDatum(part), false,
- TEXTOID, CurrentMemoryContext);
- missing_ident = false;
- }
+ char *downname;
+ int len;
+ text *part;
+
+ curname = nextp++;
+ while (is_ident_cont((unsigned char) *nextp))
+ nextp++;
+
+ len = nextp - curname;
+
+ /*
+ * We don't implicitly truncate identifiers. This is useful for
+ * allowing the user to check for specific parts of the identifier
+ * being too long. It's easy enough for the user to get the
+ * truncated names by casting our output to name[].
+ */
+ downname = downcase_identifier(curname, len, false, false);
+ part = cstring_to_text_with_len(downname, len);
+ astate = accumArrayResult(astate, PointerGetDatum(part), false,
+ TEXTOID, CurrentMemoryContext);
+ missing_ident = false;
}
if (missing_ident)
@@ -901,19 +848,21 @@ parse_ident(PG_FUNCTION_ARGS)
/* Different error messages based on where we failed. */
if (*nextp == '.')
ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("missing valid identifier before \".\" symbol: %s",
- sanitize_text(qualname))));
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("string is not a valid identifier: \"%s\"",
+ text_to_cstring(qualname)),
+ errdetail("No valid identifier before \".\" symbol.")));
else if (after_dot)
ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("missing valid identifier after \".\" symbol: %s",
- sanitize_text(qualname))));
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("string is not a valid identifier: \"%s\"",
+ text_to_cstring(qualname)),
+ errdetail("No valid identifier after \".\" symbol.")));
else
ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("missing valid identifier: %s",
- sanitize_text(qualname))));
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("string is not a valid identifier: \"%s\"",
+ text_to_cstring(qualname))));
}
while (isspace((unsigned char) *nextp))
@@ -934,9 +883,9 @@ parse_ident(PG_FUNCTION_ARGS)
{
if (strict)
ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("identifier contains disallowed characters: %s",
- sanitize_text(qualname))));
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("string is not a valid identifier: \"%s\"",
+ text_to_cstring(qualname))));
break;
}
}