diff options
author | Teodor Sigaev <teodor@sigaev.ru> | 2016-03-18 18:16:14 +0300 |
---|---|---|
committer | Teodor Sigaev <teodor@sigaev.ru> | 2016-03-18 18:16:14 +0300 |
commit | 3187d6de0e5a9e805b27c48437897e8c39071d45 (patch) | |
tree | 73c8b2d0ffb1f9410ca5d59bc3b62c43febbf0ea /src/backend/utils/adt/misc.c | |
parent | 992b5ba30dcafdc222341505b072a6b009b248a7 (diff) | |
download | postgresql-3187d6de0e5a9e805b27c48437897e8c39071d45.tar.gz postgresql-3187d6de0e5a9e805b27c48437897e8c39071d45.zip |
Introduce parse_ident()
SQL-layer function to split qualified identifier into array parts.
Author: Pavel Stehule with minor editorization by me and Jim Nasby
Diffstat (limited to 'src/backend/utils/adt/misc.c')
-rw-r--r-- | src/backend/utils/adt/misc.c | 224 |
1 files changed, 224 insertions, 0 deletions
diff --git a/src/backend/utils/adt/misc.c b/src/backend/utils/adt/misc.c index 43f36db47bb..4dcc5a63be7 100644 --- a/src/backend/utils/adt/misc.c +++ b/src/backend/utils/adt/misc.c @@ -27,6 +27,7 @@ #include "commands/dbcommands.h" #include "funcapi.h" #include "miscadmin.h" +#include "parser/scansup.h" #include "parser/keywords.h" #include "postmaster/syslogger.h" #include "rewrite/rewriteHandler.h" @@ -719,3 +720,226 @@ pg_column_is_updatable(PG_FUNCTION_ARGS) PG_RETURN_BOOL((events & REQ_EVENTS) == REQ_EVENTS); } + + +/* + * This simple parser utility are compatible with lexer implementation, + * used only in parse_ident function + */ +static bool +is_ident_start(unsigned char c) +{ + if (c == '_') + return true; + if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) + return true; + + if (c >= 0200 && c <= 0377) + return true; + + return false; +} + +static bool +is_ident_cont(unsigned char c) +{ + if (c >= '0' && c <= '9') + return true; + + return is_ident_start(c); +} + +/* + * Sanitize SQL string for using in error message. + */ +static char * +sanitize_text(text *t) +{ + int len = VARSIZE_ANY_EXHDR(t); + const char *p = VARDATA_ANY(t); + StringInfo dstr; + + dstr = makeStringInfo(); + + appendStringInfoChar(dstr, '"'); + + while (len--) + { + switch (*p) + { + case '\b': + appendStringInfoString(dstr, "\\b"); + break; + case '\f': + appendStringInfoString(dstr, "\\f"); + break; + case '\n': + appendStringInfoString(dstr, "\\n"); + break; + case '\r': + appendStringInfoString(dstr, "\\r"); + break; + case '\t': + appendStringInfoString(dstr, "\\t"); + break; + case '\'': + appendStringInfoString(dstr, "''"); + break; + case '\\': + appendStringInfoString(dstr, "\\\\"); + break; + default: + if ((unsigned char) *p < ' ') + appendStringInfo(dstr, "\\u%04x", (int) *p); + else + appendStringInfoCharMacro(dstr, *p); + break; + } + p++; + } + + appendStringInfoChar(dstr, '"'); + + return dstr->data; +} + +/* + * parse_ident - parse SQL composed identifier to separate identifiers. + * When strict mode is active (second parameter), then any chars after + * last identifiers are disallowed. + */ +Datum +parse_ident(PG_FUNCTION_ARGS) +{ + text *qualname; + char *qualname_str; + bool strict; + char *nextp; + bool after_dot = false; + ArrayBuildState *astate = NULL; + + qualname = PG_GETARG_TEXT_PP(0); + qualname_str = text_to_cstring(qualname); + strict = PG_GETARG_BOOL(1); + + nextp = qualname_str; + + /* skip leading whitespace */ + while (isspace((unsigned char) *nextp)) + nextp++; + + for (;;) + { + char *curname; + char *endp; + bool missing_ident; + + missing_ident = true; + + if (*nextp == '\"') + { + curname = nextp + 1; + for (;;) + { + endp = strchr(nextp + 1, '\"'); + if (endp == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unclosed double quotes"), + errdetail("string %s is not valid identifier", + sanitize_text(qualname)))); + if (endp[1] != '\"') + break; + memmove(endp, endp + 1, strlen(endp)); + nextp = endp; + } + nextp = endp + 1; + *endp = '\0'; + + /* Show complete input string in this case. */ + if (endp - curname == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("identifier should not be empty: %s", + sanitize_text(qualname)))); + + astate = accumArrayResult(astate, CStringGetTextDatum(curname), + false, TEXTOID, CurrentMemoryContext); + missing_ident = false; + } + else + { + if (is_ident_start((unsigned char) *nextp)) + { + char *downname; + int len; + text *part; + + curname = nextp++; + while (is_ident_cont((unsigned char) *nextp)) + nextp++; + + len = nextp - curname; + + /* + * Unlike name, we don't implicitly truncate identifiers. This + * is useful for allowing the user to check for specific parts + * of the identifier being too long. It's easy enough for the + * user to get the truncated names by casting our output to + * name[]. + */ + downname = downcase_identifier(curname, len, false, false); + part = cstring_to_text_with_len(downname, len); + astate = accumArrayResult(astate, PointerGetDatum(part), false, + TEXTOID, CurrentMemoryContext); + missing_ident = false; + } + } + + if (missing_ident) + { + /* Different error messages based on where we failed. */ + if (*nextp == '.') + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("missing valid identifier before \".\" symbol: %s", + sanitize_text(qualname)))); + else if (after_dot) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("missing valid identifier after \".\" symbol: %s", + sanitize_text(qualname)))); + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("missing valid identifier: %s", + sanitize_text(qualname)))); + } + + while (isspace((unsigned char) *nextp)) + nextp++; + + if (*nextp == '.') + { + after_dot = true; + nextp++; + while (isspace((unsigned char) *nextp)) + nextp++; + } + else if (*nextp == '\0') + { + break; + } + else + { + if (strict) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("identifier contains disallowed characters: %s", + sanitize_text(qualname)))); + break; + } + } + + PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext)); +} |