Introduce parse_ident()

SQL-layer function to split qualified identifier into array parts. Author: Pavel Stehule with minor editorization by me and Jim Nasby
author: Teodor Sigaev <teodor@sigaev.ru> 2016-03-18 18:16:14 +0300
committer: Teodor Sigaev <teodor@sigaev.ru> 2016-03-18 18:16:14 +0300
commit: 3187d6de0e5a9e805b27c48437897e8c39071d45 (patch)
tree: 73c8b2d0ffb1f9410ca5d59bc3b62c43febbf0ea /src/backend/utils/adt/misc.c
parent: 992b5ba30dcafdc222341505b072a6b009b248a7 (diff)
download: postgresql-3187d6de0e5a9e805b27c48437897e8c39071d45.tar.gz
postgresql-3187d6de0e5a9e805b27c48437897e8c39071d45.zip
1 files changed, 224 insertions, 0 deletions
diff --git a/src/backend/utils/adt/misc.c b/src/backend/utils/adt/misc.c
index 43f36db47bb..4dcc5a63be7 100644
--- a/src/backend/utils/adt/misc.c
+++ b/src/backend/utils/adt/misc.c
@@ -27,6 +27,7 @@
 #include "commands/dbcommands.h"
 #include "funcapi.h"
 #include "miscadmin.h"
+#include "parser/scansup.h"
 #include "parser/keywords.h"
 #include "postmaster/syslogger.h"
 #include "rewrite/rewriteHandler.h"
@@ -719,3 +720,226 @@ pg_column_is_updatable(PG_FUNCTION_ARGS)
 
 	PG_RETURN_BOOL((events & REQ_EVENTS) == REQ_EVENTS);
 }
+
+
+/*
+ * This simple parser utility are compatible with lexer implementation,
+ * used only in parse_ident function
+ */
+static bool
+is_ident_start(unsigned char c)
+{
+	if (c == '_')
+		return true;
+	if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
+		return true;
+
+	if (c >= 0200 && c <= 0377)
+		return true;
+
+	return false;
+}
+
+static bool
+is_ident_cont(unsigned char c)
+{
+	if (c >= '0' && c <= '9')
+		return true;
+
+	return is_ident_start(c);
+}
+
+/*
+ * Sanitize SQL string for using in error message.
+ */
+static char *
+sanitize_text(text *t)
+{
+	int			len = VARSIZE_ANY_EXHDR(t);
+	const char *p = VARDATA_ANY(t);
+	StringInfo	dstr;
+
+	dstr = makeStringInfo();
+
+	appendStringInfoChar(dstr, '"');
+
+	while (len--)
+	{
+		switch (*p)
+		{
+			case '\b':
+				appendStringInfoString(dstr, "\\b");
+				break;
+			case '\f':
+				appendStringInfoString(dstr, "\\f");
+				break;
+			case '\n':
+				appendStringInfoString(dstr, "\\n");
+				break;
+			case '\r':
+				appendStringInfoString(dstr, "\\r");
+				break;
+			case '\t':
+				appendStringInfoString(dstr, "\\t");
+				break;
+			case '\'':
+				appendStringInfoString(dstr, "''");
+				break;
+			case '\\':
+				appendStringInfoString(dstr, "\\\\");
+				break;
+			default:
+				if ((unsigned char) *p < ' ')
+					appendStringInfo(dstr, "\\u%04x", (int) *p);
+				else
+					appendStringInfoCharMacro(dstr, *p);
+				break;
+		}
+		p++;
+	}
+
+	appendStringInfoChar(dstr, '"');
+
+	return dstr->data;
+}
+
+/*
+ * parse_ident - parse SQL composed identifier to separate identifiers.
+ * When strict mode is active (second parameter), then any chars after
+ * last identifiers are disallowed.
+ */
+Datum
+parse_ident(PG_FUNCTION_ARGS)
+{
+	text	   *qualname;
+	char	   *qualname_str;
+	bool		strict;
+	char	   *nextp;
+	bool		after_dot = false;
+	ArrayBuildState *astate = NULL;
+
+	qualname = PG_GETARG_TEXT_PP(0);
+	qualname_str = text_to_cstring(qualname);
+	strict = PG_GETARG_BOOL(1);
+
+	nextp = qualname_str;
+
+	/* skip leading whitespace */
+	while (isspace((unsigned char) *nextp))
+		nextp++;
+
+	for (;;)
+	{
+		char		*curname;
+		char		*endp;
+		bool		missing_ident;
+
+		missing_ident = true;
+
+		if (*nextp == '\"')
+		{
+			curname = nextp + 1;
+			for (;;)
+			{
+				endp = strchr(nextp + 1, '\"');
+				if (endp == NULL)
+					ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("unclosed double quotes"),
+						 errdetail("string %s is not valid identifier",
+									sanitize_text(qualname))));
+				if (endp[1] != '\"')
+					break;
+				memmove(endp, endp + 1, strlen(endp));
+				nextp = endp;
+			}
+			nextp = endp + 1;
+			*endp = '\0';
+
+			/* Show complete input string in this case. */
+			if (endp - curname == 0)
+				ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("identifier should not be empty: %s",
+							sanitize_text(qualname))));
+
+			astate = accumArrayResult(astate, CStringGetTextDatum(curname),
+									  false, TEXTOID, CurrentMemoryContext);
+			missing_ident = false;
+		}
+		else
+		{
+			if (is_ident_start((unsigned char) *nextp))
+			{
+				char *downname;
+				int	len;
+				text	*part;
+
+				curname = nextp++;
+				while (is_ident_cont((unsigned char) *nextp))
+					nextp++;
+
+				len = nextp - curname;
+
+				/*
+				 * Unlike name, we don't implicitly truncate identifiers. This
+				 * is useful for allowing the user to check for specific parts
+				 * of the identifier being too long. It's easy enough for the
+				 * user to get the truncated names by casting our output to
+				 * name[].
+				 */
+				downname = downcase_identifier(curname, len, false, false);
+				part = cstring_to_text_with_len(downname, len);
+				astate = accumArrayResult(astate, PointerGetDatum(part), false,
+										  TEXTOID, CurrentMemoryContext);
+				missing_ident = false;
+			}
+		}
+
+		if (missing_ident)
+		{
+			/* Different error messages based on where we failed. */
+			if (*nextp == '.')
+				ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("missing valid identifier before \".\" symbol: %s",
+							sanitize_text(qualname))));
+			else if (after_dot)
+				ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("missing valid identifier after \".\" symbol: %s",
+							sanitize_text(qualname))));
+			else
+				ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("missing valid identifier: %s",
+							sanitize_text(qualname))));
+		}
+
+		while (isspace((unsigned char) *nextp))
+			nextp++;
+
+		if (*nextp == '.')
+		{
+			after_dot = true;
+			nextp++;
+			while (isspace((unsigned char) *nextp))
+				nextp++;
+		}
+		else if (*nextp == '\0')
+		{
+			break;
+		}
+		else
+		{
+			if (strict)
+				ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("identifier contains disallowed characters: %s",
+							sanitize_text(qualname))));
+			break;
+		}
+	}
+
+	PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
+}
author	Teodor Sigaev <teodor@sigaev.ru>	2016-03-18 18:16:14 +0300
committer	Teodor Sigaev <teodor@sigaev.ru>	2016-03-18 18:16:14 +0300
commit	3187d6de0e5a9e805b27c48437897e8c39071d45 (patch)
tree	73c8b2d0ffb1f9410ca5d59bc3b62c43febbf0ea /src/backend/utils/adt/misc.c
parent	992b5ba30dcafdc222341505b072a6b009b248a7 (diff)
download	postgresql-3187d6de0e5a9e805b27c48437897e8c39071d45.tar.gz postgresql-3187d6de0e5a9e805b27c48437897e8c39071d45.zip