aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/parser/gram.y35
-rw-r--r--src/backend/parser/parser.c105
-rw-r--r--src/include/parser/gramparse.h2
-rw-r--r--src/interfaces/ecpg/preproc/parse.pl6
-rw-r--r--src/interfaces/ecpg/preproc/parser.c116
-rw-r--r--src/test/regress/expected/foreign_data.out2
-rw-r--r--src/test/regress/expected/with.out15
-rw-r--r--src/test/regress/sql/with.sql5
8 files changed, 173 insertions, 113 deletions
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 6c210028753..581f7a1c1c6 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -633,9 +633,9 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
/*
* The grammar thinks these are keywords, but they are not in the kwlist.h
* list and so can never be entered directly. The filter in parser.c
- * creates these tokens when required.
+ * creates these tokens when required (based on looking one token ahead).
*/
-%token NULLS_FIRST NULLS_LAST WITH_ORDINALITY WITH_TIME
+%token NULLS_LA WITH_LA
/* Precedence: lowest to highest */
@@ -873,6 +873,7 @@ CreateRoleStmt:
opt_with: WITH {}
+ | WITH_LA {}
| /*EMPTY*/ {}
;
@@ -6673,8 +6674,8 @@ opt_asc_desc: ASC { $$ = SORTBY_ASC; }
| /*EMPTY*/ { $$ = SORTBY_DEFAULT; }
;
-opt_nulls_order: NULLS_FIRST { $$ = SORTBY_NULLS_FIRST; }
- | NULLS_LAST { $$ = SORTBY_NULLS_LAST; }
+opt_nulls_order: NULLS_LA FIRST_P { $$ = SORTBY_NULLS_FIRST; }
+ | NULLS_LA LAST_P { $$ = SORTBY_NULLS_LAST; }
| /*EMPTY*/ { $$ = SORTBY_NULLS_DEFAULT; }
;
@@ -8923,7 +8924,7 @@ AlterTSDictionaryStmt:
;
AlterTSConfigurationStmt:
- ALTER TEXT_P SEARCH CONFIGURATION any_name ADD_P MAPPING FOR name_list WITH any_name_list
+ ALTER TEXT_P SEARCH CONFIGURATION any_name ADD_P MAPPING FOR name_list any_with any_name_list
{
AlterTSConfigurationStmt *n = makeNode(AlterTSConfigurationStmt);
n->cfgname = $5;
@@ -8933,7 +8934,7 @@ AlterTSConfigurationStmt:
n->replace = false;
$$ = (Node*)n;
}
- | ALTER TEXT_P SEARCH CONFIGURATION any_name ALTER MAPPING FOR name_list WITH any_name_list
+ | ALTER TEXT_P SEARCH CONFIGURATION any_name ALTER MAPPING FOR name_list any_with any_name_list
{
AlterTSConfigurationStmt *n = makeNode(AlterTSConfigurationStmt);
n->cfgname = $5;
@@ -8943,7 +8944,7 @@ AlterTSConfigurationStmt:
n->replace = false;
$$ = (Node*)n;
}
- | ALTER TEXT_P SEARCH CONFIGURATION any_name ALTER MAPPING REPLACE any_name WITH any_name
+ | ALTER TEXT_P SEARCH CONFIGURATION any_name ALTER MAPPING REPLACE any_name any_with any_name
{
AlterTSConfigurationStmt *n = makeNode(AlterTSConfigurationStmt);
n->cfgname = $5;
@@ -8953,7 +8954,7 @@ AlterTSConfigurationStmt:
n->replace = true;
$$ = (Node*)n;
}
- | ALTER TEXT_P SEARCH CONFIGURATION any_name ALTER MAPPING FOR name_list REPLACE any_name WITH any_name
+ | ALTER TEXT_P SEARCH CONFIGURATION any_name ALTER MAPPING FOR name_list REPLACE any_name any_with any_name
{
AlterTSConfigurationStmt *n = makeNode(AlterTSConfigurationStmt);
n->cfgname = $5;
@@ -8981,6 +8982,11 @@ AlterTSConfigurationStmt:
}
;
+/* Use this if TIME or ORDINALITY after WITH should be taken as an identifier */
+any_with: WITH {}
+ | WITH_LA {}
+ ;
+
/*****************************************************************************
*
@@ -9891,6 +9897,8 @@ simple_select:
* AS (query) [ SEARCH or CYCLE clause ]
*
* We don't currently support the SEARCH or CYCLE clause.
+ *
+ * Recognizing WITH_LA here allows a CTE to be named TIME or ORDINALITY.
*/
with_clause:
WITH cte_list
@@ -9900,6 +9908,13 @@ with_clause:
$$->recursive = false;
$$->location = @1;
}
+ | WITH_LA cte_list
+ {
+ $$ = makeNode(WithClause);
+ $$->ctes = $2;
+ $$->recursive = false;
+ $$->location = @1;
+ }
| WITH RECURSIVE cte_list
{
$$ = makeNode(WithClause);
@@ -10601,7 +10616,7 @@ opt_col_def_list: AS '(' TableFuncElementList ')' { $$ = $3; }
| /*EMPTY*/ { $$ = NIL; }
;
-opt_ordinality: WITH_ORDINALITY { $$ = true; }
+opt_ordinality: WITH_LA ORDINALITY { $$ = true; }
| /*EMPTY*/ { $$ = false; }
;
@@ -11057,7 +11072,7 @@ ConstInterval:
;
opt_timezone:
- WITH_TIME ZONE { $$ = TRUE; }
+ WITH_LA TIME ZONE { $$ = TRUE; }
| WITHOUT TIME ZONE { $$ = FALSE; }
| /*EMPTY*/ { $$ = FALSE; }
;
diff --git a/src/backend/parser/parser.c b/src/backend/parser/parser.c
index db49275e00a..b17771d4cca 100644
--- a/src/backend/parser/parser.c
+++ b/src/backend/parser/parser.c
@@ -64,13 +64,13 @@ raw_parser(const char *str)
/*
* Intermediate filter between parser and core lexer (core_yylex in scan.l).
*
- * The filter is needed because in some cases the standard SQL grammar
+ * This filter is needed because in some cases the standard SQL grammar
* requires more than one token lookahead. We reduce these cases to one-token
- * lookahead by combining tokens here, in order to keep the grammar LALR(1).
+ * lookahead by replacing tokens here, in order to keep the grammar LALR(1).
*
* Using a filter is simpler than trying to recognize multiword tokens
* directly in scan.l, because we'd have to allow for comments between the
- * words. Furthermore it's not clear how to do it without re-introducing
+ * words. Furthermore it's not clear how to do that without re-introducing
* scanner backtrack, which would cost more performance than this filter
* layer does.
*
@@ -84,7 +84,7 @@ base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner)
base_yy_extra_type *yyextra = pg_yyget_extra(yyscanner);
int cur_token;
int next_token;
- core_YYSTYPE cur_yylval;
+ int cur_token_length;
YYLTYPE cur_yylloc;
/* Get next token --- we might already have it */
@@ -93,74 +93,85 @@ base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner)
cur_token = yyextra->lookahead_token;
lvalp->core_yystype = yyextra->lookahead_yylval;
*llocp = yyextra->lookahead_yylloc;
+ *(yyextra->lookahead_end) = yyextra->lookahead_hold_char;
yyextra->have_lookahead = false;
}
else
cur_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner);
- /* Do we need to look ahead for a possible multiword token? */
+ /*
+ * If this token isn't one that requires lookahead, just return it. If it
+ * does, determine the token length. (We could get that via strlen(), but
+ * since we have such a small set of possibilities, hardwiring seems
+ * feasible and more efficient.)
+ */
switch (cur_token)
{
case NULLS_P:
+ cur_token_length = 5;
+ break;
+ case WITH:
+ cur_token_length = 4;
+ break;
+ default:
+ return cur_token;
+ }
- /*
- * NULLS FIRST and NULLS LAST must be reduced to one token
- */
- cur_yylval = lvalp->core_yystype;
- cur_yylloc = *llocp;
- next_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner);
+ /*
+ * Identify end+1 of current token. core_yylex() has temporarily stored a
+ * '\0' here, and will undo that when we call it again. We need to redo
+ * it to fully revert the lookahead call for error reporting purposes.
+ */
+ yyextra->lookahead_end = yyextra->core_yy_extra.scanbuf +
+ *llocp + cur_token_length;
+ Assert(*(yyextra->lookahead_end) == '\0');
+
+ /*
+ * Save and restore *llocp around the call. It might look like we could
+ * avoid this by just passing &lookahead_yylloc to core_yylex(), but that
+ * does not work because flex actually holds onto the last-passed pointer
+ * internally, and will use that for error reporting. We need any error
+ * reports to point to the current token, not the next one.
+ */
+ cur_yylloc = *llocp;
+
+ /* Get next token, saving outputs into lookahead variables */
+ next_token = core_yylex(&(yyextra->lookahead_yylval), llocp, yyscanner);
+ yyextra->lookahead_token = next_token;
+ yyextra->lookahead_yylloc = *llocp;
+
+ *llocp = cur_yylloc;
+
+ /* Now revert the un-truncation of the current token */
+ yyextra->lookahead_hold_char = *(yyextra->lookahead_end);
+ *(yyextra->lookahead_end) = '\0';
+
+ yyextra->have_lookahead = true;
+
+ /* Replace cur_token if needed, based on lookahead */
+ switch (cur_token)
+ {
+ case NULLS_P:
+ /* Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST */
switch (next_token)
{
case FIRST_P:
- cur_token = NULLS_FIRST;
- break;
case LAST_P:
- cur_token = NULLS_LAST;
- break;
- default:
- /* save the lookahead token for next time */
- yyextra->lookahead_token = next_token;
- yyextra->lookahead_yylval = lvalp->core_yystype;
- yyextra->lookahead_yylloc = *llocp;
- yyextra->have_lookahead = true;
- /* and back up the output info to cur_token */
- lvalp->core_yystype = cur_yylval;
- *llocp = cur_yylloc;
+ cur_token = NULLS_LA;
break;
}
break;
case WITH:
-
- /*
- * WITH TIME and WITH ORDINALITY must each be reduced to one token
- */
- cur_yylval = lvalp->core_yystype;
- cur_yylloc = *llocp;
- next_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner);
+ /* Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY */
switch (next_token)
{
case TIME:
- cur_token = WITH_TIME;
- break;
case ORDINALITY:
- cur_token = WITH_ORDINALITY;
- break;
- default:
- /* save the lookahead token for next time */
- yyextra->lookahead_token = next_token;
- yyextra->lookahead_yylval = lvalp->core_yystype;
- yyextra->lookahead_yylloc = *llocp;
- yyextra->have_lookahead = true;
- /* and back up the output info to cur_token */
- lvalp->core_yystype = cur_yylval;
- *llocp = cur_yylloc;
+ cur_token = WITH_LA;
break;
}
break;
-
- default:
- break;
}
return cur_token;
diff --git a/src/include/parser/gramparse.h b/src/include/parser/gramparse.h
index d9df30374b2..100fdfb213e 100644
--- a/src/include/parser/gramparse.h
+++ b/src/include/parser/gramparse.h
@@ -46,6 +46,8 @@ typedef struct base_yy_extra_type
int lookahead_token; /* one-token lookahead */
core_YYSTYPE lookahead_yylval; /* yylval for lookahead token */
YYLTYPE lookahead_yylloc; /* yylloc for lookahead token */
+ char *lookahead_end; /* end of current token */
+ char lookahead_hold_char; /* to be put back at *lookahead_end */
/*
* State variables that belong to the grammar.
diff --git a/src/interfaces/ecpg/preproc/parse.pl b/src/interfaces/ecpg/preproc/parse.pl
index f998310ef46..36dce803863 100644
--- a/src/interfaces/ecpg/preproc/parse.pl
+++ b/src/interfaces/ecpg/preproc/parse.pl
@@ -42,10 +42,8 @@ my %replace_token = (
# or in the block
my %replace_string = (
- 'WITH_TIME' => 'with time',
- 'WITH_ORDINALITY' => 'with ordinality',
- 'NULLS_FIRST' => 'nulls first',
- 'NULLS_LAST' => 'nulls last',
+ 'NULLS_LA' => 'nulls',
+ 'WITH_LA' => 'with',
'TYPECAST' => '::',
'DOT_DOT' => '..',
'COLON_EQUALS' => ':=',);
diff --git a/src/interfaces/ecpg/preproc/parser.c b/src/interfaces/ecpg/preproc/parser.c
index f1188269533..099a213d118 100644
--- a/src/interfaces/ecpg/preproc/parser.c
+++ b/src/interfaces/ecpg/preproc/parser.c
@@ -3,11 +3,8 @@
* parser.c
* Main entry point/driver for PostgreSQL grammar
*
- * Note that the grammar is not allowed to perform any table access
- * (since we need to be able to do basic parsing even while inside an
- * aborted transaction). Therefore, the data structures returned by
- * the grammar are "raw" parsetrees that still need to be analyzed by
- * analyze.c and related files.
+ * This should match src/backend/parser/parser.c, except that we do not
+ * need to bother with re-entrant interfaces.
*
*
* Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
@@ -29,18 +26,21 @@ static bool have_lookahead; /* is lookahead info valid? */
static int lookahead_token; /* one-token lookahead */
static YYSTYPE lookahead_yylval; /* yylval for lookahead token */
static YYLTYPE lookahead_yylloc; /* yylloc for lookahead token */
+static char *lookahead_yytext; /* start current token */
+static char *lookahead_end; /* end of current token */
+static char lookahead_hold_char; /* to be put back at *lookahead_end */
/*
* Intermediate filter between parser and base lexer (base_yylex in scan.l).
*
- * The filter is needed because in some cases the standard SQL grammar
+ * This filter is needed because in some cases the standard SQL grammar
* requires more than one token lookahead. We reduce these cases to one-token
- * lookahead by combining tokens here, in order to keep the grammar LALR(1).
+ * lookahead by replacing tokens here, in order to keep the grammar LALR(1).
*
* Using a filter is simpler than trying to recognize multiword tokens
* directly in scan.l, because we'd have to allow for comments between the
- * words. Furthermore it's not clear how to do it without re-introducing
+ * words. Furthermore it's not clear how to do that without re-introducing
* scanner backtrack, which would cost more performance than this filter
* layer does.
*/
@@ -49,8 +49,10 @@ filtered_base_yylex(void)
{
int cur_token;
int next_token;
+ int cur_token_length;
YYSTYPE cur_yylval;
YYLTYPE cur_yylloc;
+ char *cur_yytext;
/* Get next token --- we might already have it */
if (have_lookahead)
@@ -58,74 +60,86 @@ filtered_base_yylex(void)
cur_token = lookahead_token;
base_yylval = lookahead_yylval;
base_yylloc = lookahead_yylloc;
+ yytext = lookahead_yytext;
+ *lookahead_end = lookahead_hold_char;
have_lookahead = false;
}
else
cur_token = base_yylex();
- /* Do we need to look ahead for a possible multiword token? */
+ /*
+ * If this token isn't one that requires lookahead, just return it. If it
+ * does, determine the token length. (We could get that via strlen(), but
+ * since we have such a small set of possibilities, hardwiring seems
+ * feasible and more efficient.)
+ */
switch (cur_token)
{
case NULLS_P:
+ cur_token_length = 5;
+ break;
+ case WITH:
+ cur_token_length = 4;
+ break;
+ default:
+ return cur_token;
+ }
+
+ /*
+ * Identify end+1 of current token. base_yylex() has temporarily stored a
+ * '\0' here, and will undo that when we call it again. We need to redo
+ * it to fully revert the lookahead call for error reporting purposes.
+ */
+ lookahead_end = yytext + cur_token_length;
+ Assert(*lookahead_end == '\0');
+
+ /* Save and restore lexer output variables around the call */
+ cur_yylval = base_yylval;
+ cur_yylloc = base_yylloc;
+ cur_yytext = yytext;
+
+ /* Get next token, saving outputs into lookahead variables */
+ next_token = base_yylex();
+
+ lookahead_token = next_token;
+ lookahead_yylval = base_yylval;
+ lookahead_yylloc = base_yylloc;
+ lookahead_yytext = yytext;
+
+ base_yylval = cur_yylval;
+ base_yylloc = cur_yylloc;
+ yytext = cur_yytext;
+
+ /* Now revert the un-truncation of the current token */
+ lookahead_hold_char = *lookahead_end;
+ *lookahead_end = '\0';
+
+ have_lookahead = true;
- /*
- * NULLS FIRST and NULLS LAST must be reduced to one token
- */
- cur_yylval = base_yylval;
- cur_yylloc = base_yylloc;
- next_token = base_yylex();
+ /* Replace cur_token if needed, based on lookahead */
+ switch (cur_token)
+ {
+ case NULLS_P:
+ /* Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST */
switch (next_token)
{
case FIRST_P:
- cur_token = NULLS_FIRST;
- break;
case LAST_P:
- cur_token = NULLS_LAST;
- break;
- default:
- /* save the lookahead token for next time */
- lookahead_token = next_token;
- lookahead_yylval = base_yylval;
- lookahead_yylloc = base_yylloc;
- have_lookahead = true;
- /* and back up the output info to cur_token */
- base_yylval = cur_yylval;
- base_yylloc = cur_yylloc;
+ cur_token = NULLS_LA;
break;
}
break;
case WITH:
-
- /*
- * WITH TIME must be reduced to one token
- */
- cur_yylval = base_yylval;
- cur_yylloc = base_yylloc;
- next_token = base_yylex();
+ /* Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY */
switch (next_token)
{
case TIME:
- cur_token = WITH_TIME;
- break;
case ORDINALITY:
- cur_token = WITH_ORDINALITY;
- break;
- default:
- /* save the lookahead token for next time */
- lookahead_token = next_token;
- lookahead_yylval = base_yylval;
- lookahead_yylloc = base_yylloc;
- have_lookahead = true;
- /* and back up the output info to cur_token */
- base_yylval = cur_yylval;
- base_yylloc = cur_yylloc;
+ cur_token = WITH_LA;
break;
}
break;
-
- default:
- break;
}
return cur_token;
diff --git a/src/test/regress/expected/foreign_data.out b/src/test/regress/expected/foreign_data.out
index 4795c83c9b6..632b7e5bffc 100644
--- a/src/test/regress/expected/foreign_data.out
+++ b/src/test/regress/expected/foreign_data.out
@@ -663,7 +663,7 @@ LINE 1: CREATE FOREIGN TABLE ft1 ();
CREATE FOREIGN TABLE ft1 () SERVER no_server; -- ERROR
ERROR: server "no_server" does not exist
CREATE FOREIGN TABLE ft1 () SERVER s0 WITH OIDS; -- ERROR
-ERROR: syntax error at or near "WITH OIDS"
+ERROR: syntax error at or near "WITH"
LINE 1: CREATE FOREIGN TABLE ft1 () SERVER s0 WITH OIDS;
^
CREATE FOREIGN TABLE ft1 (
diff --git a/src/test/regress/expected/with.out b/src/test/regress/expected/with.out
index 06b372bf519..524e0ef2c68 100644
--- a/src/test/regress/expected/with.out
+++ b/src/test/regress/expected/with.out
@@ -2155,3 +2155,18 @@ WITH t AS (
VALUES(FALSE);
ERROR: conditional DO INSTEAD rules are not supported for data-modifying statements in WITH
DROP RULE y_rule ON y;
+-- check that parser lookahead for WITH doesn't cause any odd behavior
+create table foo (with baz); -- fail, WITH is a reserved word
+ERROR: syntax error at or near "with"
+LINE 1: create table foo (with baz);
+ ^
+create table foo (with ordinality); -- fail, WITH is a reserved word
+ERROR: syntax error at or near "with"
+LINE 1: create table foo (with ordinality);
+ ^
+with ordinality as (select 1 as x) select * from ordinality;
+ x
+---
+ 1
+(1 row)
+
diff --git a/src/test/regress/sql/with.sql b/src/test/regress/sql/with.sql
index c7163699576..1687c119830 100644
--- a/src/test/regress/sql/with.sql
+++ b/src/test/regress/sql/with.sql
@@ -956,3 +956,8 @@ WITH t AS (
)
VALUES(FALSE);
DROP RULE y_rule ON y;
+
+-- check that parser lookahead for WITH doesn't cause any odd behavior
+create table foo (with baz); -- fail, WITH is a reserved word
+create table foo (with ordinality); -- fail, WITH is a reserved word
+with ordinality as (select 1 as x) select * from ordinality;