diff options
Diffstat (limited to 'src/interfaces/ecpg/preproc/parser.c')
-rw-r--r-- | src/interfaces/ecpg/preproc/parser.c | 116 |
1 files changed, 65 insertions, 51 deletions
diff --git a/src/interfaces/ecpg/preproc/parser.c b/src/interfaces/ecpg/preproc/parser.c index f1188269533..099a213d118 100644 --- a/src/interfaces/ecpg/preproc/parser.c +++ b/src/interfaces/ecpg/preproc/parser.c @@ -3,11 +3,8 @@ * parser.c * Main entry point/driver for PostgreSQL grammar * - * Note that the grammar is not allowed to perform any table access - * (since we need to be able to do basic parsing even while inside an - * aborted transaction). Therefore, the data structures returned by - * the grammar are "raw" parsetrees that still need to be analyzed by - * analyze.c and related files. + * This should match src/backend/parser/parser.c, except that we do not + * need to bother with re-entrant interfaces. * * * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group @@ -29,18 +26,21 @@ static bool have_lookahead; /* is lookahead info valid? */ static int lookahead_token; /* one-token lookahead */ static YYSTYPE lookahead_yylval; /* yylval for lookahead token */ static YYLTYPE lookahead_yylloc; /* yylloc for lookahead token */ +static char *lookahead_yytext; /* start current token */ +static char *lookahead_end; /* end of current token */ +static char lookahead_hold_char; /* to be put back at *lookahead_end */ /* * Intermediate filter between parser and base lexer (base_yylex in scan.l). * - * The filter is needed because in some cases the standard SQL grammar + * This filter is needed because in some cases the standard SQL grammar * requires more than one token lookahead. We reduce these cases to one-token - * lookahead by combining tokens here, in order to keep the grammar LALR(1). + * lookahead by replacing tokens here, in order to keep the grammar LALR(1). * * Using a filter is simpler than trying to recognize multiword tokens * directly in scan.l, because we'd have to allow for comments between the - * words. Furthermore it's not clear how to do it without re-introducing + * words. Furthermore it's not clear how to do that without re-introducing * scanner backtrack, which would cost more performance than this filter * layer does. */ @@ -49,8 +49,10 @@ filtered_base_yylex(void) { int cur_token; int next_token; + int cur_token_length; YYSTYPE cur_yylval; YYLTYPE cur_yylloc; + char *cur_yytext; /* Get next token --- we might already have it */ if (have_lookahead) @@ -58,74 +60,86 @@ filtered_base_yylex(void) cur_token = lookahead_token; base_yylval = lookahead_yylval; base_yylloc = lookahead_yylloc; + yytext = lookahead_yytext; + *lookahead_end = lookahead_hold_char; have_lookahead = false; } else cur_token = base_yylex(); - /* Do we need to look ahead for a possible multiword token? */ + /* + * If this token isn't one that requires lookahead, just return it. If it + * does, determine the token length. (We could get that via strlen(), but + * since we have such a small set of possibilities, hardwiring seems + * feasible and more efficient.) + */ switch (cur_token) { case NULLS_P: + cur_token_length = 5; + break; + case WITH: + cur_token_length = 4; + break; + default: + return cur_token; + } + + /* + * Identify end+1 of current token. base_yylex() has temporarily stored a + * '\0' here, and will undo that when we call it again. We need to redo + * it to fully revert the lookahead call for error reporting purposes. + */ + lookahead_end = yytext + cur_token_length; + Assert(*lookahead_end == '\0'); + + /* Save and restore lexer output variables around the call */ + cur_yylval = base_yylval; + cur_yylloc = base_yylloc; + cur_yytext = yytext; + + /* Get next token, saving outputs into lookahead variables */ + next_token = base_yylex(); + + lookahead_token = next_token; + lookahead_yylval = base_yylval; + lookahead_yylloc = base_yylloc; + lookahead_yytext = yytext; + + base_yylval = cur_yylval; + base_yylloc = cur_yylloc; + yytext = cur_yytext; + + /* Now revert the un-truncation of the current token */ + lookahead_hold_char = *lookahead_end; + *lookahead_end = '\0'; + + have_lookahead = true; - /* - * NULLS FIRST and NULLS LAST must be reduced to one token - */ - cur_yylval = base_yylval; - cur_yylloc = base_yylloc; - next_token = base_yylex(); + /* Replace cur_token if needed, based on lookahead */ + switch (cur_token) + { + case NULLS_P: + /* Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST */ switch (next_token) { case FIRST_P: - cur_token = NULLS_FIRST; - break; case LAST_P: - cur_token = NULLS_LAST; - break; - default: - /* save the lookahead token for next time */ - lookahead_token = next_token; - lookahead_yylval = base_yylval; - lookahead_yylloc = base_yylloc; - have_lookahead = true; - /* and back up the output info to cur_token */ - base_yylval = cur_yylval; - base_yylloc = cur_yylloc; + cur_token = NULLS_LA; break; } break; case WITH: - - /* - * WITH TIME must be reduced to one token - */ - cur_yylval = base_yylval; - cur_yylloc = base_yylloc; - next_token = base_yylex(); + /* Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY */ switch (next_token) { case TIME: - cur_token = WITH_TIME; - break; case ORDINALITY: - cur_token = WITH_ORDINALITY; - break; - default: - /* save the lookahead token for next time */ - lookahead_token = next_token; - lookahead_yylval = base_yylval; - lookahead_yylloc = base_yylloc; - have_lookahead = true; - /* and back up the output info to cur_token */ - base_yylval = cur_yylval; - base_yylloc = cur_yylloc; + cur_token = WITH_LA; break; } break; - - default: - break; } return cur_token; |