diff options
Diffstat (limited to 'src/interfaces/ecpg/preproc/parser.c')
-rw-r--r-- | src/interfaces/ecpg/preproc/parser.c | 118 |
1 files changed, 93 insertions, 25 deletions
diff --git a/src/interfaces/ecpg/preproc/parser.c b/src/interfaces/ecpg/preproc/parser.c index c27de59828a..a2eeeba2174 100644 --- a/src/interfaces/ecpg/preproc/parser.c +++ b/src/interfaces/ecpg/preproc/parser.c @@ -6,6 +6,9 @@ * This should match src/backend/parser/parser.c, except that we do not * need to bother with re-entrant interfaces. * + * Note: ECPG doesn't report error location like the backend does. + * This file will need work if we ever want it to. + * * * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California @@ -27,8 +30,9 @@ static int lookahead_token; /* one-token lookahead */ static YYSTYPE lookahead_yylval; /* yylval for lookahead token */ static YYLTYPE lookahead_yylloc; /* yylloc for lookahead token */ static char *lookahead_yytext; /* start current token */ -static char *lookahead_end; /* end of current token */ -static char lookahead_hold_char; /* to be put back at *lookahead_end */ + +static bool check_uescapechar(unsigned char escape); +static bool ecpg_isspace(char ch); /* @@ -43,13 +47,16 @@ static char lookahead_hold_char; /* to be put back at *lookahead_end */ * words. Furthermore it's not clear how to do that without re-introducing * scanner backtrack, which would cost more performance than this filter * layer does. + * + * We also use this filter to convert UIDENT and USCONST sequences into + * plain IDENT and SCONST tokens. While that could be handled by additional + * productions in the main grammar, it's more efficient to do it like this. */ int filtered_base_yylex(void) { int cur_token; int next_token; - int cur_token_length; YYSTYPE cur_yylval; YYLTYPE cur_yylloc; char *cur_yytext; @@ -61,41 +68,26 @@ filtered_base_yylex(void) base_yylval = lookahead_yylval; base_yylloc = lookahead_yylloc; base_yytext = lookahead_yytext; - *lookahead_end = lookahead_hold_char; have_lookahead = false; } else cur_token = base_yylex(); /* - * If this token isn't one that requires lookahead, just return it. If it - * does, determine the token length. (We could get that via strlen(), but - * since we have such a small set of possibilities, hardwiring seems - * feasible and more efficient.) + * If this token isn't one that requires lookahead, just return it. */ switch (cur_token) { case NOT: - cur_token_length = 3; - break; case NULLS_P: - cur_token_length = 5; - break; case WITH: - cur_token_length = 4; + case UIDENT: + case USCONST: break; default: return cur_token; } - /* - * Identify end+1 of current token. base_yylex() has temporarily stored a - * '\0' here, and will undo that when we call it again. We need to redo - * it to fully revert the lookahead call for error reporting purposes. - */ - lookahead_end = base_yytext + cur_token_length; - Assert(*lookahead_end == '\0'); - /* Save and restore lexer output variables around the call */ cur_yylval = base_yylval; cur_yylloc = base_yylloc; @@ -113,10 +105,6 @@ filtered_base_yylex(void) base_yylloc = cur_yylloc; base_yytext = cur_yytext; - /* Now revert the un-truncation of the current token */ - lookahead_hold_char = *lookahead_end; - *lookahead_end = '\0'; - have_lookahead = true; /* Replace cur_token if needed, based on lookahead */ @@ -157,7 +145,87 @@ filtered_base_yylex(void) break; } break; + case UIDENT: + case USCONST: + /* Look ahead for UESCAPE */ + if (next_token == UESCAPE) + { + /* Yup, so get third token, which had better be SCONST */ + const char *escstr; + + /* + * Again save and restore lexer output variables around the + * call + */ + cur_yylval = base_yylval; + cur_yylloc = base_yylloc; + cur_yytext = base_yytext; + + /* Get third token */ + next_token = base_yylex(); + + if (next_token != SCONST) + mmerror(PARSE_ERROR, ET_ERROR, "UESCAPE must be followed by a simple string literal"); + + /* + * Save and check escape string, which the scanner returns + * with quotes + */ + escstr = base_yylval.str; + if (strlen(escstr) != 3 || !check_uescapechar(escstr[1])) + mmerror(PARSE_ERROR, ET_ERROR, "invalid Unicode escape character"); + + base_yylval = cur_yylval; + base_yylloc = cur_yylloc; + base_yytext = cur_yytext; + + /* Combine 3 tokens into 1 */ + base_yylval.str = psprintf("%s UESCAPE %s", base_yylval.str, escstr); + + /* Clear have_lookahead, thereby consuming all three tokens */ + have_lookahead = false; + } + + if (cur_token == UIDENT) + cur_token = IDENT; + else if (cur_token == USCONST) + cur_token = SCONST; + break; } return cur_token; } + +/* + * check_uescapechar() and ecpg_isspace() should match their equivalents + * in pgc.l. + */ + +/* is 'escape' acceptable as Unicode escape character (UESCAPE syntax) ? */ +static bool +check_uescapechar(unsigned char escape) +{ + if (isxdigit(escape) + || escape == '+' + || escape == '\'' + || escape == '"' + || ecpg_isspace(escape)) + return false; + else + return true; +} + +/* + * ecpg_isspace() --- return true if flex scanner considers char whitespace + */ +static bool +ecpg_isspace(char ch) +{ + if (ch == ' ' || + ch == '\t' || + ch == '\n' || + ch == '\r' || + ch == '\f') + return true; + return false; +} |