diff options
Diffstat (limited to 'src/bin/pgbench/exprscan.l')
-rw-r--r-- | src/bin/pgbench/exprscan.l | 320 |
1 files changed, 251 insertions, 69 deletions
diff --git a/src/bin/pgbench/exprscan.l b/src/bin/pgbench/exprscan.l index 00cb74d7dad..d069c5b05b0 100644 --- a/src/bin/pgbench/exprscan.l +++ b/src/bin/pgbench/exprscan.l @@ -2,7 +2,18 @@ /*------------------------------------------------------------------------- * * exprscan.l - * a lexical scanner for a simple expression syntax + * lexical scanner for pgbench backslash commands + * + * This lexer supports two operating modes: + * + * In INITIAL state, just parse off whitespace-separated words (this mode + * is basically equivalent to strtok(), which is what we used to use). + * + * In EXPR state, lex for the simple expression syntax of exprparse.y. + * + * In either mode, stop upon hitting newline or end of string. + * + * Note that this lexer operates within the framework created by psqlscan.l, * * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California @@ -12,19 +23,16 @@ *------------------------------------------------------------------------- */ -/* line and column number for error reporting */ -static int yyline = 0, yycol = 0; +#include "psqlscan_int.h" -/* Handles to the buffer that the lexer uses internally */ -static YY_BUFFER_STATE scanbufhandle; -static char *scanbuf; - -/* context information for error reporting */ +/* context information for reporting errors in expressions */ static const char *expr_source = NULL; -static int expr_lineno = 0; -static const char *expr_full_line = NULL; +static int expr_lineno = 0; +static int expr_start_offset = 0; static const char *expr_command = NULL; -static int expr_col = 0; + +/* indicates whether last yylex() call read a newline */ +static bool last_was_newline = false; /* * Work around a bug in flex 2.5.35: it emits a couple of functions that @@ -48,122 +56,296 @@ extern void expr_yyset_column(int column_no, yyscan_t yyscanner); %option warn %option prefix="expr_yy" +/* Character classes */ alpha [a-zA-Z_] digit [0-9] alnum [a-zA-Z0-9_] -space [ \t\r\f] +/* {space} + {nonspace} + {newline} should cover all characters */ +space [ \t\r\f\v] +nonspace [^ \t\r\f\v\n] +newline [\n] + +/* Exclusive states */ +%x EXPR %% %{ + /* Declare some local variables inside yylex(), for convenience */ + PsqlScanState cur_state = yyextra; + /* - * Force flex into the appropriate start state ... which, for this - * case, is always INITIAL. This ensures that we can transition - * between different lexers sharing the same yyscan_t. + * Force flex into the state indicated by start_state. This has a + * couple of purposes: it lets some of the functions below set a new + * starting state without ugly direct access to flex variables, and it + * allows us to transition from one flex lexer to another so that we + * can lex different parts of the source string using separate lexers. */ - BEGIN(INITIAL); + BEGIN(cur_state->start_state); + + /* Reset was-newline flag */ + last_was_newline = false; %} -"+" { yycol += yyleng; return '+'; } -"-" { yycol += yyleng; return '-'; } -"*" { yycol += yyleng; return '*'; } -"/" { yycol += yyleng; return '/'; } -"%" { yycol += yyleng; return '%'; } -"(" { yycol += yyleng; return '('; } -")" { yycol += yyleng; return ')'; } -"," { yycol += yyleng; return ','; } + /* INITIAL state */ + +{nonspace}+ { + /* Found a word, emit and return it */ + psqlscan_emit(cur_state, yytext, yyleng); + return 1; + } + +{space}+ { /* ignore */ } + +{newline} { + /* report end of command */ + last_was_newline = true; + return 0; + } + + /* EXPR state */ + +<EXPR>{ + +"+" { return '+'; } +"-" { return '-'; } +"*" { return '*'; } +"/" { return '/'; } +"%" { return '%'; } +"(" { return '('; } +")" { return ')'; } +"," { return ','; } :{alnum}+ { - yycol += yyleng; yylval.str = pg_strdup(yytext + 1); return VARIABLE; } {digit}+ { - yycol += yyleng; yylval.ival = strtoint64(yytext); return INTEGER; } {alpha}{alnum}* { - yycol += yyleng; yylval.str = pg_strdup(yytext); return FUNCTION; } -[\n] { yycol = 0; yyline++; } +{newline} { + /* report end of command */ + last_was_newline = true; + return 0; + } -{space}+ { yycol += yyleng; /* otherwise ignore */ } +{space}+ { /* ignore */ } . { - yycol += yyleng; - syntax_error(expr_source, expr_lineno, expr_full_line, expr_command, - "unexpected character", yytext, expr_col + yycol); - /* NOTREACHED, exit is called from syntax_error */ + /* + * must strdup yytext so that expr_yyerror_more doesn't + * change it while finding end of line + */ + expr_yyerror_more(yyscanner, "unexpected character", + pg_strdup(yytext)); + /* NOTREACHED, syntax_error calls exit() */ return 0; } + +} + +<<EOF>> { + if (cur_state->buffer_stack == NULL) + return 0; /* end of input reached */ + + /* + * We were expanding a variable, so pop the inclusion + * stack and keep lexing + */ + psqlscan_pop_buffer_stack(cur_state); + psqlscan_select_top_buffer(cur_state); + } + %% void expr_yyerror_more(yyscan_t yyscanner, const char *message, const char *more) { - syntax_error(expr_source, expr_lineno, expr_full_line, expr_command, - message, more, expr_col + yycol); + PsqlScanState state = yyget_extra(yyscanner); + int error_detection_offset = expr_scanner_offset(state) - 1; + char *full_line; + size_t l; + + /* + * While parsing an expression, we may not have collected the whole line + * yet from the input source. Lex till EOL so we can report whole line. + * (If we're at EOF, it's okay to call yylex() an extra time.) + */ + if (!last_was_newline) + { + while (yylex(yyscanner)) + /* skip */ ; + } + + full_line = expr_scanner_get_substring(state, + expr_start_offset, + expr_scanner_offset(state)); + /* Trim trailing newline if any */ + l = strlen(full_line); + while (l > 0 && full_line[l - 1] == '\n') + full_line[--l] = '\0'; + + syntax_error(expr_source, expr_lineno, full_line, expr_command, + message, more, error_detection_offset - expr_start_offset); } void -yyerror(yyscan_t yyscanner, const char *message) +expr_yyerror(yyscan_t yyscanner, const char *message) { expr_yyerror_more(yyscanner, message, NULL); } /* - * Called before any actual parsing is done + * Collect a space-separated word from a backslash command and return it + * in word_buf, along with its starting string offset in *offset. + * Returns true if successful, false if at end of command. */ -yyscan_t -expr_scanner_init(const char *str, const char *source, - int lineno, const char *line, - const char *cmd, int ecol) +bool +expr_lex_one_word(PsqlScanState state, PQExpBuffer word_buf, int *offset) { - yyscan_t yyscanner; - Size slen = strlen(str); + int lexresult; - /* Set up yyscan_t */ - yylex_init(&yyscanner); + /* Must be scanning already */ + Assert(state->scanbufhandle != NULL); - /* save context information for error messages */ - expr_source = source; - expr_lineno = lineno; - expr_full_line = line; - expr_command = cmd; - expr_col = ecol; + /* Set current output target */ + state->output_buf = word_buf; + resetPQExpBuffer(word_buf); + + /* Set input source */ + if (state->buffer_stack != NULL) + yy_switch_to_buffer(state->buffer_stack->buf, state->scanner); + else + yy_switch_to_buffer(state->scanbufhandle, state->scanner); - /* reset error pointers for this scan */ - yycol = yyline = 0; + /* Set start state */ + state->start_state = INITIAL; + + /* And lex. */ + lexresult = yylex(state->scanner); /* - * Make a scan buffer with special termination needed by flex. + * Save start offset of word, if any. We could do this more efficiently, + * but for now this seems fine. */ - scanbuf = pg_malloc(slen + 2); - memcpy(scanbuf, str, slen); - scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR; - scanbufhandle = yy_scan_buffer(scanbuf, slen + 2, yyscanner); + if (lexresult) + *offset = expr_scanner_offset(state) - word_buf->len; + else + *offset = -1; - return yyscanner; + /* + * In case the caller returns to using the regular SQL lexer, reselect the + * appropriate initial state. + */ + psql_scan_reselect_sql_lexer(state); + + return (bool) lexresult; } +/* + * Prepare to lex an expression via expr_yyparse(). + * + * Returns the yyscan_t that is to be passed to expr_yyparse(). + * (This is just state->scanner, but callers don't need to know that.) + */ +yyscan_t +expr_scanner_init(PsqlScanState state, + const char *source, int lineno, int start_offset, + const char *command) +{ + /* Save error context info */ + expr_source = source; + expr_lineno = lineno; + expr_start_offset = start_offset; + expr_command = command; + + /* Must be scanning already */ + Assert(state->scanbufhandle != NULL); + + /* Set current output target */ + state->output_buf = NULL; + + /* Set input source */ + if (state->buffer_stack != NULL) + yy_switch_to_buffer(state->buffer_stack->buf, state->scanner); + else + yy_switch_to_buffer(state->scanbufhandle, state->scanner); + + /* Set start state */ + state->start_state = EXPR; + + return state->scanner; +} /* - * Called after parsing is done to clean up after expr_scanner_init() + * Finish lexing an expression. */ void expr_scanner_finish(yyscan_t yyscanner) { - yy_delete_buffer(scanbufhandle, yyscanner); - pg_free(scanbuf); - yylex_destroy(yyscanner); - - expr_source = NULL; - expr_lineno = 0; - expr_full_line = NULL; - expr_command = NULL; - expr_col = 0; + PsqlScanState state = yyget_extra(yyscanner); + + /* + * Reselect appropriate initial state for SQL lexer. + */ + psql_scan_reselect_sql_lexer(state); +} + +/* + * Get offset from start of string to end of current lexer token. + * + * We rely on the knowledge that flex modifies the scan buffer by storing + * a NUL at the end of the current token (yytext). Note that this might + * not work quite right if we were parsing a sub-buffer, but since pgbench + * never invokes that functionality, it doesn't matter. + */ +int +expr_scanner_offset(PsqlScanState state) +{ + return strlen(state->scanbuf); +} + +/* + * Get a malloc'd copy of the lexer input string from start_offset + * to just before end_offset. + */ +char * +expr_scanner_get_substring(PsqlScanState state, + int start_offset, int end_offset) +{ + char *result; + int slen = end_offset - start_offset; + + Assert(slen >= 0); + Assert(end_offset <= strlen(state->scanbuf)); + result = (char *) pg_malloc(slen + 1); + memcpy(result, state->scanbuf + start_offset, slen); + result[slen] = '\0'; + + return result; +} + +/* + * Get the line number associated with the given string offset + * (which must not be past the end of where we've lexed to). + */ +int +expr_scanner_get_lineno(PsqlScanState state, int offset) +{ + int lineno = 1; + const char *p = state->scanbuf; + + while (*p && offset > 0) + { + if (*p == '\n') + lineno++; + p++, offset--; + } + return lineno; } |