diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2016-03-24 20:28:47 -0400 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2016-03-24 20:28:47 -0400 |
commit | c1156411ad0879a71956b64aa487babe7572685b (patch) | |
tree | 2213c8da830ec180c397bb84fced8b472f69f341 /src/include/fe_utils | |
parent | d65bea26a867e3bbd053bf87b985b0e113256414 (diff) | |
download | postgresql-c1156411ad0879a71956b64aa487babe7572685b.tar.gz postgresql-c1156411ad0879a71956b64aa487babe7572685b.zip |
Move psql's psqlscan.l into src/fe_utils.
This completes (at least for now) the project of getting rid of ad-hoc
linkages among the src/bin/ subdirectories. Everything they share is now
in src/fe_utils/ and is included from a static library at link time.
A side benefit is that we can restore the FLEX_NO_BACKUP check for
psqlscanslash.l. We might need to think of another way to do that check
if we ever need to build two lexers with that property in the same source
directory, but there's no foreseeable reason to need that.
Diffstat (limited to 'src/include/fe_utils')
-rw-r--r-- | src/include/fe_utils/psqlscan.h | 85 | ||||
-rw-r--r-- | src/include/fe_utils/psqlscan_int.h | 144 |
2 files changed, 229 insertions, 0 deletions
diff --git a/src/include/fe_utils/psqlscan.h b/src/include/fe_utils/psqlscan.h new file mode 100644 index 00000000000..1f10ecc2d44 --- /dev/null +++ b/src/include/fe_utils/psqlscan.h @@ -0,0 +1,85 @@ +/*------------------------------------------------------------------------- + * + * psqlscan.h + * lexical scanner for SQL commands + * + * This lexer used to be part of psql, and that heritage is reflected in + * the file name as well as function and typedef names, though it can now + * be used by other frontend programs as well. It's also possible to extend + * this lexer with a compatible add-on lexer to handle program-specific + * backslash commands. + * + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/fe_utils/psqlscan.h + * + *------------------------------------------------------------------------- + */ +#ifndef PSQLSCAN_H +#define PSQLSCAN_H + +#include "pqexpbuffer.h" + + +/* Abstract type for lexer's internal state */ +typedef struct PsqlScanStateData *PsqlScanState; + +/* Termination states for psql_scan() */ +typedef enum +{ + PSCAN_SEMICOLON, /* found command-ending semicolon */ + PSCAN_BACKSLASH, /* found backslash command */ + PSCAN_INCOMPLETE, /* end of line, SQL statement incomplete */ + PSCAN_EOL /* end of line, SQL possibly complete */ +} PsqlScanResult; + +/* Prompt type returned by psql_scan() */ +typedef enum _promptStatus +{ + PROMPT_READY, + PROMPT_CONTINUE, + PROMPT_COMMENT, + PROMPT_SINGLEQUOTE, + PROMPT_DOUBLEQUOTE, + PROMPT_DOLLARQUOTE, + PROMPT_PAREN, + PROMPT_COPY +} promptStatus_t; + +/* Callback functions to be used by the lexer */ +typedef struct PsqlScanCallbacks +{ + /* Fetch value of a variable, as a pfree'able string; NULL if unknown */ + /* This pointer can be NULL if no variable substitution is wanted */ + char *(*get_variable) (const char *varname, bool escape, bool as_ident); + /* Print an error message someplace appropriate */ + /* (very old gcc versions don't support attributes on function pointers) */ +#if defined(__GNUC__) && __GNUC__ < 4 + void (*write_error) (const char *fmt,...); +#else + void (*write_error) (const char *fmt,...) pg_attribute_printf(1, 2); +#endif +} PsqlScanCallbacks; + + +extern PsqlScanState psql_scan_create(const PsqlScanCallbacks *callbacks); +extern void psql_scan_destroy(PsqlScanState state); + +extern void psql_scan_setup(PsqlScanState state, + const char *line, int line_len, + int encoding, bool std_strings); +extern void psql_scan_finish(PsqlScanState state); + +extern PsqlScanResult psql_scan(PsqlScanState state, + PQExpBuffer query_buf, + promptStatus_t *prompt); + +extern void psql_scan_reset(PsqlScanState state); + +extern void psql_scan_reselect_sql_lexer(PsqlScanState state); + +extern bool psql_scan_in_quote(PsqlScanState state); + +#endif /* PSQLSCAN_H */ diff --git a/src/include/fe_utils/psqlscan_int.h b/src/include/fe_utils/psqlscan_int.h new file mode 100644 index 00000000000..a52929d5abf --- /dev/null +++ b/src/include/fe_utils/psqlscan_int.h @@ -0,0 +1,144 @@ +/*------------------------------------------------------------------------- + * + * psqlscan_int.h + * lexical scanner internal declarations + * + * This file declares the PsqlScanStateData structure used by psqlscan.l + * and shared by other lexers compatible with it, such as psqlscanslash.l. + * + * One difficult aspect of this code is that we need to work in multibyte + * encodings that are not ASCII-safe. A "safe" encoding is one in which each + * byte of a multibyte character has the high bit set (it's >= 0x80). Since + * all our lexing rules treat all high-bit-set characters alike, we don't + * really need to care whether such a byte is part of a sequence or not. + * In an "unsafe" encoding, we still expect the first byte of a multibyte + * sequence to be >= 0x80, but later bytes might not be. If we scan such + * a sequence as-is, the lexing rules could easily be fooled into matching + * such bytes to ordinary ASCII characters. Our solution for this is to + * substitute 0xFF for each non-first byte within the data presented to flex. + * The flex rules will then pass the FF's through unmolested. The + * psqlscan_emit() subroutine is responsible for looking back to the original + * string and replacing FF's with the corresponding original bytes. + * + * Another interesting thing we do here is scan different parts of the same + * input with physically separate flex lexers (ie, lexers written in separate + * .l files). We can get away with this because the only part of the + * persistent state of a flex lexer that depends on its parsing rule tables + * is the start state number, which is easy enough to manage --- usually, + * in fact, we just need to set it to INITIAL when changing lexers. But to + * make that work at all, we must use re-entrant lexers, so that all the + * relevant state is in the yyscanner_t attached to the PsqlScanState; + * if we were using lexers with separate static state we would soon end up + * with dangling buffer pointers in one or the other. Also note that this + * is unlikely to work very nicely if the lexers aren't all built with the + * same flex version, or if they don't use the same flex options. + * + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/fe_utils/psqlscan_int.h + * + *------------------------------------------------------------------------- + */ +#ifndef PSQLSCAN_INT_H +#define PSQLSCAN_INT_H + +#include "fe_utils/psqlscan.h" + +/* + * These are just to allow this file to be compilable standalone for header + * validity checking; in actual use, this file should always be included + * from the body of a flex file, where these symbols are already defined. + */ +#ifndef YY_TYPEDEF_YY_BUFFER_STATE +#define YY_TYPEDEF_YY_BUFFER_STATE +typedef struct yy_buffer_state *YY_BUFFER_STATE; +#endif +#ifndef YY_TYPEDEF_YY_SCANNER_T +#define YY_TYPEDEF_YY_SCANNER_T +typedef void *yyscan_t; +#endif + +/* + * We use a stack of flex buffers to handle substitution of psql variables. + * Each stacked buffer contains the as-yet-unread text from one psql variable. + * When we pop the stack all the way, we resume reading from the outer buffer + * identified by scanbufhandle. + */ +typedef struct StackElem +{ + YY_BUFFER_STATE buf; /* flex input control structure */ + char *bufstring; /* data actually being scanned by flex */ + char *origstring; /* copy of original data, if needed */ + char *varname; /* name of variable providing data, or NULL */ + struct StackElem *next; +} StackElem; + +/* + * All working state of the lexer must be stored in PsqlScanStateData + * between calls. This allows us to have multiple open lexer operations, + * which is needed for nested include files. The lexer itself is not + * recursive, but it must be re-entrant. + */ +typedef struct PsqlScanStateData +{ + yyscan_t scanner; /* Flex's state for this PsqlScanState */ + + PQExpBuffer output_buf; /* current output buffer */ + + StackElem *buffer_stack; /* stack of variable expansion buffers */ + + /* + * These variables always refer to the outer buffer, never to any stacked + * variable-expansion buffer. + */ + YY_BUFFER_STATE scanbufhandle; + char *scanbuf; /* start of outer-level input buffer */ + const char *scanline; /* current input line at outer level */ + + /* safe_encoding, curline, refline are used by emit() to replace FFs */ + int encoding; /* encoding being used now */ + bool safe_encoding; /* is current encoding "safe"? */ + bool std_strings; /* are string literals standard? */ + const char *curline; /* actual flex input string for cur buf */ + const char *refline; /* original data for cur buffer */ + + /* + * All this state lives across successive input lines, until explicitly + * reset by psql_scan_reset. start_state is adopted by yylex() on entry, + * and updated with its finishing state on exit. + */ + int start_state; /* yylex's starting/finishing state */ + int paren_depth; /* depth of nesting in parentheses */ + int xcdepth; /* depth of nesting in slash-star comments */ + char *dolqstart; /* current $foo$ quote start string */ + + /* + * Callback functions provided by the program making use of the lexer. + */ + const PsqlScanCallbacks *callbacks; +} PsqlScanStateData; + + +/* + * Functions exported by psqlscan.l, but only meant for use within + * compatible lexers. + */ +extern void psqlscan_push_new_buffer(PsqlScanState state, + const char *newstr, const char *varname); +extern void psqlscan_pop_buffer_stack(PsqlScanState state); +extern void psqlscan_select_top_buffer(PsqlScanState state); +extern bool psqlscan_var_is_current_source(PsqlScanState state, + const char *varname); +extern YY_BUFFER_STATE psqlscan_prepare_buffer(PsqlScanState state, + const char *txt, int len, + char **txtcopy); +extern void psqlscan_emit(PsqlScanState state, const char *txt, int len); +extern char *psqlscan_extract_substring(PsqlScanState state, + const char *txt, int len); +extern void psqlscan_escape_variable(PsqlScanState state, + const char *txt, int len, + bool as_ident); + +#endif /* PSQLSCAN_INT_H */ |