diff options
author | Teodor Sigaev <teodor@sigaev.ru> | 2018-04-05 19:55:11 +0300 |
---|---|---|
committer | Teodor Sigaev <teodor@sigaev.ru> | 2018-04-05 19:55:11 +0300 |
commit | 1664ae1978bf0f5ee940dc2fc8313e6400a7e7da (patch) | |
tree | 750d1e406f9db66b039545a98485da6cc855d3c8 /src/backend/utils/adt/tsvector_parser.c | |
parent | fbc27330b8f8693d8fa9b1f8cb450567c3d81640 (diff) | |
download | postgresql-1664ae1978bf0f5ee940dc2fc8313e6400a7e7da.tar.gz postgresql-1664ae1978bf0f5ee940dc2fc8313e6400a7e7da.zip |
Add websearch_to_tsquery
Error-tolerant conversion function with web-like syntax for search query,
it simplifies constraining search engine with close to habitual interface for
users.
Bump catalog version
Authors: Victor Drobny, Dmitry Ivanov with editorization by me
Reviewed by: Aleksander Alekseev, Tomas Vondra, Thomas Munro, Aleksandr Parfenov
Discussion: https://www.postgresql.org/message-id/flat/fe931111ff7e9ad79196486ada79e268@postgrespro.ru
Diffstat (limited to 'src/backend/utils/adt/tsvector_parser.c')
-rw-r--r-- | src/backend/utils/adt/tsvector_parser.c | 36 |
1 files changed, 15 insertions, 21 deletions
diff --git a/src/backend/utils/adt/tsvector_parser.c b/src/backend/utils/adt/tsvector_parser.c index 7367ba6a40f..fed411a842e 100644 --- a/src/backend/utils/adt/tsvector_parser.c +++ b/src/backend/utils/adt/tsvector_parser.c @@ -33,6 +33,7 @@ struct TSVectorParseStateData int eml; /* max bytes per character */ bool oprisdelim; /* treat ! | * ( ) as delimiters? */ bool is_tsquery; /* say "tsquery" not "tsvector" in errors? */ + bool is_web; /* we're in websearch_to_tsquery() */ }; @@ -42,7 +43,7 @@ struct TSVectorParseStateData * ! | & ( ) */ TSVectorParseState -init_tsvector_parser(char *input, bool oprisdelim, bool is_tsquery) +init_tsvector_parser(char *input, int flags) { TSVectorParseState state; @@ -52,8 +53,9 @@ init_tsvector_parser(char *input, bool oprisdelim, bool is_tsquery) state->len = 32; state->word = (char *) palloc(state->len); state->eml = pg_database_encoding_max_length(); - state->oprisdelim = oprisdelim; - state->is_tsquery = is_tsquery; + state->oprisdelim = (flags & P_TSV_OPR_IS_DELIM) != 0; + state->is_tsquery = (flags & P_TSV_IS_TSQUERY) != 0; + state->is_web = (flags & P_TSV_IS_WEB) != 0; return state; } @@ -89,16 +91,6 @@ do { \ } \ } while (0) -/* phrase operator begins with '<' */ -#define ISOPERATOR(x) \ - ( pg_mblen(x) == 1 && ( *(x) == '!' || \ - *(x) == '&' || \ - *(x) == '|' || \ - *(x) == '(' || \ - *(x) == ')' || \ - *(x) == '<' \ - ) ) - /* Fills gettoken_tsvector's output parameters, and returns true */ #define RETURN_TOKEN \ do { \ @@ -183,14 +175,15 @@ gettoken_tsvector(TSVectorParseState state, { if (*(state->prsbuf) == '\0') return false; - else if (t_iseq(state->prsbuf, '\'')) + else if (!state->is_web && t_iseq(state->prsbuf, '\'')) statecode = WAITENDCMPLX; - else if (t_iseq(state->prsbuf, '\\')) + else if (!state->is_web && t_iseq(state->prsbuf, '\\')) { statecode = WAITNEXTCHAR; oldstate = WAITENDWORD; } - else if (state->oprisdelim && ISOPERATOR(state->prsbuf)) + else if ((state->oprisdelim && ISOPERATOR(state->prsbuf)) || + (state->is_web && t_iseq(state->prsbuf, '"'))) PRSSYNTAXERROR; else if (!t_isspace(state->prsbuf)) { @@ -217,13 +210,14 @@ gettoken_tsvector(TSVectorParseState state, } else if (statecode == WAITENDWORD) { - if (t_iseq(state->prsbuf, '\\')) + if (!state->is_web && t_iseq(state->prsbuf, '\\')) { statecode = WAITNEXTCHAR; oldstate = WAITENDWORD; } else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' || - (state->oprisdelim && ISOPERATOR(state->prsbuf))) + (state->oprisdelim && ISOPERATOR(state->prsbuf)) || + (state->is_web && t_iseq(state->prsbuf, '"'))) { RESIZEPRSBUF; if (curpos == state->word) @@ -250,11 +244,11 @@ gettoken_tsvector(TSVectorParseState state, } else if (statecode == WAITENDCMPLX) { - if (t_iseq(state->prsbuf, '\'')) + if (!state->is_web && t_iseq(state->prsbuf, '\'')) { statecode = WAITCHARCMPLX; } - else if (t_iseq(state->prsbuf, '\\')) + else if (!state->is_web && t_iseq(state->prsbuf, '\\')) { statecode = WAITNEXTCHAR; oldstate = WAITENDCMPLX; @@ -270,7 +264,7 @@ gettoken_tsvector(TSVectorParseState state, } else if (statecode == WAITCHARCMPLX) { - if (t_iseq(state->prsbuf, '\'')) + if (!state->is_web && t_iseq(state->prsbuf, '\'')) { RESIZEPRSBUF; COPYCHAR(curpos, state->prsbuf); |