aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/tsvector_parser.c
diff options
context:
space:
mode:
authorTeodor Sigaev <teodor@sigaev.ru>2018-04-05 19:55:11 +0300
committerTeodor Sigaev <teodor@sigaev.ru>2018-04-05 19:55:11 +0300
commit1664ae1978bf0f5ee940dc2fc8313e6400a7e7da (patch)
tree750d1e406f9db66b039545a98485da6cc855d3c8 /src/backend/utils/adt/tsvector_parser.c
parentfbc27330b8f8693d8fa9b1f8cb450567c3d81640 (diff)
downloadpostgresql-1664ae1978bf0f5ee940dc2fc8313e6400a7e7da.tar.gz
postgresql-1664ae1978bf0f5ee940dc2fc8313e6400a7e7da.zip
Add websearch_to_tsquery
Error-tolerant conversion function with web-like syntax for search query, it simplifies constraining search engine with close to habitual interface for users. Bump catalog version Authors: Victor Drobny, Dmitry Ivanov with editorization by me Reviewed by: Aleksander Alekseev, Tomas Vondra, Thomas Munro, Aleksandr Parfenov Discussion: https://www.postgresql.org/message-id/flat/fe931111ff7e9ad79196486ada79e268@postgrespro.ru
Diffstat (limited to 'src/backend/utils/adt/tsvector_parser.c')
-rw-r--r--src/backend/utils/adt/tsvector_parser.c36
1 files changed, 15 insertions, 21 deletions
diff --git a/src/backend/utils/adt/tsvector_parser.c b/src/backend/utils/adt/tsvector_parser.c
index 7367ba6a40f..fed411a842e 100644
--- a/src/backend/utils/adt/tsvector_parser.c
+++ b/src/backend/utils/adt/tsvector_parser.c
@@ -33,6 +33,7 @@ struct TSVectorParseStateData
int eml; /* max bytes per character */
bool oprisdelim; /* treat ! | * ( ) as delimiters? */
bool is_tsquery; /* say "tsquery" not "tsvector" in errors? */
+ bool is_web; /* we're in websearch_to_tsquery() */
};
@@ -42,7 +43,7 @@ struct TSVectorParseStateData
* ! | & ( )
*/
TSVectorParseState
-init_tsvector_parser(char *input, bool oprisdelim, bool is_tsquery)
+init_tsvector_parser(char *input, int flags)
{
TSVectorParseState state;
@@ -52,8 +53,9 @@ init_tsvector_parser(char *input, bool oprisdelim, bool is_tsquery)
state->len = 32;
state->word = (char *) palloc(state->len);
state->eml = pg_database_encoding_max_length();
- state->oprisdelim = oprisdelim;
- state->is_tsquery = is_tsquery;
+ state->oprisdelim = (flags & P_TSV_OPR_IS_DELIM) != 0;
+ state->is_tsquery = (flags & P_TSV_IS_TSQUERY) != 0;
+ state->is_web = (flags & P_TSV_IS_WEB) != 0;
return state;
}
@@ -89,16 +91,6 @@ do { \
} \
} while (0)
-/* phrase operator begins with '<' */
-#define ISOPERATOR(x) \
- ( pg_mblen(x) == 1 && ( *(x) == '!' || \
- *(x) == '&' || \
- *(x) == '|' || \
- *(x) == '(' || \
- *(x) == ')' || \
- *(x) == '<' \
- ) )
-
/* Fills gettoken_tsvector's output parameters, and returns true */
#define RETURN_TOKEN \
do { \
@@ -183,14 +175,15 @@ gettoken_tsvector(TSVectorParseState state,
{
if (*(state->prsbuf) == '\0')
return false;
- else if (t_iseq(state->prsbuf, '\''))
+ else if (!state->is_web && t_iseq(state->prsbuf, '\''))
statecode = WAITENDCMPLX;
- else if (t_iseq(state->prsbuf, '\\'))
+ else if (!state->is_web && t_iseq(state->prsbuf, '\\'))
{
statecode = WAITNEXTCHAR;
oldstate = WAITENDWORD;
}
- else if (state->oprisdelim && ISOPERATOR(state->prsbuf))
+ else if ((state->oprisdelim && ISOPERATOR(state->prsbuf)) ||
+ (state->is_web && t_iseq(state->prsbuf, '"')))
PRSSYNTAXERROR;
else if (!t_isspace(state->prsbuf))
{
@@ -217,13 +210,14 @@ gettoken_tsvector(TSVectorParseState state,
}
else if (statecode == WAITENDWORD)
{
- if (t_iseq(state->prsbuf, '\\'))
+ if (!state->is_web && t_iseq(state->prsbuf, '\\'))
{
statecode = WAITNEXTCHAR;
oldstate = WAITENDWORD;
}
else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
- (state->oprisdelim && ISOPERATOR(state->prsbuf)))
+ (state->oprisdelim && ISOPERATOR(state->prsbuf)) ||
+ (state->is_web && t_iseq(state->prsbuf, '"')))
{
RESIZEPRSBUF;
if (curpos == state->word)
@@ -250,11 +244,11 @@ gettoken_tsvector(TSVectorParseState state,
}
else if (statecode == WAITENDCMPLX)
{
- if (t_iseq(state->prsbuf, '\''))
+ if (!state->is_web && t_iseq(state->prsbuf, '\''))
{
statecode = WAITCHARCMPLX;
}
- else if (t_iseq(state->prsbuf, '\\'))
+ else if (!state->is_web && t_iseq(state->prsbuf, '\\'))
{
statecode = WAITNEXTCHAR;
oldstate = WAITENDCMPLX;
@@ -270,7 +264,7 @@ gettoken_tsvector(TSVectorParseState state,
}
else if (statecode == WAITCHARCMPLX)
{
- if (t_iseq(state->prsbuf, '\''))
+ if (!state->is_web && t_iseq(state->prsbuf, '\''))
{
RESIZEPRSBUF;
COPYCHAR(curpos, state->prsbuf);