aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2024-06-13 20:34:43 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2024-06-13 20:34:43 -0400
commit5912bf77c526fdb8cfc9bbb56a509a152c6a3632 (patch)
tree6b937b2b19b36f1259ec055b95fb8b23e2a12fbd
parent1450db793f643991e4684fa8b5cee6ded51ab870 (diff)
downloadpostgresql-5912bf77c526fdb8cfc9bbb56a509a152c6a3632.tar.gz
postgresql-5912bf77c526fdb8cfc9bbb56a509a152c6a3632.zip
Fix parsing of ignored operators in websearch_to_tsquery().
The manual says clearly that punctuation in the input of websearch_to_tsquery() is ignored, except for the special cases of dashes and quotes. However, this failed for cases like "(foo bar) or something", or in general an ISOPERATOR character in front of the "or". We'd switch back to WAITOPERAND state, then ignore the operator character while remaining in that state, and then reach the "or" in WAITOPERAND state which (intentionally) makes us treat it as data. The fix is simple enough: if we see an ISOPERATOR character while in WAITOPERATOR state, we have to skip it while staying in that state. (We don't need to worry about other punctuation characters: those will be consumed as though they were words, but then rejected by lexizing.) In v14 and up (since commit eb086056f) we can simplify the code a bit more too, because there is no longer a reason for the WAITOPERAND state to distinguish between quoted and unquoted operands. Per bug #18479 from Manos Emmanouilidis. Back-patch to all supported branches. Discussion: https://postgr.es/m/18479-d9b46e2fc242c33e@postgresql.org
-rw-r--r--src/backend/utils/adt/tsquery.c22
-rw-r--r--src/test/regress/expected/tsearch.out7
-rw-r--r--src/test/regress/sql/tsearch.sql3
3 files changed, 19 insertions, 13 deletions
diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c
index ded919b39b0..1a146372f7a 100644
--- a/src/backend/utils/adt/tsquery.c
+++ b/src/backend/utils/adt/tsquery.c
@@ -420,7 +420,7 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
}
else if (ISOPERATOR(state->buf))
{
- /* or else gettoken_tsvector() will raise an error */
+ /* ignore, else gettoken_tsvector() will raise an error */
state->buf++;
state->state = WAITOPERAND;
continue;
@@ -452,15 +452,9 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
break;
case WAITOPERATOR:
- if (t_iseq(state->buf, '"'))
+ if (*state->buf == '\0')
{
- /*
- * put implicit AND after an operand and handle this quote
- * in WAITOPERAND
- */
- state->state = WAITOPERAND;
- *operator = OP_AND;
- return PT_OPR;
+ return PT_END;
}
else if (parse_or_operator(state))
{
@@ -468,15 +462,17 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
*operator = OP_OR;
return PT_OPR;
}
- else if (*state->buf == '\0')
+ else if (ISOPERATOR(state->buf))
{
- return PT_END;
+ /* ignore other operators in this state too */
+ state->buf++;
+ continue;
}
else if (!t_isspace(state->buf))
{
- /* put implicit AND after an operand */
- *operator = OP_AND;
+ /* insert implicit AND between operands */
state->state = WAITOPERAND;
+ *operator = OP_AND;
return PT_OPR;
}
break;
diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out
index cfe38aa3062..629147c5b11 100644
--- a/src/test/regress/expected/tsearch.out
+++ b/src/test/regress/expected/tsearch.out
@@ -2539,12 +2539,19 @@ select websearch_to_tsquery('simple', 'abc <-> def');
'abc' & 'def'
(1 row)
+-- parens are ignored, too
select websearch_to_tsquery('simple', 'abc (pg or class)');
websearch_to_tsquery
------------------------
'abc' & 'pg' | 'class'
(1 row)
+select websearch_to_tsquery('simple', '(foo bar) or (ding dong)');
+ websearch_to_tsquery
+---------------------------------
+ 'foo' & 'bar' | 'ding' & 'dong'
+(1 row)
+
-- NOT is ignored in quotes
select websearch_to_tsquery('english', 'My brand new smartphone');
websearch_to_tsquery
diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql
index b5a3a68a647..0a90c1b539d 100644
--- a/src/test/regress/sql/tsearch.sql
+++ b/src/test/regress/sql/tsearch.sql
@@ -727,7 +727,10 @@ select websearch_to_tsquery('simple', ':');
select websearch_to_tsquery('simple', 'abc & def');
select websearch_to_tsquery('simple', 'abc | def');
select websearch_to_tsquery('simple', 'abc <-> def');
+
+-- parens are ignored, too
select websearch_to_tsquery('simple', 'abc (pg or class)');
+select websearch_to_tsquery('simple', '(foo bar) or (ding dong)');
-- NOT is ignored in quotes
select websearch_to_tsquery('english', 'My brand new smartphone');