aboutsummaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
authorAlexander Korotkov <akorotkov@postgresql.org>2021-01-31 20:14:29 +0300
committerAlexander Korotkov <akorotkov@postgresql.org>2021-01-31 20:14:29 +0300
commit0c4f355c6a5fd437f71349f2f3d5d491382572b7 (patch)
tree5a5cdade362e68bb59ef980c9a3678951369edbd /src/backend
parentdfb75e478cacb33d277432e0df5e2f9a2a2b52d9 (diff)
downloadpostgresql-0c4f355c6a5fd437f71349f2f3d5d491382572b7.tar.gz
postgresql-0c4f355c6a5fd437f71349f2f3d5d491382572b7.zip
Fix parsing of complex morphs to tsquery
When to_tsquery() or websearch_to_tsquery() meet a complex morph containing multiple words residing adjacent position, these words are connected with OP_AND operator. That leads to surprising results. For instace, both websearch_to_tsquery('"pg_class pg"') and to_tsquery('pg_class <-> pg') produce '( pg & class ) <-> pg' tsquery. This tsquery requires 'pg' and 'class' words to reside on the same position and doesn't match to to_tsvector('pg_class pg'). It appears to be ridiculous behavior, which needs to be fixed. This commit makes to_tsquery() or websearch_to_tsquery() connect words residing adjacent position with OP_PHRASE. Therefore, now those words are normally chained with other OP_PHRASE operator. The examples of above now produces 'pg <-> class <-> pg' tsquery, which matches to to_tsvector('pg_class pg'). Another effect of this commit is that complex morph word positions now need to match the tsvector even if there is no surrounding OP_PHRASE. This behavior change generally looks like an improvement but making this commit not backpatchable. Reported-by: Barry Pederson Bug: #16592 Discussion: https://postgr.es/m/16592-70b110ff9731c07d@postgresql.org Discussion: https://postgr.es/m/CAPpHfdv0EzVhf6CWfB1_TTZqXV_2Sn-jSY3zSd7ePH%3D-%2B1V2DQ%40mail.gmail.com Author: Alexander Korotkov Reviewed-by: Tom Lane, Neil Chen
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/tsearch/to_tsany.c41
1 files changed, 38 insertions, 3 deletions
diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c
index e4ad661a8ba..f4ddfc01059 100644
--- a/src/backend/tsearch/to_tsany.c
+++ b/src/backend/tsearch/to_tsany.c
@@ -20,10 +20,20 @@
#include "utils/jsonfuncs.h"
+/*
+ * Opaque data structure, which is passed by parse_tsquery() to pushval_morph().
+ */
typedef struct MorphOpaque
{
Oid cfg_id;
- int qoperator; /* query operator */
+
+ /*
+ * Single tsquery morph could be parsed into multiple words. When these
+ * words reside in adjacent positions, they are connected using this
+ * operator. Usually, that is OP_PHRASE, which requires word positions of
+ * a complex morph to exactly match the tsvector.
+ */
+ int qoperator;
} MorphOpaque;
typedef struct TSVectorBuildState
@@ -573,7 +583,14 @@ to_tsquery_byid(PG_FUNCTION_ARGS)
MorphOpaque data;
data.cfg_id = PG_GETARG_OID(0);
- data.qoperator = OP_AND;
+
+ /*
+ * Passing OP_PHRASE as a qoperator makes tsquery require matching of word
+ * positions of a complex morph exactly match the tsvector. Also, when
+ * the complex morphs are connected with OP_PHRASE operator, we connect
+ * all their words into the OP_PHRASE sequence.
+ */
+ data.qoperator = OP_PHRASE;
query = parse_tsquery(text_to_cstring(in),
pushval_morph,
@@ -603,6 +620,12 @@ plainto_tsquery_byid(PG_FUNCTION_ARGS)
MorphOpaque data;
data.cfg_id = PG_GETARG_OID(0);
+
+ /*
+ * parse_tsquery() with P_TSQ_PLAIN flag takes the whole input text as a
+ * single morph. Passing OP_PHRASE as a qoperator makes tsquery require
+ * matching of all words independently on their positions.
+ */
data.qoperator = OP_AND;
query = parse_tsquery(text_to_cstring(in),
@@ -634,6 +657,12 @@ phraseto_tsquery_byid(PG_FUNCTION_ARGS)
MorphOpaque data;
data.cfg_id = PG_GETARG_OID(0);
+
+ /*
+ * parse_tsquery() with P_TSQ_PLAIN flag takes the whole input text as a
+ * single morph. Passing OP_PHRASE as a qoperator makes tsquery require
+ * matching of word positions.
+ */
data.qoperator = OP_PHRASE;
query = parse_tsquery(text_to_cstring(in),
@@ -665,7 +694,13 @@ websearch_to_tsquery_byid(PG_FUNCTION_ARGS)
data.cfg_id = PG_GETARG_OID(0);
- data.qoperator = OP_AND;
+ /*
+ * Passing OP_PHRASE as a qoperator makes tsquery require matching of word
+ * positions of a complex morph exactly match the tsvector. Also, when
+ * the complex morphs are given in quotes, we connect all their words into
+ * the OP_PHRASE sequence.
+ */
+ data.qoperator = OP_PHRASE;
query = parse_tsquery(text_to_cstring(in),
pushval_morph,