aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2017-01-26 12:17:47 -0500
committerTom Lane <tgl@sss.pgh.pa.us>2017-01-26 12:17:47 -0500
commit2c1976a6cc4bc2447bfd1e422c5b9ebf2a4d893f (patch)
tree7b48f31f5a25b6e5d70b795589caa41b334b04a0 /src
parentfb9d2ed6151d1220fc54b86142e3be85ce90ef9d (diff)
downloadpostgresql-2c1976a6cc4bc2447bfd1e422c5b9ebf2a4d893f.tar.gz
postgresql-2c1976a6cc4bc2447bfd1e422c5b9ebf2a4d893f.zip
Ensure that a tsquery like '!foo' matches empty tsvectors.
!foo means "the tsvector does not contain foo", and therefore it should match an empty tsvector. ts_match_vq() overenthusiastically supposed that an empty tsvector could never match any query, so it forcibly returned FALSE, the wrong answer. Remove the premature optimization. Our behavior on this point was inconsistent, because while seqscans and GIST index searches both failed to match empty tsvectors, GIN index searches would find them, since GIN scans don't rely on ts_match_vq(). That makes this certainly a bug, not a debatable definition disagreement, so back-patch to all supported branches. Report and diagnosis by Tom Dunstan (bug #14515); added test cases by me. Discussion: https://postgr.es/m/20170126025524.1434.97828@wrigleys.postgresql.org
Diffstat (limited to 'src')
-rw-r--r--src/backend/utils/adt/tsvector_op.c3
-rw-r--r--src/test/regress/expected/tsearch.out137
-rw-r--r--src/test/regress/expected/tstypes.out12
-rw-r--r--src/test/regress/sql/tsearch.sql33
-rw-r--r--src/test/regress/sql/tstypes.sql3
5 files changed, 187 insertions, 1 deletions
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c
index 8b71de2a17e..2fea327af20 100644
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -784,7 +784,8 @@ ts_match_vq(PG_FUNCTION_ARGS)
CHKVAL chkval;
bool result;
- if (!val->size || !query->size)
+ /* empty query matches nothing */
+ if (!query->size)
{
PG_FREE_IF_COPY(val, 0);
PG_FREE_IF_COPY(query, 1);
diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out
index 59e2a06d0e6..909212e1a76 100644
--- a/src/test/regress/expected/tsearch.out
+++ b/src/test/regress/expected/tsearch.out
@@ -98,8 +98,108 @@ SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
494
(1 row)
+SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
+ count
+-------
+ 158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
+ count
+-------
+ 0
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
+ count
+-------
+ 508
+(1 row)
+
create index wowidx on test_tsvector using gist (a);
SET enable_seqscan=OFF;
+SET enable_indexscan=ON;
+SET enable_bitmapscan=OFF;
+explain (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ QUERY PLAN
+-------------------------------------------------------
+ Aggregate
+ -> Index Scan using wowidx on test_tsvector
+ Index Cond: (a @@ '''wr'' | ''qh'''::tsquery)
+(3 rows)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ count
+-------
+ 158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+ count
+-------
+ 17
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+ count
+-------
+ 6
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+ count
+-------
+ 98
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+ count
+-------
+ 23
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+ count
+-------
+ 39
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
+ count
+-------
+ 494
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
+ count
+-------
+ 158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
+ count
+-------
+ 0
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
+ count
+-------
+ 508
+(1 row)
+
+SET enable_indexscan=OFF;
+SET enable_bitmapscan=ON;
+explain (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ QUERY PLAN
+-------------------------------------------------------------
+ Aggregate
+ -> Bitmap Heap Scan on test_tsvector
+ Recheck Cond: (a @@ '''wr'' | ''qh'''::tsquery)
+ -> Bitmap Index Scan on wowidx
+ Index Cond: (a @@ '''wr'' | ''qh'''::tsquery)
+(5 rows)
+
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
count
-------
@@ -148,10 +248,35 @@ SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
158
(1 row)
+SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
+ count
+-------
+ 0
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
+ count
+-------
+ 508
+(1 row)
+
RESET enable_seqscan;
+RESET enable_indexscan;
+RESET enable_bitmapscan;
DROP INDEX wowidx;
CREATE INDEX wowidx ON test_tsvector USING gin (a);
SET enable_seqscan=OFF;
+-- GIN only supports bitmapscan, so no need to test plain indexscan
+explain (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ QUERY PLAN
+-------------------------------------------------------------
+ Aggregate
+ -> Bitmap Heap Scan on test_tsvector
+ Recheck Cond: (a @@ '''wr'' | ''qh'''::tsquery)
+ -> Bitmap Index Scan on wowidx
+ Index Cond: (a @@ '''wr'' | ''qh'''::tsquery)
+(5 rows)
+
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
count
-------
@@ -200,6 +325,18 @@ SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
158
(1 row)
+SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
+ count
+-------
+ 0
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
+ count
+-------
+ 508
+(1 row)
+
RESET enable_seqscan;
INSERT INTO test_tsvector VALUES ('???', 'DFG:1A,2B,6C,10 FGH');
SELECT * FROM ts_stat('SELECT a FROM test_tsvector') ORDER BY ndoc DESC, nentry DESC, word LIMIT 10;
diff --git a/src/test/regress/expected/tstypes.out b/src/test/regress/expected/tstypes.out
index 6284fb61813..da4c760175b 100644
--- a/src/test/regress/expected/tstypes.out
+++ b/src/test/regress/expected/tstypes.out
@@ -625,3 +625,15 @@ SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a & s');
0.1
(1 row)
+select to_tsvector('simple', 'x y q y') @@ '!foo' AS "true";
+ true
+------
+ t
+(1 row)
+
+select to_tsvector('simple', '') @@ '!foo' AS "true";
+ true
+------
+ t
+(1 row)
+
diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql
index 1ba2cdb3829..355fbe16368 100644
--- a/src/test/regress/sql/tsearch.sql
+++ b/src/test/regress/sql/tsearch.sql
@@ -48,10 +48,17 @@ SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
+SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
+SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
+SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
create index wowidx on test_tsvector using gist (a);
SET enable_seqscan=OFF;
+SET enable_indexscan=ON;
+SET enable_bitmapscan=OFF;
+
+explain (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
@@ -61,14 +68,37 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
+SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
+SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
+
+SET enable_indexscan=OFF;
+SET enable_bitmapscan=ON;
+
+explain (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
+SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
+SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
+SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
RESET enable_seqscan;
+RESET enable_indexscan;
+RESET enable_bitmapscan;
DROP INDEX wowidx;
CREATE INDEX wowidx ON test_tsvector USING gin (a);
SET enable_seqscan=OFF;
+-- GIN only supports bitmapscan, so no need to test plain indexscan
+
+explain (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
@@ -78,8 +108,11 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
+SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
+SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
RESET enable_seqscan;
+
INSERT INTO test_tsvector VALUES ('???', 'DFG:1A,2B,6C,10 FGH');
SELECT * FROM ts_stat('SELECT a FROM test_tsvector') ORDER BY ndoc DESC, nentry DESC, word LIMIT 10;
SELECT * FROM ts_stat('SELECT a FROM test_tsvector', 'AB') ORDER BY ndoc DESC, nentry DESC, word;
diff --git a/src/test/regress/sql/tstypes.sql b/src/test/regress/sql/tstypes.sql
index fd7c7024f5e..6bc47c007f7 100644
--- a/src/test/regress/sql/tstypes.sql
+++ b/src/test/regress/sql/tstypes.sql
@@ -115,3 +115,6 @@ SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a | s');
SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a & s');
SELECT ts_rank_cd(' a:1 s:2B d g'::tsvector, 'a & s');
SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a & s');
+
+select to_tsvector('simple', 'x y q y') @@ '!foo' AS "true";
+select to_tsvector('simple', '') @@ '!foo' AS "true";