diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2023-03-11 12:15:41 -0500 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2023-03-11 12:15:41 -0500 |
commit | 786528039911c2270589bb690afab20116ee88f3 (patch) | |
tree | 39ef4f5dda76039d7c792195750cc6050a0e56c2 | |
parent | 53a53ea332131b3d29d8d69e1dc2823f4d6ff21a (diff) | |
download | postgresql-786528039911c2270589bb690afab20116ee88f3.tar.gz postgresql-786528039911c2270589bb690afab20116ee88f3.zip |
Fix misbehavior in contrib/pg_trgm with an unsatisfiable regex.
If the regex compiler can see that a regex is unsatisfiable
(for example, '$foo') then it may emit an NFA having no arcs.
pg_trgm's packGraph function did the wrong thing in this case;
it would access off the end of a work array, and with bad luck
could produce a corrupted output data structure causing more
problems later. This could end with wrong answers or crashes
in queries using a pg_trgm GIN or GiST index with such a regex.
Fix by not trying to de-duplicate if there aren't at least 2 arcs.
Per bug #17830 from Alexander Lakhin. Back-patch to all supported
branches.
Discussion: https://postgr.es/m/17830-57ff5f89bdb02b09@postgresql.org
-rw-r--r-- | contrib/pg_trgm/expected/pg_word_trgm.out | 6 | ||||
-rw-r--r-- | contrib/pg_trgm/sql/pg_word_trgm.sql | 3 | ||||
-rw-r--r-- | contrib/pg_trgm/trgm_regexp.c | 26 |
3 files changed, 25 insertions, 10 deletions
diff --git a/contrib/pg_trgm/expected/pg_word_trgm.out b/contrib/pg_trgm/expected/pg_word_trgm.out index 936d489390e..c66a67f30ef 100644 --- a/contrib/pg_trgm/expected/pg_word_trgm.out +++ b/contrib/pg_trgm/expected/pg_word_trgm.out @@ -1044,3 +1044,9 @@ select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kaban Waikala | 0.3 (89 rows) +-- test unsatisfiable pattern +select * from test_trgm2 where t ~ '.*$x'; + t +--- +(0 rows) + diff --git a/contrib/pg_trgm/sql/pg_word_trgm.sql b/contrib/pg_trgm/sql/pg_word_trgm.sql index d9fa1c55e5e..d2ada49133a 100644 --- a/contrib/pg_trgm/sql/pg_word_trgm.sql +++ b/contrib/pg_trgm/sql/pg_word_trgm.sql @@ -43,3 +43,6 @@ select t,word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t select t,word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; select t,word_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; + +-- test unsatisfiable pattern +select * from test_trgm2 where t ~ '.*$x'; diff --git a/contrib/pg_trgm/trgm_regexp.c b/contrib/pg_trgm/trgm_regexp.c index 71e4ebee4e9..3485a725cde 100644 --- a/contrib/pg_trgm/trgm_regexp.c +++ b/contrib/pg_trgm/trgm_regexp.c @@ -1944,9 +1944,7 @@ packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext) arcsCount; HASH_SEQ_STATUS scan_status; TrgmState *state; - TrgmPackArcInfo *arcs, - *p1, - *p2; + TrgmPackArcInfo *arcs; TrgmPackedArc *packedArcs; TrgmPackedGraph *result; int i, @@ -2018,17 +2016,25 @@ packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext) qsort(arcs, arcIndex, sizeof(TrgmPackArcInfo), packArcInfoCmp); /* We could have duplicates because states were merged. Remove them. */ - /* p1 is probe point, p2 is last known non-duplicate. */ - p2 = arcs; - for (p1 = arcs + 1; p1 < arcs + arcIndex; p1++) + if (arcIndex > 1) { - if (packArcInfoCmp(p1, p2) > 0) + /* p1 is probe point, p2 is last known non-duplicate. */ + TrgmPackArcInfo *p1, + *p2; + + p2 = arcs; + for (p1 = arcs + 1; p1 < arcs + arcIndex; p1++) { - p2++; - *p2 = *p1; + if (packArcInfoCmp(p1, p2) > 0) + { + p2++; + *p2 = *p1; + } } + arcsCount = (p2 - arcs) + 1; } - arcsCount = (p2 - arcs) + 1; + else + arcsCount = arcIndex; /* Create packed representation */ result = (TrgmPackedGraph *) |