aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2023-03-11 12:15:41 -0500
committerTom Lane <tgl@sss.pgh.pa.us>2023-03-11 12:15:41 -0500
commit786528039911c2270589bb690afab20116ee88f3 (patch)
tree39ef4f5dda76039d7c792195750cc6050a0e56c2
parent53a53ea332131b3d29d8d69e1dc2823f4d6ff21a (diff)
downloadpostgresql-786528039911c2270589bb690afab20116ee88f3.tar.gz
postgresql-786528039911c2270589bb690afab20116ee88f3.zip
Fix misbehavior in contrib/pg_trgm with an unsatisfiable regex.
If the regex compiler can see that a regex is unsatisfiable (for example, '$foo') then it may emit an NFA having no arcs. pg_trgm's packGraph function did the wrong thing in this case; it would access off the end of a work array, and with bad luck could produce a corrupted output data structure causing more problems later. This could end with wrong answers or crashes in queries using a pg_trgm GIN or GiST index with such a regex. Fix by not trying to de-duplicate if there aren't at least 2 arcs. Per bug #17830 from Alexander Lakhin. Back-patch to all supported branches. Discussion: https://postgr.es/m/17830-57ff5f89bdb02b09@postgresql.org
-rw-r--r--contrib/pg_trgm/expected/pg_word_trgm.out6
-rw-r--r--contrib/pg_trgm/sql/pg_word_trgm.sql3
-rw-r--r--contrib/pg_trgm/trgm_regexp.c26
3 files changed, 25 insertions, 10 deletions
diff --git a/contrib/pg_trgm/expected/pg_word_trgm.out b/contrib/pg_trgm/expected/pg_word_trgm.out
index 936d489390e..c66a67f30ef 100644
--- a/contrib/pg_trgm/expected/pg_word_trgm.out
+++ b/contrib/pg_trgm/expected/pg_word_trgm.out
@@ -1044,3 +1044,9 @@ select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kaban
Waikala | 0.3
(89 rows)
+-- test unsatisfiable pattern
+select * from test_trgm2 where t ~ '.*$x';
+ t
+---
+(0 rows)
+
diff --git a/contrib/pg_trgm/sql/pg_word_trgm.sql b/contrib/pg_trgm/sql/pg_word_trgm.sql
index d9fa1c55e5e..d2ada49133a 100644
--- a/contrib/pg_trgm/sql/pg_word_trgm.sql
+++ b/contrib/pg_trgm/sql/pg_word_trgm.sql
@@ -43,3 +43,6 @@ select t,word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t
select t,word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t;
select t,word_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t;
select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t;
+
+-- test unsatisfiable pattern
+select * from test_trgm2 where t ~ '.*$x';
diff --git a/contrib/pg_trgm/trgm_regexp.c b/contrib/pg_trgm/trgm_regexp.c
index 71e4ebee4e9..3485a725cde 100644
--- a/contrib/pg_trgm/trgm_regexp.c
+++ b/contrib/pg_trgm/trgm_regexp.c
@@ -1944,9 +1944,7 @@ packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext)
arcsCount;
HASH_SEQ_STATUS scan_status;
TrgmState *state;
- TrgmPackArcInfo *arcs,
- *p1,
- *p2;
+ TrgmPackArcInfo *arcs;
TrgmPackedArc *packedArcs;
TrgmPackedGraph *result;
int i,
@@ -2018,17 +2016,25 @@ packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext)
qsort(arcs, arcIndex, sizeof(TrgmPackArcInfo), packArcInfoCmp);
/* We could have duplicates because states were merged. Remove them. */
- /* p1 is probe point, p2 is last known non-duplicate. */
- p2 = arcs;
- for (p1 = arcs + 1; p1 < arcs + arcIndex; p1++)
+ if (arcIndex > 1)
{
- if (packArcInfoCmp(p1, p2) > 0)
+ /* p1 is probe point, p2 is last known non-duplicate. */
+ TrgmPackArcInfo *p1,
+ *p2;
+
+ p2 = arcs;
+ for (p1 = arcs + 1; p1 < arcs + arcIndex; p1++)
{
- p2++;
- *p2 = *p1;
+ if (packArcInfoCmp(p1, p2) > 0)
+ {
+ p2++;
+ *p2 = *p1;
+ }
}
+ arcsCount = (p2 - arcs) + 1;
}
- arcsCount = (p2 - arcs) + 1;
+ else
+ arcsCount = arcIndex;
/* Create packed representation */
result = (TrgmPackedGraph *)