Fix misbehavior in contrib/pg_trgm with an unsatisfiable regex.

If the regex compiler can see that a regex is unsatisfiable (for example, '$foo') then it may emit an NFA having no arcs. pg_trgm's packGraph function did the wrong thing in this case; it would access off the end of a work array, and with bad luck could produce a corrupted output data structure causing more problems later. This could end with wrong answers or crashes in queries using a pg_trgm GIN or GiST index with such a regex. Fix by not trying to de-duplicate if there aren't at least 2 arcs. Per bug #17830 from Alexander Lakhin. Back-patch to all supported branches. Discussion: https://postgr.es/m/17830-57ff5f89bdb02b09@postgresql.org
author: Tom Lane <tgl@sss.pgh.pa.us> 2023-03-11 12:15:41 -0500
committer: Tom Lane <tgl@sss.pgh.pa.us> 2023-03-11 12:15:41 -0500
commit: 786528039911c2270589bb690afab20116ee88f3 (patch)
tree: 39ef4f5dda76039d7c792195750cc6050a0e56c2
parent: 53a53ea332131b3d29d8d69e1dc2823f4d6ff21a (diff)
download: postgresql-786528039911c2270589bb690afab20116ee88f3.tar.gz
postgresql-786528039911c2270589bb690afab20116ee88f3.zip
3 files changed, 25 insertions, 10 deletions
diff --git a/contrib/pg_trgm/expected/pg_word_trgm.out b/contrib/pg_trgm/expected/pg_word_trgm.out
index 936d489390e..c66a67f30ef 100644
--- a/contrib/pg_trgm/expected/pg_word_trgm.out
+++ b/contrib/pg_trgm/expected/pg_word_trgm.out
@@ -1044,3 +1044,9 @@ select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kaban
  Waikala                          |      0.3
 (89 rows)
 
+-- test unsatisfiable pattern
+select * from test_trgm2 where t ~ '.*$x';
+ t 
+---
+(0 rows)
+
diff --git a/contrib/pg_trgm/sql/pg_word_trgm.sql b/contrib/pg_trgm/sql/pg_word_trgm.sql
index d9fa1c55e5e..d2ada49133a 100644
--- a/contrib/pg_trgm/sql/pg_word_trgm.sql
+++ b/contrib/pg_trgm/sql/pg_word_trgm.sql
@@ -43,3 +43,6 @@ select t,word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t
 select t,word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t;
 select t,word_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t;
 select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t;
+
+-- test unsatisfiable pattern
+select * from test_trgm2 where t ~ '.*$x';
diff --git a/contrib/pg_trgm/trgm_regexp.c b/contrib/pg_trgm/trgm_regexp.c
index 71e4ebee4e9..3485a725cde 100644
--- a/contrib/pg_trgm/trgm_regexp.c
+++ b/contrib/pg_trgm/trgm_regexp.c
@@ -1944,9 +1944,7 @@ packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext)
 				arcsCount;
 	HASH_SEQ_STATUS scan_status;
 	TrgmState  *state;
-	TrgmPackArcInfo *arcs,
-			   *p1,
-			   *p2;
+	TrgmPackArcInfo *arcs;
 	TrgmPackedArc *packedArcs;
 	TrgmPackedGraph *result;
 	int			i,
@@ -2018,17 +2016,25 @@ packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext)
 	qsort(arcs, arcIndex, sizeof(TrgmPackArcInfo), packArcInfoCmp);
 
 	/* We could have duplicates because states were merged. Remove them. */
-	/* p1 is probe point, p2 is last known non-duplicate. */
-	p2 = arcs;
-	for (p1 = arcs + 1; p1 < arcs + arcIndex; p1++)
+	if (arcIndex > 1)
 	{
-		if (packArcInfoCmp(p1, p2) > 0)
+		/* p1 is probe point, p2 is last known non-duplicate. */
+		TrgmPackArcInfo *p1,
+				   *p2;
+
+		p2 = arcs;
+		for (p1 = arcs + 1; p1 < arcs + arcIndex; p1++)
 		{
-			p2++;
-			*p2 = *p1;
+			if (packArcInfoCmp(p1, p2) > 0)
+			{
+				p2++;
+				*p2 = *p1;
+			}
 		}
+		arcsCount = (p2 - arcs) + 1;
 	}
-	arcsCount = (p2 - arcs) + 1;
+	else
+		arcsCount = arcIndex;
 
 	/* Create packed representation */
 	result = (TrgmPackedGraph *)
author	Tom Lane <tgl@sss.pgh.pa.us>	2023-03-11 12:15:41 -0500
committer	Tom Lane <tgl@sss.pgh.pa.us>	2023-03-11 12:15:41 -0500
commit	786528039911c2270589bb690afab20116ee88f3 (patch)
tree	39ef4f5dda76039d7c792195750cc6050a0e56c2
parent	53a53ea332131b3d29d8d69e1dc2823f4d6ff21a (diff)
download	postgresql-786528039911c2270589bb690afab20116ee88f3.tar.gz postgresql-786528039911c2270589bb690afab20116ee88f3.zip