aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlexander Korotkov <akorotkov@postgresql.org>2025-03-28 23:36:49 +0200
committerAlexander Korotkov <akorotkov@postgresql.org>2025-03-28 23:37:49 +0200
commit775a06d44c04e323158826ec256386e7211e154d (patch)
tree0fb5bd5d510deffef81b5ccc8ee8ac75a7b020e3 /src
parent519338ace410d9b1ffb13176b8802b0307ff0531 (diff)
downloadpostgresql-775a06d44c04e323158826ec256386e7211e154d.tar.gz
postgresql-775a06d44c04e323158826ec256386e7211e154d.zip
Make group_similar_or_args() reorder clause list as little as possible
Currently, group_similar_or_args() permutes original positions of clauses independently on whether it manages to find any groups of similar clauses. While we are not providing any strict warranties on saving the original order of OR-clauses, it is preferred that the original order be modified as little as possible. This commit changes the reordering algorithm of group_similar_or_args() in the following way. We reorder each group of similar clauses so that the first item of the group stays in place, but all the other items are moved after it. So, if there are no similar clauses, the order of clauses stays the same. When there are some groups, only required reordering happens while the rest of the clauses remain in their places. Reported-by: Andrei Lepikhov <lepihov@gmail.com> Discussion: https://postgr.es/m/3ac7c436-81e1-4191-9caf-b0dd70b51511%40gmail.com Reviewed-by: Pavel Borisov <pashkin.elfe@gmail.com> Reviewed-by: Andrei Lepikhov <lepihov@gmail.com> Reviewed-by: Alena Rybakina <a.rybakina@postgrespro.ru>
Diffstat (limited to 'src')
-rw-r--r--src/backend/optimizer/path/indxpath.c59
-rw-r--r--src/test/regress/expected/create_index.out49
-rw-r--r--src/test/regress/expected/join.out52
-rw-r--r--src/test/regress/expected/partition_join.out12
-rw-r--r--src/test/regress/sql/create_index.sql11
5 files changed, 141 insertions, 42 deletions
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index a43ca16d683..6386ce82253 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -1189,6 +1189,8 @@ typedef struct
Oid inputcollid; /* OID of the OpClause input collation */
int argindex; /* index of the clause in the list of
* arguments */
+ int groupindex; /* value of argindex for the fist clause in
+ * the group of similar clauses */
} OrArgIndexMatch;
/*
@@ -1230,6 +1232,29 @@ or_arg_index_match_cmp(const void *a, const void *b)
}
/*
+ * Another comparison function for OrArgIndexMatch. It sorts groups together
+ * using groupindex. The group items are then sorted by argindex.
+ */
+static int
+or_arg_index_match_cmp_group(const void *a, const void *b)
+{
+ const OrArgIndexMatch *match_a = (const OrArgIndexMatch *) a;
+ const OrArgIndexMatch *match_b = (const OrArgIndexMatch *) b;
+
+ if (match_a->groupindex < match_b->groupindex)
+ return -1;
+ else if (match_a->groupindex > match_b->groupindex)
+ return 1;
+
+ if (match_a->argindex < match_b->argindex)
+ return -1;
+ else if (match_a->argindex > match_b->argindex)
+ return 1;
+
+ return 0;
+}
+
+/*
* group_similar_or_args
* Transform incoming OR-restrictinfo into a list of sub-restrictinfos,
* each of them containing a subset of similar OR-clause arguments from
@@ -1282,6 +1307,7 @@ group_similar_or_args(PlannerInfo *root, RelOptInfo *rel, RestrictInfo *rinfo)
i++;
matches[i].argindex = i;
+ matches[i].groupindex = i;
matches[i].indexnum = -1;
matches[i].colnum = -1;
matches[i].opno = InvalidOid;
@@ -1400,9 +1426,40 @@ group_similar_or_args(PlannerInfo *root, RelOptInfo *rel, RestrictInfo *rinfo)
return orargs;
}
- /* Sort clauses to make similar clauses go together */
+ /*
+ * Sort clauses to make similar clauses go together. But at the same
+ * time, we would like to change the order of clauses as little as
+ * possible. To do so, we reorder each group of similar clauses so that
+ * the first item of the group stays in place, and all the other items are
+ * moved after it. So, if there are no similar clauses, the order of
+ * clauses stays the same. When there are some groups, required
+ * reordering happens while the rest of the clauses remain in their
+ * places. That is achieved by assigning a 'groupindex' to each clause:
+ * the number of the first item in the group in the original clause list.
+ */
qsort(matches, n, sizeof(OrArgIndexMatch), or_arg_index_match_cmp);
+ /* Assign groupindex to the sorted clauses */
+ for (i = 1; i < n; i++)
+ {
+ /*
+ * When two clauses are similar and should belong to the same group,
+ * copy the 'groupindex' from the previous clause. Given we are
+ * considering clauses in direct order, all the clauses would have a
+ * 'groupindex' equal to the 'groupindex' of the first clause in the
+ * group.
+ */
+ if (matches[i].indexnum == matches[i - 1].indexnum &&
+ matches[i].colnum == matches[i - 1].colnum &&
+ matches[i].opno == matches[i - 1].opno &&
+ matches[i].inputcollid == matches[i - 1].inputcollid &&
+ matches[i].indexnum != -1)
+ matches[i].groupindex = matches[i - 1].groupindex;
+ }
+
+ /* Re-sort clauses first by groupindex then by argindex */
+ qsort(matches, n, sizeof(OrArgIndexMatch), or_arg_index_match_cmp_group);
+
/*
* Group similar clauses into single sub-restrictinfo. Side effect: the
* resulting list of restrictions will be sorted by indexnum and colnum.
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index bd5f002cf20..15be0043ad4 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -1899,13 +1899,13 @@ SELECT * FROM tenk1
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------
Bitmap Heap Scan on tenk1
- Recheck Cond: (((thousand = 42) AND (tenthous IS NULL)) OR ((thousand = 42) AND ((tenthous = 1) OR (tenthous = 3) OR (tenthous = 42))))
+ Recheck Cond: (((thousand = 42) AND ((tenthous = 1) OR (tenthous = 3) OR (tenthous = 42))) OR ((thousand = 42) AND (tenthous IS NULL)))
Filter: ((tenthous = 1) OR (tenthous = 3) OR (tenthous = 42) OR (tenthous IS NULL))
-> BitmapOr
-> Bitmap Index Scan on tenk1_thous_tenthous
- Index Cond: ((thousand = 42) AND (tenthous IS NULL))
- -> Bitmap Index Scan on tenk1_thous_tenthous
Index Cond: ((thousand = 42) AND (tenthous = ANY ('{1,3,42}'::integer[])))
+ -> Bitmap Index Scan on tenk1_thous_tenthous
+ Index Cond: ((thousand = 42) AND (tenthous IS NULL))
(8 rows)
EXPLAIN (COSTS OFF)
@@ -1938,13 +1938,13 @@ SELECT * FROM tenk1
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------
Bitmap Heap Scan on tenk1
- Recheck Cond: (((thousand = 42) AND ((tenthous = '3'::bigint) OR (tenthous = '42'::bigint))) OR ((thousand = 42) AND (tenthous = '1'::smallint)))
+ Recheck Cond: (((thousand = 42) AND (tenthous = '1'::smallint)) OR ((thousand = 42) AND ((tenthous = '3'::bigint) OR (tenthous = '42'::bigint))))
Filter: ((tenthous = '1'::smallint) OR (tenthous = '3'::bigint) OR (tenthous = '42'::bigint))
-> BitmapOr
-> Bitmap Index Scan on tenk1_thous_tenthous
- Index Cond: ((thousand = 42) AND (tenthous = ANY ('{3,42}'::bigint[])))
- -> Bitmap Index Scan on tenk1_thous_tenthous
Index Cond: ((thousand = 42) AND (tenthous = '1'::smallint))
+ -> Bitmap Index Scan on tenk1_thous_tenthous
+ Index Cond: ((thousand = 42) AND (tenthous = ANY ('{3,42}'::bigint[])))
(8 rows)
EXPLAIN (COSTS OFF)
@@ -2129,16 +2129,16 @@ SELECT count(*) FROM tenk1
---------------------------------------------------------------------------------------------------------------------------
Aggregate
-> Bitmap Heap Scan on tenk1
- Recheck Cond: ((hundred = 42) AND (((thousand = 99) AND (tenthous = 2)) OR ((thousand = 42) OR (thousand = 41))))
+ Recheck Cond: ((hundred = 42) AND (((thousand = 42) OR (thousand = 41)) OR ((thousand = 99) AND (tenthous = 2))))
Filter: ((thousand = 42) OR (thousand = 41) OR ((thousand = 99) AND (tenthous = 2)))
-> BitmapAnd
-> Bitmap Index Scan on tenk1_hundred
Index Cond: (hundred = 42)
-> BitmapOr
-> Bitmap Index Scan on tenk1_thous_tenthous
- Index Cond: ((thousand = 99) AND (tenthous = 2))
- -> Bitmap Index Scan on tenk1_thous_tenthous
Index Cond: (thousand = ANY ('{42,41}'::integer[]))
+ -> Bitmap Index Scan on tenk1_thous_tenthous
+ Index Cond: ((thousand = 99) AND (tenthous = 2))
(12 rows)
SELECT count(*) FROM tenk1
@@ -3248,6 +3248,37 @@ SELECT b.relname,
(2 rows)
DROP TABLE concur_temp_tab_1, concur_temp_tab_2, reindex_temp_before;
+-- No OR-clause groupings should happen, and there should be no clause
+-- permutations in the filtering conditions we could see in the EXPLAIN.
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1 WHERE unique1 < 1 OR hundred < 2;
+ QUERY PLAN
+--------------------------------------------------
+ Bitmap Heap Scan on tenk1
+ Recheck Cond: ((unique1 < 1) OR (hundred < 2))
+ -> BitmapOr
+ -> Bitmap Index Scan on tenk1_unique1
+ Index Cond: (unique1 < 1)
+ -> Bitmap Index Scan on tenk1_hundred
+ Index Cond: (hundred < 2)
+(7 rows)
+
+-- OR clauses in the 'unique1' column are grouped, so clause permutation
+-- occurs. W e can see it in the 'Recheck Cond': the second clause involving
+-- the 'unique1' column goes just after the first one.
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1 WHERE unique1 < 1 OR unique1 < 3 OR hundred < 2;
+ QUERY PLAN
+---------------------------------------------------------------------
+ Bitmap Heap Scan on tenk1
+ Recheck Cond: (((unique1 < 1) OR (unique1 < 3)) OR (hundred < 2))
+ -> BitmapOr
+ -> Bitmap Index Scan on tenk1_unique1
+ Index Cond: (unique1 < ANY ('{1,3}'::integer[]))
+ -> Bitmap Index Scan on tenk1_hundred
+ Index Cond: (hundred < 2)
+(7 rows)
+
-- Check bitmap scan can consider similar OR arguments separately without
-- grouping them into SAOP.
CREATE TABLE bitmap_split_or (a int NOT NULL, b int NOT NULL, c int NOT NULL);
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index a57bb18c24f..14da5708451 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -4333,20 +4333,20 @@ select * from tenk1 a join tenk1 b on
Nested Loop
Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR ((a.unique2 = 3) AND (b.hundred = 4)))
-> Bitmap Heap Scan on tenk1 b
- Recheck Cond: ((hundred = 4) OR (unique1 = 2))
+ Recheck Cond: ((unique1 = 2) OR (hundred = 4))
-> BitmapOr
- -> Bitmap Index Scan on tenk1_hundred
- Index Cond: (hundred = 4)
-> Bitmap Index Scan on tenk1_unique1
Index Cond: (unique1 = 2)
+ -> Bitmap Index Scan on tenk1_hundred
+ Index Cond: (hundred = 4)
-> Materialize
-> Bitmap Heap Scan on tenk1 a
- Recheck Cond: ((unique2 = 3) OR (unique1 = 1))
+ Recheck Cond: ((unique1 = 1) OR (unique2 = 3))
-> BitmapOr
- -> Bitmap Index Scan on tenk1_unique2
- Index Cond: (unique2 = 3)
-> Bitmap Index Scan on tenk1_unique1
Index Cond: (unique1 = 1)
+ -> Bitmap Index Scan on tenk1_unique2
+ Index Cond: (unique2 = 3)
(17 rows)
explain (costs off)
@@ -4360,12 +4360,12 @@ select * from tenk1 a join tenk1 b on
Filter: ((unique1 = 2) OR (ten = 4))
-> Materialize
-> Bitmap Heap Scan on tenk1 a
- Recheck Cond: ((unique2 = 3) OR (unique1 = 1))
+ Recheck Cond: ((unique1 = 1) OR (unique2 = 3))
-> BitmapOr
- -> Bitmap Index Scan on tenk1_unique2
- Index Cond: (unique2 = 3)
-> Bitmap Index Scan on tenk1_unique1
Index Cond: (unique1 = 1)
+ -> Bitmap Index Scan on tenk1_unique2
+ Index Cond: (unique2 = 3)
(12 rows)
explain (costs off)
@@ -4377,21 +4377,21 @@ select * from tenk1 a join tenk1 b on
Nested Loop
Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR (((a.unique2 = 3) OR (a.unique2 = 7)) AND (b.hundred = 4)))
-> Bitmap Heap Scan on tenk1 b
- Recheck Cond: ((hundred = 4) OR (unique1 = 2))
+ Recheck Cond: ((unique1 = 2) OR (hundred = 4))
-> BitmapOr
- -> Bitmap Index Scan on tenk1_hundred
- Index Cond: (hundred = 4)
-> Bitmap Index Scan on tenk1_unique1
Index Cond: (unique1 = 2)
+ -> Bitmap Index Scan on tenk1_hundred
+ Index Cond: (hundred = 4)
-> Materialize
-> Bitmap Heap Scan on tenk1 a
- Recheck Cond: (((unique2 = 3) OR (unique2 = 7)) OR (unique1 = 1))
+ Recheck Cond: ((unique1 = 1) OR ((unique2 = 3) OR (unique2 = 7)))
Filter: ((unique1 = 1) OR (unique2 = 3) OR (unique2 = 7))
-> BitmapOr
- -> Bitmap Index Scan on tenk1_unique2
- Index Cond: (unique2 = ANY ('{3,7}'::integer[]))
-> Bitmap Index Scan on tenk1_unique1
Index Cond: (unique1 = 1)
+ -> Bitmap Index Scan on tenk1_unique2
+ Index Cond: (unique2 = ANY ('{3,7}'::integer[]))
(18 rows)
explain (costs off)
@@ -4403,21 +4403,21 @@ select * from tenk1 a join tenk1 b on
Nested Loop
Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR (((a.unique2 = 3) OR (a.unique2 = 7)) AND (b.hundred = 4)))
-> Bitmap Heap Scan on tenk1 b
- Recheck Cond: ((hundred = 4) OR (unique1 = 2))
+ Recheck Cond: ((unique1 = 2) OR (hundred = 4))
-> BitmapOr
- -> Bitmap Index Scan on tenk1_hundred
- Index Cond: (hundred = 4)
-> Bitmap Index Scan on tenk1_unique1
Index Cond: (unique1 = 2)
+ -> Bitmap Index Scan on tenk1_hundred
+ Index Cond: (hundred = 4)
-> Materialize
-> Bitmap Heap Scan on tenk1 a
- Recheck Cond: (((unique2 = 3) OR (unique2 = 7)) OR (unique1 = 1))
+ Recheck Cond: ((unique1 = 1) OR ((unique2 = 3) OR (unique2 = 7)))
Filter: ((unique1 = 1) OR (unique2 = 3) OR (unique2 = 7))
-> BitmapOr
- -> Bitmap Index Scan on tenk1_unique2
- Index Cond: (unique2 = ANY ('{3,7}'::integer[]))
-> Bitmap Index Scan on tenk1_unique1
Index Cond: (unique1 = 1)
+ -> Bitmap Index Scan on tenk1_unique2
+ Index Cond: (unique2 = ANY ('{3,7}'::integer[]))
(18 rows)
explain (costs off)
@@ -4431,15 +4431,15 @@ select * from tenk1 a join tenk1 b on
-> Seq Scan on tenk1 b
-> Materialize
-> Bitmap Heap Scan on tenk1 a
- Recheck Cond: (((unique2 = 3) OR (unique2 = 7)) OR ((unique1 = 3) OR (unique1 = 1)) OR (unique1 < 20))
+ Recheck Cond: ((unique1 < 20) OR ((unique1 = 3) OR (unique1 = 1)) OR ((unique2 = 3) OR (unique2 = 7)))
Filter: ((unique1 < 20) OR (unique1 = 3) OR (unique1 = 1) OR (unique2 = 3) OR (unique2 = 7))
-> BitmapOr
- -> Bitmap Index Scan on tenk1_unique2
- Index Cond: (unique2 = ANY ('{3,7}'::integer[]))
- -> Bitmap Index Scan on tenk1_unique1
- Index Cond: (unique1 = ANY ('{3,1}'::integer[]))
-> Bitmap Index Scan on tenk1_unique1
Index Cond: (unique1 < 20)
+ -> Bitmap Index Scan on tenk1_unique1
+ Index Cond: (unique1 = ANY ('{3,1}'::integer[]))
+ -> Bitmap Index Scan on tenk1_unique2
+ Index Cond: (unique2 = ANY ('{3,7}'::integer[]))
(14 rows)
--
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index 938cedd79ad..6101c8c7cf1 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -2568,24 +2568,24 @@ where not exists (select 1 from prtx2
-> Seq Scan on prtx1_1
Filter: ((a < 20) AND (c = 91))
-> Bitmap Heap Scan on prtx2_1
- Recheck Cond: ((c = 99) OR (b = (prtx1_1.b + 1)))
+ Recheck Cond: ((b = (prtx1_1.b + 1)) OR (c = 99))
Filter: (a = prtx1_1.a)
-> BitmapOr
- -> Bitmap Index Scan on prtx2_1_c_idx
- Index Cond: (c = 99)
-> Bitmap Index Scan on prtx2_1_b_idx
Index Cond: (b = (prtx1_1.b + 1))
+ -> Bitmap Index Scan on prtx2_1_c_idx
+ Index Cond: (c = 99)
-> Nested Loop Anti Join
-> Seq Scan on prtx1_2
Filter: ((a < 20) AND (c = 91))
-> Bitmap Heap Scan on prtx2_2
- Recheck Cond: ((c = 99) OR (b = (prtx1_2.b + 1)))
+ Recheck Cond: ((b = (prtx1_2.b + 1)) OR (c = 99))
Filter: (a = prtx1_2.a)
-> BitmapOr
- -> Bitmap Index Scan on prtx2_2_c_idx
- Index Cond: (c = 99)
-> Bitmap Index Scan on prtx2_2_b_idx
Index Cond: (b = (prtx1_2.b + 1))
+ -> Bitmap Index Scan on prtx2_2_c_idx
+ Index Cond: (c = 99)
(23 rows)
select * from prtx1
diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql
index be570da08a0..6b3852dddd8 100644
--- a/src/test/regress/sql/create_index.sql
+++ b/src/test/regress/sql/create_index.sql
@@ -1355,6 +1355,17 @@ SELECT b.relname,
ORDER BY 1;
DROP TABLE concur_temp_tab_1, concur_temp_tab_2, reindex_temp_before;
+-- No OR-clause groupings should happen, and there should be no clause
+-- permutations in the filtering conditions we could see in the EXPLAIN.
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1 WHERE unique1 < 1 OR hundred < 2;
+
+-- OR clauses in the 'unique1' column are grouped, so clause permutation
+-- occurs. W e can see it in the 'Recheck Cond': the second clause involving
+-- the 'unique1' column goes just after the first one.
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1 WHERE unique1 < 1 OR unique1 < 3 OR hundred < 2;
+
-- Check bitmap scan can consider similar OR arguments separately without
-- grouping them into SAOP.
CREATE TABLE bitmap_split_or (a int NOT NULL, b int NOT NULL, c int NOT NULL);