diff options
author | Alexander Korotkov <akorotkov@postgresql.org> | 2025-03-28 23:36:49 +0200 |
---|---|---|
committer | Alexander Korotkov <akorotkov@postgresql.org> | 2025-03-28 23:37:49 +0200 |
commit | 775a06d44c04e323158826ec256386e7211e154d (patch) | |
tree | 0fb5bd5d510deffef81b5ccc8ee8ac75a7b020e3 /src | |
parent | 519338ace410d9b1ffb13176b8802b0307ff0531 (diff) | |
download | postgresql-775a06d44c04e323158826ec256386e7211e154d.tar.gz postgresql-775a06d44c04e323158826ec256386e7211e154d.zip |
Make group_similar_or_args() reorder clause list as little as possible
Currently, group_similar_or_args() permutes original positions of clauses
independently on whether it manages to find any groups of similar clauses.
While we are not providing any strict warranties on saving the original order
of OR-clauses, it is preferred that the original order be modified as little
as possible.
This commit changes the reordering algorithm of group_similar_or_args() in
the following way. We reorder each group of similar clauses so that the
first item of the group stays in place, but all the other items are moved
after it. So, if there are no similar clauses, the order of clauses stays
the same. When there are some groups, only required reordering happens while
the rest of the clauses remain in their places.
Reported-by: Andrei Lepikhov <lepihov@gmail.com>
Discussion: https://postgr.es/m/3ac7c436-81e1-4191-9caf-b0dd70b51511%40gmail.com
Reviewed-by: Pavel Borisov <pashkin.elfe@gmail.com>
Reviewed-by: Andrei Lepikhov <lepihov@gmail.com>
Reviewed-by: Alena Rybakina <a.rybakina@postgrespro.ru>
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/optimizer/path/indxpath.c | 59 | ||||
-rw-r--r-- | src/test/regress/expected/create_index.out | 49 | ||||
-rw-r--r-- | src/test/regress/expected/join.out | 52 | ||||
-rw-r--r-- | src/test/regress/expected/partition_join.out | 12 | ||||
-rw-r--r-- | src/test/regress/sql/create_index.sql | 11 |
5 files changed, 141 insertions, 42 deletions
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index a43ca16d683..6386ce82253 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -1189,6 +1189,8 @@ typedef struct Oid inputcollid; /* OID of the OpClause input collation */ int argindex; /* index of the clause in the list of * arguments */ + int groupindex; /* value of argindex for the fist clause in + * the group of similar clauses */ } OrArgIndexMatch; /* @@ -1230,6 +1232,29 @@ or_arg_index_match_cmp(const void *a, const void *b) } /* + * Another comparison function for OrArgIndexMatch. It sorts groups together + * using groupindex. The group items are then sorted by argindex. + */ +static int +or_arg_index_match_cmp_group(const void *a, const void *b) +{ + const OrArgIndexMatch *match_a = (const OrArgIndexMatch *) a; + const OrArgIndexMatch *match_b = (const OrArgIndexMatch *) b; + + if (match_a->groupindex < match_b->groupindex) + return -1; + else if (match_a->groupindex > match_b->groupindex) + return 1; + + if (match_a->argindex < match_b->argindex) + return -1; + else if (match_a->argindex > match_b->argindex) + return 1; + + return 0; +} + +/* * group_similar_or_args * Transform incoming OR-restrictinfo into a list of sub-restrictinfos, * each of them containing a subset of similar OR-clause arguments from @@ -1282,6 +1307,7 @@ group_similar_or_args(PlannerInfo *root, RelOptInfo *rel, RestrictInfo *rinfo) i++; matches[i].argindex = i; + matches[i].groupindex = i; matches[i].indexnum = -1; matches[i].colnum = -1; matches[i].opno = InvalidOid; @@ -1400,9 +1426,40 @@ group_similar_or_args(PlannerInfo *root, RelOptInfo *rel, RestrictInfo *rinfo) return orargs; } - /* Sort clauses to make similar clauses go together */ + /* + * Sort clauses to make similar clauses go together. But at the same + * time, we would like to change the order of clauses as little as + * possible. To do so, we reorder each group of similar clauses so that + * the first item of the group stays in place, and all the other items are + * moved after it. So, if there are no similar clauses, the order of + * clauses stays the same. When there are some groups, required + * reordering happens while the rest of the clauses remain in their + * places. That is achieved by assigning a 'groupindex' to each clause: + * the number of the first item in the group in the original clause list. + */ qsort(matches, n, sizeof(OrArgIndexMatch), or_arg_index_match_cmp); + /* Assign groupindex to the sorted clauses */ + for (i = 1; i < n; i++) + { + /* + * When two clauses are similar and should belong to the same group, + * copy the 'groupindex' from the previous clause. Given we are + * considering clauses in direct order, all the clauses would have a + * 'groupindex' equal to the 'groupindex' of the first clause in the + * group. + */ + if (matches[i].indexnum == matches[i - 1].indexnum && + matches[i].colnum == matches[i - 1].colnum && + matches[i].opno == matches[i - 1].opno && + matches[i].inputcollid == matches[i - 1].inputcollid && + matches[i].indexnum != -1) + matches[i].groupindex = matches[i - 1].groupindex; + } + + /* Re-sort clauses first by groupindex then by argindex */ + qsort(matches, n, sizeof(OrArgIndexMatch), or_arg_index_match_cmp_group); + /* * Group similar clauses into single sub-restrictinfo. Side effect: the * resulting list of restrictions will be sorted by indexnum and colnum. diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out index bd5f002cf20..15be0043ad4 100644 --- a/src/test/regress/expected/create_index.out +++ b/src/test/regress/expected/create_index.out @@ -1899,13 +1899,13 @@ SELECT * FROM tenk1 QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------- Bitmap Heap Scan on tenk1 - Recheck Cond: (((thousand = 42) AND (tenthous IS NULL)) OR ((thousand = 42) AND ((tenthous = 1) OR (tenthous = 3) OR (tenthous = 42)))) + Recheck Cond: (((thousand = 42) AND ((tenthous = 1) OR (tenthous = 3) OR (tenthous = 42))) OR ((thousand = 42) AND (tenthous IS NULL))) Filter: ((tenthous = 1) OR (tenthous = 3) OR (tenthous = 42) OR (tenthous IS NULL)) -> BitmapOr -> Bitmap Index Scan on tenk1_thous_tenthous - Index Cond: ((thousand = 42) AND (tenthous IS NULL)) - -> Bitmap Index Scan on tenk1_thous_tenthous Index Cond: ((thousand = 42) AND (tenthous = ANY ('{1,3,42}'::integer[]))) + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: ((thousand = 42) AND (tenthous IS NULL)) (8 rows) EXPLAIN (COSTS OFF) @@ -1938,13 +1938,13 @@ SELECT * FROM tenk1 QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------------------- Bitmap Heap Scan on tenk1 - Recheck Cond: (((thousand = 42) AND ((tenthous = '3'::bigint) OR (tenthous = '42'::bigint))) OR ((thousand = 42) AND (tenthous = '1'::smallint))) + Recheck Cond: (((thousand = 42) AND (tenthous = '1'::smallint)) OR ((thousand = 42) AND ((tenthous = '3'::bigint) OR (tenthous = '42'::bigint)))) Filter: ((tenthous = '1'::smallint) OR (tenthous = '3'::bigint) OR (tenthous = '42'::bigint)) -> BitmapOr -> Bitmap Index Scan on tenk1_thous_tenthous - Index Cond: ((thousand = 42) AND (tenthous = ANY ('{3,42}'::bigint[]))) - -> Bitmap Index Scan on tenk1_thous_tenthous Index Cond: ((thousand = 42) AND (tenthous = '1'::smallint)) + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: ((thousand = 42) AND (tenthous = ANY ('{3,42}'::bigint[]))) (8 rows) EXPLAIN (COSTS OFF) @@ -2129,16 +2129,16 @@ SELECT count(*) FROM tenk1 --------------------------------------------------------------------------------------------------------------------------- Aggregate -> Bitmap Heap Scan on tenk1 - Recheck Cond: ((hundred = 42) AND (((thousand = 99) AND (tenthous = 2)) OR ((thousand = 42) OR (thousand = 41)))) + Recheck Cond: ((hundred = 42) AND (((thousand = 42) OR (thousand = 41)) OR ((thousand = 99) AND (tenthous = 2)))) Filter: ((thousand = 42) OR (thousand = 41) OR ((thousand = 99) AND (tenthous = 2))) -> BitmapAnd -> Bitmap Index Scan on tenk1_hundred Index Cond: (hundred = 42) -> BitmapOr -> Bitmap Index Scan on tenk1_thous_tenthous - Index Cond: ((thousand = 99) AND (tenthous = 2)) - -> Bitmap Index Scan on tenk1_thous_tenthous Index Cond: (thousand = ANY ('{42,41}'::integer[])) + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: ((thousand = 99) AND (tenthous = 2)) (12 rows) SELECT count(*) FROM tenk1 @@ -3248,6 +3248,37 @@ SELECT b.relname, (2 rows) DROP TABLE concur_temp_tab_1, concur_temp_tab_2, reindex_temp_before; +-- No OR-clause groupings should happen, and there should be no clause +-- permutations in the filtering conditions we could see in the EXPLAIN. +EXPLAIN (COSTS OFF) +SELECT * FROM tenk1 WHERE unique1 < 1 OR hundred < 2; + QUERY PLAN +-------------------------------------------------- + Bitmap Heap Scan on tenk1 + Recheck Cond: ((unique1 < 1) OR (hundred < 2)) + -> BitmapOr + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 < 1) + -> Bitmap Index Scan on tenk1_hundred + Index Cond: (hundred < 2) +(7 rows) + +-- OR clauses in the 'unique1' column are grouped, so clause permutation +-- occurs. W e can see it in the 'Recheck Cond': the second clause involving +-- the 'unique1' column goes just after the first one. +EXPLAIN (COSTS OFF) +SELECT * FROM tenk1 WHERE unique1 < 1 OR unique1 < 3 OR hundred < 2; + QUERY PLAN +--------------------------------------------------------------------- + Bitmap Heap Scan on tenk1 + Recheck Cond: (((unique1 < 1) OR (unique1 < 3)) OR (hundred < 2)) + -> BitmapOr + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 < ANY ('{1,3}'::integer[])) + -> Bitmap Index Scan on tenk1_hundred + Index Cond: (hundred < 2) +(7 rows) + -- Check bitmap scan can consider similar OR arguments separately without -- grouping them into SAOP. CREATE TABLE bitmap_split_or (a int NOT NULL, b int NOT NULL, c int NOT NULL); diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index a57bb18c24f..14da5708451 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -4333,20 +4333,20 @@ select * from tenk1 a join tenk1 b on Nested Loop Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR ((a.unique2 = 3) AND (b.hundred = 4))) -> Bitmap Heap Scan on tenk1 b - Recheck Cond: ((hundred = 4) OR (unique1 = 2)) + Recheck Cond: ((unique1 = 2) OR (hundred = 4)) -> BitmapOr - -> Bitmap Index Scan on tenk1_hundred - Index Cond: (hundred = 4) -> Bitmap Index Scan on tenk1_unique1 Index Cond: (unique1 = 2) + -> Bitmap Index Scan on tenk1_hundred + Index Cond: (hundred = 4) -> Materialize -> Bitmap Heap Scan on tenk1 a - Recheck Cond: ((unique2 = 3) OR (unique1 = 1)) + Recheck Cond: ((unique1 = 1) OR (unique2 = 3)) -> BitmapOr - -> Bitmap Index Scan on tenk1_unique2 - Index Cond: (unique2 = 3) -> Bitmap Index Scan on tenk1_unique1 Index Cond: (unique1 = 1) + -> Bitmap Index Scan on tenk1_unique2 + Index Cond: (unique2 = 3) (17 rows) explain (costs off) @@ -4360,12 +4360,12 @@ select * from tenk1 a join tenk1 b on Filter: ((unique1 = 2) OR (ten = 4)) -> Materialize -> Bitmap Heap Scan on tenk1 a - Recheck Cond: ((unique2 = 3) OR (unique1 = 1)) + Recheck Cond: ((unique1 = 1) OR (unique2 = 3)) -> BitmapOr - -> Bitmap Index Scan on tenk1_unique2 - Index Cond: (unique2 = 3) -> Bitmap Index Scan on tenk1_unique1 Index Cond: (unique1 = 1) + -> Bitmap Index Scan on tenk1_unique2 + Index Cond: (unique2 = 3) (12 rows) explain (costs off) @@ -4377,21 +4377,21 @@ select * from tenk1 a join tenk1 b on Nested Loop Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR (((a.unique2 = 3) OR (a.unique2 = 7)) AND (b.hundred = 4))) -> Bitmap Heap Scan on tenk1 b - Recheck Cond: ((hundred = 4) OR (unique1 = 2)) + Recheck Cond: ((unique1 = 2) OR (hundred = 4)) -> BitmapOr - -> Bitmap Index Scan on tenk1_hundred - Index Cond: (hundred = 4) -> Bitmap Index Scan on tenk1_unique1 Index Cond: (unique1 = 2) + -> Bitmap Index Scan on tenk1_hundred + Index Cond: (hundred = 4) -> Materialize -> Bitmap Heap Scan on tenk1 a - Recheck Cond: (((unique2 = 3) OR (unique2 = 7)) OR (unique1 = 1)) + Recheck Cond: ((unique1 = 1) OR ((unique2 = 3) OR (unique2 = 7))) Filter: ((unique1 = 1) OR (unique2 = 3) OR (unique2 = 7)) -> BitmapOr - -> Bitmap Index Scan on tenk1_unique2 - Index Cond: (unique2 = ANY ('{3,7}'::integer[])) -> Bitmap Index Scan on tenk1_unique1 Index Cond: (unique1 = 1) + -> Bitmap Index Scan on tenk1_unique2 + Index Cond: (unique2 = ANY ('{3,7}'::integer[])) (18 rows) explain (costs off) @@ -4403,21 +4403,21 @@ select * from tenk1 a join tenk1 b on Nested Loop Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR (((a.unique2 = 3) OR (a.unique2 = 7)) AND (b.hundred = 4))) -> Bitmap Heap Scan on tenk1 b - Recheck Cond: ((hundred = 4) OR (unique1 = 2)) + Recheck Cond: ((unique1 = 2) OR (hundred = 4)) -> BitmapOr - -> Bitmap Index Scan on tenk1_hundred - Index Cond: (hundred = 4) -> Bitmap Index Scan on tenk1_unique1 Index Cond: (unique1 = 2) + -> Bitmap Index Scan on tenk1_hundred + Index Cond: (hundred = 4) -> Materialize -> Bitmap Heap Scan on tenk1 a - Recheck Cond: (((unique2 = 3) OR (unique2 = 7)) OR (unique1 = 1)) + Recheck Cond: ((unique1 = 1) OR ((unique2 = 3) OR (unique2 = 7))) Filter: ((unique1 = 1) OR (unique2 = 3) OR (unique2 = 7)) -> BitmapOr - -> Bitmap Index Scan on tenk1_unique2 - Index Cond: (unique2 = ANY ('{3,7}'::integer[])) -> Bitmap Index Scan on tenk1_unique1 Index Cond: (unique1 = 1) + -> Bitmap Index Scan on tenk1_unique2 + Index Cond: (unique2 = ANY ('{3,7}'::integer[])) (18 rows) explain (costs off) @@ -4431,15 +4431,15 @@ select * from tenk1 a join tenk1 b on -> Seq Scan on tenk1 b -> Materialize -> Bitmap Heap Scan on tenk1 a - Recheck Cond: (((unique2 = 3) OR (unique2 = 7)) OR ((unique1 = 3) OR (unique1 = 1)) OR (unique1 < 20)) + Recheck Cond: ((unique1 < 20) OR ((unique1 = 3) OR (unique1 = 1)) OR ((unique2 = 3) OR (unique2 = 7))) Filter: ((unique1 < 20) OR (unique1 = 3) OR (unique1 = 1) OR (unique2 = 3) OR (unique2 = 7)) -> BitmapOr - -> Bitmap Index Scan on tenk1_unique2 - Index Cond: (unique2 = ANY ('{3,7}'::integer[])) - -> Bitmap Index Scan on tenk1_unique1 - Index Cond: (unique1 = ANY ('{3,1}'::integer[])) -> Bitmap Index Scan on tenk1_unique1 Index Cond: (unique1 < 20) + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 = ANY ('{3,1}'::integer[])) + -> Bitmap Index Scan on tenk1_unique2 + Index Cond: (unique2 = ANY ('{3,7}'::integer[])) (14 rows) -- diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out index 938cedd79ad..6101c8c7cf1 100644 --- a/src/test/regress/expected/partition_join.out +++ b/src/test/regress/expected/partition_join.out @@ -2568,24 +2568,24 @@ where not exists (select 1 from prtx2 -> Seq Scan on prtx1_1 Filter: ((a < 20) AND (c = 91)) -> Bitmap Heap Scan on prtx2_1 - Recheck Cond: ((c = 99) OR (b = (prtx1_1.b + 1))) + Recheck Cond: ((b = (prtx1_1.b + 1)) OR (c = 99)) Filter: (a = prtx1_1.a) -> BitmapOr - -> Bitmap Index Scan on prtx2_1_c_idx - Index Cond: (c = 99) -> Bitmap Index Scan on prtx2_1_b_idx Index Cond: (b = (prtx1_1.b + 1)) + -> Bitmap Index Scan on prtx2_1_c_idx + Index Cond: (c = 99) -> Nested Loop Anti Join -> Seq Scan on prtx1_2 Filter: ((a < 20) AND (c = 91)) -> Bitmap Heap Scan on prtx2_2 - Recheck Cond: ((c = 99) OR (b = (prtx1_2.b + 1))) + Recheck Cond: ((b = (prtx1_2.b + 1)) OR (c = 99)) Filter: (a = prtx1_2.a) -> BitmapOr - -> Bitmap Index Scan on prtx2_2_c_idx - Index Cond: (c = 99) -> Bitmap Index Scan on prtx2_2_b_idx Index Cond: (b = (prtx1_2.b + 1)) + -> Bitmap Index Scan on prtx2_2_c_idx + Index Cond: (c = 99) (23 rows) select * from prtx1 diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql index be570da08a0..6b3852dddd8 100644 --- a/src/test/regress/sql/create_index.sql +++ b/src/test/regress/sql/create_index.sql @@ -1355,6 +1355,17 @@ SELECT b.relname, ORDER BY 1; DROP TABLE concur_temp_tab_1, concur_temp_tab_2, reindex_temp_before; +-- No OR-clause groupings should happen, and there should be no clause +-- permutations in the filtering conditions we could see in the EXPLAIN. +EXPLAIN (COSTS OFF) +SELECT * FROM tenk1 WHERE unique1 < 1 OR hundred < 2; + +-- OR clauses in the 'unique1' column are grouped, so clause permutation +-- occurs. W e can see it in the 'Recheck Cond': the second clause involving +-- the 'unique1' column goes just after the first one. +EXPLAIN (COSTS OFF) +SELECT * FROM tenk1 WHERE unique1 < 1 OR unique1 < 3 OR hundred < 2; + -- Check bitmap scan can consider similar OR arguments separately without -- grouping them into SAOP. CREATE TABLE bitmap_split_or (a int NOT NULL, b int NOT NULL, c int NOT NULL); |