aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAmit Langote <amitlan@postgresql.org>2024-11-08 16:30:22 +0900
committerAmit Langote <amitlan@postgresql.org>2024-11-08 17:18:55 +0900
commit33040b1715c7a3450a6b3dc61bd7779a5f9ae709 (patch)
tree0842ca992b40984bb62c1bd9f1a4bd1f59e1f5bd
parent0a620659c549f05881800fddd1859f3e373dd9cf (diff)
downloadpostgresql-33040b1715c7a3450a6b3dc61bd7779a5f9ae709.tar.gz
postgresql-33040b1715c7a3450a6b3dc61bd7779a5f9ae709.zip
Disallow partitionwise join when collations don't match
If the collation of any join key column doesn’t match the collation of the corresponding partition key, partitionwise joins can yield incorrect results. For example, rows that would match under the join key collation might be located in different partitions due to the partitioning collation. In such cases, a partitionwise join would yield different results from a non-partitionwise join, so disallow it in such cases. Reported-by: Tender Wang <tndrwang@gmail.com> Author: Jian He <jian.universality@gmail.com> Reviewed-by: Tender Wang <tndrwang@gmail.com> Reviewed-by: Junwang Zhao <zhjwpku@gmail.com> Discussion: https://postgr.es/m/CAHewXNno_HKiQ6PqyLYfuqDtwp7KKHZiH1J7Pqyz0nr+PS2Dwg@mail.gmail.com Backpatch-through: 12
-rw-r--r--src/backend/optimizer/util/relnode.c4
-rw-r--r--src/test/regress/expected/collate.icu.utf8.out118
-rw-r--r--src/test/regress/sql/collate.icu.utf8.sql22
3 files changed, 144 insertions, 0 deletions
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index 6e1c87a4e20..591306735a5 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -1810,6 +1810,10 @@ have_partkey_equi_join(RelOptInfo *joinrel,
if (ipk1 != ipk2)
continue;
+ /* Reject if the partition key collation differs from the clause's. */
+ if (rel1->part_scheme->partcollation[ipk1] != opexpr->inputcollid)
+ return false;
+
/*
* The clause allows partitionwise join only if it uses the same
* operator family as that specified by the partition key.
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index a47e1befa3c..b7d78e32ee0 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -2038,6 +2038,124 @@ SELECT c collate "C", count(c) FROM pagg_tab3 GROUP BY c collate "C" ORDER BY 1;
b | 5
(4 rows)
+-- Partitionwise join should not be allowed too when the collation used by the
+-- join keys doesn't match the partition key collation.
+SET enable_partitionwise_join TO false;
+EXPLAIN (COSTS OFF)
+SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
+ QUERY PLAN
+-------------------------------------------------------------
+ Sort
+ Sort Key: t1.c COLLATE "C"
+ -> HashAggregate
+ Group Key: t1.c
+ -> Hash Join
+ Hash Cond: (t1.c = t2.c)
+ -> Append
+ -> Seq Scan on pagg_tab3_p2 t1_1
+ -> Seq Scan on pagg_tab3_p1 t1_2
+ -> Hash
+ -> Append
+ -> Seq Scan on pagg_tab3_p2 t2_1
+ -> Seq Scan on pagg_tab3_p1 t2_2
+(13 rows)
+
+SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
+ c | count
+---+-------
+ A | 100
+ B | 100
+(2 rows)
+
+SET enable_partitionwise_join TO true;
+EXPLAIN (COSTS OFF)
+SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
+ QUERY PLAN
+-------------------------------------------------------------
+ Sort
+ Sort Key: t1.c COLLATE "C"
+ -> HashAggregate
+ Group Key: t1.c
+ -> Hash Join
+ Hash Cond: (t1.c = t2.c)
+ -> Append
+ -> Seq Scan on pagg_tab3_p2 t1_1
+ -> Seq Scan on pagg_tab3_p1 t1_2
+ -> Hash
+ -> Append
+ -> Seq Scan on pagg_tab3_p2 t2_1
+ -> Seq Scan on pagg_tab3_p1 t2_2
+(13 rows)
+
+SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
+ c | count
+---+-------
+ A | 100
+ B | 100
+(2 rows)
+
+-- OK when the join clause uses the same collation as the partition key.
+EXPLAIN (COSTS OFF)
+SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
+ QUERY PLAN
+------------------------------------------------------------------
+ Sort
+ Sort Key: ((t1.c)::text) COLLATE "C"
+ -> Append
+ -> HashAggregate
+ Group Key: (t1.c)::text
+ -> Hash Join
+ Hash Cond: ((t1.c)::text = (t2.c)::text)
+ -> Seq Scan on pagg_tab3_p2 t1
+ -> Hash
+ -> Seq Scan on pagg_tab3_p2 t2
+ -> HashAggregate
+ Group Key: (t1_1.c)::text
+ -> Hash Join
+ Hash Cond: ((t1_1.c)::text = (t2_1.c)::text)
+ -> Seq Scan on pagg_tab3_p1 t1_1
+ -> Hash
+ -> Seq Scan on pagg_tab3_p1 t2_1
+(17 rows)
+
+SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
+ c | count
+---+-------
+ A | 25
+ B | 25
+ a | 25
+ b | 25
+(4 rows)
+
+SET enable_partitionwise_join TO false;
+EXPLAIN (COSTS OFF)
+SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
+ QUERY PLAN
+-------------------------------------------------------------
+ Sort
+ Sort Key: ((t1.c)::text) COLLATE "C"
+ -> HashAggregate
+ Group Key: (t1.c)::text
+ -> Hash Join
+ Hash Cond: ((t1.c)::text = (t2.c)::text)
+ -> Append
+ -> Seq Scan on pagg_tab3_p2 t1_1
+ -> Seq Scan on pagg_tab3_p1 t1_2
+ -> Hash
+ -> Append
+ -> Seq Scan on pagg_tab3_p2 t2_1
+ -> Seq Scan on pagg_tab3_p1 t2_2
+(13 rows)
+
+SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
+ c | count
+---+-------
+ A | 25
+ B | 25
+ a | 25
+ b | 25
+(4 rows)
+
DROP TABLE pagg_tab3;
RESET enable_partitionwise_aggregate;
RESET max_parallel_workers_per_gather;
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index a7b3565f3c6..397b96bb7d0 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -781,6 +781,28 @@ EXPLAIN (COSTS OFF)
SELECT c collate "C", count(c) FROM pagg_tab3 GROUP BY c collate "C" ORDER BY 1;
SELECT c collate "C", count(c) FROM pagg_tab3 GROUP BY c collate "C" ORDER BY 1;
+-- Partitionwise join should not be allowed too when the collation used by the
+-- join keys doesn't match the partition key collation.
+SET enable_partitionwise_join TO false;
+EXPLAIN (COSTS OFF)
+SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
+SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
+
+SET enable_partitionwise_join TO true;
+EXPLAIN (COSTS OFF)
+SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
+SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
+
+-- OK when the join clause uses the same collation as the partition key.
+EXPLAIN (COSTS OFF)
+SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
+SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
+
+SET enable_partitionwise_join TO false;
+EXPLAIN (COSTS OFF)
+SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
+SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
+
DROP TABLE pagg_tab3;
RESET enable_partitionwise_aggregate;