aboutsummaryrefslogtreecommitdiff
path: root/src/backend/optimizer/util/relnode.c
diff options
context:
space:
mode:
authorRichard Guo <rguo@postgresql.org>2024-07-30 15:51:54 +0900
committerRichard Guo <rguo@postgresql.org>2024-07-30 15:51:54 +0900
commit9b282a9359a12831c087eba7f0f5f0b1dba7b7eb (patch)
treec0abcb0fb859f9da8f94f4b9dc86c2a26c681c94 /src/backend/optimizer/util/relnode.c
parent2309eff62b463fb3f19e6dd229243902b3b44501 (diff)
downloadpostgresql-9b282a9359a12831c087eba7f0f5f0b1dba7b7eb.tar.gz
postgresql-9b282a9359a12831c087eba7f0f5f0b1dba7b7eb.zip
Fix partitionwise join with partially-redundant join clauses
To determine if the two relations being joined can use partitionwise join, we need to verify the existence of equi-join conditions involving pairs of matching partition keys for all partition keys. Currently we do that by looking through the join's restriction clauses. However, it has been discovered that this approach is insufficient, because there might be partition keys known equal by a specific EC, but they do not form a join clause because it happens that other members of the EC than the partition keys are constrained to become a join clause. To address this issue, in addition to examining the join's restriction clauses, we also check if any partition keys are known equal by ECs, by leveraging function exprs_known_equal(). To accomplish this, we enhance exprs_known_equal() to check equality per the semantics of the opfamily, if provided. It could be argued that exprs_known_equal() could be called O(N^2) times, where N is the number of partition key expressions, resulting in noticeable performance costs if there are a lot of partition key expressions. But I think this is not a problem. The number of a joinrel's partition key expressions would only be equal to the join degree, since each base relation within the join contributes only one partition key expression. That is to say, it does not scale with the number of partitions. A benchmark with a query involving 5-way joins of partitioned tables, each with 3 partition keys and 1000 partitions, shows that the planning time is not significantly affected by this patch (within the margin of error), particularly when compared to the impact caused by partitionwise join. Thanks to Tom Lane for the idea of leveraging exprs_known_equal() to check if partition keys are known equal by ECs. Author: Richard Guo, Tom Lane Reviewed-by: Tom Lane, Ashutosh Bapat, Robert Haas Discussion: https://postgr.es/m/CAN_9JTzo_2F5dKLqXVtDX5V6dwqB0Xk+ihstpKEt3a1LT6X78A@mail.gmail.com
Diffstat (limited to 'src/backend/optimizer/util/relnode.c')
-rw-r--r--src/backend/optimizer/util/relnode.c103
1 files changed, 92 insertions, 11 deletions
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index 971d1c7aae5..d7266e4cdba 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -2080,10 +2080,9 @@ have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel,
JoinType jointype, List *restrictlist)
{
PartitionScheme part_scheme = rel1->part_scheme;
+ bool pk_known_equal[PARTITION_MAX_KEYS];
+ int num_equal_pks;
ListCell *lc;
- int cnt_pks;
- bool pk_has_clause[PARTITION_MAX_KEYS];
- bool strict_op;
/*
* This function must only be called when the joined relations have same
@@ -2092,13 +2091,19 @@ have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel,
Assert(rel1->part_scheme == rel2->part_scheme);
Assert(part_scheme);
- memset(pk_has_clause, 0, sizeof(pk_has_clause));
+ /* We use a bool array to track which partkey columns are known equal */
+ memset(pk_known_equal, 0, sizeof(pk_known_equal));
+ /* ... as well as a count of how many are known equal */
+ num_equal_pks = 0;
+
+ /* First, look through the join's restriction clauses */
foreach(lc, restrictlist)
{
RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
OpExpr *opexpr;
Expr *expr1;
Expr *expr2;
+ bool strict_op;
int ipk1;
int ipk2;
@@ -2176,11 +2181,15 @@ have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel,
if (ipk1 != ipk2)
continue;
+ /* Ignore clause if we already proved these keys equal. */
+ if (pk_known_equal[ipk1])
+ continue;
+
/*
* The clause allows partitionwise join only if it uses the same
* operator family as that specified by the partition key.
*/
- if (rel1->part_scheme->strategy == PARTITION_STRATEGY_HASH)
+ if (part_scheme->strategy == PARTITION_STRATEGY_HASH)
{
if (!OidIsValid(rinfo->hashjoinoperator) ||
!op_in_opfamily(rinfo->hashjoinoperator,
@@ -2192,17 +2201,89 @@ have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel,
continue;
/* Mark the partition key as having an equi-join clause. */
- pk_has_clause[ipk1] = true;
+ pk_known_equal[ipk1] = true;
+
+ /* We can stop examining clauses once we prove all keys equal. */
+ if (++num_equal_pks == part_scheme->partnatts)
+ return true;
}
- /* Check whether every partition key has an equi-join condition. */
- for (cnt_pks = 0; cnt_pks < part_scheme->partnatts; cnt_pks++)
+ /*
+ * Also check to see if any keys are known equal by equivclass.c. In most
+ * cases there would have been a join restriction clause generated from
+ * any EC that had such knowledge, but there might be no such clause, or
+ * it might happen to constrain other members of the ECs than the ones we
+ * are looking for.
+ */
+ for (int ipk = 0; ipk < part_scheme->partnatts; ipk++)
{
- if (!pk_has_clause[cnt_pks])
- return false;
+ Oid btree_opfamily;
+
+ /* Ignore if we already proved these keys equal. */
+ if (pk_known_equal[ipk])
+ continue;
+
+ /*
+ * We need a btree opfamily to ask equivclass.c about. If the
+ * partopfamily is a hash opfamily, look up its equality operator, and
+ * select some btree opfamily that that operator is part of. (Any
+ * such opfamily should be good enough, since equivclass.c will track
+ * multiple opfamilies as appropriate.)
+ */
+ if (part_scheme->strategy == PARTITION_STRATEGY_HASH)
+ {
+ Oid eq_op;
+ List *eq_opfamilies;
+
+ eq_op = get_opfamily_member(part_scheme->partopfamily[ipk],
+ part_scheme->partopcintype[ipk],
+ part_scheme->partopcintype[ipk],
+ HTEqualStrategyNumber);
+ if (!OidIsValid(eq_op))
+ break; /* we're not going to succeed */
+ eq_opfamilies = get_mergejoin_opfamilies(eq_op);
+ if (eq_opfamilies == NIL)
+ break; /* we're not going to succeed */
+ btree_opfamily = linitial_oid(eq_opfamilies);
+ }
+ else
+ btree_opfamily = part_scheme->partopfamily[ipk];
+
+ /*
+ * We consider only non-nullable partition keys here; nullable ones
+ * would not be treated as part of the same equivalence classes as
+ * non-nullable ones.
+ */
+ foreach(lc, rel1->partexprs[ipk])
+ {
+ Node *expr1 = (Node *) lfirst(lc);
+ ListCell *lc2;
+
+ foreach(lc2, rel2->partexprs[ipk])
+ {
+ Node *expr2 = (Node *) lfirst(lc2);
+
+ if (exprs_known_equal(root, expr1, expr2, btree_opfamily))
+ {
+ pk_known_equal[ipk] = true;
+ break;
+ }
+ }
+ if (pk_known_equal[ipk])
+ break;
+ }
+
+ if (pk_known_equal[ipk])
+ {
+ /* We can stop examining keys once we prove all keys equal. */
+ if (++num_equal_pks == part_scheme->partnatts)
+ return true;
+ }
+ else
+ break; /* no chance to succeed, give up */
}
- return true;
+ return false;
}
/*