diff options
Diffstat (limited to 'src/backend/optimizer/path/joinrels.c')
-rw-r--r-- | src/backend/optimizer/path/joinrels.c | 316 |
1 files changed, 314 insertions, 2 deletions
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 6ee23509c58..2b868c52de4 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -14,10 +14,17 @@ */ #include "postgres.h" +#include "miscadmin.h" +#include "catalog/partition.h" +#include "nodes/relation.h" +#include "optimizer/clauses.h" #include "optimizer/joininfo.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" +#include "optimizer/prep.h" +#include "optimizer/cost.h" #include "utils/memutils.h" +#include "utils/lsyscache.h" static void make_rels_by_clause_joins(PlannerInfo *root, @@ -29,12 +36,17 @@ static void make_rels_by_clauseless_joins(PlannerInfo *root, static bool has_join_restriction(PlannerInfo *root, RelOptInfo *rel); static bool has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel); static bool is_dummy_rel(RelOptInfo *rel); -static void mark_dummy_rel(RelOptInfo *rel); static bool restriction_is_constant_false(List *restrictlist, bool only_pushed_down); static void populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, RelOptInfo *joinrel, SpecialJoinInfo *sjinfo, List *restrictlist); +static void try_partition_wise_join(PlannerInfo *root, RelOptInfo *rel1, + RelOptInfo *rel2, RelOptInfo *joinrel, + SpecialJoinInfo *parent_sjinfo, + List *parent_restrictlist); +static int match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel, + bool strict_op); /* @@ -892,6 +904,9 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1, elog(ERROR, "unrecognized join type: %d", (int) sjinfo->jointype); break; } + + /* Apply partition-wise join technique, if possible. */ + try_partition_wise_join(root, rel1, rel2, joinrel, sjinfo, restrictlist); } @@ -1197,7 +1212,7 @@ is_dummy_rel(RelOptInfo *rel) * is that the best solution is to explicitly make the dummy path in the same * context the given RelOptInfo is in. */ -static void +void mark_dummy_rel(RelOptInfo *rel) { MemoryContext oldcontext; @@ -1268,3 +1283,300 @@ restriction_is_constant_false(List *restrictlist, bool only_pushed_down) } return false; } + +/* + * Assess whether join between given two partitioned relations can be broken + * down into joins between matching partitions; a technique called + * "partition-wise join" + * + * Partition-wise join is possible when a. Joining relations have same + * partitioning scheme b. There exists an equi-join between the partition keys + * of the two relations. + * + * Partition-wise join is planned as follows (details: optimizer/README.) + * + * 1. Create the RelOptInfos for joins between matching partitions i.e + * child-joins and add paths to them. + * + * 2. Construct Append or MergeAppend paths across the set of child joins. + * This second phase is implemented by generate_partition_wise_join_paths(). + * + * The RelOptInfo, SpecialJoinInfo and restrictlist for each child join are + * obtained by translating the respective parent join structures. + */ +static void +try_partition_wise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, + RelOptInfo *joinrel, SpecialJoinInfo *parent_sjinfo, + List *parent_restrictlist) +{ + int nparts; + int cnt_parts; + + /* Guard against stack overflow due to overly deep partition hierarchy. */ + check_stack_depth(); + + /* Nothing to do, if the join relation is not partitioned. */ + if (!IS_PARTITIONED_REL(joinrel)) + return; + + /* + * set_rel_pathlist() may not create paths in children of an empty + * partitioned table and so we can not add paths to child-joins. So, deem + * such a join as unpartitioned. When a partitioned relation is deemed + * empty because all its children are empty, dummy path will be set in + * each of the children. In such a case we could still consider the join + * as partitioned, but it might not help much. + */ + if (IS_DUMMY_REL(rel1) || IS_DUMMY_REL(rel2)) + return; + + /* + * Since this join relation is partitioned, all the base relations + * participating in this join must be partitioned and so are all the + * intermediate join relations. + */ + Assert(IS_PARTITIONED_REL(rel1) && IS_PARTITIONED_REL(rel2)); + Assert(REL_HAS_ALL_PART_PROPS(rel1) && REL_HAS_ALL_PART_PROPS(rel2)); + + /* + * The partition scheme of the join relation should match that of the + * joining relations. + */ + Assert(joinrel->part_scheme == rel1->part_scheme && + joinrel->part_scheme == rel2->part_scheme); + + /* + * Since we allow partition-wise join only when the partition bounds of + * the joining relations exactly match, the partition bounds of the join + * should match those of the joining relations. + */ + Assert(partition_bounds_equal(joinrel->part_scheme->partnatts, + joinrel->part_scheme->parttyplen, + joinrel->part_scheme->parttypbyval, + joinrel->boundinfo, rel1->boundinfo)); + Assert(partition_bounds_equal(joinrel->part_scheme->partnatts, + joinrel->part_scheme->parttyplen, + joinrel->part_scheme->parttypbyval, + joinrel->boundinfo, rel2->boundinfo)); + + nparts = joinrel->nparts; + + /* Allocate space to hold child-joins RelOptInfos, if not already done. */ + if (!joinrel->part_rels) + joinrel->part_rels = + (RelOptInfo **) palloc0(sizeof(RelOptInfo *) * nparts); + + /* + * Create child-join relations for this partitioned join, if those don't + * exist. Add paths to child-joins for a pair of child relations + * corresponding to the given pair of parent relations. + */ + for (cnt_parts = 0; cnt_parts < nparts; cnt_parts++) + { + RelOptInfo *child_rel1 = rel1->part_rels[cnt_parts]; + RelOptInfo *child_rel2 = rel2->part_rels[cnt_parts]; + SpecialJoinInfo *child_sjinfo; + List *child_restrictlist; + RelOptInfo *child_joinrel; + Relids child_joinrelids; + AppendRelInfo **appinfos; + int nappinfos; + + /* We should never try to join two overlapping sets of rels. */ + Assert(!bms_overlap(child_rel1->relids, child_rel2->relids)); + child_joinrelids = bms_union(child_rel1->relids, child_rel2->relids); + appinfos = find_appinfos_by_relids(root, child_joinrelids, &nappinfos); + + /* + * Construct SpecialJoinInfo from parent join relations's + * SpecialJoinInfo. + */ + child_sjinfo = build_child_join_sjinfo(root, parent_sjinfo, + child_rel1->relids, + child_rel2->relids); + + /* + * Construct restrictions applicable to the child join from those + * applicable to the parent join. + */ + child_restrictlist = + (List *) adjust_appendrel_attrs(root, + (Node *) parent_restrictlist, + nappinfos, appinfos); + pfree(appinfos); + + child_joinrel = joinrel->part_rels[cnt_parts]; + if (!child_joinrel) + { + child_joinrel = build_child_join_rel(root, child_rel1, child_rel2, + joinrel, child_restrictlist, + child_sjinfo, + child_sjinfo->jointype); + joinrel->part_rels[cnt_parts] = child_joinrel; + } + + Assert(bms_equal(child_joinrel->relids, child_joinrelids)); + + populate_joinrel_with_paths(root, child_rel1, child_rel2, + child_joinrel, child_sjinfo, + child_restrictlist); + } +} + +/* + * Returns true if there exists an equi-join condition for each pair of + * partition keys from given relations being joined. + */ +bool +have_partkey_equi_join(RelOptInfo *rel1, RelOptInfo *rel2, JoinType jointype, + List *restrictlist) +{ + PartitionScheme part_scheme = rel1->part_scheme; + ListCell *lc; + int cnt_pks; + bool pk_has_clause[PARTITION_MAX_KEYS]; + bool strict_op; + + /* + * This function should be called when the joining relations have same + * partitioning scheme. + */ + Assert(rel1->part_scheme == rel2->part_scheme); + Assert(part_scheme); + + memset(pk_has_clause, 0, sizeof(pk_has_clause)); + foreach(lc, restrictlist) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); + OpExpr *opexpr; + Expr *expr1; + Expr *expr2; + int ipk1; + int ipk2; + + /* If processing an outer join, only use its own join clauses. */ + if (IS_OUTER_JOIN(jointype) && rinfo->is_pushed_down) + continue; + + /* Skip clauses which can not be used for a join. */ + if (!rinfo->can_join) + continue; + + /* Skip clauses which are not equality conditions. */ + if (!rinfo->mergeopfamilies) + continue; + + opexpr = (OpExpr *) rinfo->clause; + Assert(is_opclause(opexpr)); + + /* + * The equi-join between partition keys is strict if equi-join between + * at least one partition key is using a strict operator. See + * explanation about outer join reordering identity 3 in + * optimizer/README + */ + strict_op = op_strict(opexpr->opno); + + /* Match the operands to the relation. */ + if (bms_is_subset(rinfo->left_relids, rel1->relids) && + bms_is_subset(rinfo->right_relids, rel2->relids)) + { + expr1 = linitial(opexpr->args); + expr2 = lsecond(opexpr->args); + } + else if (bms_is_subset(rinfo->left_relids, rel2->relids) && + bms_is_subset(rinfo->right_relids, rel1->relids)) + { + expr1 = lsecond(opexpr->args); + expr2 = linitial(opexpr->args); + } + else + continue; + + /* + * Only clauses referencing the partition keys are useful for + * partition-wise join. + */ + ipk1 = match_expr_to_partition_keys(expr1, rel1, strict_op); + if (ipk1 < 0) + continue; + ipk2 = match_expr_to_partition_keys(expr2, rel2, strict_op); + if (ipk2 < 0) + continue; + + /* + * If the clause refers to keys at different ordinal positions, it can + * not be used for partition-wise join. + */ + if (ipk1 != ipk2) + continue; + + /* + * The clause allows partition-wise join if only it uses the same + * operator family as that specified by the partition key. + */ + if (!list_member_oid(rinfo->mergeopfamilies, + part_scheme->partopfamily[ipk1])) + continue; + + /* Mark the partition key as having an equi-join clause. */ + pk_has_clause[ipk1] = true; + } + + /* Check whether every partition key has an equi-join condition. */ + for (cnt_pks = 0; cnt_pks < part_scheme->partnatts; cnt_pks++) + { + if (!pk_has_clause[cnt_pks]) + return false; + } + + return true; +} + +/* + * Find the partition key from the given relation matching the given + * expression. If found, return the index of the partition key, else return -1. + */ +static int +match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel, bool strict_op) +{ + int cnt; + + /* This function should be called only for partitioned relations. */ + Assert(rel->part_scheme); + + /* Remove any relabel decorations. */ + while (IsA(expr, RelabelType)) + expr = (Expr *) (castNode(RelabelType, expr))->arg; + + for (cnt = 0; cnt < rel->part_scheme->partnatts; cnt++) + { + ListCell *lc; + + Assert(rel->partexprs); + foreach(lc, rel->partexprs[cnt]) + { + if (equal(lfirst(lc), expr)) + return cnt; + } + + if (!strict_op) + continue; + + /* + * If it's a strict equi-join a NULL partition key on one side will + * not join a NULL partition key on the other side. So, rows with NULL + * partition key from a partition on one side can not join with those + * from a non-matching partition on the other side. So, search the + * nullable partition keys as well. + */ + Assert(rel->nullable_partexprs); + foreach(lc, rel->nullable_partexprs[cnt]) + { + if (equal(lfirst(lc), expr)) + return cnt; + } + } + + return -1; +} |