diff options
author | Robert Haas <rhaas@postgresql.org> | 2017-10-06 11:11:10 -0400 |
---|---|---|
committer | Robert Haas <rhaas@postgresql.org> | 2017-10-06 11:11:10 -0400 |
commit | f49842d1ee31b976c681322f76025d7732e860f3 (patch) | |
tree | bc86f11819247980137e89404162ddc88200ac1c /src/backend/optimizer/prep/prepunion.c | |
parent | fe9ba28ee852bb968bc8948d172c6bc0c70c50df (diff) | |
download | postgresql-f49842d1ee31b976c681322f76025d7732e860f3.tar.gz postgresql-f49842d1ee31b976c681322f76025d7732e860f3.zip |
Basic partition-wise join functionality.
Instead of joining two partitioned tables in their entirety we can, if
it is an equi-join on the partition keys, join the matching partitions
individually. This involves teaching the planner about "other join"
rels, which are related to regular join rels in the same way that
other member rels are related to baserels. This can use significantly
more CPU time and memory than regular join planning, because there may
now be a set of "other" rels not only for every base relation but also
for every join relation. In most practical cases, this probably
shouldn't be a problem, because (1) it's probably unusual to join many
tables each with many partitions using the partition keys for all
joins and (2) if you do that scenario then you probably have a big
enough machine to handle the increased memory cost of planning and (3)
the resulting plan is highly likely to be better, so what you spend in
planning you'll make up on the execution side. All the same, for now,
turn this feature off by default.
Currently, we can only perform joins between two tables whose
partitioning schemes are absolutely identical. It would be nice to
cope with other scenarios, such as extra partitions on one side or the
other with no match on the other side, but that will have to wait for
a future patch.
Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit
Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit
Khandekar, and by me. A few final adjustments by me.
Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com
Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
Diffstat (limited to 'src/backend/optimizer/prep/prepunion.c')
-rw-r--r-- | src/backend/optimizer/prep/prepunion.c | 95 |
1 files changed, 95 insertions, 0 deletions
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 3e0c3de86d5..1c84a2cb280 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -2270,6 +2270,59 @@ adjust_child_relids(Relids relids, int nappinfos, AppendRelInfo **appinfos) } /* + * Replace any relid present in top_parent_relids with its child in + * child_relids. Members of child_relids can be multiple levels below top + * parent in the partition hierarchy. + */ +Relids +adjust_child_relids_multilevel(PlannerInfo *root, Relids relids, + Relids child_relids, Relids top_parent_relids) +{ + AppendRelInfo **appinfos; + int nappinfos; + Relids parent_relids = NULL; + Relids result; + Relids tmp_result = NULL; + int cnt; + + /* + * If the given relids set doesn't contain any of the top parent relids, + * it will remain unchanged. + */ + if (!bms_overlap(relids, top_parent_relids)) + return relids; + + appinfos = find_appinfos_by_relids(root, child_relids, &nappinfos); + + /* Construct relids set for the immediate parent of the given child. */ + for (cnt = 0; cnt < nappinfos; cnt++) + { + AppendRelInfo *appinfo = appinfos[cnt]; + + parent_relids = bms_add_member(parent_relids, appinfo->parent_relid); + } + + /* Recurse if immediate parent is not the top parent. */ + if (!bms_equal(parent_relids, top_parent_relids)) + { + tmp_result = adjust_child_relids_multilevel(root, relids, + parent_relids, + top_parent_relids); + relids = tmp_result; + } + + result = adjust_child_relids(relids, nappinfos, appinfos); + + /* Free memory consumed by any intermediate result. */ + if (tmp_result) + bms_free(tmp_result); + bms_free(parent_relids); + pfree(appinfos); + + return result; +} + +/* * Adjust the targetlist entries of an inherited UPDATE operation * * The expressions have already been fixed, but we have to make sure that @@ -2409,6 +2462,48 @@ adjust_appendrel_attrs_multilevel(PlannerInfo *root, Node *node, } /* + * Construct the SpecialJoinInfo for a child-join by translating + * SpecialJoinInfo for the join between parents. left_relids and right_relids + * are the relids of left and right side of the join respectively. + */ +SpecialJoinInfo * +build_child_join_sjinfo(PlannerInfo *root, SpecialJoinInfo *parent_sjinfo, + Relids left_relids, Relids right_relids) +{ + SpecialJoinInfo *sjinfo = makeNode(SpecialJoinInfo); + AppendRelInfo **left_appinfos; + int left_nappinfos; + AppendRelInfo **right_appinfos; + int right_nappinfos; + + memcpy(sjinfo, parent_sjinfo, sizeof(SpecialJoinInfo)); + left_appinfos = find_appinfos_by_relids(root, left_relids, + &left_nappinfos); + right_appinfos = find_appinfos_by_relids(root, right_relids, + &right_nappinfos); + + sjinfo->min_lefthand = adjust_child_relids(sjinfo->min_lefthand, + left_nappinfos, left_appinfos); + sjinfo->min_righthand = adjust_child_relids(sjinfo->min_righthand, + right_nappinfos, + right_appinfos); + sjinfo->syn_lefthand = adjust_child_relids(sjinfo->syn_lefthand, + left_nappinfos, left_appinfos); + sjinfo->syn_righthand = adjust_child_relids(sjinfo->syn_righthand, + right_nappinfos, + right_appinfos); + sjinfo->semi_rhs_exprs = (List *) adjust_appendrel_attrs(root, + (Node *) sjinfo->semi_rhs_exprs, + right_nappinfos, + right_appinfos); + + pfree(left_appinfos); + pfree(right_appinfos); + + return sjinfo; +} + +/* * find_appinfos_by_relids * Find AppendRelInfo structures for all relations specified by relids. * |