aboutsummaryrefslogtreecommitdiff
path: root/src/backend/optimizer/prep/prepunion.c
diff options
context:
space:
mode:
authorRobert Haas <rhaas@postgresql.org>2017-10-06 11:11:10 -0400
committerRobert Haas <rhaas@postgresql.org>2017-10-06 11:11:10 -0400
commitf49842d1ee31b976c681322f76025d7732e860f3 (patch)
treebc86f11819247980137e89404162ddc88200ac1c /src/backend/optimizer/prep/prepunion.c
parentfe9ba28ee852bb968bc8948d172c6bc0c70c50df (diff)
downloadpostgresql-f49842d1ee31b976c681322f76025d7732e860f3.tar.gz
postgresql-f49842d1ee31b976c681322f76025d7732e860f3.zip
Basic partition-wise join functionality.
Instead of joining two partitioned tables in their entirety we can, if it is an equi-join on the partition keys, join the matching partitions individually. This involves teaching the planner about "other join" rels, which are related to regular join rels in the same way that other member rels are related to baserels. This can use significantly more CPU time and memory than regular join planning, because there may now be a set of "other" rels not only for every base relation but also for every join relation. In most practical cases, this probably shouldn't be a problem, because (1) it's probably unusual to join many tables each with many partitions using the partition keys for all joins and (2) if you do that scenario then you probably have a big enough machine to handle the increased memory cost of planning and (3) the resulting plan is highly likely to be better, so what you spend in planning you'll make up on the execution side. All the same, for now, turn this feature off by default. Currently, we can only perform joins between two tables whose partitioning schemes are absolutely identical. It would be nice to cope with other scenarios, such as extra partitions on one side or the other with no match on the other side, but that will have to wait for a future patch. Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit Khandekar, and by me. A few final adjustments by me. Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
Diffstat (limited to 'src/backend/optimizer/prep/prepunion.c')
-rw-r--r--src/backend/optimizer/prep/prepunion.c95
1 files changed, 95 insertions, 0 deletions
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 3e0c3de86d5..1c84a2cb280 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -2270,6 +2270,59 @@ adjust_child_relids(Relids relids, int nappinfos, AppendRelInfo **appinfos)
}
/*
+ * Replace any relid present in top_parent_relids with its child in
+ * child_relids. Members of child_relids can be multiple levels below top
+ * parent in the partition hierarchy.
+ */
+Relids
+adjust_child_relids_multilevel(PlannerInfo *root, Relids relids,
+ Relids child_relids, Relids top_parent_relids)
+{
+ AppendRelInfo **appinfos;
+ int nappinfos;
+ Relids parent_relids = NULL;
+ Relids result;
+ Relids tmp_result = NULL;
+ int cnt;
+
+ /*
+ * If the given relids set doesn't contain any of the top parent relids,
+ * it will remain unchanged.
+ */
+ if (!bms_overlap(relids, top_parent_relids))
+ return relids;
+
+ appinfos = find_appinfos_by_relids(root, child_relids, &nappinfos);
+
+ /* Construct relids set for the immediate parent of the given child. */
+ for (cnt = 0; cnt < nappinfos; cnt++)
+ {
+ AppendRelInfo *appinfo = appinfos[cnt];
+
+ parent_relids = bms_add_member(parent_relids, appinfo->parent_relid);
+ }
+
+ /* Recurse if immediate parent is not the top parent. */
+ if (!bms_equal(parent_relids, top_parent_relids))
+ {
+ tmp_result = adjust_child_relids_multilevel(root, relids,
+ parent_relids,
+ top_parent_relids);
+ relids = tmp_result;
+ }
+
+ result = adjust_child_relids(relids, nappinfos, appinfos);
+
+ /* Free memory consumed by any intermediate result. */
+ if (tmp_result)
+ bms_free(tmp_result);
+ bms_free(parent_relids);
+ pfree(appinfos);
+
+ return result;
+}
+
+/*
* Adjust the targetlist entries of an inherited UPDATE operation
*
* The expressions have already been fixed, but we have to make sure that
@@ -2409,6 +2462,48 @@ adjust_appendrel_attrs_multilevel(PlannerInfo *root, Node *node,
}
/*
+ * Construct the SpecialJoinInfo for a child-join by translating
+ * SpecialJoinInfo for the join between parents. left_relids and right_relids
+ * are the relids of left and right side of the join respectively.
+ */
+SpecialJoinInfo *
+build_child_join_sjinfo(PlannerInfo *root, SpecialJoinInfo *parent_sjinfo,
+ Relids left_relids, Relids right_relids)
+{
+ SpecialJoinInfo *sjinfo = makeNode(SpecialJoinInfo);
+ AppendRelInfo **left_appinfos;
+ int left_nappinfos;
+ AppendRelInfo **right_appinfos;
+ int right_nappinfos;
+
+ memcpy(sjinfo, parent_sjinfo, sizeof(SpecialJoinInfo));
+ left_appinfos = find_appinfos_by_relids(root, left_relids,
+ &left_nappinfos);
+ right_appinfos = find_appinfos_by_relids(root, right_relids,
+ &right_nappinfos);
+
+ sjinfo->min_lefthand = adjust_child_relids(sjinfo->min_lefthand,
+ left_nappinfos, left_appinfos);
+ sjinfo->min_righthand = adjust_child_relids(sjinfo->min_righthand,
+ right_nappinfos,
+ right_appinfos);
+ sjinfo->syn_lefthand = adjust_child_relids(sjinfo->syn_lefthand,
+ left_nappinfos, left_appinfos);
+ sjinfo->syn_righthand = adjust_child_relids(sjinfo->syn_righthand,
+ right_nappinfos,
+ right_appinfos);
+ sjinfo->semi_rhs_exprs = (List *) adjust_appendrel_attrs(root,
+ (Node *) sjinfo->semi_rhs_exprs,
+ right_nappinfos,
+ right_appinfos);
+
+ pfree(left_appinfos);
+ pfree(right_appinfos);
+
+ return sjinfo;
+}
+
+/*
* find_appinfos_by_relids
* Find AppendRelInfo structures for all relations specified by relids.
*