aboutsummaryrefslogtreecommitdiff
path: root/src/backend/executor/nodeModifyTable.c
diff options
context:
space:
mode:
authorAlvaro Herrera <alvherre@alvh.no-ip.org>2018-11-16 14:54:15 -0300
committerAlvaro Herrera <alvherre@alvh.no-ip.org>2018-11-16 15:01:05 -0300
commit3f2393edefa5ef2b6970a5a2fa2c7e9c55cc10cf (patch)
treee80c043f882bfdbd7ded4575d0041c14c924153b /src/backend/executor/nodeModifyTable.c
parenta387a3dff9001225ad571ff2755d139f5bd193b3 (diff)
downloadpostgresql-3f2393edefa5ef2b6970a5a2fa2c7e9c55cc10cf.tar.gz
postgresql-3f2393edefa5ef2b6970a5a2fa2c7e9c55cc10cf.zip
Redesign initialization of partition routing structures
This speeds up write operations (INSERT, UPDATE, DELETE, COPY, as well as the future MERGE) on partitioned tables. This changes the setup for tuple routing so that it does far less work during the initial setup and pushes more work out to when partitions receive tuples. PartitionDispatchData structs for sub-partitioned tables are only created when a tuple gets routed through it. The possibly large arrays in the PartitionTupleRouting struct have largely been removed. The partitions[] array remains but now never contains any NULL gaps. Previously the NULLs had to be skipped during ExecCleanupTupleRouting(), which could add a large overhead to the cleanup when the number of partitions was large. The partitions[] array is allocated small to start with and only enlarged when we route tuples to enough partitions that it runs out of space. This allows us to keep simple single-row partition INSERTs running quickly. Redesign The arrays in PartitionTupleRouting which stored the tuple translation maps have now been removed. These have been moved out into a PartitionRoutingInfo struct which is an additional field in ResultRelInfo. The find_all_inheritors() call still remains by far the slowest part of ExecSetupPartitionTupleRouting(). This commit just removes the other slow parts. In passing also rename the tuple translation maps from being ParentToChild and ChildToParent to being RootToPartition and PartitionToRoot. The old names mislead you into thinking that a partition of some sub-partitioned table would translate to the rowtype of the sub-partitioned table rather than the root partitioned table. Authors: David Rowley and Amit Langote, heavily revised by Álvaro Herrera Testing help from Jesper Pedersen and Kato Sho. Discussion: https://postgr.es/m/CAKJS1f_1RJyFquuCKRFHTdcXqoPX-PYqAd7nz=GVBwvGh4a6xA@mail.gmail.com
Diffstat (limited to 'src/backend/executor/nodeModifyTable.c')
-rw-r--r--src/backend/executor/nodeModifyTable.c165
1 files changed, 36 insertions, 129 deletions
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index bb344a7070a..65d46c8ea8b 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -68,7 +68,6 @@ static TupleTableSlot *ExecPrepareTupleRouting(ModifyTableState *mtstate,
ResultRelInfo *targetRelInfo,
TupleTableSlot *slot);
static ResultRelInfo *getTargetResultRelInfo(ModifyTableState *node);
-static void ExecSetupChildParentMapForTcs(ModifyTableState *mtstate);
static void ExecSetupChildParentMapForSubplan(ModifyTableState *mtstate);
static TupleConversionMap *tupconv_map_for_subplan(ModifyTableState *node,
int whichplan);
@@ -1157,7 +1156,8 @@ lreplace:;
tupconv_map = tupconv_map_for_subplan(mtstate, map_index);
if (tupconv_map != NULL)
slot = execute_attr_map_slot(tupconv_map->attrMap,
- slot, proute->root_tuple_slot);
+ slot,
+ mtstate->mt_root_tuple_slot);
/*
* Prepare for tuple routing, making it look like we're inserting
@@ -1653,7 +1653,7 @@ ExecSetupTransitionCaptureState(ModifyTableState *mtstate, EState *estate)
if (mtstate->mt_transition_capture != NULL ||
mtstate->mt_oc_transition_capture != NULL)
{
- ExecSetupChildParentMapForTcs(mtstate);
+ ExecSetupChildParentMapForSubplan(mtstate);
/*
* Install the conversion map for the first plan for UPDATE and DELETE
@@ -1686,52 +1686,21 @@ ExecPrepareTupleRouting(ModifyTableState *mtstate,
TupleTableSlot *slot)
{
ModifyTable *node;
- int partidx;
ResultRelInfo *partrel;
+ PartitionRoutingInfo *partrouteinfo;
HeapTuple tuple;
TupleConversionMap *map;
/*
- * Determine the target partition. If ExecFindPartition does not find a
- * partition after all, it doesn't return here; otherwise, the returned
- * value is to be used as an index into the arrays for the ResultRelInfo
- * and TupleConversionMap for the partition.
- */
- partidx = ExecFindPartition(targetRelInfo,
- proute->partition_dispatch_info,
- slot,
- estate);
- Assert(partidx >= 0 && partidx < proute->num_partitions);
-
- /*
- * Get the ResultRelInfo corresponding to the selected partition; if not
- * yet there, initialize it.
+ * Lookup the target partition's ResultRelInfo. If ExecFindPartition does
+ * not find a valid partition for the tuple in 'slot' then an error is
+ * raised. An error may also be raised if the found partition is not a
+ * valid target for INSERTs. This is required since a partitioned table
+ * UPDATE to another partition becomes a DELETE+INSERT.
*/
- partrel = proute->partitions[partidx];
- if (partrel == NULL)
- partrel = ExecInitPartitionInfo(mtstate, targetRelInfo,
- proute, estate,
- partidx);
-
- /*
- * Check whether the partition is routable if we didn't yet
- *
- * Note: an UPDATE of a partition key invokes an INSERT that moves the
- * tuple to a new partition. This check would be applied to a subplan
- * partition of such an UPDATE that is chosen as the partition to route
- * the tuple to. The reason we do this check here rather than in
- * ExecSetupPartitionTupleRouting is to avoid aborting such an UPDATE
- * unnecessarily due to non-routable subplan partitions that may not be
- * chosen for update tuple movement after all.
- */
- if (!partrel->ri_PartitionReadyForRouting)
- {
- /* Verify the partition is a valid target for INSERT. */
- CheckValidResultRel(partrel, CMD_INSERT);
-
- /* Set up information needed for routing tuples to the partition. */
- ExecInitRoutingInfo(mtstate, estate, proute, partrel, partidx);
- }
+ partrel = ExecFindPartition(mtstate, targetRelInfo, proute, slot, estate);
+ partrouteinfo = partrel->ri_PartitionInfo;
+ Assert(partrouteinfo != NULL);
/*
* Make it look like we are inserting into the partition.
@@ -1743,7 +1712,7 @@ ExecPrepareTupleRouting(ModifyTableState *mtstate,
/*
* If we're capturing transition tuples, we might need to convert from the
- * partition rowtype to parent rowtype.
+ * partition rowtype to root partitioned table's rowtype.
*/
if (mtstate->mt_transition_capture != NULL)
{
@@ -1756,7 +1725,7 @@ ExecPrepareTupleRouting(ModifyTableState *mtstate,
*/
mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL;
mtstate->mt_transition_capture->tcs_map =
- TupConvMapForLeaf(proute, targetRelInfo, partidx);
+ partrouteinfo->pi_PartitionToRootMap;
}
else
{
@@ -1771,20 +1740,17 @@ ExecPrepareTupleRouting(ModifyTableState *mtstate,
if (mtstate->mt_oc_transition_capture != NULL)
{
mtstate->mt_oc_transition_capture->tcs_map =
- TupConvMapForLeaf(proute, targetRelInfo, partidx);
+ partrouteinfo->pi_PartitionToRootMap;
}
/*
* Convert the tuple, if necessary.
*/
- map = proute->parent_child_tupconv_maps[partidx];
+ map = partrouteinfo->pi_RootToPartitionMap;
if (map != NULL)
{
- TupleTableSlot *new_slot;
+ TupleTableSlot *new_slot = partrouteinfo->pi_PartitionTupleSlot;
- Assert(proute->partition_tuple_slots != NULL &&
- proute->partition_tuple_slots[partidx] != NULL);
- new_slot = proute->partition_tuple_slots[partidx];
slot = execute_attr_map_slot(map->attrMap, slot, new_slot);
}
@@ -1823,17 +1789,6 @@ ExecSetupChildParentMapForSubplan(ModifyTableState *mtstate)
int i;
/*
- * First check if there is already a per-subplan array allocated. Even if
- * there is already a per-leaf map array, we won't require a per-subplan
- * one, since we will use the subplan offset array to convert the subplan
- * index to per-leaf index.
- */
- if (mtstate->mt_per_subplan_tupconv_maps ||
- (mtstate->mt_partition_tuple_routing &&
- mtstate->mt_partition_tuple_routing->child_parent_tupconv_maps))
- return;
-
- /*
* Build array of conversion maps from each child's TupleDesc to the one
* used in the target relation. The map pointers may be NULL when no
* conversion is necessary, which is hopefully a common case.
@@ -1855,78 +1810,17 @@ ExecSetupChildParentMapForSubplan(ModifyTableState *mtstate)
}
/*
- * Initialize the child-to-root tuple conversion map array required for
- * capturing transition tuples.
- *
- * The map array can be indexed either by subplan index or by leaf-partition
- * index. For transition tables, we need a subplan-indexed access to the map,
- * and where tuple-routing is present, we also require a leaf-indexed access.
- */
-static void
-ExecSetupChildParentMapForTcs(ModifyTableState *mtstate)
-{
- PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing;
-
- /*
- * If partition tuple routing is set up, we will require partition-indexed
- * access. In that case, create the map array indexed by partition; we
- * will still be able to access the maps using a subplan index by
- * converting the subplan index to a partition index using
- * subplan_partition_offsets. If tuple routing is not set up, it means we
- * don't require partition-indexed access. In that case, create just a
- * subplan-indexed map.
- */
- if (proute)
- {
- /*
- * If a partition-indexed map array is to be created, the subplan map
- * array has to be NULL. If the subplan map array is already created,
- * we won't be able to access the map using a partition index.
- */
- Assert(mtstate->mt_per_subplan_tupconv_maps == NULL);
-
- ExecSetupChildParentMapForLeaf(proute);
- }
- else
- ExecSetupChildParentMapForSubplan(mtstate);
-}
-
-/*
* For a given subplan index, get the tuple conversion map.
*/
static TupleConversionMap *
tupconv_map_for_subplan(ModifyTableState *mtstate, int whichplan)
{
- /*
- * If a partition-index tuple conversion map array is allocated, we need
- * to first get the index into the partition array. Exactly *one* of the
- * two arrays is allocated. This is because if there is a partition array
- * required, we don't require subplan-indexed array since we can translate
- * subplan index into partition index. And, we create a subplan-indexed
- * array *only* if partition-indexed array is not required.
- */
+ /* If nobody else set the per-subplan array of maps, do so ourselves. */
if (mtstate->mt_per_subplan_tupconv_maps == NULL)
- {
- int leaf_index;
- PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing;
-
- /*
- * If subplan-indexed array is NULL, things should have been arranged
- * to convert the subplan index to partition index.
- */
- Assert(proute && proute->subplan_partition_offsets != NULL &&
- whichplan < proute->num_subplan_partition_offsets);
-
- leaf_index = proute->subplan_partition_offsets[whichplan];
+ ExecSetupChildParentMapForSubplan(mtstate);
- return TupConvMapForLeaf(proute, getTargetResultRelInfo(mtstate),
- leaf_index);
- }
- else
- {
- Assert(whichplan >= 0 && whichplan < mtstate->mt_nplans);
- return mtstate->mt_per_subplan_tupconv_maps[whichplan];
- }
+ Assert(whichplan >= 0 && whichplan < mtstate->mt_nplans);
+ return mtstate->mt_per_subplan_tupconv_maps[whichplan];
}
/* ----------------------------------------------------------------
@@ -2370,10 +2264,15 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
* descriptor of a source partition does not match the root partitioned
* table descriptor. In such a case we need to convert tuples to the root
* tuple descriptor, because the search for destination partition starts
- * from the root. Skip this setup if it's not a partition key update.
+ * from the root. We'll also need a slot to store these converted tuples.
+ * We can skip this setup if it's not a partition key update.
*/
if (update_tuple_routing_needed)
+ {
ExecSetupChildParentMapForSubplan(mtstate);
+ mtstate->mt_root_tuple_slot = MakeTupleTableSlot(RelationGetDescr(rel),
+ &TTSOpsHeapTuple);
+ }
/*
* Initialize any WITH CHECK OPTION constraints if needed.
@@ -2716,10 +2615,18 @@ ExecEndModifyTable(ModifyTableState *node)
resultRelInfo);
}
- /* Close all the partitioned tables, leaf partitions, and their indices */
+ /*
+ * Close all the partitioned tables, leaf partitions, and their indices
+ * and release the slot used for tuple routing, if set.
+ */
if (node->mt_partition_tuple_routing)
+ {
ExecCleanupTupleRouting(node, node->mt_partition_tuple_routing);
+ if (node->mt_root_tuple_slot)
+ ExecDropSingleTupleTableSlot(node->mt_root_tuple_slot);
+ }
+
/*
* Free the exprcontext
*/