aboutsummaryrefslogtreecommitdiff
path: root/src/backend/executor/nodeModifyTable.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/executor/nodeModifyTable.c')
-rw-r--r--src/backend/executor/nodeModifyTable.c583
1 files changed, 459 insertions, 124 deletions
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index c5eca1bb74c..6c2f8d4ec03 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -62,6 +62,11 @@ static bool ExecOnConflictUpdate(ModifyTableState *mtstate,
EState *estate,
bool canSetTag,
TupleTableSlot **returning);
+static ResultRelInfo *getTargetResultRelInfo(ModifyTableState *node);
+static void ExecSetupChildParentMapForTcs(ModifyTableState *mtstate);
+static void ExecSetupChildParentMapForSubplan(ModifyTableState *mtstate);
+static TupleConversionMap *tupconv_map_for_subplan(ModifyTableState *node,
+ int whichplan);
/*
* Verify that the tuples to be produced by INSERT or UPDATE match the
@@ -265,6 +270,7 @@ ExecInsert(ModifyTableState *mtstate,
Oid newId;
List *recheckIndexes = NIL;
TupleTableSlot *result = NULL;
+ TransitionCaptureState *ar_insert_trig_tcs;
/*
* get the heap tuple out of the tuple table slot, making sure we have a
@@ -282,7 +288,6 @@ ExecInsert(ModifyTableState *mtstate,
{
int leaf_part_index;
PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing;
- TupleConversionMap *map;
/*
* Away we go ... If we end up not finding a partition after all,
@@ -331,8 +336,10 @@ ExecInsert(ModifyTableState *mtstate,
* back to tuplestore format.
*/
mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL;
+
mtstate->mt_transition_capture->tcs_map =
- mtstate->mt_transition_tupconv_maps[leaf_part_index];
+ TupConvMapForLeaf(proute, saved_resultRelInfo,
+ leaf_part_index);
}
else
{
@@ -345,30 +352,20 @@ ExecInsert(ModifyTableState *mtstate,
}
}
if (mtstate->mt_oc_transition_capture != NULL)
+ {
mtstate->mt_oc_transition_capture->tcs_map =
- mtstate->mt_transition_tupconv_maps[leaf_part_index];
+ TupConvMapForLeaf(proute, saved_resultRelInfo,
+ leaf_part_index);
+ }
/*
* We might need to convert from the parent rowtype to the partition
* rowtype.
*/
- map = proute->partition_tupconv_maps[leaf_part_index];
- if (map)
- {
- Relation partrel = resultRelInfo->ri_RelationDesc;
-
- tuple = do_convert_tuple(tuple, map);
-
- /*
- * We must use the partition's tuple descriptor from this point
- * on, until we're finished dealing with the partition. Use the
- * dedicated slot for that.
- */
- slot = proute->partition_tuple_slot;
- Assert(slot != NULL);
- ExecSetSlotDescriptor(slot, RelationGetDescr(partrel));
- ExecStoreTuple(tuple, slot, InvalidBuffer, true);
- }
+ tuple = ConvertPartitionTupleSlot(proute->parent_child_tupconv_maps[leaf_part_index],
+ tuple,
+ proute->partition_tuple_slot,
+ &slot);
}
resultRelationDesc = resultRelInfo->ri_RelationDesc;
@@ -449,6 +446,8 @@ ExecInsert(ModifyTableState *mtstate,
}
else
{
+ WCOKind wco_kind;
+
/*
* We always check the partition constraint, including when the tuple
* got here via tuple-routing. However we don't need to in the latter
@@ -466,14 +465,23 @@ ExecInsert(ModifyTableState *mtstate,
tuple->t_tableOid = RelationGetRelid(resultRelationDesc);
/*
- * Check any RLS INSERT WITH CHECK policies
+ * Check any RLS WITH CHECK policies.
*
+ * Normally we should check INSERT policies. But if the insert is the
+ * result of a partition key update that moved the tuple to a new
+ * partition, we should instead check UPDATE policies, because we are
+ * executing policies defined on the target table, and not those
+ * defined on the child partitions.
+ */
+ wco_kind = (mtstate->operation == CMD_UPDATE) ?
+ WCO_RLS_UPDATE_CHECK : WCO_RLS_INSERT_CHECK;
+
+ /*
* ExecWithCheckOptions() will skip any WCOs which are not of the kind
* we are looking for at this point.
*/
if (resultRelInfo->ri_WithCheckOptions != NIL)
- ExecWithCheckOptions(WCO_RLS_INSERT_CHECK,
- resultRelInfo, slot, estate);
+ ExecWithCheckOptions(wco_kind, resultRelInfo, slot, estate);
/*
* No need though if the tuple has been routed, and a BR trigger
@@ -622,9 +630,32 @@ ExecInsert(ModifyTableState *mtstate,
setLastTid(&(tuple->t_self));
}
+ /*
+ * If this insert is the result of a partition key update that moved the
+ * tuple to a new partition, put this row into the transition NEW TABLE,
+ * if there is one. We need to do this separately for DELETE and INSERT
+ * because they happen on different tables.
+ */
+ ar_insert_trig_tcs = mtstate->mt_transition_capture;
+ if (mtstate->operation == CMD_UPDATE && mtstate->mt_transition_capture
+ && mtstate->mt_transition_capture->tcs_update_new_table)
+ {
+ ExecARUpdateTriggers(estate, resultRelInfo, NULL,
+ NULL,
+ tuple,
+ NULL,
+ mtstate->mt_transition_capture);
+
+ /*
+ * We've already captured the NEW TABLE row, so make sure any AR
+ * INSERT trigger fired below doesn't capture it again.
+ */
+ ar_insert_trig_tcs = NULL;
+ }
+
/* AFTER ROW INSERT Triggers */
ExecARInsertTriggers(estate, resultRelInfo, tuple, recheckIndexes,
- mtstate->mt_transition_capture);
+ ar_insert_trig_tcs);
list_free(recheckIndexes);
@@ -678,6 +709,8 @@ ExecDelete(ModifyTableState *mtstate,
TupleTableSlot *planSlot,
EPQState *epqstate,
EState *estate,
+ bool *tupleDeleted,
+ bool processReturning,
bool canSetTag)
{
ResultRelInfo *resultRelInfo;
@@ -685,6 +718,10 @@ ExecDelete(ModifyTableState *mtstate,
HTSU_Result result;
HeapUpdateFailureData hufd;
TupleTableSlot *slot = NULL;
+ TransitionCaptureState *ar_delete_trig_tcs;
+
+ if (tupleDeleted)
+ *tupleDeleted = false;
/*
* get information on the (current) result relation
@@ -849,12 +886,40 @@ ldelete:;
if (canSetTag)
(estate->es_processed)++;
+ /* Tell caller that the delete actually happened. */
+ if (tupleDeleted)
+ *tupleDeleted = true;
+
+ /*
+ * If this delete is the result of a partition key update that moved the
+ * tuple to a new partition, put this row into the transition OLD TABLE,
+ * if there is one. We need to do this separately for DELETE and INSERT
+ * because they happen on different tables.
+ */
+ ar_delete_trig_tcs = mtstate->mt_transition_capture;
+ if (mtstate->operation == CMD_UPDATE && mtstate->mt_transition_capture
+ && mtstate->mt_transition_capture->tcs_update_old_table)
+ {
+ ExecARUpdateTriggers(estate, resultRelInfo,
+ tupleid,
+ oldtuple,
+ NULL,
+ NULL,
+ mtstate->mt_transition_capture);
+
+ /*
+ * We've already captured the NEW TABLE row, so make sure any AR
+ * DELETE trigger fired below doesn't capture it again.
+ */
+ ar_delete_trig_tcs = NULL;
+ }
+
/* AFTER ROW DELETE Triggers */
ExecARDeleteTriggers(estate, resultRelInfo, tupleid, oldtuple,
- mtstate->mt_transition_capture);
+ ar_delete_trig_tcs);
- /* Process RETURNING if present */
- if (resultRelInfo->ri_projectReturning)
+ /* Process RETURNING if present and if requested */
+ if (processReturning && resultRelInfo->ri_projectReturning)
{
/*
* We have to put the target tuple into a slot, which means first we
@@ -947,6 +1012,7 @@ ExecUpdate(ModifyTableState *mtstate,
HTSU_Result result;
HeapUpdateFailureData hufd;
List *recheckIndexes = NIL;
+ TupleConversionMap *saved_tcs_map = NULL;
/*
* abort the operation if not running transactions
@@ -1018,6 +1084,7 @@ ExecUpdate(ModifyTableState *mtstate,
else
{
LockTupleMode lockmode;
+ bool partition_constraint_failed;
/*
* Constraints might reference the tableoid column, so initialize
@@ -1033,22 +1100,142 @@ ExecUpdate(ModifyTableState *mtstate,
* (We don't need to redo triggers, however. If there are any BEFORE
* triggers then trigger.c will have done heap_lock_tuple to lock the
* correct tuple, so there's no need to do them again.)
- *
- * ExecWithCheckOptions() will skip any WCOs which are not of the kind
- * we are looking for at this point.
*/
lreplace:;
- if (resultRelInfo->ri_WithCheckOptions != NIL)
+
+ /*
+ * If partition constraint fails, this row might get moved to another
+ * partition, in which case we should check the RLS CHECK policy just
+ * before inserting into the new partition, rather than doing it here.
+ * This is because a trigger on that partition might again change the
+ * row. So skip the WCO checks if the partition constraint fails.
+ */
+ partition_constraint_failed =
+ resultRelInfo->ri_PartitionCheck &&
+ !ExecPartitionCheck(resultRelInfo, slot, estate);
+
+ if (!partition_constraint_failed &&
+ resultRelInfo->ri_WithCheckOptions != NIL)
+ {
+ /*
+ * ExecWithCheckOptions() will skip any WCOs which are not of the
+ * kind we are looking for at this point.
+ */
ExecWithCheckOptions(WCO_RLS_UPDATE_CHECK,
resultRelInfo, slot, estate);
+ }
+
+ /*
+ * If a partition check failed, try to move the row into the right
+ * partition.
+ */
+ if (partition_constraint_failed)
+ {
+ bool tuple_deleted;
+ TupleTableSlot *ret_slot;
+ PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing;
+ int map_index;
+ TupleConversionMap *tupconv_map;
+
+ /*
+ * When an UPDATE is run on a leaf partition, we will not have
+ * partition tuple routing set up. In that case, fail with
+ * partition constraint violation error.
+ */
+ if (proute == NULL)
+ ExecPartitionCheckEmitError(resultRelInfo, slot, estate);
+
+ /*
+ * Row movement, part 1. Delete the tuple, but skip RETURNING
+ * processing. We want to return rows from INSERT.
+ */
+ ExecDelete(mtstate, tupleid, oldtuple, planSlot, epqstate, estate,
+ &tuple_deleted, false, false);
+
+ /*
+ * For some reason if DELETE didn't happen (e.g. trigger prevented
+ * it, or it was already deleted by self, or it was concurrently
+ * deleted by another transaction), then we should skip the insert
+ * as well; otherwise, an UPDATE could cause an increase in the
+ * total number of rows across all partitions, which is clearly
+ * wrong.
+ *
+ * For a normal UPDATE, the case where the tuple has been the
+ * subject of a concurrent UPDATE or DELETE would be handled by
+ * the EvalPlanQual machinery, but for an UPDATE that we've
+ * translated into a DELETE from this partition and an INSERT into
+ * some other partition, that's not available, because CTID chains
+ * can't span relation boundaries. We mimic the semantics to a
+ * limited extent by skipping the INSERT if the DELETE fails to
+ * find a tuple. This ensures that two concurrent attempts to
+ * UPDATE the same tuple at the same time can't turn one tuple
+ * into two, and that an UPDATE of a just-deleted tuple can't
+ * resurrect it.
+ */
+ if (!tuple_deleted)
+ return NULL;
+
+ /*
+ * Updates set the transition capture map only when a new subplan
+ * is chosen. But for inserts, it is set for each row. So after
+ * INSERT, we need to revert back to the map created for UPDATE;
+ * otherwise the next UPDATE will incorrectly use the one created
+ * for INSERT. So first save the one created for UPDATE.
+ */
+ if (mtstate->mt_transition_capture)
+ saved_tcs_map = mtstate->mt_transition_capture->tcs_map;
+
+ /*
+ * resultRelInfo is one of the per-subplan resultRelInfos. So we
+ * should convert the tuple into root's tuple descriptor, since
+ * ExecInsert() starts the search from root. The tuple conversion
+ * map list is in the order of mtstate->resultRelInfo[], so to
+ * retrieve the one for this resultRel, we need to know the
+ * position of the resultRel in mtstate->resultRelInfo[].
+ */
+ map_index = resultRelInfo - mtstate->resultRelInfo;
+ Assert(map_index >= 0 && map_index < mtstate->mt_nplans);
+ tupconv_map = tupconv_map_for_subplan(mtstate, map_index);
+ tuple = ConvertPartitionTupleSlot(tupconv_map,
+ tuple,
+ proute->root_tuple_slot,
+ &slot);
+
+
+ /*
+ * For ExecInsert(), make it look like we are inserting into the
+ * root.
+ */
+ Assert(mtstate->rootResultRelInfo != NULL);
+ estate->es_result_relation_info = mtstate->rootResultRelInfo;
+
+ ret_slot = ExecInsert(mtstate, slot, planSlot, NULL,
+ ONCONFLICT_NONE, estate, canSetTag);
+
+ /*
+ * Revert back the active result relation and the active
+ * transition capture map that we changed above.
+ */
+ estate->es_result_relation_info = resultRelInfo;
+ if (mtstate->mt_transition_capture)
+ {
+ mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL;
+ mtstate->mt_transition_capture->tcs_map = saved_tcs_map;
+ }
+ return ret_slot;
+ }
/*
* Check the constraints of the tuple. Note that we pass the same
* slot for the orig_slot argument, because unlike ExecInsert(), no
* tuple-routing is performed here, hence the slot remains unchanged.
+ * We've already checked the partition constraint above; however, we
+ * must still ensure the tuple passes all other constraints, so we
+ * will call ExecConstraints() and have it validate all remaining
+ * checks.
*/
- if (resultRelationDesc->rd_att->constr || resultRelInfo->ri_PartitionCheck)
- ExecConstraints(resultRelInfo, slot, estate, true);
+ if (resultRelationDesc->rd_att->constr)
+ ExecConstraints(resultRelInfo, slot, estate, false);
/*
* replace the heap tuple
@@ -1418,17 +1605,20 @@ fireBSTriggers(ModifyTableState *node)
}
/*
- * Return the ResultRelInfo for which we will fire AFTER STATEMENT triggers.
- * This is also the relation into whose tuple format all captured transition
- * tuples must be converted.
+ * Return the target rel ResultRelInfo.
+ *
+ * This relation is the same as :
+ * - the relation for which we will fire AFTER STATEMENT triggers.
+ * - the relation into whose tuple format all captured transition tuples must
+ * be converted.
+ * - the root partitioned table.
*/
static ResultRelInfo *
-getASTriggerResultRelInfo(ModifyTableState *node)
+getTargetResultRelInfo(ModifyTableState *node)
{
/*
- * If the node modifies a partitioned table, we must fire its triggers.
- * Note that in that case, node->resultRelInfo points to the first leaf
- * partition, not the root table.
+ * Note that if the node modifies a partitioned table, node->resultRelInfo
+ * points to the first leaf partition, not the root table.
*/
if (node->rootResultRelInfo != NULL)
return node->rootResultRelInfo;
@@ -1442,7 +1632,7 @@ getASTriggerResultRelInfo(ModifyTableState *node)
static void
fireASTriggers(ModifyTableState *node)
{
- ResultRelInfo *resultRelInfo = getASTriggerResultRelInfo(node);
+ ResultRelInfo *resultRelInfo = getTargetResultRelInfo(node);
switch (node->operation)
{
@@ -1475,8 +1665,7 @@ fireASTriggers(ModifyTableState *node)
static void
ExecSetupTransitionCaptureState(ModifyTableState *mtstate, EState *estate)
{
- ResultRelInfo *targetRelInfo = getASTriggerResultRelInfo(mtstate);
- int i;
+ ResultRelInfo *targetRelInfo = getTargetResultRelInfo(mtstate);
/* Check for transition tables on the directly targeted relation. */
mtstate->mt_transition_capture =
@@ -1499,62 +1688,141 @@ ExecSetupTransitionCaptureState(ModifyTableState *mtstate, EState *estate)
if (mtstate->mt_transition_capture != NULL ||
mtstate->mt_oc_transition_capture != NULL)
{
- int numResultRelInfos;
- PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing;
-
- numResultRelInfos = (proute != NULL ?
- proute->num_partitions :
- mtstate->mt_nplans);
+ ExecSetupChildParentMapForTcs(mtstate);
/*
- * Build array of conversion maps from each child's TupleDesc to the
- * one used in the tuplestore. The map pointers may be NULL when no
- * conversion is necessary, which is hopefully a common case for
- * partitions.
+ * Install the conversion map for the first plan for UPDATE and DELETE
+ * operations. It will be advanced each time we switch to the next
+ * plan. (INSERT operations set it every time, so we need not update
+ * mtstate->mt_oc_transition_capture here.)
*/
- mtstate->mt_transition_tupconv_maps = (TupleConversionMap **)
- palloc0(sizeof(TupleConversionMap *) * numResultRelInfos);
+ if (mtstate->mt_transition_capture && mtstate->operation != CMD_INSERT)
+ mtstate->mt_transition_capture->tcs_map =
+ tupconv_map_for_subplan(mtstate, 0);
+ }
+}
- /* Choose the right set of partitions */
- if (proute != NULL)
- {
- /*
- * For tuple routing among partitions, we need TupleDescs based on
- * the partition routing table.
- */
- ResultRelInfo **resultRelInfos = proute->partitions;
+/*
+ * Initialize the child-to-root tuple conversion map array for UPDATE subplans.
+ *
+ * This map array is required to convert the tuple from the subplan result rel
+ * to the target table descriptor. This requirement arises for two independent
+ * scenarios:
+ * 1. For update-tuple-routing.
+ * 2. For capturing tuples in transition tables.
+ */
+void
+ExecSetupChildParentMapForSubplan(ModifyTableState *mtstate)
+{
+ ResultRelInfo *targetRelInfo = getTargetResultRelInfo(mtstate);
+ ResultRelInfo *resultRelInfos = mtstate->resultRelInfo;
+ TupleDesc outdesc;
+ int numResultRelInfos = mtstate->mt_nplans;
+ int i;
- for (i = 0; i < numResultRelInfos; ++i)
- {
- mtstate->mt_transition_tupconv_maps[i] =
- convert_tuples_by_name(RelationGetDescr(resultRelInfos[i]->ri_RelationDesc),
- RelationGetDescr(targetRelInfo->ri_RelationDesc),
- gettext_noop("could not convert row type"));
- }
- }
- else
- {
- /* Otherwise we need the ResultRelInfo for each subplan. */
- ResultRelInfo *resultRelInfos = mtstate->resultRelInfo;
+ /*
+ * First check if there is already a per-subplan array allocated. Even if
+ * there is already a per-leaf map array, we won't require a per-subplan
+ * one, since we will use the subplan offset array to convert the subplan
+ * index to per-leaf index.
+ */
+ if (mtstate->mt_per_subplan_tupconv_maps ||
+ (mtstate->mt_partition_tuple_routing &&
+ mtstate->mt_partition_tuple_routing->child_parent_tupconv_maps))
+ return;
- for (i = 0; i < numResultRelInfos; ++i)
- {
- mtstate->mt_transition_tupconv_maps[i] =
- convert_tuples_by_name(RelationGetDescr(resultRelInfos[i].ri_RelationDesc),
- RelationGetDescr(targetRelInfo->ri_RelationDesc),
- gettext_noop("could not convert row type"));
- }
- }
+ /*
+ * Build array of conversion maps from each child's TupleDesc to the one
+ * used in the target relation. The map pointers may be NULL when no
+ * conversion is necessary, which is hopefully a common case.
+ */
+ /* Get tuple descriptor of the target rel. */
+ outdesc = RelationGetDescr(targetRelInfo->ri_RelationDesc);
+
+ mtstate->mt_per_subplan_tupconv_maps = (TupleConversionMap **)
+ palloc(sizeof(TupleConversionMap *) * numResultRelInfos);
+
+ for (i = 0; i < numResultRelInfos; ++i)
+ {
+ mtstate->mt_per_subplan_tupconv_maps[i] =
+ convert_tuples_by_name(RelationGetDescr(resultRelInfos[i].ri_RelationDesc),
+ outdesc,
+ gettext_noop("could not convert row type"));
+ }
+}
+
+/*
+ * Initialize the child-to-root tuple conversion map array required for
+ * capturing transition tuples.
+ *
+ * The map array can be indexed either by subplan index or by leaf-partition
+ * index. For transition tables, we need a subplan-indexed access to the map,
+ * and where tuple-routing is present, we also require a leaf-indexed access.
+ */
+static void
+ExecSetupChildParentMapForTcs(ModifyTableState *mtstate)
+{
+ PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing;
+
+ /*
+ * If partition tuple routing is set up, we will require partition-indexed
+ * access. In that case, create the map array indexed by partition; we
+ * will still be able to access the maps using a subplan index by
+ * converting the subplan index to a partition index using
+ * subplan_partition_offsets. If tuple routing is not set up, it means we
+ * don't require partition-indexed access. In that case, create just a
+ * subplan-indexed map.
+ */
+ if (proute)
+ {
/*
- * Install the conversion map for the first plan for UPDATE and DELETE
- * operations. It will be advanced each time we switch to the next
- * plan. (INSERT operations set it every time, so we need not update
- * mtstate->mt_oc_transition_capture here.)
+ * If a partition-indexed map array is to be created, the subplan map
+ * array has to be NULL. If the subplan map array is already created,
+ * we won't be able to access the map using a partition index.
*/
- if (mtstate->mt_transition_capture)
- mtstate->mt_transition_capture->tcs_map =
- mtstate->mt_transition_tupconv_maps[0];
+ Assert(mtstate->mt_per_subplan_tupconv_maps == NULL);
+
+ ExecSetupChildParentMapForLeaf(proute);
+ }
+ else
+ ExecSetupChildParentMapForSubplan(mtstate);
+}
+
+/*
+ * For a given subplan index, get the tuple conversion map.
+ */
+static TupleConversionMap *
+tupconv_map_for_subplan(ModifyTableState *mtstate, int whichplan)
+{
+ /*
+ * If a partition-index tuple conversion map array is allocated, we need
+ * to first get the index into the partition array. Exactly *one* of the
+ * two arrays is allocated. This is because if there is a partition array
+ * required, we don't require subplan-indexed array since we can translate
+ * subplan index into partition index. And, we create a subplan-indexed
+ * array *only* if partition-indexed array is not required.
+ */
+ if (mtstate->mt_per_subplan_tupconv_maps == NULL)
+ {
+ int leaf_index;
+ PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing;
+
+ /*
+ * If subplan-indexed array is NULL, things should have been arranged
+ * to convert the subplan index to partition index.
+ */
+ Assert(proute && proute->subplan_partition_offsets != NULL);
+
+ leaf_index = proute->subplan_partition_offsets[whichplan];
+
+ return TupConvMapForLeaf(proute, getTargetResultRelInfo(mtstate),
+ leaf_index);
+ }
+ else
+ {
+ Assert(whichplan >= 0 && whichplan < mtstate->mt_nplans);
+ return mtstate->mt_per_subplan_tupconv_maps[whichplan];
}
}
@@ -1661,15 +1929,13 @@ ExecModifyTable(PlanState *pstate)
/* Prepare to convert transition tuples from this child. */
if (node->mt_transition_capture != NULL)
{
- Assert(node->mt_transition_tupconv_maps != NULL);
node->mt_transition_capture->tcs_map =
- node->mt_transition_tupconv_maps[node->mt_whichplan];
+ tupconv_map_for_subplan(node, node->mt_whichplan);
}
if (node->mt_oc_transition_capture != NULL)
{
- Assert(node->mt_transition_tupconv_maps != NULL);
node->mt_oc_transition_capture->tcs_map =
- node->mt_transition_tupconv_maps[node->mt_whichplan];
+ tupconv_map_for_subplan(node, node->mt_whichplan);
}
continue;
}
@@ -1786,7 +2052,8 @@ ExecModifyTable(PlanState *pstate)
break;
case CMD_DELETE:
slot = ExecDelete(node, tupleid, oldtuple, planSlot,
- &node->mt_epqstate, estate, node->canSetTag);
+ &node->mt_epqstate, estate,
+ NULL, true, node->canSetTag);
break;
default:
elog(ERROR, "unknown operation");
@@ -1830,9 +2097,12 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
ResultRelInfo *saved_resultRelInfo;
ResultRelInfo *resultRelInfo;
Plan *subplan;
+ int firstVarno = 0;
+ Relation firstResultRel = NULL;
ListCell *l;
int i;
Relation rel;
+ bool update_tuple_routing_needed = node->partColsUpdated;
PartitionTupleRouting *proute = NULL;
int num_partitions = 0;
@@ -1907,6 +2177,16 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
resultRelInfo->ri_IndexRelationDescs == NULL)
ExecOpenIndices(resultRelInfo, mtstate->mt_onconflict != ONCONFLICT_NONE);
+ /*
+ * If this is an UPDATE and a BEFORE UPDATE trigger is present, the
+ * trigger itself might modify the partition-key values. So arrange
+ * for tuple routing.
+ */
+ if (resultRelInfo->ri_TrigDesc &&
+ resultRelInfo->ri_TrigDesc->trig_update_before_row &&
+ operation == CMD_UPDATE)
+ update_tuple_routing_needed = true;
+
/* Now init the plan for this result rel */
estate->es_result_relation_info = resultRelInfo;
mtstate->mt_plans[i] = ExecInitNode(subplan, estate, eflags);
@@ -1931,16 +2211,35 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
estate->es_result_relation_info = saved_resultRelInfo;
- /* Build state for INSERT tuple routing */
- rel = mtstate->resultRelInfo->ri_RelationDesc;
- if (operation == CMD_INSERT &&
- rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ /* Get the target relation */
+ rel = (getTargetResultRelInfo(mtstate))->ri_RelationDesc;
+
+ /*
+ * If it's not a partitioned table after all, UPDATE tuple routing should
+ * not be attempted.
+ */
+ if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+ update_tuple_routing_needed = false;
+
+ /*
+ * Build state for tuple routing if it's an INSERT or if it's an UPDATE of
+ * partition key.
+ */
+ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
+ (operation == CMD_INSERT || update_tuple_routing_needed))
{
proute = mtstate->mt_partition_tuple_routing =
ExecSetupPartitionTupleRouting(mtstate,
rel, node->nominalRelation,
estate);
num_partitions = proute->num_partitions;
+
+ /*
+ * Below are required as reference objects for mapping partition
+ * attno's in expressions such as WithCheckOptions and RETURNING.
+ */
+ firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
+ firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc;
}
/*
@@ -1951,6 +2250,17 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
ExecSetupTransitionCaptureState(mtstate, estate);
/*
+ * Construct mapping from each of the per-subplan partition attnos to the
+ * root attno. This is required when during update row movement the tuple
+ * descriptor of a source partition does not match the root partitioned
+ * table descriptor. In such a case we need to convert tuples to the root
+ * tuple descriptor, because the search for destination partition starts
+ * from the root. Skip this setup if it's not a partition key update.
+ */
+ if (update_tuple_routing_needed)
+ ExecSetupChildParentMapForSubplan(mtstate);
+
+ /*
* Initialize any WITH CHECK OPTION constraints if needed.
*/
resultRelInfo = mtstate->resultRelInfo;
@@ -1980,26 +2290,29 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
* Build WITH CHECK OPTION constraints for each leaf partition rel. Note
* that we didn't build the withCheckOptionList for each partition within
* the planner, but simple translation of the varattnos for each partition
- * will suffice. This only occurs for the INSERT case; UPDATE/DELETE
- * cases are handled above.
+ * will suffice. This only occurs for the INSERT case or for UPDATE row
+ * movement. DELETEs and local UPDATEs are handled above.
*/
if (node->withCheckOptionLists != NIL && num_partitions > 0)
{
- List *wcoList;
- PlanState *plan;
+ List *first_wcoList;
/*
* In case of INSERT on partitioned tables, there is only one plan.
* Likewise, there is only one WITH CHECK OPTIONS list, not one per
- * partition. We make a copy of the WCO qual for each partition; note
- * that, if there are SubPlans in there, they all end up attached to
- * the one parent Plan node.
+ * partition. Whereas for UPDATE, there are as many WCOs as there are
+ * plans. So in either case, use the WCO expression of the first
+ * resultRelInfo as a reference to calculate attno's for the WCO
+ * expression of each of the partitions. We make a copy of the WCO
+ * qual for each partition. Note that, if there are SubPlans in there,
+ * they all end up attached to the one parent Plan node.
*/
- Assert(operation == CMD_INSERT &&
- list_length(node->withCheckOptionLists) == 1 &&
- mtstate->mt_nplans == 1);
- wcoList = linitial(node->withCheckOptionLists);
- plan = mtstate->mt_plans[0];
+ Assert(update_tuple_routing_needed ||
+ (operation == CMD_INSERT &&
+ list_length(node->withCheckOptionLists) == 1 &&
+ mtstate->mt_nplans == 1));
+
+ first_wcoList = linitial(node->withCheckOptionLists);
for (i = 0; i < num_partitions; i++)
{
Relation partrel;
@@ -2008,17 +2321,26 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
ListCell *ll;
resultRelInfo = proute->partitions[i];
+
+ /*
+ * If we are referring to a resultRelInfo from one of the update
+ * result rels, that result rel would already have
+ * WithCheckOptions initialized.
+ */
+ if (resultRelInfo->ri_WithCheckOptions)
+ continue;
+
partrel = resultRelInfo->ri_RelationDesc;
- /* varno = node->nominalRelation */
- mapped_wcoList = map_partition_varattnos(wcoList,
- node->nominalRelation,
- partrel, rel, NULL);
+ mapped_wcoList = map_partition_varattnos(first_wcoList,
+ firstVarno,
+ partrel, firstResultRel,
+ NULL);
foreach(ll, mapped_wcoList)
{
WithCheckOption *wco = castNode(WithCheckOption, lfirst(ll));
ExprState *wcoExpr = ExecInitQual(castNode(List, wco->qual),
- plan);
+ &mtstate->ps);
wcoExprs = lappend(wcoExprs, wcoExpr);
}
@@ -2035,7 +2357,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
{
TupleTableSlot *slot;
ExprContext *econtext;
- List *returningList;
+ List *firstReturningList;
/*
* Initialize result tuple slot and assign its rowtype using the first
@@ -2071,22 +2393,35 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
* Build a projection for each leaf partition rel. Note that we
* didn't build the returningList for each partition within the
* planner, but simple translation of the varattnos for each partition
- * will suffice. This only occurs for the INSERT case; UPDATE/DELETE
- * are handled above.
+ * will suffice. This only occurs for the INSERT case or for UPDATE
+ * row movement. DELETEs and local UPDATEs are handled above.
*/
- returningList = linitial(node->returningLists);
+ firstReturningList = linitial(node->returningLists);
for (i = 0; i < num_partitions; i++)
{
Relation partrel;
List *rlist;
resultRelInfo = proute->partitions[i];
+
+ /*
+ * If we are referring to a resultRelInfo from one of the update
+ * result rels, that result rel would already have a returningList
+ * built.
+ */
+ if (resultRelInfo->ri_projectReturning)
+ continue;
+
partrel = resultRelInfo->ri_RelationDesc;
- /* varno = node->nominalRelation */
- rlist = map_partition_varattnos(returningList,
- node->nominalRelation,
- partrel, rel, NULL);
+ /*
+ * Use the returning expression of the first resultRelInfo as a
+ * reference to calculate attno's for the returning expression of
+ * each of the partitions.
+ */
+ rlist = map_partition_varattnos(firstReturningList,
+ firstVarno,
+ partrel, firstResultRel, NULL);
resultRelInfo->ri_projectReturning =
ExecBuildProjectionInfo(rlist, econtext, slot, &mtstate->ps,
resultRelInfo->ri_RelationDesc->rd_att);