From 40ca70ebcc9d0bc1c02937b27c44b2766617e790 Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Thu, 12 Jul 2018 12:51:39 +0530 Subject: Allow using the updated tuple while moving it to a different partition. An update that causes the tuple to be moved to a different partition was missing out on re-constructing the to-be-updated tuple, based on the latest tuple in the update chain. Instead, it's simply deleting the latest tuple and inserting a new tuple in the new partition based on the old tuple. Commit 2f17844104 didn't consider this case, so some of the updates were getting lost. In passing, change the argument order for output parameter in ExecDelete and add some commentary about it. Reported-by: Pavan Deolasee Author: Amit Khandekar, with minor changes by me Reviewed-by: Dilip Kumar, Amit Kapila and Alvaro Herrera Backpatch-through: 11 Discussion: https://postgr.es/m/CAJ3gD9fRbEzDqdeDq1jxqZUb47kJn+tQ7=Bcgjc8quqKsDViKQ@mail.gmail.com --- src/backend/executor/nodeModifyTable.c | 73 ++++++++++++++++++++++++---------- 1 file changed, 53 insertions(+), 20 deletions(-) (limited to 'src/backend/executor/nodeModifyTable.c') diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 7e0b8679717..779b3d48940 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -609,7 +609,11 @@ ExecInsert(ModifyTableState *mtstate, * foreign table, tupleid is invalid; the FDW has to figure out * which row to delete using data from the planSlot. oldtuple is * passed to foreign table triggers; it is NULL when the foreign - * table has no relevant triggers. + * table has no relevant triggers. We use tupleDeleted to indicate + * whether the tuple is actually deleted, callers can use it to + * decide whether to continue the operation. When this DELETE is a + * part of an UPDATE of partition-key, then the slot returned by + * EvalPlanQual() is passed back using output parameter epqslot. * * Returns RETURNING result if any, otherwise NULL. * ---------------------------------------------------------------- @@ -621,10 +625,11 @@ ExecDelete(ModifyTableState *mtstate, TupleTableSlot *planSlot, EPQState *epqstate, EState *estate, - bool *tupleDeleted, bool processReturning, bool canSetTag, - bool changingPart) + bool changingPart, + bool *tupleDeleted, + TupleTableSlot **epqslot) { ResultRelInfo *resultRelInfo; Relation resultRelationDesc; @@ -649,7 +654,7 @@ ExecDelete(ModifyTableState *mtstate, bool dodelete; dodelete = ExecBRDeleteTriggers(estate, epqstate, resultRelInfo, - tupleid, oldtuple); + tupleid, oldtuple, epqslot); if (!dodelete) /* "do nothing" */ return NULL; @@ -769,19 +774,30 @@ ldelete:; if (!ItemPointerEquals(tupleid, &hufd.ctid)) { - TupleTableSlot *epqslot; - - epqslot = EvalPlanQual(estate, - epqstate, - resultRelationDesc, - resultRelInfo->ri_RangeTableIndex, - LockTupleExclusive, - &hufd.ctid, - hufd.xmax); - if (!TupIsNull(epqslot)) + TupleTableSlot *my_epqslot; + + my_epqslot = EvalPlanQual(estate, + epqstate, + resultRelationDesc, + resultRelInfo->ri_RangeTableIndex, + LockTupleExclusive, + &hufd.ctid, + hufd.xmax); + if (!TupIsNull(my_epqslot)) { *tupleid = hufd.ctid; - goto ldelete; + + /* + * If requested, skip delete and pass back the updated + * row. + */ + if (epqslot) + { + *epqslot = my_epqslot; + return NULL; + } + else + goto ldelete; } } /* tuple already deleted; nothing to do */ @@ -1052,6 +1068,7 @@ lreplace:; { bool tuple_deleted; TupleTableSlot *ret_slot; + TupleTableSlot *epqslot = NULL; PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing; int map_index; TupleConversionMap *tupconv_map; @@ -1081,8 +1098,8 @@ lreplace:; * processing. We want to return rows from INSERT. */ ExecDelete(mtstate, tupleid, oldtuple, planSlot, epqstate, - estate, &tuple_deleted, false, - false /* canSetTag */ , true /* changingPart */ ); + estate, false, false /* canSetTag */ , + true /* changingPart */ , &tuple_deleted, &epqslot); /* * For some reason if DELETE didn't happen (e.g. trigger prevented @@ -1105,7 +1122,23 @@ lreplace:; * resurrect it. */ if (!tuple_deleted) - return NULL; + { + /* + * epqslot will be typically NULL. But when ExecDelete() + * finds that another transaction has concurrently updated the + * same row, it re-fetches the row, skips the delete, and + * epqslot is set to the re-fetched tuple slot. In that case, + * we need to do all the checks again. + */ + if (TupIsNull(epqslot)) + return NULL; + else + { + slot = ExecFilterJunk(resultRelInfo->ri_junkFilter, epqslot); + tuple = ExecMaterializeSlot(slot); + goto lreplace; + } + } /* * Updates set the transition capture map only when a new subplan @@ -2136,8 +2169,8 @@ ExecModifyTable(PlanState *pstate) case CMD_DELETE: slot = ExecDelete(node, tupleid, oldtuple, planSlot, &node->mt_epqstate, estate, - NULL, true, node->canSetTag, - false /* changingPart */ ); + true, node->canSetTag, + false /* changingPart */ , NULL, NULL); break; default: elog(ERROR, "unknown operation"); -- cgit v1.2.3