aboutsummaryrefslogtreecommitdiff
path: root/src/backend/executor/nodeModifyTable.c
diff options
context:
space:
mode:
authorAlexander Korotkov <akorotkov@postgresql.org>2023-03-23 00:13:37 +0300
committerAlexander Korotkov <akorotkov@postgresql.org>2023-03-23 00:26:59 +0300
commit11470f544e3729c60fab890145b2e839cbc8905e (patch)
tree8ebd3c5b4aab9023e54e71e892b91a5beb9de8dc /src/backend/executor/nodeModifyTable.c
parent764da7710bf68eebb2c0facb2f871bc3c7a705b6 (diff)
downloadpostgresql-11470f544e3729c60fab890145b2e839cbc8905e.tar.gz
postgresql-11470f544e3729c60fab890145b2e839cbc8905e.zip
Allow locking updated tuples in tuple_update() and tuple_delete()
Currently, in read committed transaction isolation mode (default), we have the following sequence of actions when tuple_update()/tuple_delete() finds the tuple updated by concurrent transaction. 1. Attempt to update/delete tuple with tuple_update()/tuple_delete(), which returns TM_Updated. 2. Lock tuple with tuple_lock(). 3. Re-evaluate plan qual (recheck if we still need to update/delete and calculate the new tuple for update). 4. Second attempt to update/delete tuple with tuple_update()/tuple_delete(). This attempt should be successful, since the tuple was previously locked. This patch eliminates step 2 by taking the lock during first tuple_update()/tuple_delete() call. Heap table access method saves some efforts by checking the updated tuple once instead of twice. Future undo-based table access methods, which will start from the latest row version, can immediately place a lock there. The code in nodeModifyTable.c is simplified by removing the nested switch/case. Discussion: https://postgr.es/m/CAPpHfdua-YFw3XTprfutzGp28xXLigFtzNbuFY8yPhqeq6X5kg%40mail.gmail.com Reviewed-by: Aleksander Alekseev, Pavel Borisov, Vignesh C, Mason Sharp Reviewed-by: Andres Freund, Chris Travers
Diffstat (limited to 'src/backend/executor/nodeModifyTable.c')
-rw-r--r--src/backend/executor/nodeModifyTable.c288
1 files changed, 122 insertions, 166 deletions
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 93ebfdbb0d8..e3503756818 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -1325,25 +1325,61 @@ ExecDeletePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
}
/*
+ * The implementation for LazyTupleTableSlot wrapper for EPQ slot to be passed
+ * to table_tuple_update()/table_tuple_delete().
+ */
+typedef struct
+{
+ EPQState *epqstate;
+ ResultRelInfo *resultRelInfo;
+} GetEPQSlotArg;
+
+static TupleTableSlot *
+GetEPQSlot(void *arg)
+{
+ GetEPQSlotArg *slotArg = (GetEPQSlotArg *) arg;
+
+ return EvalPlanQualSlot(slotArg->epqstate,
+ slotArg->resultRelInfo->ri_RelationDesc,
+ slotArg->resultRelInfo->ri_RangeTableIndex);
+}
+
+/*
* ExecDeleteAct -- subroutine for ExecDelete
*
* Actually delete the tuple from a plain table.
*
+ * If the 'lockUpdated' flag is set and the target tuple is updated, then
+ * the latest version gets locked and fetched into the EPQ slot.
+ *
* Caller is in charge of doing EvalPlanQual as necessary
*/
static TM_Result
ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
- ItemPointer tupleid, bool changingPart)
+ ItemPointer tupleid, bool changingPart, bool lockUpdated)
{
EState *estate = context->estate;
+ GetEPQSlotArg slotArg = {context->epqstate, resultRelInfo};
+ LazyTupleTableSlot lazyEPQSlot,
+ *lazyEPQSlotPtr;
+ if (lockUpdated)
+ {
+ MAKE_LAZY_TTS(&lazyEPQSlot, GetEPQSlot, &slotArg);
+ lazyEPQSlotPtr = &lazyEPQSlot;
+ }
+ else
+ {
+ lazyEPQSlotPtr = NULL;
+ }
return table_tuple_delete(resultRelInfo->ri_RelationDesc, tupleid,
estate->es_output_cid,
estate->es_snapshot,
estate->es_crosscheck_snapshot,
true /* wait for commit */ ,
&context->tmfd,
- changingPart);
+ changingPart,
+ lazyEPQSlotPtr);
}
/*
@@ -1488,7 +1524,8 @@ ExecDelete(ModifyTableContext *context,
* transaction-snapshot mode transactions.
*/
ldelete:
- result = ExecDeleteAct(context, resultRelInfo, tupleid, changingPart);
+ result = ExecDeleteAct(context, resultRelInfo, tupleid, changingPart,
+ !IsolationUsesXactSnapshot());
switch (result)
{
@@ -1541,103 +1578,49 @@ ldelete:
errmsg("could not serialize access due to concurrent update")));
/*
- * Already know that we're going to need to do EPQ, so
- * fetch tuple directly into the right slot.
+ * ExecDeleteAct() has already locked the old tuple for
+ * us. Now we need to copy it to the right slot.
*/
EvalPlanQualBegin(context->epqstate);
inputslot = EvalPlanQualSlot(context->epqstate, resultRelationDesc,
resultRelInfo->ri_RangeTableIndex);
- result = table_tuple_lock(resultRelationDesc, tupleid,
- estate->es_snapshot,
- inputslot, estate->es_output_cid,
- LockTupleExclusive, LockWaitBlock,
- TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
- &context->tmfd);
-
- switch (result)
+ /*
+ * Save locked table for further processing for RETURNING
+ * clause.
+ */
+ if (processReturning &&
+ resultRelInfo->ri_projectReturning &&
+ !resultRelInfo->ri_FdwRoutine)
{
- case TM_Ok:
- Assert(context->tmfd.traversed);
+ TupleTableSlot *returningSlot;
- /*
- * Save locked tuple for further processing of
- * RETURNING clause.
- */
- if (processReturning &&
- resultRelInfo->ri_projectReturning &&
- !resultRelInfo->ri_FdwRoutine)
- {
- TupleTableSlot *returningSlot;
-
- returningSlot = ExecGetReturningSlot(estate, resultRelInfo);
- ExecCopySlot(returningSlot, inputslot);
- ExecMaterializeSlot(returningSlot);
- }
-
- epqslot = EvalPlanQual(context->epqstate,
- resultRelationDesc,
- resultRelInfo->ri_RangeTableIndex,
- inputslot);
- if (TupIsNull(epqslot))
- /* Tuple not passing quals anymore, exiting... */
- return NULL;
-
- /*
- * If requested, skip delete and pass back the
- * updated row.
- */
- if (epqreturnslot)
- {
- *epqreturnslot = epqslot;
- return NULL;
- }
- else
- goto ldelete;
-
- case TM_SelfModified:
-
- /*
- * This can be reached when following an update
- * chain from a tuple updated by another session,
- * reaching a tuple that was already updated in
- * this transaction. If previously updated by this
- * command, ignore the delete, otherwise error
- * out.
- *
- * See also TM_SelfModified response to
- * table_tuple_delete() above.
- */
- if (context->tmfd.cmax != estate->es_output_cid)
- ereport(ERROR,
- (errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
- errmsg("tuple to be deleted was already modified by an operation triggered by the current command"),
- errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
- return NULL;
-
- case TM_Deleted:
- /* tuple already deleted; nothing to do */
- return NULL;
+ returningSlot = ExecGetReturningSlot(estate,
+ resultRelInfo);
+ ExecCopySlot(returningSlot, inputslot);
+ ExecMaterializeSlot(returningSlot);
+ }
- default:
+ Assert(context->tmfd.traversed);
+ epqslot = EvalPlanQual(context->epqstate,
+ resultRelationDesc,
+ resultRelInfo->ri_RangeTableIndex,
+ inputslot);
+ if (TupIsNull(epqslot))
+ /* Tuple not passing quals anymore, exiting... */
+ return NULL;
- /*
- * TM_Invisible should be impossible because we're
- * waiting for updated row versions, and would
- * already have errored out if the first version
- * is invisible.
- *
- * TM_Updated should be impossible, because we're
- * locking the latest version via
- * TUPLE_LOCK_FLAG_FIND_LAST_VERSION.
- */
- elog(ERROR, "unexpected table_tuple_lock status: %u",
- result);
- return NULL;
+ /*
+ * If requested, skip delete and pass back the updated
+ * row.
+ */
+ if (epqreturnslot)
+ {
+ *epqreturnslot = epqslot;
+ return NULL;
}
-
- Assert(false);
- break;
+ else
+ goto ldelete;
}
case TM_Deleted:
@@ -1982,12 +1965,15 @@ ExecUpdatePrepareSlot(ResultRelInfo *resultRelInfo,
static TM_Result
ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
- bool canSetTag, UpdateContext *updateCxt)
+ bool canSetTag, bool lockUpdated, UpdateContext *updateCxt)
{
EState *estate = context->estate;
Relation resultRelationDesc = resultRelInfo->ri_RelationDesc;
bool partition_constraint_failed;
TM_Result result;
+ GetEPQSlotArg slotArg = {context->epqstate, resultRelInfo};
+ LazyTupleTableSlot lazyEPQSlot,
+ *lazyEPQSlotPtr;
updateCxt->crossPartUpdate = false;
@@ -2113,13 +2099,23 @@ lreplace:
* for referential integrity updates in transaction-snapshot mode
* transactions.
*/
+ if (lockUpdated)
+ {
+ MAKE_LAZY_TTS(&lazyEPQSlot, GetEPQSlot, &slotArg);
+ lazyEPQSlotPtr = &lazyEPQSlot;
+ }
+ else
+ {
+ lazyEPQSlotPtr = NULL;
+ }
result = table_tuple_update(resultRelationDesc, tupleid, slot,
estate->es_output_cid,
estate->es_snapshot,
estate->es_crosscheck_snapshot,
true /* wait for commit */ ,
&context->tmfd, &updateCxt->lockmode,
- &updateCxt->updateIndexes);
+ &updateCxt->updateIndexes,
+ lazyEPQSlotPtr);
if (result == TM_Ok)
updateCxt->updated = true;
@@ -2273,7 +2269,7 @@ ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
static TupleTableSlot *
ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
- bool canSetTag)
+ bool canSetTag, bool locked)
{
EState *estate = context->estate;
Relation resultRelationDesc = resultRelInfo->ri_RelationDesc;
@@ -2335,7 +2331,8 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
*/
redo_act:
result = ExecUpdateAct(context, resultRelInfo, tupleid, oldtuple, slot,
- canSetTag, &updateCxt);
+ canSetTag, !IsolationUsesXactSnapshot(),
+ &updateCxt);
/*
* If ExecUpdateAct reports that a cross-partition update was done,
@@ -2394,81 +2391,39 @@ redo_act:
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
+ Assert(!locked);
/*
- * Already know that we're going to need to do EPQ, so
- * fetch tuple directly into the right slot.
+ * ExecUpdateAct() has already locked the old tuple for
+ * us. Now we need to copy it to the right slot.
*/
inputslot = EvalPlanQualSlot(context->epqstate, resultRelationDesc,
resultRelInfo->ri_RangeTableIndex);
- result = table_tuple_lock(resultRelationDesc, tupleid,
- estate->es_snapshot,
- inputslot, estate->es_output_cid,
- updateCxt.lockmode, LockWaitBlock,
- TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
- &context->tmfd);
-
- switch (result)
- {
- case TM_Ok:
- Assert(context->tmfd.traversed);
-
- /* Make sure ri_oldTupleSlot is initialized. */
- if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
- ExecInitUpdateProjection(context->mtstate,
- resultRelInfo);
-
- /*
- * Save the locked tuple for further calculation
- * of the new tuple.
- */
- oldSlot = resultRelInfo->ri_oldTupleSlot;
- ExecCopySlot(oldSlot, inputslot);
- ExecMaterializeSlot(oldSlot);
-
- epqslot = EvalPlanQual(context->epqstate,
- resultRelationDesc,
- resultRelInfo->ri_RangeTableIndex,
- inputslot);
- if (TupIsNull(epqslot))
- /* Tuple not passing quals anymore, exiting... */
- return NULL;
-
- slot = ExecGetUpdateNewTuple(resultRelInfo,
- epqslot, oldSlot);
- goto redo_act;
-
- case TM_Deleted:
- /* tuple already deleted; nothing to do */
- return NULL;
-
- case TM_SelfModified:
-
- /*
- * This can be reached when following an update
- * chain from a tuple updated by another session,
- * reaching a tuple that was already updated in
- * this transaction. If previously modified by
- * this command, ignore the redundant update,
- * otherwise error out.
- *
- * See also TM_SelfModified response to
- * table_tuple_update() above.
- */
- if (context->tmfd.cmax != estate->es_output_cid)
- ereport(ERROR,
- (errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
- errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
- errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
- return NULL;
+ /* Make sure ri_oldTupleSlot is initialized. */
+ if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
+ ExecInitUpdateProjection(context->mtstate,
+ resultRelInfo);
- default:
- /* see table_tuple_lock call in ExecDelete() */
- elog(ERROR, "unexpected table_tuple_lock status: %u",
- result);
- return NULL;
- }
+ /*
+ * Save the locked tuple for further calculation of the
+ * new tuple.
+ */
+ oldSlot = resultRelInfo->ri_oldTupleSlot;
+ ExecCopySlot(oldSlot, inputslot);
+ ExecMaterializeSlot(oldSlot);
+ Assert(context->tmfd.traversed);
+
+ epqslot = EvalPlanQual(context->epqstate,
+ resultRelationDesc,
+ resultRelInfo->ri_RangeTableIndex,
+ inputslot);
+ if (TupIsNull(epqslot))
+ /* Tuple not passing quals anymore, exiting... */
+ return NULL;
+ slot = ExecGetUpdateNewTuple(resultRelInfo,
+ epqslot, oldSlot);
+ goto redo_act;
}
break;
@@ -2710,7 +2665,7 @@ ExecOnConflictUpdate(ModifyTableContext *context,
*returning = ExecUpdate(context, resultRelInfo,
conflictTid, NULL,
resultRelInfo->ri_onConflict->oc_ProjSlot,
- canSetTag);
+ canSetTag, true);
/*
* Clear out existing tuple, as there might not be another conflict among
@@ -2913,7 +2868,7 @@ lmerge_matched:
break; /* concurrent update/delete */
}
result = ExecUpdateAct(context, resultRelInfo, tupleid, NULL,
- newslot, false, &updateCxt);
+ newslot, false, false, &updateCxt);
if (result == TM_Ok && updateCxt.updated)
{
ExecUpdateEpilogue(context, &updateCxt, resultRelInfo,
@@ -2931,7 +2886,8 @@ lmerge_matched:
return true; /* "do nothing" */
break; /* concurrent update/delete */
}
- result = ExecDeleteAct(context, resultRelInfo, tupleid, false);
+ result = ExecDeleteAct(context, resultRelInfo, tupleid,
+ false, false);
if (result == TM_Ok)
{
ExecDeleteEpilogue(context, resultRelInfo, tupleid, NULL,
@@ -3837,7 +3793,7 @@ ExecModifyTable(PlanState *pstate)
/* Now apply the update. */
slot = ExecUpdate(&context, resultRelInfo, tupleid, oldtuple,
- slot, node->canSetTag);
+ slot, node->canSetTag, false);
break;
case CMD_DELETE: