diff options
author | Alexander Korotkov <akorotkov@postgresql.org> | 2023-03-23 00:13:37 +0300 |
---|---|---|
committer | Alexander Korotkov <akorotkov@postgresql.org> | 2023-03-23 00:26:59 +0300 |
commit | 11470f544e3729c60fab890145b2e839cbc8905e (patch) | |
tree | 8ebd3c5b4aab9023e54e71e892b91a5beb9de8dc /src/backend/executor/nodeModifyTable.c | |
parent | 764da7710bf68eebb2c0facb2f871bc3c7a705b6 (diff) | |
download | postgresql-11470f544e3729c60fab890145b2e839cbc8905e.tar.gz postgresql-11470f544e3729c60fab890145b2e839cbc8905e.zip |
Allow locking updated tuples in tuple_update() and tuple_delete()
Currently, in read committed transaction isolation mode (default), we have the
following sequence of actions when tuple_update()/tuple_delete() finds
the tuple updated by concurrent transaction.
1. Attempt to update/delete tuple with tuple_update()/tuple_delete(), which
returns TM_Updated.
2. Lock tuple with tuple_lock().
3. Re-evaluate plan qual (recheck if we still need to update/delete and
calculate the new tuple for update).
4. Second attempt to update/delete tuple with tuple_update()/tuple_delete().
This attempt should be successful, since the tuple was previously locked.
This patch eliminates step 2 by taking the lock during first
tuple_update()/tuple_delete() call. Heap table access method saves some
efforts by checking the updated tuple once instead of twice. Future
undo-based table access methods, which will start from the latest row version,
can immediately place a lock there.
The code in nodeModifyTable.c is simplified by removing the nested switch/case.
Discussion: https://postgr.es/m/CAPpHfdua-YFw3XTprfutzGp28xXLigFtzNbuFY8yPhqeq6X5kg%40mail.gmail.com
Reviewed-by: Aleksander Alekseev, Pavel Borisov, Vignesh C, Mason Sharp
Reviewed-by: Andres Freund, Chris Travers
Diffstat (limited to 'src/backend/executor/nodeModifyTable.c')
-rw-r--r-- | src/backend/executor/nodeModifyTable.c | 288 |
1 files changed, 122 insertions, 166 deletions
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 93ebfdbb0d8..e3503756818 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -1325,25 +1325,61 @@ ExecDeletePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo, } /* + * The implementation for LazyTupleTableSlot wrapper for EPQ slot to be passed + * to table_tuple_update()/table_tuple_delete(). + */ +typedef struct +{ + EPQState *epqstate; + ResultRelInfo *resultRelInfo; +} GetEPQSlotArg; + +static TupleTableSlot * +GetEPQSlot(void *arg) +{ + GetEPQSlotArg *slotArg = (GetEPQSlotArg *) arg; + + return EvalPlanQualSlot(slotArg->epqstate, + slotArg->resultRelInfo->ri_RelationDesc, + slotArg->resultRelInfo->ri_RangeTableIndex); +} + +/* * ExecDeleteAct -- subroutine for ExecDelete * * Actually delete the tuple from a plain table. * + * If the 'lockUpdated' flag is set and the target tuple is updated, then + * the latest version gets locked and fetched into the EPQ slot. + * * Caller is in charge of doing EvalPlanQual as necessary */ static TM_Result ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, bool changingPart) + ItemPointer tupleid, bool changingPart, bool lockUpdated) { EState *estate = context->estate; + GetEPQSlotArg slotArg = {context->epqstate, resultRelInfo}; + LazyTupleTableSlot lazyEPQSlot, + *lazyEPQSlotPtr; + if (lockUpdated) + { + MAKE_LAZY_TTS(&lazyEPQSlot, GetEPQSlot, &slotArg); + lazyEPQSlotPtr = &lazyEPQSlot; + } + else + { + lazyEPQSlotPtr = NULL; + } return table_tuple_delete(resultRelInfo->ri_RelationDesc, tupleid, estate->es_output_cid, estate->es_snapshot, estate->es_crosscheck_snapshot, true /* wait for commit */ , &context->tmfd, - changingPart); + changingPart, + lazyEPQSlotPtr); } /* @@ -1488,7 +1524,8 @@ ExecDelete(ModifyTableContext *context, * transaction-snapshot mode transactions. */ ldelete: - result = ExecDeleteAct(context, resultRelInfo, tupleid, changingPart); + result = ExecDeleteAct(context, resultRelInfo, tupleid, changingPart, + !IsolationUsesXactSnapshot()); switch (result) { @@ -1541,103 +1578,49 @@ ldelete: errmsg("could not serialize access due to concurrent update"))); /* - * Already know that we're going to need to do EPQ, so - * fetch tuple directly into the right slot. + * ExecDeleteAct() has already locked the old tuple for + * us. Now we need to copy it to the right slot. */ EvalPlanQualBegin(context->epqstate); inputslot = EvalPlanQualSlot(context->epqstate, resultRelationDesc, resultRelInfo->ri_RangeTableIndex); - result = table_tuple_lock(resultRelationDesc, tupleid, - estate->es_snapshot, - inputslot, estate->es_output_cid, - LockTupleExclusive, LockWaitBlock, - TUPLE_LOCK_FLAG_FIND_LAST_VERSION, - &context->tmfd); - - switch (result) + /* + * Save locked table for further processing for RETURNING + * clause. + */ + if (processReturning && + resultRelInfo->ri_projectReturning && + !resultRelInfo->ri_FdwRoutine) { - case TM_Ok: - Assert(context->tmfd.traversed); + TupleTableSlot *returningSlot; - /* - * Save locked tuple for further processing of - * RETURNING clause. - */ - if (processReturning && - resultRelInfo->ri_projectReturning && - !resultRelInfo->ri_FdwRoutine) - { - TupleTableSlot *returningSlot; - - returningSlot = ExecGetReturningSlot(estate, resultRelInfo); - ExecCopySlot(returningSlot, inputslot); - ExecMaterializeSlot(returningSlot); - } - - epqslot = EvalPlanQual(context->epqstate, - resultRelationDesc, - resultRelInfo->ri_RangeTableIndex, - inputslot); - if (TupIsNull(epqslot)) - /* Tuple not passing quals anymore, exiting... */ - return NULL; - - /* - * If requested, skip delete and pass back the - * updated row. - */ - if (epqreturnslot) - { - *epqreturnslot = epqslot; - return NULL; - } - else - goto ldelete; - - case TM_SelfModified: - - /* - * This can be reached when following an update - * chain from a tuple updated by another session, - * reaching a tuple that was already updated in - * this transaction. If previously updated by this - * command, ignore the delete, otherwise error - * out. - * - * See also TM_SelfModified response to - * table_tuple_delete() above. - */ - if (context->tmfd.cmax != estate->es_output_cid) - ereport(ERROR, - (errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION), - errmsg("tuple to be deleted was already modified by an operation triggered by the current command"), - errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows."))); - return NULL; - - case TM_Deleted: - /* tuple already deleted; nothing to do */ - return NULL; + returningSlot = ExecGetReturningSlot(estate, + resultRelInfo); + ExecCopySlot(returningSlot, inputslot); + ExecMaterializeSlot(returningSlot); + } - default: + Assert(context->tmfd.traversed); + epqslot = EvalPlanQual(context->epqstate, + resultRelationDesc, + resultRelInfo->ri_RangeTableIndex, + inputslot); + if (TupIsNull(epqslot)) + /* Tuple not passing quals anymore, exiting... */ + return NULL; - /* - * TM_Invisible should be impossible because we're - * waiting for updated row versions, and would - * already have errored out if the first version - * is invisible. - * - * TM_Updated should be impossible, because we're - * locking the latest version via - * TUPLE_LOCK_FLAG_FIND_LAST_VERSION. - */ - elog(ERROR, "unexpected table_tuple_lock status: %u", - result); - return NULL; + /* + * If requested, skip delete and pass back the updated + * row. + */ + if (epqreturnslot) + { + *epqreturnslot = epqslot; + return NULL; } - - Assert(false); - break; + else + goto ldelete; } case TM_Deleted: @@ -1982,12 +1965,15 @@ ExecUpdatePrepareSlot(ResultRelInfo *resultRelInfo, static TM_Result ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo, ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot, - bool canSetTag, UpdateContext *updateCxt) + bool canSetTag, bool lockUpdated, UpdateContext *updateCxt) { EState *estate = context->estate; Relation resultRelationDesc = resultRelInfo->ri_RelationDesc; bool partition_constraint_failed; TM_Result result; + GetEPQSlotArg slotArg = {context->epqstate, resultRelInfo}; + LazyTupleTableSlot lazyEPQSlot, + *lazyEPQSlotPtr; updateCxt->crossPartUpdate = false; @@ -2113,13 +2099,23 @@ lreplace: * for referential integrity updates in transaction-snapshot mode * transactions. */ + if (lockUpdated) + { + MAKE_LAZY_TTS(&lazyEPQSlot, GetEPQSlot, &slotArg); + lazyEPQSlotPtr = &lazyEPQSlot; + } + else + { + lazyEPQSlotPtr = NULL; + } result = table_tuple_update(resultRelationDesc, tupleid, slot, estate->es_output_cid, estate->es_snapshot, estate->es_crosscheck_snapshot, true /* wait for commit */ , &context->tmfd, &updateCxt->lockmode, - &updateCxt->updateIndexes); + &updateCxt->updateIndexes, + lazyEPQSlotPtr); if (result == TM_Ok) updateCxt->updated = true; @@ -2273,7 +2269,7 @@ ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context, static TupleTableSlot * ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo, ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot, - bool canSetTag) + bool canSetTag, bool locked) { EState *estate = context->estate; Relation resultRelationDesc = resultRelInfo->ri_RelationDesc; @@ -2335,7 +2331,8 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo, */ redo_act: result = ExecUpdateAct(context, resultRelInfo, tupleid, oldtuple, slot, - canSetTag, &updateCxt); + canSetTag, !IsolationUsesXactSnapshot(), + &updateCxt); /* * If ExecUpdateAct reports that a cross-partition update was done, @@ -2394,81 +2391,39 @@ redo_act: ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); + Assert(!locked); /* - * Already know that we're going to need to do EPQ, so - * fetch tuple directly into the right slot. + * ExecUpdateAct() has already locked the old tuple for + * us. Now we need to copy it to the right slot. */ inputslot = EvalPlanQualSlot(context->epqstate, resultRelationDesc, resultRelInfo->ri_RangeTableIndex); - result = table_tuple_lock(resultRelationDesc, tupleid, - estate->es_snapshot, - inputslot, estate->es_output_cid, - updateCxt.lockmode, LockWaitBlock, - TUPLE_LOCK_FLAG_FIND_LAST_VERSION, - &context->tmfd); - - switch (result) - { - case TM_Ok: - Assert(context->tmfd.traversed); - - /* Make sure ri_oldTupleSlot is initialized. */ - if (unlikely(!resultRelInfo->ri_projectNewInfoValid)) - ExecInitUpdateProjection(context->mtstate, - resultRelInfo); - - /* - * Save the locked tuple for further calculation - * of the new tuple. - */ - oldSlot = resultRelInfo->ri_oldTupleSlot; - ExecCopySlot(oldSlot, inputslot); - ExecMaterializeSlot(oldSlot); - - epqslot = EvalPlanQual(context->epqstate, - resultRelationDesc, - resultRelInfo->ri_RangeTableIndex, - inputslot); - if (TupIsNull(epqslot)) - /* Tuple not passing quals anymore, exiting... */ - return NULL; - - slot = ExecGetUpdateNewTuple(resultRelInfo, - epqslot, oldSlot); - goto redo_act; - - case TM_Deleted: - /* tuple already deleted; nothing to do */ - return NULL; - - case TM_SelfModified: - - /* - * This can be reached when following an update - * chain from a tuple updated by another session, - * reaching a tuple that was already updated in - * this transaction. If previously modified by - * this command, ignore the redundant update, - * otherwise error out. - * - * See also TM_SelfModified response to - * table_tuple_update() above. - */ - if (context->tmfd.cmax != estate->es_output_cid) - ereport(ERROR, - (errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION), - errmsg("tuple to be updated was already modified by an operation triggered by the current command"), - errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows."))); - return NULL; + /* Make sure ri_oldTupleSlot is initialized. */ + if (unlikely(!resultRelInfo->ri_projectNewInfoValid)) + ExecInitUpdateProjection(context->mtstate, + resultRelInfo); - default: - /* see table_tuple_lock call in ExecDelete() */ - elog(ERROR, "unexpected table_tuple_lock status: %u", - result); - return NULL; - } + /* + * Save the locked tuple for further calculation of the + * new tuple. + */ + oldSlot = resultRelInfo->ri_oldTupleSlot; + ExecCopySlot(oldSlot, inputslot); + ExecMaterializeSlot(oldSlot); + Assert(context->tmfd.traversed); + + epqslot = EvalPlanQual(context->epqstate, + resultRelationDesc, + resultRelInfo->ri_RangeTableIndex, + inputslot); + if (TupIsNull(epqslot)) + /* Tuple not passing quals anymore, exiting... */ + return NULL; + slot = ExecGetUpdateNewTuple(resultRelInfo, + epqslot, oldSlot); + goto redo_act; } break; @@ -2710,7 +2665,7 @@ ExecOnConflictUpdate(ModifyTableContext *context, *returning = ExecUpdate(context, resultRelInfo, conflictTid, NULL, resultRelInfo->ri_onConflict->oc_ProjSlot, - canSetTag); + canSetTag, true); /* * Clear out existing tuple, as there might not be another conflict among @@ -2913,7 +2868,7 @@ lmerge_matched: break; /* concurrent update/delete */ } result = ExecUpdateAct(context, resultRelInfo, tupleid, NULL, - newslot, false, &updateCxt); + newslot, false, false, &updateCxt); if (result == TM_Ok && updateCxt.updated) { ExecUpdateEpilogue(context, &updateCxt, resultRelInfo, @@ -2931,7 +2886,8 @@ lmerge_matched: return true; /* "do nothing" */ break; /* concurrent update/delete */ } - result = ExecDeleteAct(context, resultRelInfo, tupleid, false); + result = ExecDeleteAct(context, resultRelInfo, tupleid, + false, false); if (result == TM_Ok) { ExecDeleteEpilogue(context, resultRelInfo, tupleid, NULL, @@ -3837,7 +3793,7 @@ ExecModifyTable(PlanState *pstate) /* Now apply the update. */ slot = ExecUpdate(&context, resultRelInfo, tupleid, oldtuple, - slot, node->canSetTag); + slot, node->canSetTag, false); break; case CMD_DELETE: |