Allow locking updated tuples in tuple_update() and tuple_delete()

Currently, in read committed transaction isolation mode (default), we have the following sequence of actions when tuple_update()/tuple_delete() finds the tuple updated by concurrent transaction. 1. Attempt to update/delete tuple with tuple_update()/tuple_delete(), which returns TM_Updated. 2. Lock tuple with tuple_lock(). 3. Re-evaluate plan qual (recheck if we still need to update/delete and calculate the new tuple for update). 4. Second attempt to update/delete tuple with tuple_update()/tuple_delete(). This attempt should be successful, since the tuple was previously locked. This patch eliminates step 2 by taking the lock during first tuple_update()/tuple_delete() call. Heap table access method saves some efforts by checking the updated tuple once instead of twice. Future undo-based table access methods, which will start from the latest row version, can immediately place a lock there. The code in nodeModifyTable.c is simplified by removing the nested switch/case. Discussion: https://postgr.es/m/CAPpHfdua-YFw3XTprfutzGp28xXLigFtzNbuFY8yPhqeq6X5kg%40mail.gmail.com Reviewed-by: Aleksander Alekseev, Pavel Borisov, Vignesh C, Mason Sharp Reviewed-by: Andres Freund, Chris Travers
author: Alexander Korotkov <akorotkov@postgresql.org> 2023-03-23 00:13:37 +0300
committer: Alexander Korotkov <akorotkov@postgresql.org> 2023-03-23 00:26:59 +0300
commit: 11470f544e3729c60fab890145b2e839cbc8905e (patch)
tree: 8ebd3c5b4aab9023e54e71e892b91a5beb9de8dc /src/backend/executor/nodeModifyTable.c
parent: 764da7710bf68eebb2c0facb2f871bc3c7a705b6 (diff)
download: postgresql-11470f544e3729c60fab890145b2e839cbc8905e.tar.gz
postgresql-11470f544e3729c60fab890145b2e839cbc8905e.zip
1 files changed, 122 insertions, 166 deletions
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 93ebfdbb0d8..e3503756818 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -1325,25 +1325,61 @@ ExecDeletePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 }
 
 /*
+ * The implementation for LazyTupleTableSlot wrapper for EPQ slot to be passed
+ * to table_tuple_update()/table_tuple_delete().
+ */
+typedef struct
+{
+	EPQState   *epqstate;
+	ResultRelInfo *resultRelInfo;
+} GetEPQSlotArg;
+
+static TupleTableSlot *
+GetEPQSlot(void *arg)
+{
+	GetEPQSlotArg *slotArg = (GetEPQSlotArg *) arg;
+
+	return EvalPlanQualSlot(slotArg->epqstate,
+							slotArg->resultRelInfo->ri_RelationDesc,
+							slotArg->resultRelInfo->ri_RangeTableIndex);
+}
+
+/*
  * ExecDeleteAct -- subroutine for ExecDelete
  *
  * Actually delete the tuple from a plain table.
  *
+ * If the 'lockUpdated' flag is set and the target tuple is updated, then
+ * the latest version gets locked and fetched into the EPQ slot.
+ *
  * Caller is in charge of doing EvalPlanQual as necessary
  */
 static TM_Result
 ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
-			  ItemPointer tupleid, bool changingPart)
+			  ItemPointer tupleid, bool changingPart, bool lockUpdated)
 {
 	EState	   *estate = context->estate;
+	GetEPQSlotArg slotArg = {context->epqstate, resultRelInfo};
+	LazyTupleTableSlot lazyEPQSlot,
+			   *lazyEPQSlotPtr;
 
+	if (lockUpdated)
+	{
+		MAKE_LAZY_TTS(&lazyEPQSlot, GetEPQSlot, &slotArg);
+		lazyEPQSlotPtr = &lazyEPQSlot;
+	}
+	else
+	{
+		lazyEPQSlotPtr = NULL;
+	}
 	return table_tuple_delete(resultRelInfo->ri_RelationDesc, tupleid,
 							  estate->es_output_cid,
 							  estate->es_snapshot,
 							  estate->es_crosscheck_snapshot,
 							  true /* wait for commit */ ,
 							  &context->tmfd,
-							  changingPart);
+							  changingPart,
+							  lazyEPQSlotPtr);
 }
 
 /*
@@ -1488,7 +1524,8 @@ ExecDelete(ModifyTableContext *context,
 		 * transaction-snapshot mode transactions.
 		 */
 ldelete:
-		result = ExecDeleteAct(context, resultRelInfo, tupleid, changingPart);
+		result = ExecDeleteAct(context, resultRelInfo, tupleid, changingPart,
+							   !IsolationUsesXactSnapshot());
 
 		switch (result)
 		{
@@ -1541,103 +1578,49 @@ ldelete:
 								 errmsg("could not serialize access due to concurrent update")));
 
 					/*
-					 * Already know that we're going to need to do EPQ, so
-					 * fetch tuple directly into the right slot.
+					 * ExecDeleteAct() has already locked the old tuple for
+					 * us. Now we need to copy it to the right slot.
 					 */
 					EvalPlanQualBegin(context->epqstate);
 					inputslot = EvalPlanQualSlot(context->epqstate, resultRelationDesc,
 												 resultRelInfo->ri_RangeTableIndex);
 
-					result = table_tuple_lock(resultRelationDesc, tupleid,
-											  estate->es_snapshot,
-											  inputslot, estate->es_output_cid,
-											  LockTupleExclusive, LockWaitBlock,
-											  TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
-											  &context->tmfd);
-
-					switch (result)
+					/*
+					 * Save locked table for further processing for RETURNING
+					 * clause.
+					 */
+					if (processReturning &&
+						resultRelInfo->ri_projectReturning &&
+						!resultRelInfo->ri_FdwRoutine)
 					{
-						case TM_Ok:
-							Assert(context->tmfd.traversed);
+						TupleTableSlot *returningSlot;
 
-							/*
-							 * Save locked tuple for further processing of
-							 * RETURNING clause.
-							 */
-							if (processReturning &&
-								resultRelInfo->ri_projectReturning &&
-								!resultRelInfo->ri_FdwRoutine)
-							{
-								TupleTableSlot *returningSlot;
-
-								returningSlot = ExecGetReturningSlot(estate, resultRelInfo);
-								ExecCopySlot(returningSlot, inputslot);
-								ExecMaterializeSlot(returningSlot);
-							}
-
-							epqslot = EvalPlanQual(context->epqstate,
-												   resultRelationDesc,
-												   resultRelInfo->ri_RangeTableIndex,
-												   inputslot);
-							if (TupIsNull(epqslot))
-								/* Tuple not passing quals anymore, exiting... */
-								return NULL;
-
-							/*
-							 * If requested, skip delete and pass back the
-							 * updated row.
-							 */
-							if (epqreturnslot)
-							{
-								*epqreturnslot = epqslot;
-								return NULL;
-							}
-							else
-								goto ldelete;
-
-						case TM_SelfModified:
-
-							/*
-							 * This can be reached when following an update
-							 * chain from a tuple updated by another session,
-							 * reaching a tuple that was already updated in
-							 * this transaction. If previously updated by this
-							 * command, ignore the delete, otherwise error
-							 * out.
-							 *
-							 * See also TM_SelfModified response to
-							 * table_tuple_delete() above.
-							 */
-							if (context->tmfd.cmax != estate->es_output_cid)
-								ereport(ERROR,
-										(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
-										 errmsg("tuple to be deleted was already modified by an operation triggered by the current command"),
-										 errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
-							return NULL;
-
-						case TM_Deleted:
-							/* tuple already deleted; nothing to do */
-							return NULL;
+						returningSlot = ExecGetReturningSlot(estate,
+															 resultRelInfo);
+						ExecCopySlot(returningSlot, inputslot);
+						ExecMaterializeSlot(returningSlot);
+					}
 
-						default:
+					Assert(context->tmfd.traversed);
+					epqslot = EvalPlanQual(context->epqstate,
+										   resultRelationDesc,
+										   resultRelInfo->ri_RangeTableIndex,
+										   inputslot);
+					if (TupIsNull(epqslot))
+						/* Tuple not passing quals anymore, exiting... */
+						return NULL;
 
-							/*
-							 * TM_Invisible should be impossible because we're
-							 * waiting for updated row versions, and would
-							 * already have errored out if the first version
-							 * is invisible.
-							 *
-							 * TM_Updated should be impossible, because we're
-							 * locking the latest version via
-							 * TUPLE_LOCK_FLAG_FIND_LAST_VERSION.
-							 */
-							elog(ERROR, "unexpected table_tuple_lock status: %u",
-								 result);
-							return NULL;
+					/*
+					 * If requested, skip delete and pass back the updated
+					 * row.
+					 */
+					if (epqreturnslot)
+					{
+						*epqreturnslot = epqslot;
+						return NULL;
 					}
-
-					Assert(false);
-					break;
+					else
+						goto ldelete;
 				}
 
 			case TM_Deleted:
@@ -1982,12 +1965,15 @@ ExecUpdatePrepareSlot(ResultRelInfo *resultRelInfo,
 static TM_Result
 ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 			  ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
-			  bool canSetTag, UpdateContext *updateCxt)
+			  bool canSetTag, bool lockUpdated, UpdateContext *updateCxt)
 {
 	EState	   *estate = context->estate;
 	Relation	resultRelationDesc = resultRelInfo->ri_RelationDesc;
 	bool		partition_constraint_failed;
 	TM_Result	result;
+	GetEPQSlotArg slotArg = {context->epqstate, resultRelInfo};
+	LazyTupleTableSlot lazyEPQSlot,
+			   *lazyEPQSlotPtr;
 
 	updateCxt->crossPartUpdate = false;
 
@@ -2113,13 +2099,23 @@ lreplace:
 	 * for referential integrity updates in transaction-snapshot mode
 	 * transactions.
 	 */
+	if (lockUpdated)
+	{
+		MAKE_LAZY_TTS(&lazyEPQSlot, GetEPQSlot, &slotArg);
+		lazyEPQSlotPtr = &lazyEPQSlot;
+	}
+	else
+	{
+		lazyEPQSlotPtr = NULL;
+	}
 	result = table_tuple_update(resultRelationDesc, tupleid, slot,
 								estate->es_output_cid,
 								estate->es_snapshot,
 								estate->es_crosscheck_snapshot,
 								true /* wait for commit */ ,
 								&context->tmfd, &updateCxt->lockmode,
-								&updateCxt->updateIndexes);
+								&updateCxt->updateIndexes,
+								lazyEPQSlotPtr);
 	if (result == TM_Ok)
 		updateCxt->updated = true;
 
@@ -2273,7 +2269,7 @@ ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
 static TupleTableSlot *
 ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 		   ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
-		   bool canSetTag)
+		   bool canSetTag, bool locked)
 {
 	EState	   *estate = context->estate;
 	Relation	resultRelationDesc = resultRelInfo->ri_RelationDesc;
@@ -2335,7 +2331,8 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 		 */
 redo_act:
 		result = ExecUpdateAct(context, resultRelInfo, tupleid, oldtuple, slot,
-							   canSetTag, &updateCxt);
+							   canSetTag, !IsolationUsesXactSnapshot(),
+							   &updateCxt);
 
 		/*
 		 * If ExecUpdateAct reports that a cross-partition update was done,
@@ -2394,81 +2391,39 @@ redo_act:
 						ereport(ERROR,
 								(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
 								 errmsg("could not serialize access due to concurrent update")));
+					Assert(!locked);
 
 					/*
-					 * Already know that we're going to need to do EPQ, so
-					 * fetch tuple directly into the right slot.
+					 * ExecUpdateAct() has already locked the old tuple for
+					 * us. Now we need to copy it to the right slot.
 					 */
 					inputslot = EvalPlanQualSlot(context->epqstate, resultRelationDesc,
 												 resultRelInfo->ri_RangeTableIndex);
 
-					result = table_tuple_lock(resultRelationDesc, tupleid,
-											  estate->es_snapshot,
-											  inputslot, estate->es_output_cid,
-											  updateCxt.lockmode, LockWaitBlock,
-											  TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
-											  &context->tmfd);
-
-					switch (result)
-					{
-						case TM_Ok:
-							Assert(context->tmfd.traversed);
-
-							/* Make sure ri_oldTupleSlot is initialized. */
-							if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
-								ExecInitUpdateProjection(context->mtstate,
-														 resultRelInfo);
-
-							/*
-							 * Save the locked tuple for further calculation
-							 * of the new tuple.
-							 */
-							oldSlot = resultRelInfo->ri_oldTupleSlot;
-							ExecCopySlot(oldSlot, inputslot);
-							ExecMaterializeSlot(oldSlot);
-
-							epqslot = EvalPlanQual(context->epqstate,
-												   resultRelationDesc,
-												   resultRelInfo->ri_RangeTableIndex,
-												   inputslot);
-							if (TupIsNull(epqslot))
-								/* Tuple not passing quals anymore, exiting... */
-								return NULL;
-
-							slot = ExecGetUpdateNewTuple(resultRelInfo,
-														 epqslot, oldSlot);
-							goto redo_act;
-
-						case TM_Deleted:
-							/* tuple already deleted; nothing to do */
-							return NULL;
-
-						case TM_SelfModified:
-
-							/*
-							 * This can be reached when following an update
-							 * chain from a tuple updated by another session,
-							 * reaching a tuple that was already updated in
-							 * this transaction. If previously modified by
-							 * this command, ignore the redundant update,
-							 * otherwise error out.
-							 *
-							 * See also TM_SelfModified response to
-							 * table_tuple_update() above.
-							 */
-							if (context->tmfd.cmax != estate->es_output_cid)
-								ereport(ERROR,
-										(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
-										 errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
-										 errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
-							return NULL;
+					/* Make sure ri_oldTupleSlot is initialized. */
+					if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
+						ExecInitUpdateProjection(context->mtstate,
+												 resultRelInfo);
 
-						default:
-							/* see table_tuple_lock call in ExecDelete() */
-							elog(ERROR, "unexpected table_tuple_lock status: %u",
-								 result);
-							return NULL;
-					}
+					/*
+					 * Save the locked tuple for further calculation of the
+					 * new tuple.
+					 */
+					oldSlot = resultRelInfo->ri_oldTupleSlot;
+					ExecCopySlot(oldSlot, inputslot);
+					ExecMaterializeSlot(oldSlot);
+					Assert(context->tmfd.traversed);
+
+					epqslot = EvalPlanQual(context->epqstate,
+										   resultRelationDesc,
+										   resultRelInfo->ri_RangeTableIndex,
+										   inputslot);
+					if (TupIsNull(epqslot))
+						/* Tuple not passing quals anymore, exiting... */
+						return NULL;
+					slot = ExecGetUpdateNewTuple(resultRelInfo,
+												 epqslot, oldSlot);
+					goto redo_act;
 				}
 
 				break;
@@ -2710,7 +2665,7 @@ ExecOnConflictUpdate(ModifyTableContext *context,
 	*returning = ExecUpdate(context, resultRelInfo,
 							conflictTid, NULL,
 							resultRelInfo->ri_onConflict->oc_ProjSlot,
-							canSetTag);
+							canSetTag, true);
 
 	/*
 	 * Clear out existing tuple, as there might not be another conflict among
@@ -2913,7 +2868,7 @@ lmerge_matched:
 					break;		/* concurrent update/delete */
 				}
 				result = ExecUpdateAct(context, resultRelInfo, tupleid, NULL,
-									   newslot, false, &updateCxt);
+									   newslot, false, false, &updateCxt);
 				if (result == TM_Ok && updateCxt.updated)
 				{
 					ExecUpdateEpilogue(context, &updateCxt, resultRelInfo,
@@ -2931,7 +2886,8 @@ lmerge_matched:
 						return true;	/* "do nothing" */
 					break;		/* concurrent update/delete */
 				}
-				result = ExecDeleteAct(context, resultRelInfo, tupleid, false);
+				result = ExecDeleteAct(context, resultRelInfo, tupleid,
+									   false, false);
 				if (result == TM_Ok)
 				{
 					ExecDeleteEpilogue(context, resultRelInfo, tupleid, NULL,
@@ -3837,7 +3793,7 @@ ExecModifyTable(PlanState *pstate)
 
 				/* Now apply the update. */
 				slot = ExecUpdate(&context, resultRelInfo, tupleid, oldtuple,
-								  slot, node->canSetTag);
+								  slot, node->canSetTag, false);
 				break;
 
 			case CMD_DELETE:
author	Alexander Korotkov <akorotkov@postgresql.org>	2023-03-23 00:13:37 +0300
committer	Alexander Korotkov <akorotkov@postgresql.org>	2023-03-23 00:26:59 +0300
commit	11470f544e3729c60fab890145b2e839cbc8905e (patch)
tree	8ebd3c5b4aab9023e54e71e892b91a5beb9de8dc /src/backend/executor/nodeModifyTable.c
parent	764da7710bf68eebb2c0facb2f871bc3c7a705b6 (diff)
download	postgresql-11470f544e3729c60fab890145b2e839cbc8905e.tar.gz postgresql-11470f544e3729c60fab890145b2e839cbc8905e.zip