Avoid spurious deadlocks when upgrading a tuple lock

When two (or more) transactions are waiting for transaction T1 to release a tuple-level lock, and transaction T1 upgrades its lock to a higher level, a spurious deadlock can be reported among the waiting transactions when T1 finishes. The simplest example case seems to be: T1: select id from job where name = 'a' for key share; Y: select id from job where name = 'a' for update; -- starts waiting for X Z: select id from job where name = 'a' for key share; T1: update job set name = 'b' where id = 1; Z: update job set name = 'c' where id = 1; -- starts waiting for X T1: rollback; At this point, transaction Y is rolled back on account of a deadlock: Y holds the heavyweight tuple lock and is waiting for the Xmax to be released, while Z holds part of the multixact and tries to acquire the heavyweight lock (per protocol) and goes to sleep; once X releases its part of the multixact, Z is awakened only to be put back to sleep on the heavyweight lock that Y is holding while sleeping. Kaboom. This can be avoided by having Z skip the heavyweight lock acquisition. As far as I can see, the biggest downside is that if there are multiple Z transactions, the order in which they resume after X finishes is not guaranteed. Backpatch to 9.6. The patch applies cleanly on 9.5, but the new tests don't work there (because isolationtester is not smart enough), so I'm not going to risk it. Author: Oleksii Kliukin Discussion: https://postgr.es/m/B9C9D7CD-EB94-4635-91B6-E558ACEC0EC3@hintbits.com
author: Alvaro Herrera <alvherre@alvh.no-ip.org> 2019-06-13 17:28:24 -0400
committer: Alvaro Herrera <alvherre@alvh.no-ip.org> 2019-06-13 17:28:24 -0400
commit: 85600b7b5da42c5166eb1188c173beb3cc356178 (patch)
tree: 5ff78eeb0df1fb9a86e8000ca2755399900d8776 /src/backend/access/heap/heapam.c
parent: 07accce500d81957d059761a1055702d147b29a7 (diff)
download: postgresql-85600b7b5da42c5166eb1188c173beb3cc356178.tar.gz
postgresql-85600b7b5da42c5166eb1188c173beb3cc356178.zip
1 files changed, 63 insertions, 21 deletions
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 698d08d1d39..e37df8c023d 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -120,7 +120,7 @@ static void GetMultiXactIdHintBits(MultiXactId multi, uint16 *new_infomask,
 static TransactionId MultiXactIdGetUpdateXid(TransactionId xmax,
 						uint16 t_infomask);
 static bool DoesMultiXactIdConflict(MultiXactId multi, uint16 infomask,
-						LockTupleMode lockmode);
+						LockTupleMode lockmode, bool *current_is_member);
 static void MultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask,
 				Relation rel, ItemPointer ctid, XLTW_Oper oper,
 				int *remaining);
@@ -3161,15 +3161,20 @@ l1:
 		 */
 		if (infomask & HEAP_XMAX_IS_MULTI)
 		{
-			/* wait for multixact */
+			bool		current_is_member = false;
+
 			if (DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
-										LockTupleExclusive))
+										LockTupleExclusive, &current_is_member))
 			{
 				LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 
-				/* acquire tuple lock, if necessary */
-				heap_acquire_tuplock(relation, &(tp.t_self), LockTupleExclusive,
-									 LockWaitBlock, &have_tuple_lock);
+				/*
+				 * Acquire the lock, if necessary (but skip it when we're
+				 * requesting a lock and already have one; avoids deadlock).
+				 */
+				if (!current_is_member)
+					heap_acquire_tuplock(relation, &(tp.t_self), LockTupleExclusive,
+										 LockWaitBlock, &have_tuple_lock);
 
 				/* wait for multixact */
 				MultiXactIdWait((MultiXactId) xwait, MultiXactStatusUpdate, infomask,
@@ -3768,15 +3773,20 @@ l2:
 		{
 			TransactionId update_xact;
 			int			remain;
+			bool		current_is_member = false;
 
 			if (DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
-										*lockmode))
+										*lockmode, &current_is_member))
 			{
 				LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 
-				/* acquire tuple lock, if necessary */
-				heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode,
-									 LockWaitBlock, &have_tuple_lock);
+				/*
+				 * Acquire the lock, if necessary (but skip it when we're
+				 * requesting a lock and already have one; avoids deadlock).
+				 */
+				if (!current_is_member)
+					heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode,
+										 LockWaitBlock, &have_tuple_lock);
 
 				/* wait for multixact */
 				MultiXactIdWait((MultiXactId) xwait, mxact_status, infomask,
@@ -4746,6 +4756,7 @@ l3:
 		uint16		infomask;
 		uint16		infomask2;
 		bool		require_sleep;
+		bool		skip_tuple_lock;
 		ItemPointerData t_ctid;
 
 		/* must copy state data before unlocking buffer */
@@ -4771,6 +4782,7 @@ l3:
 		if (first_time)
 		{
 			first_time = false;
+			skip_tuple_lock = false;
 
 			if (infomask & HEAP_XMAX_IS_MULTI)
 			{
@@ -4799,6 +4811,21 @@ l3:
 						result = HeapTupleMayBeUpdated;
 						goto out_unlocked;
 					}
+					else
+					{
+						/*
+						 * Disable acquisition of the heavyweight tuple lock.
+						 * Otherwise, when promoting a weaker lock, we might
+						 * deadlock with another locker that has acquired the
+						 * heavyweight tuple lock and is waiting for our
+						 * transaction to finish.
+						 *
+						 * Note that in this case we still need to wait for
+						 * the multixact if required, to avoid acquiring
+						 * conflicting locks.
+						 */
+						skip_tuple_lock = true;
+					}
 				}
 
 				if (members)
@@ -4953,7 +4980,7 @@ l3:
 			if (infomask & HEAP_XMAX_IS_MULTI)
 			{
 				if (!DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
-											 mode))
+											 mode, NULL))
 				{
 					/*
 					 * No conflict, but if the xmax changed under us in the
@@ -5030,13 +5057,15 @@ l3:
 			/*
 			 * Acquire tuple lock to establish our priority for the tuple, or
 			 * die trying.  LockTuple will release us when we are next-in-line
-			 * for the tuple.  We must do this even if we are share-locking.
+			 * for the tuple.  We must do this even if we are share-locking,
+			 * but not if we already have a weaker lock on the tuple.
 			 *
 			 * If we are forced to "start over" below, we keep the tuple lock;
 			 * this arranges that we stay at the head of the line while
 			 * rechecking tuple state.
 			 */
-			if (!heap_acquire_tuplock(relation, tid, mode, wait_policy,
+			if (!skip_tuple_lock &&
+				!heap_acquire_tuplock(relation, tid, mode, wait_policy,
 									  &have_tuple_lock))
 			{
 				/*
@@ -7214,10 +7243,13 @@ HeapTupleGetUpdateXid(HeapTupleHeader tuple)
  * tuple lock of the given strength?
  *
  * The passed infomask pairs up with the given multixact in the tuple header.
+ *
+ * If current_is_member is not NULL, it is set to 'true' if the current
+ * transaction is a member of the given multixact.
  */
 static bool
 DoesMultiXactIdConflict(MultiXactId multi, uint16 infomask,
-						LockTupleMode lockmode)
+						LockTupleMode lockmode, bool *current_is_member)
 {
 	int			nmembers;
 	MultiXactMember *members;
@@ -7238,15 +7270,24 @@ DoesMultiXactIdConflict(MultiXactId multi, uint16 infomask,
 			TransactionId memxid;
 			LOCKMODE	memlockmode;
 
-			memlockmode = LOCKMODE_from_mxstatus(members[i].status);
+			if (result && (current_is_member == NULL || *current_is_member))
+				break;
 
-			/* ignore members that don't conflict with the lock we want */
-			if (!DoLockModesConflict(memlockmode, wanted))
-				continue;
+			memlockmode = LOCKMODE_from_mxstatus(members[i].status);
 
-			/* ignore members from current xact */
+			/* ignore members from current xact (but track their presence) */
 			memxid = members[i].xid;
 			if (TransactionIdIsCurrentTransactionId(memxid))
+			{
+				if (current_is_member != NULL)
+					*current_is_member = true;
+				continue;
+			}
+			else if (result)
+				continue;
+
+			/* ignore members that don't conflict with the lock we want */
+			if (!DoLockModesConflict(memlockmode, wanted))
 				continue;
 
 			if (ISUPDATE_from_mxstatus(members[i].status))
@@ -7265,10 +7306,11 @@ DoesMultiXactIdConflict(MultiXactId multi, uint16 infomask,
 			/*
 			 * Whatever remains are either live lockers that conflict with our
 			 * wanted lock, and updaters that are not aborted.  Those conflict
-			 * with what we want, so return true.
+			 * with what we want.  Set up to return true, but keep going to
+			 * look for the current transaction among the multixact members,
+			 * if needed.
 			 */
 			result = true;
-			break;
 		}
 		pfree(members);
 	}
author	Alvaro Herrera <alvherre@alvh.no-ip.org>	2019-06-13 17:28:24 -0400
committer	Alvaro Herrera <alvherre@alvh.no-ip.org>	2019-06-13 17:28:24 -0400
commit	85600b7b5da42c5166eb1188c173beb3cc356178 (patch)
tree	5ff78eeb0df1fb9a86e8000ca2755399900d8776 /src/backend/access/heap/heapam.c
parent	07accce500d81957d059761a1055702d147b29a7 (diff)
download	postgresql-85600b7b5da42c5166eb1188c173beb3cc356178.tar.gz postgresql-85600b7b5da42c5166eb1188c173beb3cc356178.zip