1 files changed, 114 insertions, 29 deletions
diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c
index cd54ae42793..77e81bdbc0f 100644
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -42,6 +42,10 @@ int			max_standby_streaming_delay = 30 * 1000;
 
 static HTAB *RecoveryLockLists;
 
+/* Flags set by timeout handlers */
+static volatile sig_atomic_t got_standby_deadlock_timeout = false;
+static volatile sig_atomic_t got_standby_lock_timeout = false;
+
 static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
 												   ProcSignalReason reason,
 												   uint32 wait_event_info,
@@ -397,8 +401,10 @@ ResolveRecoveryConflictWithDatabase(Oid dbid)
  * lock.  As we are already queued to be granted the lock, no new lock
  * requests conflicting with ours will be granted in the meantime.
  *
- * Deadlocks involving the Startup process and an ordinary backend process
- * will be detected by the deadlock detector within the ordinary backend.
+ * We also must check for deadlocks involving the Startup process and
+ * hot-standby backend processes. If deadlock_timeout is reached in
+ * this function, all the backends holding the conflicting locks are
+ * requested to check themselves for deadlocks.
  */
 void
 ResolveRecoveryConflictWithLock(LOCKTAG locktag)
@@ -409,7 +415,7 @@ ResolveRecoveryConflictWithLock(LOCKTAG locktag)
 
 	ltime = GetStandbyLimitTime();
 
-	if (GetCurrentTimestamp() >= ltime)
+	if (GetCurrentTimestamp() >= ltime && ltime != 0)
 	{
 		/*
 		 * We're already behind, so clear a path as quickly as possible.
@@ -431,26 +437,85 @@ ResolveRecoveryConflictWithLock(LOCKTAG locktag)
 	else
 	{
 		/*
-		 * Wait (or wait again) until ltime
+		 * Wait (or wait again) until ltime, and check for deadlocks as well
+		 * if we will be waiting longer than deadlock_timeout
 		 */
-		EnableTimeoutParams timeouts[1];
+		EnableTimeoutParams timeouts[2];
+		int			cnt = 0;
+
+		if (ltime != 0)
+		{
+			got_standby_lock_timeout = false;
+			timeouts[cnt].id = STANDBY_LOCK_TIMEOUT;
+			timeouts[cnt].type = TMPARAM_AT;
+			timeouts[cnt].fin_time = ltime;
+			cnt++;
+		}
 
-		timeouts[0].id = STANDBY_LOCK_TIMEOUT;
-		timeouts[0].type = TMPARAM_AT;
-		timeouts[0].fin_time = ltime;
-		enable_timeouts(timeouts, 1);
+		got_standby_deadlock_timeout = false;
+		timeouts[cnt].id = STANDBY_DEADLOCK_TIMEOUT;
+		timeouts[cnt].type = TMPARAM_AFTER;
+		timeouts[cnt].delay_ms = DeadlockTimeout;
+		cnt++;
+
+		enable_timeouts(timeouts, cnt);
 	}
 
 	/* Wait to be signaled by the release of the Relation Lock */
 	ProcWaitForSignal(PG_WAIT_LOCK | locktag.locktag_type);
 
 	/*
+	 * Exit if ltime is reached. Then all the backends holding conflicting
+	 * locks will be canceled in the next ResolveRecoveryConflictWithLock()
+	 * call.
+	 */
+	if (got_standby_lock_timeout)
+		goto cleanup;
+
+	if (got_standby_deadlock_timeout)
+	{
+		VirtualTransactionId *backends;
+
+		backends = GetLockConflicts(&locktag, AccessExclusiveLock, NULL);
+
+		/* Quick exit if there's no work to be done */
+		if (!VirtualTransactionIdIsValid(*backends))
+			goto cleanup;
+
+		/*
+		 * Send signals to all the backends holding the conflicting locks, to
+		 * ask them to check themselves for deadlocks.
+		 */
+		while (VirtualTransactionIdIsValid(*backends))
+		{
+			SignalVirtualTransaction(*backends,
+									 PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK,
+									 false);
+			backends++;
+		}
+
+		/*
+		 * Wait again here to be signaled by the release of the Relation Lock,
+		 * to prevent the subsequent RecoveryConflictWithLock() from causing
+		 * deadlock_timeout and sending a request for deadlocks check again.
+		 * Otherwise the request continues to be sent every deadlock_timeout
+		 * until the relation locks are released or ltime is reached.
+		 */
+		got_standby_deadlock_timeout = false;
+		ProcWaitForSignal(PG_WAIT_LOCK | locktag.locktag_type);
+	}
+
+cleanup:
+
+	/*
 	 * Clear any timeout requests established above.  We assume here that the
 	 * Startup process doesn't have any other outstanding timeouts than those
 	 * used by this function. If that stops being true, we could cancel the
 	 * timeouts individually, but that'd be slower.
 	 */
 	disable_all_timeouts(false);
+	got_standby_lock_timeout = false;
+	got_standby_deadlock_timeout = false;
 }
 
 /*
@@ -489,15 +554,7 @@ ResolveRecoveryConflictWithBufferPin(void)
 
 	ltime = GetStandbyLimitTime();
 
-	if (ltime == 0)
-	{
-		/*
-		 * We're willing to wait forever for conflicts, so set timeout for
-		 * deadlock check only
-		 */
-		enable_timeout_after(STANDBY_DEADLOCK_TIMEOUT, DeadlockTimeout);
-	}
-	else if (GetCurrentTimestamp() >= ltime)
+	if (GetCurrentTimestamp() >= ltime && ltime != 0)
 	{
 		/*
 		 * We're already behind, so clear a path as quickly as possible.
@@ -511,14 +568,23 @@ ResolveRecoveryConflictWithBufferPin(void)
 		 * waiting longer than deadlock_timeout
 		 */
 		EnableTimeoutParams timeouts[2];
+		int			cnt = 0;
 
-		timeouts[0].id = STANDBY_TIMEOUT;
-		timeouts[0].type = TMPARAM_AT;
-		timeouts[0].fin_time = ltime;
-		timeouts[1].id = STANDBY_DEADLOCK_TIMEOUT;
-		timeouts[1].type = TMPARAM_AFTER;
-		timeouts[1].delay_ms = DeadlockTimeout;
-		enable_timeouts(timeouts, 2);
+		if (ltime != 0)
+		{
+			timeouts[cnt].id = STANDBY_TIMEOUT;
+			timeouts[cnt].type = TMPARAM_AT;
+			timeouts[cnt].fin_time = ltime;
+			cnt++;
+		}
+
+		got_standby_deadlock_timeout = false;
+		timeouts[cnt].id = STANDBY_DEADLOCK_TIMEOUT;
+		timeouts[cnt].type = TMPARAM_AFTER;
+		timeouts[cnt].delay_ms = DeadlockTimeout;
+		cnt++;
+
+		enable_timeouts(timeouts, cnt);
 	}
 
 	/*
@@ -531,6 +597,25 @@ ResolveRecoveryConflictWithBufferPin(void)
 	 */
 	ProcWaitForSignal(PG_WAIT_BUFFER_PIN);
 
+	if (got_standby_deadlock_timeout)
+	{
+		/*
+		 * Send out a request for hot-standby backends to check themselves for
+		 * deadlocks.
+		 *
+		 * XXX The subsequent ResolveRecoveryConflictWithBufferPin() will wait
+		 * to be signaled by UnpinBuffer() again and send a request for
+		 * deadlocks check if deadlock_timeout happens. This causes the
+		 * request to continue to be sent every deadlock_timeout until the
+		 * buffer is unpinned or ltime is reached. This would increase the
+		 * workload in the startup process and backends. In practice it may
+		 * not be so harmful because the period that the buffer is kept pinned
+		 * is basically no so long. But we should fix this?
+		 */
+		SendRecoveryConflictWithBufferPin(
+										  PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
+	}
+
 	/*
 	 * Clear any timeout requests established above.  We assume here that the
 	 * Startup process doesn't have any other timeouts than what this function
@@ -538,6 +623,7 @@ ResolveRecoveryConflictWithBufferPin(void)
 	 * individually, but that'd be slower.
 	 */
 	disable_all_timeouts(false);
+	got_standby_deadlock_timeout = false;
 }
 
 static void
@@ -597,13 +683,12 @@ CheckRecoveryConflictDeadlock(void)
 
 /*
  * StandbyDeadLockHandler() will be called if STANDBY_DEADLOCK_TIMEOUT
- * occurs before STANDBY_TIMEOUT.  Send out a request for hot-standby
- * backends to check themselves for deadlocks.
+ * occurs before STANDBY_TIMEOUT.
  */
 void
 StandbyDeadLockHandler(void)
 {
-	SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
+	got_standby_deadlock_timeout = true;
 }
 
 /*
@@ -622,11 +707,11 @@ StandbyTimeoutHandler(void)
 
 /*
  * StandbyLockTimeoutHandler() will be called if STANDBY_LOCK_TIMEOUT is exceeded.
- * This doesn't need to do anything, simply waking up is enough.
  */
 void
 StandbyLockTimeoutHandler(void)
 {
+	got_standby_lock_timeout = true;
 }
 
 /*