aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndres Freund <andres@anarazel.de>2015-02-03 23:24:38 +0100
committerAndres Freund <andres@anarazel.de>2015-02-03 23:24:38 +0100
commit6753333f55e1d9bcb9da4323556b456583624a07 (patch)
treebf2082418d16dae3371fda014fa78734ce9dc7cd
parent6647248e3708843be93c7ca670cd219fe8e61026 (diff)
downloadpostgresql-6753333f55e1d9bcb9da4323556b456583624a07.tar.gz
postgresql-6753333f55e1d9bcb9da4323556b456583624a07.zip
Move deadlock and other interrupt handling in proc.c out of signal handlers.
Deadlock checking was performed inside signal handlers up to now. While it's a remarkable feat to have made this work reliably, it's quite complex to understand why that is the case. Partially it worked due to the assumption that semaphores are signal safe - which is not actually documented to be the case for sysv semaphores. The reason we had to rely on performing this work inside signal handlers is that semaphores aren't guaranteed to be interruptable by signals on all platforms. But now that latches provide a somewhat similar API, which actually has the guarantee of being interruptible, we can avoid doing so. Signalling between ProcSleep, ProcWakeup, ProcWaitForSignal and ProcSendSignal is now done using latches. This increases the likelihood of spurious wakeups. As spurious wakeup already were possible and aren't likely to be frequent enough to be an actual problem, this seems acceptable. This change would allow for further simplification of the deadlock checking, now that it doesn't have to run in a signal handler. But even if I were motivated to do so right now, it would still be better to do that separately. Such a cleanup shouldn't have to be reviewed a the same time as the more fundamental changes in this commit. There is one possible usability regression due to this commit. Namely it is more likely than before that log_lock_waits messages are output more than once. Reviewed-By: Heikki Linnakangas
-rw-r--r--src/backend/storage/lmgr/proc.c131
-rw-r--r--src/backend/utils/init/postinit.c2
-rw-r--r--src/include/storage/proc.h2
3 files changed, 63 insertions, 72 deletions
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index 24636223b1d..33b2f69bf87 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -80,13 +80,15 @@ PGPROC *PreparedXactProcs = NULL;
/* If we are waiting for a lock, this points to the associated LOCALLOCK */
static LOCALLOCK *lockAwaited = NULL;
-/* Mark this volatile because it can be changed by signal handler */
-static volatile DeadLockState deadlock_state = DS_NOT_YET_CHECKED;
+static DeadLockState deadlock_state = DS_NOT_YET_CHECKED;
+/* Is a deadlock check pending? */
+static volatile sig_atomic_t got_deadlock_timeout;
static void RemoveProcFromArray(int code, Datum arg);
static void ProcKill(int code, Datum arg);
static void AuxiliaryProcKill(int code, Datum arg);
+static void CheckDeadLock(void);
/*
@@ -705,16 +707,6 @@ LockErrorCleanup(void)
LWLockRelease(partitionLock);
- /*
- * We used to do PGSemaphoreReset() here to ensure that our proc's wait
- * semaphore is reset to zero. This prevented a leftover wakeup signal
- * from remaining in the semaphore if someone else had granted us the lock
- * we wanted before we were able to remove ourselves from the wait-list.
- * However, now that ProcSleep loops until waitStatus changes, a leftover
- * wakeup signal isn't harmful, and it seems not worth expending cycles to
- * get rid of a signal that most likely isn't there.
- */
-
RESUME_INTERRUPTS();
}
@@ -942,9 +934,6 @@ ProcQueueInit(PROC_QUEUE *queue)
* we release the partition lock.
*
* NOTES: The process queue is now a priority queue for locking.
- *
- * P() on the semaphore should put us to sleep. The process
- * semaphore is normally zero, so when we try to acquire it, we sleep.
*/
int
ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
@@ -1084,6 +1073,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
/* Reset deadlock_state before enabling the timeout handler */
deadlock_state = DS_NOT_YET_CHECKED;
+ got_deadlock_timeout = false;
/*
* Set timer so we can wake up after awhile and check for a deadlock. If a
@@ -1113,32 +1103,37 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
enable_timeout_after(DEADLOCK_TIMEOUT, DeadlockTimeout);
/*
- * If someone wakes us between LWLockRelease and PGSemaphoreLock,
- * PGSemaphoreLock will not block. The wakeup is "saved" by the semaphore
- * implementation. While this is normally good, there are cases where a
- * saved wakeup might be leftover from a previous operation (for example,
- * we aborted ProcWaitForSignal just before someone did ProcSendSignal).
- * So, loop to wait again if the waitStatus shows we haven't been granted
- * nor denied the lock yet.
+ * If somebody wakes us between LWLockRelease and WaitLatch, the latch
+ * will not wait. But a set latch does not necessarily mean that the lock
+ * is free now, as there are many other sources for latch sets than
+ * somebody releasing the lock.
*
- * We pass interruptOK = true, which eliminates a window in which
- * cancel/die interrupts would be held off undesirably. This is a promise
- * that we don't mind losing control to a cancel/die interrupt here. We
- * don't, because we have no shared-state-change work to do after being
- * granted the lock (the grantor did it all). We do have to worry about
- * canceling the deadlock timeout and updating the locallock table, but if
- * we lose control to an error, LockErrorCleanup will fix that up.
+ * We process interrupts whenever the latch has been set, so cancel/die
+ * interrupts are processed quickly. This means we must not mind losing
+ * control to a cancel/die interrupt here. We don't, because we have no
+ * shared-state-change work to do after being granted the lock (the
+ * grantor did it all). We do have to worry about canceling the deadlock
+ * timeout and updating the locallock table, but if we lose control to an
+ * error, LockErrorCleanup will fix that up.
*/
do
{
- PGSemaphoreLock(&MyProc->sem, true);
+ WaitLatch(MyLatch, WL_LATCH_SET, 0);
+ ResetLatch(MyLatch);
+ /* check for deadlocks first, as that's probably log-worthy */
+ if (got_deadlock_timeout)
+ {
+ CheckDeadLock();
+ got_deadlock_timeout = false;
+ }
+ CHECK_FOR_INTERRUPTS();
/*
* waitStatus could change from STATUS_WAITING to something else
* asynchronously. Read it just once per loop to prevent surprising
* behavior (such as missing log messages).
*/
- myWaitStatus = MyProc->waitStatus;
+ myWaitStatus = *((volatile int *) &MyProc->waitStatus);
/*
* If we are not deadlocked, but are waiting on an autovacuum-induced
@@ -1435,7 +1430,7 @@ ProcWakeup(PGPROC *proc, int waitStatus)
proc->waitStatus = waitStatus;
/* And awaken it */
- PGSemaphoreUnlock(&proc->sem);
+ SetLatch(&proc->procLatch);
return retProc;
}
@@ -1502,17 +1497,13 @@ ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock)
/*
* CheckDeadLock
*
- * We only get to this routine if the DEADLOCK_TIMEOUT fired
- * while waiting for a lock to be released by some other process. Look
- * to see if there's a deadlock; if not, just return and continue waiting.
- * (But signal ProcSleep to log a message, if log_lock_waits is true.)
- * If we have a real deadlock, remove ourselves from the lock's wait queue
- * and signal an error to ProcSleep.
- *
- * NB: this is run inside a signal handler, so be very wary about what is done
- * here or in called routines.
+ * We only get to this routine, if DEADLOCK_TIMEOUT fired while waiting for a
+ * lock to be released by some other process. Check if there's a deadlock; if
+ * not, just return. (But signal ProcSleep to log a message, if
+ * log_lock_waits is true.) If we have a real deadlock, remove ourselves from
+ * the lock's wait queue and signal an error to ProcSleep.
*/
-void
+static void
CheckDeadLock(void)
{
int i;
@@ -1571,12 +1562,6 @@ CheckDeadLock(void)
RemoveFromWaitQueue(MyProc, LockTagHashCode(&(MyProc->waitLock->tag)));
/*
- * Unlock my semaphore so that the interrupted ProcSleep() call can
- * finish.
- */
- PGSemaphoreUnlock(&MyProc->sem);
-
- /*
* We're done here. Transaction abort caused by the error that
* ProcSleep will raise will cause any other locks we hold to be
* released, thus allowing other processes to wake up; we don't need
@@ -1587,19 +1572,6 @@ CheckDeadLock(void)
* RemoveFromWaitQueue took care of waking up any such processes.
*/
}
- else if (log_lock_waits || deadlock_state == DS_BLOCKED_BY_AUTOVACUUM)
- {
- /*
- * Unlock my semaphore so that the interrupted ProcSleep() call can
- * print the log message (we daren't do it here because we are inside
- * a signal handler). It will then sleep again until someone releases
- * the lock.
- *
- * If blocked by autovacuum, this wakeup will enable ProcSleep to send
- * the canceling signal to the autovacuum worker.
- */
- PGSemaphoreUnlock(&MyProc->sem);
- }
/*
* And release locks. We do this in reverse order for two reasons: (1)
@@ -1613,22 +1585,39 @@ check_done:
LWLockRelease(LockHashPartitionLockByIndex(i));
}
+/*
+ * CheckDeadLockAlert - Handle the expiry of deadlock_timeout.
+ *
+ * NB: Runs inside a signal handler, be careful.
+ */
+void
+CheckDeadLockAlert(void)
+{
+ int save_errno = errno;
+
+ got_deadlock_timeout = true;
+ /*
+ * Have to set the latch again, even if handle_sig_alarm already did. Back
+ * then got_deadlock_timeout wasn't yet set... It's unlikely that this
+ * ever would be a problem, but setting a set latch again is cheap.
+ */
+ SetLatch(MyLatch);
+ errno = save_errno;
+}
/*
* ProcWaitForSignal - wait for a signal from another backend.
*
- * This can share the semaphore normally used for waiting for locks,
- * since a backend could never be waiting for a lock and a signal at
- * the same time. As with locks, it's OK if the signal arrives just
- * before we actually reach the waiting state. Also as with locks,
- * it's necessary that the caller be robust against bogus wakeups:
- * always check that the desired state has occurred, and wait again
- * if not. This copes with possible "leftover" wakeups.
+ * As this uses the generic process latch the caller has to be robust against
+ * unrelated wakeups: Always check that the desired state has occurred, and
+ * wait again if not.
*/
void
ProcWaitForSignal(void)
{
- PGSemaphoreLock(&MyProc->sem, true);
+ WaitLatch(MyLatch, WL_LATCH_SET, 0);
+ ResetLatch(MyLatch);
+ CHECK_FOR_INTERRUPTS();
}
/*
@@ -1664,5 +1653,7 @@ ProcSendSignal(int pid)
proc = BackendPidGetProc(pid);
if (proc != NULL)
- PGSemaphoreUnlock(&proc->sem);
+ {
+ SetLatch(&proc->procLatch);
+ }
}
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 66aa7ea61b6..1e646a1360a 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -578,7 +578,7 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username,
*/
if (!bootstrap)
{
- RegisterTimeout(DEADLOCK_TIMEOUT, CheckDeadLock);
+ RegisterTimeout(DEADLOCK_TIMEOUT, CheckDeadLockAlert);
RegisterTimeout(STATEMENT_TIMEOUT, StatementTimeoutHandler);
RegisterTimeout(LOCK_TIMEOUT, LockTimeoutHandler);
}
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index d194f38b468..e807a2e020d 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -251,7 +251,7 @@ extern void ProcQueueInit(PROC_QUEUE *queue);
extern int ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable);
extern PGPROC *ProcWakeup(PGPROC *proc, int waitStatus);
extern void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock);
-extern void CheckDeadLock(void);
+extern void CheckDeadLockAlert(void);
extern bool IsWaitingForLock(void);
extern void LockErrorCleanup(void);