aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/access/transam/xact.c38
-rw-r--r--src/backend/postmaster/autovacuum.c3
-rw-r--r--src/backend/storage/lmgr/proc.c7
-rw-r--r--src/backend/tcop/postgres.c19
-rw-r--r--src/backend/utils/misc/timeout.c56
-rw-r--r--src/include/utils/timeout.h1
6 files changed, 117 insertions, 7 deletions
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index bab048d38a1..b467b5c89d1 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -34,6 +34,7 @@
#include "commands/trigger.h"
#include "executor/spi.h"
#include "libpq/be-fsstubs.h"
+#include "libpq/pqsignal.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "replication/walsender.h"
@@ -52,6 +53,7 @@
#include "utils/memutils.h"
#include "utils/relmapper.h"
#include "utils/snapmgr.h"
+#include "utils/timeout.h"
#include "utils/timestamp.h"
#include "pg_trace.h"
@@ -2297,6 +2299,22 @@ AbortTransaction(void)
LockErrorCleanup();
/*
+ * If any timeout events are still active, make sure the timeout interrupt
+ * is scheduled. This covers possible loss of a timeout interrupt due to
+ * longjmp'ing out of the SIGINT handler (see notes in handle_sig_alarm).
+ * We delay this till after LockErrorCleanup so that we don't uselessly
+ * reschedule lock or deadlock check timeouts.
+ */
+ reschedule_timeouts();
+
+ /*
+ * Re-enable signals, in case we got here by longjmp'ing out of a signal
+ * handler. We do this fairly early in the sequence so that the timeout
+ * infrastructure will be functional if needed while aborting.
+ */
+ PG_SETMASK(&UnBlockSig);
+
+ /*
* check the current transaction state
*/
if (s->state != TRANS_INPROGRESS && s->state != TRANS_PREPARE)
@@ -4222,9 +4240,29 @@ AbortSubTransaction(void)
AbortBufferIO();
UnlockBuffers();
+ /*
+ * Also clean up any open wait for lock, since the lock manager will choke
+ * if we try to wait for another lock before doing this.
+ */
LockErrorCleanup();
/*
+ * If any timeout events are still active, make sure the timeout interrupt
+ * is scheduled. This covers possible loss of a timeout interrupt due to
+ * longjmp'ing out of the SIGINT handler (see notes in handle_sig_alarm).
+ * We delay this till after LockErrorCleanup so that we don't uselessly
+ * reschedule lock or deadlock check timeouts.
+ */
+ reschedule_timeouts();
+
+ /*
+ * Re-enable signals, in case we got here by longjmp'ing out of a signal
+ * handler. We do this fairly early in the sequence so that the timeout
+ * infrastructure will be functional if needed while aborting.
+ */
+ PG_SETMASK(&UnBlockSig);
+
+ /*
* check the current transaction state
*/
ShowTransactionState("AbortSubTransaction");
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 8c14d0f4e44..88ecd3834b3 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -493,9 +493,8 @@ AutoVacLauncherMain(int argc, char *argv[])
HOLD_INTERRUPTS();
/* Forget any pending QueryCancel or timeout request */
- QueryCancelPending = false;
disable_all_timeouts(false);
- QueryCancelPending = false; /* again in case timeout occurred */
+ QueryCancelPending = false; /* second to avoid race condition */
/* Report the error to the server log */
EmitErrorReport();
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index 222251df659..122afb24a8b 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -1266,7 +1266,10 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
} while (myWaitStatus == STATUS_WAITING);
/*
- * Disable the timers, if they are still running
+ * Disable the timers, if they are still running. As in LockErrorCleanup,
+ * we must preserve the LOCK_TIMEOUT indicator flag: if a lock timeout has
+ * already caused QueryCancelPending to become set, we want the cancel to
+ * be reported as a lock timeout, not a user cancel.
*/
if (LockTimeout > 0)
{
@@ -1275,7 +1278,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
timeouts[0].id = DEADLOCK_TIMEOUT;
timeouts[0].keep_indicator = false;
timeouts[1].id = LOCK_TIMEOUT;
- timeouts[1].keep_indicator = false;
+ timeouts[1].keep_indicator = true;
disable_timeouts(timeouts, 2);
}
else
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 3d74654c827..e6428b69f43 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -3803,6 +3803,13 @@ PostgresMain(int argc, char *argv[],
* always active, we have at least some chance of recovering from an error
* during error recovery. (If we get into an infinite loop thereby, it
* will soon be stopped by overflow of elog.c's internal state stack.)
+ *
+ * Note that we use sigsetjmp(..., 1), so that this function's signal mask
+ * (to wit, UnBlockSig) will be restored when longjmp'ing to here. This
+ * is essential in case we longjmp'd out of a signal handler on a platform
+ * where that leaves the signal blocked. It's not redundant with the
+ * unblock in AbortTransaction() because the latter is only called if we
+ * were inside a transaction.
*/
if (sigsetjmp(local_sigjmp_buf, 1) != 0)
@@ -3823,11 +3830,17 @@ PostgresMain(int argc, char *argv[],
/*
* Forget any pending QueryCancel request, since we're returning to
- * the idle loop anyway, and cancel any active timeout requests.
+ * the idle loop anyway, and cancel any active timeout requests. (In
+ * future we might want to allow some timeout requests to survive, but
+ * at minimum it'd be necessary to do reschedule_timeouts(), in case
+ * we got here because of a query cancel interrupting the SIGALRM
+ * interrupt handler.) Note in particular that we must clear the
+ * statement and lock timeout indicators, to prevent any future plain
+ * query cancels from being misreported as timeouts in case we're
+ * forgetting a timeout cancel.
*/
- QueryCancelPending = false;
disable_all_timeouts(false);
- QueryCancelPending = false; /* again in case timeout occurred */
+ QueryCancelPending = false; /* second to avoid race condition */
/*
* Turn off these interrupts too. This is only needed here and not in
diff --git a/src/backend/utils/misc/timeout.c b/src/backend/utils/misc/timeout.c
index 3c3e41eca91..aae947e601b 100644
--- a/src/backend/utils/misc/timeout.c
+++ b/src/backend/utils/misc/timeout.c
@@ -16,6 +16,7 @@
#include <sys/time.h>
+#include "miscadmin.h"
#include "storage/proc.h"
#include "utils/timeout.h"
#include "utils/timestamp.h"
@@ -260,6 +261,23 @@ handle_sig_alarm(SIGNAL_ARGS)
int save_errno = errno;
/*
+ * We may be executing while ImmediateInterruptOK is true (e.g., when
+ * mainline is waiting for a lock). If SIGINT or similar arrives while
+ * this code is running, we'd lose control and perhaps leave our data
+ * structures in an inconsistent state. Hold off interrupts to prevent
+ * that.
+ *
+ * Note: it's possible for a SIGINT to interrupt handle_sig_alarm even
+ * before we reach HOLD_INTERRUPTS(); the net effect would be as if the
+ * SIGALRM event had been silently lost. Therefore error recovery must
+ * include some action that will allow any lost interrupt to be
+ * rescheduled. Disabling some or all timeouts is sufficient, or if
+ * that's not appropriate, reschedule_timeouts() can be called. Also, the
+ * signal blocking hazard described below applies here too.
+ */
+ HOLD_INTERRUPTS();
+
+ /*
* SIGALRM is always cause for waking anything waiting on the process
* latch. Cope with MyProc not being there, as the startup process also
* uses this signal handler.
@@ -311,6 +329,20 @@ handle_sig_alarm(SIGNAL_ARGS)
}
}
+ /*
+ * Re-allow query cancel, and then service any cancel request that arrived
+ * meanwhile (this might in particular include a cancel request fired by
+ * one of the timeout handlers).
+ *
+ * Note: a longjmp from here is safe so far as our own data structures are
+ * concerned; but on platforms that block a signal before calling the
+ * handler and then un-block it on return, longjmping out of the signal
+ * handler leaves SIGALRM still blocked. Error cleanup is responsible for
+ * unblocking any blocked signals.
+ */
+ RESUME_INTERRUPTS();
+ CHECK_FOR_INTERRUPTS();
+
errno = save_errno;
}
@@ -388,6 +420,30 @@ RegisterTimeout(TimeoutId id, timeout_handler_proc handler)
}
/*
+ * Reschedule any pending SIGALRM interrupt.
+ *
+ * This can be used during error recovery in case query cancel resulted in loss
+ * of a SIGALRM event (due to longjmp'ing out of handle_sig_alarm before it
+ * could do anything). But note it's not necessary if any of the public
+ * enable_ or disable_timeout functions are called in the same area, since
+ * those all do schedule_alarm() internally if needed.
+ */
+void
+reschedule_timeouts(void)
+{
+ /* For flexibility, allow this to be called before we're initialized. */
+ if (!all_timeouts_initialized)
+ return;
+
+ /* Disable timeout interrupts for safety. */
+ disable_alarm();
+
+ /* Reschedule the interrupt, if any timeouts remain active. */
+ if (num_active_timeouts > 0)
+ schedule_alarm(GetCurrentTimestamp());
+}
+
+/*
* Enable the specified timeout to fire after the specified delay.
*
* Delay is given in milliseconds.
diff --git a/src/include/utils/timeout.h b/src/include/utils/timeout.h
index 06bcc5a0f3d..759ee24774b 100644
--- a/src/include/utils/timeout.h
+++ b/src/include/utils/timeout.h
@@ -67,6 +67,7 @@ typedef struct
/* timeout setup */
extern void InitializeTimeouts(void);
extern TimeoutId RegisterTimeout(TimeoutId id, timeout_handler_proc handler);
+extern void reschedule_timeouts(void);
/* timeout operation */
extern void enable_timeout_after(TimeoutId id, int delay_ms);