aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/postmaster/pgarch.c91
-rw-r--r--src/backend/postmaster/postmaster.c99
2 files changed, 138 insertions, 52 deletions
diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c
index 6cb32fcb601..e181950c0fe 100644
--- a/src/backend/postmaster/pgarch.c
+++ b/src/backend/postmaster/pgarch.c
@@ -19,7 +19,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/postmaster/pgarch.c,v 1.37 2008/01/01 19:45:51 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/postmaster/pgarch.c,v 1.38 2008/01/11 00:54:08 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -77,12 +77,15 @@
* ----------
*/
static time_t last_pgarch_start_time;
+static time_t last_sigterm_time = 0;
/*
* Flags set by interrupt handlers for later service in the main loop.
*/
static volatile sig_atomic_t got_SIGHUP = false;
+static volatile sig_atomic_t got_SIGTERM = false;
static volatile sig_atomic_t wakened = false;
+static volatile sig_atomic_t ready_to_stop = false;
/* ----------
* Local function forward declarations
@@ -95,7 +98,9 @@ static pid_t pgarch_forkexec(void);
NON_EXEC_STATIC void PgArchiverMain(int argc, char *argv[]);
static void pgarch_exit(SIGNAL_ARGS);
static void ArchSigHupHandler(SIGNAL_ARGS);
+static void ArchSigTermHandler(SIGNAL_ARGS);
static void pgarch_waken(SIGNAL_ARGS);
+static void pgarch_waken_stop(SIGNAL_ARGS);
static void pgarch_MainLoop(void);
static void pgarch_ArchiverCopyLoop(void);
static bool pgarch_archiveXlog(char *xlog);
@@ -236,16 +241,16 @@ PgArchiverMain(int argc, char *argv[])
/*
* Ignore all signals usually bound to some action in the postmaster,
- * except for SIGHUP, SIGUSR1 and SIGQUIT.
+ * except for SIGHUP, SIGTERM, SIGUSR1, SIGUSR2, and SIGQUIT.
*/
pqsignal(SIGHUP, ArchSigHupHandler);
pqsignal(SIGINT, SIG_IGN);
- pqsignal(SIGTERM, SIG_IGN);
+ pqsignal(SIGTERM, ArchSigTermHandler);
pqsignal(SIGQUIT, pgarch_exit);
pqsignal(SIGALRM, SIG_IGN);
pqsignal(SIGPIPE, SIG_IGN);
pqsignal(SIGUSR1, pgarch_waken);
- pqsignal(SIGUSR2, SIG_IGN);
+ pqsignal(SIGUSR2, pgarch_waken_stop);
pqsignal(SIGCHLD, SIG_DFL);
pqsignal(SIGTTIN, SIG_DFL);
pqsignal(SIGTTOU, SIG_DFL);
@@ -267,28 +272,47 @@ PgArchiverMain(int argc, char *argv[])
static void
pgarch_exit(SIGNAL_ARGS)
{
- /*
- * For now, we just nail the doors shut and get out of town. It might
- * seem cleaner to finish up any pending archive copies, but there's a
- * nontrivial risk that init will kill us partway through.
- */
- exit(0);
+ /* SIGQUIT means curl up and die ... */
+ exit(1);
}
-/* SIGHUP: set flag to re-read config file at next convenient time */
+/* SIGHUP signal handler for archiver process */
static void
ArchSigHupHandler(SIGNAL_ARGS)
{
+ /* set flag to re-read config file at next convenient time */
got_SIGHUP = true;
}
+/* SIGTERM signal handler for archiver process */
+static void
+ArchSigTermHandler(SIGNAL_ARGS)
+{
+ /*
+ * The postmaster never sends us SIGTERM, so we assume that this means
+ * that init is trying to shut down the whole system. If we hang around
+ * too long we'll get SIGKILL'd. Set flag to prevent starting any more
+ * archive commands.
+ */
+ got_SIGTERM = true;
+}
+
/* SIGUSR1 signal handler for archiver process */
static void
pgarch_waken(SIGNAL_ARGS)
{
+ /* set flag that there is work to be done */
wakened = true;
}
+/* SIGUSR2 signal handler for archiver process */
+static void
+pgarch_waken_stop(SIGNAL_ARGS)
+{
+ /* set flag to do a final cycle and shut down afterwards */
+ ready_to_stop = true;
+}
+
/*
* pgarch_MainLoop
*
@@ -298,6 +322,7 @@ static void
pgarch_MainLoop(void)
{
time_t last_copy_time = 0;
+ bool time_to_stop;
/*
* We run the copy loop immediately upon entry, in case there are
@@ -309,6 +334,9 @@ pgarch_MainLoop(void)
do
{
+ /* When we get SIGUSR2, we do one more archive cycle, then exit */
+ time_to_stop = ready_to_stop;
+
/* Check for config update */
if (got_SIGHUP)
{
@@ -316,8 +344,26 @@ pgarch_MainLoop(void)
ProcessConfigFile(PGC_SIGHUP);
}
+ /*
+ * If we've gotten SIGTERM, we normally just sit and do nothing until
+ * SIGUSR2 arrives. However, that means a random SIGTERM would
+ * disable archiving indefinitely, which doesn't seem like a good
+ * idea. If more than 60 seconds pass since SIGTERM, exit anyway,
+ * so that the postmaster can start a new archiver if needed.
+ */
+ if (got_SIGTERM)
+ {
+ time_t curtime = time(NULL);
+
+ if (last_sigterm_time == 0)
+ last_sigterm_time = curtime;
+ else if ((unsigned int) (curtime - last_sigterm_time) >=
+ (unsigned int) 60)
+ break;
+ }
+
/* Do what we're here for */
- if (wakened)
+ if (wakened || time_to_stop)
{
wakened = false;
pgarch_ArchiverCopyLoop();
@@ -334,7 +380,8 @@ pgarch_MainLoop(void)
* sleep into 1-second increments, and check for interrupts after each
* nap.
*/
- while (!(wakened || got_SIGHUP))
+ while (!(wakened || ready_to_stop || got_SIGHUP ||
+ !PostmasterIsAlive(true)))
{
time_t curtime;
@@ -344,7 +391,13 @@ pgarch_MainLoop(void)
(unsigned int) PGARCH_AUTOWAKE_INTERVAL)
wakened = true;
}
- } while (PostmasterIsAlive(true));
+
+ /*
+ * The archiver quits either when the postmaster dies (not expected)
+ * or after completing one more archiving cycle after receiving
+ * SIGUSR2.
+ */
+ } while (PostmasterIsAlive(true) && !time_to_stop);
}
/*
@@ -377,8 +430,14 @@ pgarch_ArchiverCopyLoop(void)
for (;;)
{
- /* Abandon processing if we notice our postmaster has died */
- if (!PostmasterIsAlive(true))
+ /*
+ * Do not initiate any more archive commands after receiving
+ * SIGTERM, nor after the postmaster has died unexpectedly.
+ * The first condition is to try to keep from having init
+ * SIGKILL the command, and the second is to avoid conflicts
+ * with another archiver spawned by a newer postmaster.
+ */
+ if (got_SIGTERM || !PostmasterIsAlive(true))
return;
if (pgarch_archiveXlog(xlog))
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index ba6c9b9183e..fe1ed795f91 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -37,7 +37,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.550 2008/01/01 19:45:51 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.551 2008/01/11 00:54:09 tgl Exp $
*
* NOTES
*
@@ -244,7 +244,7 @@ static bool FatalError = false; /* T if recovering from backend crash */
* Notice that this state variable does not distinguish *why* we entered
* PM_WAIT_BACKENDS or later states --- Shutdown and FatalError must be
* consulted to find that out. FatalError is never true in PM_RUN state, nor
- * in PM_SHUTDOWN state (because we don't enter that state when trying to
+ * in PM_SHUTDOWN states (because we don't enter those states when trying to
* recover from a crash). It can be true in PM_STARTUP state, because we
* don't clear it until we've successfully recovered.
*/
@@ -255,6 +255,7 @@ typedef enum
PM_RUN, /* normal "database is alive" state */
PM_WAIT_BACKENDS, /* waiting for live backends to exit */
PM_SHUTDOWN, /* waiting for bgwriter to do shutdown ckpt */
+ PM_SHUTDOWN_2, /* waiting for archiver to finish */
PM_WAIT_DEAD_END, /* waiting for dead_end children to exit */
PM_NO_CHILDREN /* all important children have exited */
} PMState;
@@ -1312,12 +1313,8 @@ ServerLoop(void)
start_autovac_launcher = false; /* signal processed */
}
- /*
- * If we have lost the archiver, try to start a new one. We do this
- * even if we are shutting down, to allow archiver to take care of any
- * remaining WAL files.
- */
- if (XLogArchivingActive() && PgArchPID == 0 && pmState >= PM_RUN)
+ /* If we have lost the archiver, try to start a new one */
+ if (XLogArchivingActive() && PgArchPID == 0 && pmState == PM_RUN)
PgArchPID = pgarch_start();
/* If we have lost the stats collector, try to start a new one */
@@ -2175,12 +2172,31 @@ reaper(SIGNAL_ARGS)
* checkpoint. (If for some reason it didn't, recovery will
* occur on next postmaster start.)
*
- * At this point we should have no normal children left (else
- * we'd not be in PM_SHUTDOWN state) but we might have
- * dead_end children.
+ * At this point we should have no normal backend children
+ * left (else we'd not be in PM_SHUTDOWN state) but we might
+ * have dead_end children to wait for.
+ *
+ * If we have an archiver subprocess, tell it to do a last
+ * archive cycle and quit; otherwise we can go directly to
+ * PM_WAIT_DEAD_END state.
*/
Assert(Shutdown > NoShutdown);
- pmState = PM_WAIT_DEAD_END;
+
+ if (PgArchPID != 0)
+ {
+ /* Waken archiver for the last time */
+ signal_child(PgArchPID, SIGUSR2);
+ pmState = PM_SHUTDOWN_2;
+ }
+ else
+ pmState = PM_WAIT_DEAD_END;
+
+ /*
+ * We can also shut down the stats collector now; there's
+ * nothing left for it to do.
+ */
+ if (PgStatPID != 0)
+ signal_child(PgStatPID, SIGQUIT);
}
else
{
@@ -2227,7 +2243,8 @@ reaper(SIGNAL_ARGS)
/*
* Was it the archiver? If so, just try to start a new one; no need
* to force reset of the rest of the system. (If fail, we'll try
- * again in future cycles of the main loop.)
+ * again in future cycles of the main loop.) But if we were waiting
+ * for it to shut down, advance to the next shutdown step.
*/
if (pid == PgArchPID)
{
@@ -2235,8 +2252,10 @@ reaper(SIGNAL_ARGS)
if (!EXIT_STATUS_0(exitstatus))
LogChildExit(LOG, _("archiver process"),
pid, exitstatus);
- if (XLogArchivingActive() && pmState >= PM_RUN)
+ if (XLogArchivingActive() && pmState == PM_RUN)
PgArchPID = pgarch_start();
+ else if (pmState == PM_SHUTDOWN_2)
+ pmState = PM_WAIT_DEAD_END;
continue;
}
@@ -2563,6 +2582,11 @@ PostmasterStateMachine(void)
* change causes ServerLoop to stop creating new ones.
*/
pmState = PM_WAIT_DEAD_END;
+
+ /*
+ * We already SIGQUIT'd the archiver and stats processes,
+ * if any, when we entered FatalError state.
+ */
}
else
{
@@ -2591,13 +2615,13 @@ PostmasterStateMachine(void)
*/
FatalError = true;
pmState = PM_WAIT_DEAD_END;
+
+ /* Kill the archiver and stats collector too */
+ if (PgArchPID != 0)
+ signal_child(PgArchPID, SIGQUIT);
+ if (PgStatPID != 0)
+ signal_child(PgStatPID, SIGQUIT);
}
- /* Tell pgarch to shut down too; nothing left for it to do */
- if (PgArchPID != 0)
- signal_child(PgArchPID, SIGQUIT);
- /* Tell pgstat to shut down too; nothing left for it to do */
- if (PgStatPID != 0)
- signal_child(PgStatPID, SIGQUIT);
}
}
}
@@ -2606,16 +2630,26 @@ PostmasterStateMachine(void)
{
/*
* PM_WAIT_DEAD_END state ends when the BackendList is entirely empty
- * (ie, no dead_end children remain).
+ * (ie, no dead_end children remain), and the archiver and stats
+ * collector are gone too.
+ *
+ * The reason we wait for those two is to protect them against a new
+ * postmaster starting conflicting subprocesses; this isn't an
+ * ironclad protection, but it at least helps in the
+ * shutdown-and-immediately-restart scenario. Note that they have
+ * already been sent appropriate shutdown signals, either during a
+ * normal state transition leading up to PM_WAIT_DEAD_END, or during
+ * FatalError processing.
*/
- if (!DLGetHead(BackendList))
+ if (DLGetHead(BackendList) == NULL &&
+ PgArchPID == 0 && PgStatPID == 0)
{
/* These other guys should be dead already */
Assert(StartupPID == 0);
Assert(BgWriterPID == 0);
Assert(WalWriterPID == 0);
Assert(AutoVacPID == 0);
- /* archiver, stats, and syslogger are not considered here */
+ /* syslogger is not considered here */
pmState = PM_NO_CHILDREN;
}
}
@@ -2628,14 +2662,9 @@ PostmasterStateMachine(void)
* we got SIGTERM from init --- there may well not be time for recovery
* before init decides to SIGKILL us.)
*
- * Note: we do not wait around for exit of the archiver or stats
- * processes. They've been sent SIGQUIT by this point (either when we
- * entered PM_SHUTDOWN state, or when we set FatalError, and at least one
- * of those must have happened by now). In any case they contain logic to
- * commit hara-kiri if they notice the postmaster is gone. Since they
- * aren't connected to shared memory, they pose no problem for shutdown.
- * The syslogger is not considered either, since it's intended to survive
- * till the postmaster exits.
+ * Note that the syslogger continues to run. It will exit when it sees
+ * EOF on its input pipe, which happens when there are no more upstream
+ * processes.
*/
if (Shutdown > NoShutdown && pmState == PM_NO_CHILDREN)
{
@@ -2652,10 +2681,8 @@ PostmasterStateMachine(void)
}
/*
- * If we need to recover from a crash, wait for all shmem-connected
- * children to exit, then reset shmem and StartupDataBase. (We can ignore
- * the archiver and stats processes here since they are not connected to
- * shmem.)
+ * If we need to recover from a crash, wait for all non-syslogger
+ * children to exit, then reset shmem and StartupDataBase.
*/
if (FatalError && pmState == PM_NO_CHILDREN)
{
@@ -3782,7 +3809,7 @@ sigusr1_handler(SIGNAL_ARGS)
}
if (CheckPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER) &&
- PgArchPID != 0 && Shutdown <= SmartShutdown)
+ PgArchPID != 0)
{
/*
* Send SIGUSR1 to archiver process, to wake it up and begin archiving