aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2008-11-23 01:40:19 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2008-11-23 01:40:19 +0000
commit6f6a6d8b140393c974ec5ae65c6c605e70d08034 (patch)
treea64923da56e6eb8e98bb49985f98edd43e00e4b3
parent8309d006cbd2cca15a5f1be69644b91f2da5eb9e (diff)
downloadpostgresql-6f6a6d8b140393c974ec5ae65c6c605e70d08034.tar.gz
postgresql-6f6a6d8b140393c974ec5ae65c6c605e70d08034.zip
Teach RequestCheckpoint() to wait and retry a few times if it can't signal
the bgwriter immediately. This covers the case where the bgwriter is still starting up, as seen in a recent buildfarm failure. In future it might also assist with clean recovery after a bgwriter termination and restart --- right now the postmaster treats early bgwriter exit as a system crash, but that might not always be so.
-rw-r--r--src/backend/postmaster/bgwriter.c42
1 files changed, 33 insertions, 9 deletions
diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c
index 7d25811afd4..03f8f7e30f2 100644
--- a/src/backend/postmaster/bgwriter.c
+++ b/src/backend/postmaster/bgwriter.c
@@ -37,7 +37,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.53 2008/10/14 08:06:39 heikki Exp $
+ * $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.54 2008/11/23 01:40:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -864,6 +864,7 @@ RequestCheckpoint(int flags)
{
/* use volatile pointer to prevent code rearrangement */
volatile BgWriterShmemStruct *bgs = BgWriterShmem;
+ int ntries;
int old_failed,
old_started;
@@ -905,15 +906,38 @@ RequestCheckpoint(int flags)
SpinLockRelease(&bgs->ckpt_lck);
/*
- * Send signal to request checkpoint. When not waiting, we consider
- * failure to send the signal to be nonfatal.
+ * Send signal to request checkpoint. It's possible that the bgwriter
+ * hasn't started yet, or is in process of restarting, so we will retry
+ * a few times if needed. Also, if not told to wait for the checkpoint
+ * to occur, we consider failure to send the signal to be nonfatal and
+ * merely LOG it.
*/
- if (BgWriterShmem->bgwriter_pid == 0)
- elog((flags & CHECKPOINT_WAIT) ? ERROR : LOG,
- "could not request checkpoint because bgwriter not running");
- if (kill(BgWriterShmem->bgwriter_pid, SIGINT) != 0)
- elog((flags & CHECKPOINT_WAIT) ? ERROR : LOG,
- "could not signal for checkpoint: %m");
+ for (ntries = 0; ; ntries++)
+ {
+ if (BgWriterShmem->bgwriter_pid == 0)
+ {
+ if (ntries >= 20) /* max wait 2.0 sec */
+ {
+ elog((flags & CHECKPOINT_WAIT) ? ERROR : LOG,
+ "could not request checkpoint because bgwriter not running");
+ break;
+ }
+ }
+ else if (kill(BgWriterShmem->bgwriter_pid, SIGINT) != 0)
+ {
+ if (ntries >= 20) /* max wait 2.0 sec */
+ {
+ elog((flags & CHECKPOINT_WAIT) ? ERROR : LOG,
+ "could not signal for checkpoint: %m");
+ break;
+ }
+ }
+ else
+ break; /* signal sent successfully */
+
+ CHECK_FOR_INTERRUPTS();
+ pg_usleep(100000L); /* wait 0.1 sec, then retry */
+ }
/*
* If requested, wait for completion. We detect completion according to