aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/src/sgml/config.sgml30
-rw-r--r--doc/src/sgml/high-availability.sgml9
-rw-r--r--src/backend/access/transam/xlog.c90
-rw-r--r--src/backend/replication/walsender.c62
-rw-r--r--src/backend/utils/misc/guc.c11
-rw-r--r--src/backend/utils/misc/postgresql.conf.sample1
-rw-r--r--src/include/access/xlog.h4
7 files changed, 174 insertions, 33 deletions
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 81b0ba34457..0ca5e402e0c 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.262 2010/04/03 07:22:53 petere Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.263 2010/04/12 09:52:29 heikki Exp $ -->
<chapter Id="runtime-config">
<title>Server Configuration</title>
@@ -1823,6 +1823,34 @@ archive_command = 'copy "%p" "C:\\server\\archivedir\\%f"' # Windows
</para>
</listitem>
</varlistentry>
+
+ <varlistentry id="guc-standby-keep-segments" xreflabel="standby_keep_segments">
+ <term><varname>standby_keep_segments</varname> (<type>integer</type>)</term>
+ <indexterm>
+ <primary><varname>standby_keep_segments</> configuration parameter</primary>
+ </indexterm>
+ <listitem>
+ <para>
+ Specifies the number of log file segments kept in <filename>pg_xlog</>
+ directory, in case a standby server needs to fetch them via streaming
+ replciation. Each segment is normally 16 megabytes. If a standby
+ server connected to the primary falls behind more than
+ <varname>standby_keep_segments</> segments, the primary might remove
+ a WAL segment still needed by the standby and the replication
+ connection will be terminated.
+
+ This sets only the minimum number of segments retained for standby
+ purposes, the system might need to retain more segments for WAL
+ archival or to recover from a checkpoint. If <varname>standby_keep_segments</>
+ is zero (the default), the system doesn't keep any extra segments
+ for standby purposes, and the number of old WAL segments available
+ for standbys is determined based only on the location of the previous
+ checkpoint and status of WAL archival.
+ This parameter can only be set in the <filename>postgresql.conf</>
+ file or on the server command line.
+ </para>
+ </listitem>
+ </varlistentry>
</variablelist>
</sect2>
<sect2 id="runtime-config-standby">
diff --git a/doc/src/sgml/high-availability.sgml b/doc/src/sgml/high-availability.sgml
index 13b783bc864..cff0339b523 100644
--- a/doc/src/sgml/high-availability.sgml
+++ b/doc/src/sgml/high-availability.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/high-availability.sgml,v 1.58 2010/04/03 07:22:54 petere Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/high-availability.sgml,v 1.59 2010/04/12 09:52:29 heikki Exp $ -->
<chapter id="high-availability">
<title>High Availability, Load Balancing, and Replication</title>
@@ -732,7 +732,12 @@ trigger_file = '/path/to/trigger_file'
Streaming replication relies on file-based continuous archiving for
making the base backup and for allowing the standby to catch up if it is
disconnected from the primary for long enough for the primary to
- delete old WAL files still required by the standby.
+ delete old WAL files still required by the standby. It is possible
+ to use streaming replication without WAL archiving, but if a standby
+ falls behind too much, the primary will delete old WAL files still
+ needed by the standby, and the standby will have to be manually restored
+ from a base backup. You can control how long the primary retains old WAL
+ segments using the <varname>standby_keep_segments</> setting.
</para>
<para>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 12392f8cfc0..c5b7f7a98ce 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.391 2010/04/07 10:58:49 heikki Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.392 2010/04/12 09:52:29 heikki Exp $
*
*-------------------------------------------------------------------------
*/
@@ -66,6 +66,7 @@
/* User-settable parameters */
int CheckPointSegments = 3;
+int StandbySegments = 0;
int XLOGbuffers = 8;
int XLogArchiveTimeout = 0;
bool XLogArchiveMode = false;
@@ -356,6 +357,8 @@ typedef struct XLogCtlData
uint32 ckptXidEpoch; /* nextXID & epoch of latest checkpoint */
TransactionId ckptXid;
XLogRecPtr asyncCommitLSN; /* LSN of newest async commit */
+ uint32 lastRemovedLog; /* latest removed/recycled XLOG segment */
+ uint32 lastRemovedSeg;
/* Protected by WALWriteLock: */
XLogCtlWrite Write;
@@ -3150,6 +3153,22 @@ PreallocXlogFiles(XLogRecPtr endptr)
}
/*
+ * Get the log/seg of the latest removed or recycled WAL segment.
+ * Returns 0 if no WAL segments have been removed since startup.
+ */
+void
+XLogGetLastRemoved(uint32 *log, uint32 *seg)
+{
+ /* use volatile pointer to prevent code rearrangement */
+ volatile XLogCtlData *xlogctl = XLogCtl;
+
+ SpinLockAcquire(&xlogctl->info_lck);
+ *log = xlogctl->lastRemovedLog;
+ *seg = xlogctl->lastRemovedSeg;
+ SpinLockRelease(&xlogctl->info_lck);
+}
+
+/*
* Recycle or remove all log files older or equal to passed log/seg#
*
* endptr is current (or recent) end of xlog; this is used to determine
@@ -3170,6 +3189,20 @@ RemoveOldXlogFiles(uint32 log, uint32 seg, XLogRecPtr endptr)
char newpath[MAXPGPATH];
#endif
struct stat statbuf;
+ /* use volatile pointer to prevent code rearrangement */
+ volatile XLogCtlData *xlogctl = XLogCtl;
+
+ /* Update the last removed location in shared memory first */
+ SpinLockAcquire(&xlogctl->info_lck);
+ if (log > xlogctl->lastRemovedLog ||
+ (log == xlogctl->lastRemovedLog && seg > xlogctl->lastRemovedSeg))
+ {
+ xlogctl->lastRemovedLog = log;
+ xlogctl->lastRemovedSeg = seg;
+ }
+ SpinLockRelease(&xlogctl->info_lck);
+
+ elog(DEBUG1, "removing WAL segments older than %X/%X", log, seg);
/*
* Initialize info about where to try to recycle to. We allow recycling
@@ -7172,36 +7205,51 @@ CreateCheckPoint(int flags)
smgrpostckpt();
/*
- * If there's connected standby servers doing XLOG streaming, don't delete
- * XLOG files that have not been streamed to all of them yet. This does
- * nothing to prevent them from being deleted when the standby is
- * disconnected (e.g because of network problems), but at least it avoids
- * an open replication connection from failing because of that.
+ * Delete old log files (those no longer needed even for previous
+ * checkpoint or the standbys in XLOG streaming).
*/
- if ((_logId || _logSeg) && max_wal_senders > 0)
+ if (_logId || _logSeg)
{
- XLogRecPtr oldest;
- uint32 log;
- uint32 seg;
-
- oldest = GetOldestWALSendPointer();
- if (oldest.xlogid != 0 || oldest.xrecoff != 0)
+ /*
+ * Calculate the last segment that we need to retain because of
+ * standby_keep_segments, by subtracting StandbySegments from the
+ * new checkpoint location.
+ */
+ if (StandbySegments > 0)
{
- XLByteToSeg(oldest, log, seg);
+ uint32 log;
+ uint32 seg;
+ int d_log;
+ int d_seg;
+
+ XLByteToSeg(recptr, log, seg);
+
+ d_seg = StandbySegments % XLogSegsPerFile;
+ d_log = StandbySegments / XLogSegsPerFile;
+ if (seg < d_seg)
+ {
+ d_log += 1;
+ seg = seg - d_seg + XLogSegsPerFile;
+ }
+ else
+ seg = seg - d_seg;
+ /* avoid underflow, don't go below (0,1) */
+ if (log < d_log || (log == d_log && seg == 0))
+ {
+ log = 0;
+ seg = 1;
+ }
+ else
+ log = log - d_log;
+
+ /* don't delete WAL segments newer than the calculated segment */
if (log < _logId || (log == _logId && seg < _logSeg))
{
_logId = log;
_logSeg = seg;
}
}
- }
- /*
- * Delete old log files (those no longer needed even for previous
- * checkpoint or the standbys in XLOG streaming).
- */
- if (_logId || _logSeg)
- {
PrevLogSeg(_logId, _logSeg);
RemoveOldXlogFiles(_logId, _logSeg, recptr);
}
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index e04e5ba65ca..aa8fbc1a40b 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -30,7 +30,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/replication/walsender.c,v 1.14 2010/04/01 00:43:29 rhaas Exp $
+ * $PostgreSQL: pgsql/src/backend/replication/walsender.c,v 1.15 2010/04/12 09:52:29 heikki Exp $
*
*-------------------------------------------------------------------------
*/
@@ -508,6 +508,10 @@ XLogRead(char *buf, XLogRecPtr recptr, Size nbytes)
{
char path[MAXPGPATH];
uint32 startoff;
+ uint32 lastRemovedLog;
+ uint32 lastRemovedSeg;
+ uint32 log;
+ uint32 seg;
while (nbytes > 0)
{
@@ -527,10 +531,27 @@ XLogRead(char *buf, XLogRecPtr recptr, Size nbytes)
sendFile = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0);
if (sendFile < 0)
- ereport(FATAL, /* XXX: Why FATAL? */
- (errcode_for_file_access(),
- errmsg("could not open file \"%s\" (log file %u, segment %u): %m",
- path, sendId, sendSeg)));
+ {
+ /*
+ * If the file is not found, assume it's because the
+ * standby asked for a too old WAL segment that has already
+ * been removed or recycled.
+ */
+ if (errno == ENOENT)
+ {
+ char filename[MAXFNAMELEN];
+ XLogFileName(filename, ThisTimeLineID, sendId, sendSeg);
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("requested WAL segment %s has already been removed",
+ filename)));
+ }
+ else
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not open file \"%s\" (log file %u, segment %u): %m",
+ path, sendId, sendSeg)));
+ }
sendOff = 0;
}
@@ -538,7 +559,7 @@ XLogRead(char *buf, XLogRecPtr recptr, Size nbytes)
if (sendOff != startoff)
{
if (lseek(sendFile, (off_t) startoff, SEEK_SET) < 0)
- ereport(FATAL,
+ ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not seek in log file %u, segment %u to offset %u: %m",
sendId, sendSeg, startoff)));
@@ -553,7 +574,7 @@ XLogRead(char *buf, XLogRecPtr recptr, Size nbytes)
readbytes = read(sendFile, buf, segbytes);
if (readbytes <= 0)
- ereport(FATAL,
+ ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read from log file %u, segment %u, offset %u, "
"length %lu: %m",
@@ -566,6 +587,26 @@ XLogRead(char *buf, XLogRecPtr recptr, Size nbytes)
nbytes -= readbytes;
buf += readbytes;
}
+
+ /*
+ * After reading into the buffer, check that what we read was valid.
+ * We do this after reading, because even though the segment was present
+ * when we opened it, it might get recycled or removed while we read it.
+ * The read() succeeds in that case, but the data we tried to read might
+ * already have been overwritten with new WAL records.
+ */
+ XLogGetLastRemoved(&lastRemovedLog, &lastRemovedSeg);
+ XLByteToPrevSeg(recptr, log, seg);
+ if (log < lastRemovedLog ||
+ (log == lastRemovedLog && seg <= lastRemovedSeg))
+ {
+ char filename[MAXFNAMELEN];
+ XLogFileName(filename, ThisTimeLineID, log, seg);
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("requested WAL segment %s has already been removed",
+ filename)));
+ }
}
/*
@@ -802,6 +843,12 @@ WalSndShmemInit(void)
}
/*
+ * This isn't currently used for anything. Monitoring tools might be
+ * interested in the future, and we'll need something like this in the
+ * future for synchronous replication.
+ */
+#ifdef NOT_USED
+/*
* Returns the oldest Send position among walsenders. Or InvalidXLogRecPtr
* if none.
*/
@@ -834,3 +881,4 @@ GetOldestWALSendPointer(void)
}
return oldest;
}
+#endif
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 5f8cc494893..9d72a0e5736 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -10,7 +10,7 @@
* Written by Peter Eisentraut <peter_e@gmx.net>.
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.546 2010/04/01 00:43:29 rhaas Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.547 2010/04/12 09:52:29 heikki Exp $
*
*--------------------------------------------------------------------
*/
@@ -1648,6 +1648,15 @@ static struct config_int ConfigureNamesInt[] =
},
{
+ {"standby_keep_segments", PGC_SIGHUP, WAL_CHECKPOINTS,
+ gettext_noop("Sets the number of WAL files held for standby servers"),
+ NULL
+ },
+ &StandbySegments,
+ 0, 0, INT_MAX, NULL, NULL
+ },
+
+ {
{"checkpoint_segments", PGC_SIGHUP, WAL_CHECKPOINTS,
gettext_noop("Sets the maximum distance in log segments between automatic WAL checkpoints."),
NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 02f1df01038..48c09d14670 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -193,6 +193,7 @@
#max_wal_senders = 0 # max number of walsender processes
#wal_sender_delay = 200ms # 1-10000 milliseconds
+#standby_keep_segments = 0 # in logfile segments, 16MB each; 0 disables
#------------------------------------------------------------------------------
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 9a66e9134d4..de7406a808b 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.105 2010/04/01 00:43:29 rhaas Exp $
+ * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.106 2010/04/12 09:52:29 heikki Exp $
*/
#ifndef XLOG_H
#define XLOG_H
@@ -187,6 +187,7 @@ extern XLogRecPtr XactLastRecEnd;
/* these variables are GUC parameters related to XLOG */
extern int CheckPointSegments;
+extern int StandbySegments;
extern int XLOGbuffers;
extern bool XLogArchiveMode;
extern char *XLogArchiveCommand;
@@ -267,6 +268,7 @@ extern int XLogFileInit(uint32 log, uint32 seg,
extern int XLogFileOpen(uint32 log, uint32 seg);
+extern void XLogGetLastRemoved(uint32 *log, uint32 *seg);
extern void XLogSetAsyncCommitLSN(XLogRecPtr record);
extern void RestoreBkpBlocks(XLogRecPtr lsn, XLogRecord *record, bool cleanup);