aboutsummaryrefslogtreecommitdiff
path: root/src/backend/storage/buffer/bufmgr.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2007-05-02 23:18:03 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2007-05-02 23:18:03 +0000
commit8c3cc86e7b688b0efe5ec6ce4f4342c2883b1db5 (patch)
tree33cfc93810e9c15988c7dc26f7350a6b9d5d4c53 /src/backend/storage/buffer/bufmgr.c
parent8ec943856a4e94637600fa7cad976281ca3f4071 (diff)
downloadpostgresql-8c3cc86e7b688b0efe5ec6ce4f4342c2883b1db5.tar.gz
postgresql-8c3cc86e7b688b0efe5ec6ce4f4342c2883b1db5.zip
During WAL recovery, when reading a page that we intend to overwrite completely
from the WAL data, don't bother to physically read it; just have bufmgr.c return a zeroed-out buffer instead. This speeds recovery significantly, and also avoids unnecessary failures when a page-to-be-overwritten has corrupt page headers on disk. This replaces a former kluge that accomplished the latter by pretending zero_damaged_pages was always ON during WAL recovery; which was OK when the kluge was put in, but is unsafe when restoring a WAL log that was written with full_page_writes off. Heikki Linnakangas
Diffstat (limited to 'src/backend/storage/buffer/bufmgr.c')
-rw-r--r--src/backend/storage/buffer/bufmgr.c48
1 files changed, 39 insertions, 9 deletions
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 8ce7700c9cb..c3d16a9418f 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.216 2007/03/30 18:34:55 mha Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.217 2007/05/02 23:18:03 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -17,6 +17,12 @@
* and pin it so that no one can destroy it while this process
* is using it.
*
+ * ReadOrZeroBuffer() -- like ReadBuffer, but if the page is not already in
+ * cache we don't read it, but just return a zeroed-out buffer. Useful
+ * when the caller intends to fill the page from scratch, since this
+ * saves I/O and avoids unnecessary failure if the page-on-disk has
+ * corrupt page headers.
+ *
* ReleaseBuffer() -- unpin a buffer
*
* MarkBufferDirty() -- mark a pinned buffer's contents as "dirty".
@@ -87,6 +93,8 @@ static volatile BufferDesc *PinCountWaitBuf = NULL;
extern PgStat_MsgBgWriter BgWriterStats;
+static Buffer ReadBuffer_common(Relation reln, BlockNumber blockNum,
+ bool zeroPage);
static bool PinBuffer(volatile BufferDesc *buf);
static void PinBuffer_Locked(volatile BufferDesc *buf);
static void UnpinBuffer(volatile BufferDesc *buf,
@@ -121,6 +129,27 @@ static void AtProcExit_Buffers(int code, Datum arg);
Buffer
ReadBuffer(Relation reln, BlockNumber blockNum)
{
+ return ReadBuffer_common(reln, blockNum, false);
+}
+
+/*
+ * ReadOrZeroBuffer -- like ReadBuffer, but if the page isn't in buffer
+ * cache already, it's filled with zeros instead of reading it from
+ * disk. The caller is expected to overwrite the whole buffer,
+ * so that the current page contents are not interesting.
+ */
+Buffer
+ReadOrZeroBuffer(Relation reln, BlockNumber blockNum)
+{
+ return ReadBuffer_common(reln, blockNum, true);
+}
+
+/*
+ * ReadBuffer_common -- common logic for ReadBuffer and ReadOrZeroBuffer
+ */
+static Buffer
+ReadBuffer_common(Relation reln, BlockNumber blockNum, bool zeroPage)
+{
volatile BufferDesc *bufHdr;
Block bufBlock;
bool found;
@@ -253,17 +282,18 @@ ReadBuffer(Relation reln, BlockNumber blockNum)
}
else
{
- smgrread(reln->rd_smgr, blockNum, (char *) bufBlock);
+ /*
+ * Read in the page, unless the caller intends to overwrite it
+ * and just wants us to allocate a buffer.
+ */
+ if (zeroPage)
+ MemSet((char *) bufBlock, 0, BLCKSZ);
+ else
+ smgrread(reln->rd_smgr, blockNum, (char *) bufBlock);
/* check for garbage data */
if (!PageHeaderIsValid((PageHeader) bufBlock))
{
- /*
- * During WAL recovery, the first access to any data page should
- * overwrite the whole page from the WAL; so a clobbered page
- * header is not reason to fail. Hence, when InRecovery we may
- * always act as though zero_damaged_pages is ON.
- */
- if (zero_damaged_pages || InRecovery)
+ if (zero_damaged_pages)
{
ereport(WARNING,
(errcode(ERRCODE_DATA_CORRUPTED),