aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/transam/slru.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/transam/slru.c')
-rw-r--r--src/backend/access/transam/slru.c298
1 files changed, 92 insertions, 206 deletions
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index d45a7d9f614..5d51f69a531 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -3,49 +3,6 @@
* slru.c
* Simple LRU buffering for transaction status logfiles
*
- * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- * $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.18 2004/07/21 22:31:20 tgl Exp $
- *
- *-------------------------------------------------------------------------
- */
-#include "postgres.h"
-
-#include <fcntl.h>
-#include <sys/stat.h>
-#include <unistd.h>
-
-#include "access/clog.h"
-#include "access/slru.h"
-#include "access/subtrans.h"
-#include "postmaster/bgwriter.h"
-#include "storage/fd.h"
-#include "storage/lwlock.h"
-#include "storage/shmem.h"
-#include "miscadmin.h"
-
-
-/*
- * Define segment size. A page is the same BLCKSZ as is used everywhere
- * else in Postgres. The segment size can be chosen somewhat arbitrarily;
- * we make it 32 pages by default, or 256Kb, i.e. 1M transactions for CLOG
- * or 64K transactions for SUBTRANS.
- *
- * Note: because TransactionIds are 32 bits and wrap around at 0xFFFFFFFF,
- * page numbering also wraps around at 0xFFFFFFFF/xxxx_XACTS_PER_PAGE (where
- * xxxx is CLOG or SUBTRANS, respectively), and segment numbering at
- * 0xFFFFFFFF/xxxx_XACTS_PER_PAGE/SLRU_PAGES_PER_SEGMENT. We need
- * take no explicit notice of that fact in this module, except when comparing
- * segment and page numbers in SimpleLruTruncate (see PagePrecedes()).
- */
-
-#define SLRU_PAGES_PER_SEGMENT 32
-
-
-/*----------
- * Shared-memory data structures for SLRU control
- *
* We use a simple least-recently-used scheme to manage a pool of page
* buffers. Under ordinary circumstances we expect that write
* traffic will occur mostly to the latest page (and to the just-prior
@@ -86,44 +43,46 @@
* to re-dirty a page that is currently being written out. This is handled
* by setting the page's state from WRITE_IN_PROGRESS to DIRTY. The writing
* process must notice this and not mark the page CLEAN when it's done.
- *----------
+ *
+ *
+ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.19 2004/08/23 23:22:44 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
*/
+#include "postgres.h"
-typedef enum
-{
- SLRU_PAGE_EMPTY, /* buffer is not in use */
- SLRU_PAGE_READ_IN_PROGRESS, /* page is being read in */
- SLRU_PAGE_CLEAN, /* page is valid and not dirty */
- SLRU_PAGE_DIRTY, /* page is valid but needs write */
- SLRU_PAGE_WRITE_IN_PROGRESS /* page is being written out */
-} SlruPageStatus;
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <unistd.h>
-/*
- * Shared-memory state
- */
-typedef struct SlruSharedData
-{
- LWLockId ControlLock;
+#include "access/slru.h"
+#include "access/xlog.h"
+#include "storage/fd.h"
+#include "storage/shmem.h"
+#include "miscadmin.h"
- /*
- * Info for each buffer slot. Page number is undefined when status is
- * EMPTY. lru_count is essentially the number of page switches since
- * last use of this page; the page with highest lru_count is the best
- * candidate to replace.
- */
- char *page_buffer[NUM_CLOG_BUFFERS];
- SlruPageStatus page_status[NUM_CLOG_BUFFERS];
- int page_number[NUM_CLOG_BUFFERS];
- unsigned int page_lru_count[NUM_CLOG_BUFFERS];
- LWLockId BufferLocks[NUM_CLOG_BUFFERS]; /* Per-buffer I/O locks */
- /*
- * latest_page_number is the page number of the current end of the
- * CLOG; this is not critical data, since we use it only to avoid
- * swapping out the latest page.
- */
- int latest_page_number;
-} SlruSharedData;
+/*
+ * Define segment size. A page is the same BLCKSZ as is used everywhere
+ * else in Postgres. The segment size can be chosen somewhat arbitrarily;
+ * we make it 32 pages by default, or 256Kb, i.e. 1M transactions for CLOG
+ * or 64K transactions for SUBTRANS.
+ *
+ * Note: because TransactionIds are 32 bits and wrap around at 0xFFFFFFFF,
+ * page numbering also wraps around at 0xFFFFFFFF/xxxx_XACTS_PER_PAGE (where
+ * xxxx is CLOG or SUBTRANS, respectively), and segment numbering at
+ * 0xFFFFFFFF/xxxx_XACTS_PER_PAGE/SLRU_PAGES_PER_SEGMENT. We need
+ * take no explicit notice of that fact in this module, except when comparing
+ * segment and page numbers in SimpleLruTruncate (see PagePrecedes()).
+ *
+ * Note: this file currently assumes that segment file names will be four
+ * hex digits. This sets a lower bound on the segment size (64K transactions
+ * for 32-bit TransactionIds).
+ */
+#define SLRU_PAGES_PER_SEGMENT 32
#define SlruFileName(ctl, path, seg) \
snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir, seg)
@@ -138,8 +97,8 @@ typedef struct SlruSharedData
typedef struct SlruFlushData
{
int num_files; /* # files actually open */
- int fd[NUM_CLOG_BUFFERS]; /* their FD's */
- int segno[NUM_CLOG_BUFFERS]; /* their clog seg#s */
+ int fd[NUM_SLRU_BUFFERS]; /* their FD's */
+ int segno[NUM_SLRU_BUFFERS]; /* their log seg#s */
} SlruFlushData;
/*
@@ -149,7 +108,7 @@ typedef struct SlruFlushData
do { \
if ((shared)->page_lru_count[slotno] != 0) { \
int iilru; \
- for (iilru = 0; iilru < NUM_CLOG_BUFFERS; iilru++) \
+ for (iilru = 0; iilru < NUM_SLRU_BUFFERS; iilru++) \
(shared)->page_lru_count[iilru]++; \
(shared)->page_lru_count[slotno] = 0; \
} \
@@ -176,7 +135,6 @@ static bool SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno,
SlruFlush fdata);
static void SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid);
static int SlruSelectLRUPage(SlruCtl ctl, int pageno);
-static bool SlruScanDirectory(SlruCtl ctl, int cutoffPage, bool doDeletions);
/*
@@ -186,11 +144,12 @@ static bool SlruScanDirectory(SlruCtl ctl, int cutoffPage, bool doDeletions);
int
SimpleLruShmemSize(void)
{
- return MAXALIGN(sizeof(SlruSharedData)) + BLCKSZ * NUM_CLOG_BUFFERS;
+ return BUFFERALIGN(sizeof(SlruSharedData)) + BLCKSZ * NUM_SLRU_BUFFERS;
}
void
-SimpleLruInit(SlruCtl ctl, const char *name, const char *subdir)
+SimpleLruInit(SlruCtl ctl, const char *name,
+ LWLockId ctllock, const char *subdir)
{
SlruShared shared;
bool found;
@@ -207,16 +166,16 @@ SimpleLruInit(SlruCtl ctl, const char *name, const char *subdir)
memset(shared, 0, sizeof(SlruSharedData));
- shared->ControlLock = LWLockAssign();
+ shared->ControlLock = ctllock;
- bufptr = (char *) shared + MAXALIGN(sizeof(SlruSharedData));
+ bufptr = (char *) shared + BUFFERALIGN(sizeof(SlruSharedData));
- for (slotno = 0; slotno < NUM_CLOG_BUFFERS; slotno++)
+ for (slotno = 0; slotno < NUM_SLRU_BUFFERS; slotno++)
{
shared->page_buffer[slotno] = bufptr;
shared->page_status[slotno] = SLRU_PAGE_EMPTY;
shared->page_lru_count[slotno] = 1;
- shared->BufferLocks[slotno] = LWLockAssign();
+ shared->buffer_locks[slotno] = LWLockAssign();
bufptr += BLCKSZ;
}
@@ -225,11 +184,12 @@ SimpleLruInit(SlruCtl ctl, const char *name, const char *subdir)
else
Assert(found);
- /* Initialize the unshared control struct */
+ /*
+ * Initialize the unshared control struct, including directory path.
+ * We assume caller set PagePrecedes.
+ */
ctl->shared = shared;
- ctl->ControlLock = shared->ControlLock;
-
- /* Initialize unshared copy of directory path */
+ ctl->do_fsync = true; /* default behavior */
snprintf(ctl->Dir, MAXPGPATH, "%s/%s", DataDir, subdir);
}
@@ -244,8 +204,8 @@ SimpleLruInit(SlruCtl ctl, const char *name, const char *subdir)
int
SimpleLruZeroPage(SlruCtl ctl, int pageno)
{
- int slotno;
SlruShared shared = ctl->shared;
+ int slotno;
/* Find a suitable buffer slot for the page */
slotno = SlruSelectLRUPage(ctl, pageno);
@@ -274,14 +234,13 @@ SimpleLruZeroPage(SlruCtl ctl, int pageno)
* The passed-in xid is used only for error reporting, and may be
* InvalidTransactionId if no specific xid is associated with the action.
*
- * Return value is the shared-buffer address of the page.
+ * Return value is the shared-buffer slot number now holding the page.
* The buffer's LRU access info is updated.
- * If forwrite is true, the buffer is marked as dirty.
*
* Control lock must be held at entry, and will be held at exit.
*/
-char *
-SimpleLruReadPage(SlruCtl ctl, int pageno, TransactionId xid, bool forwrite)
+int
+SimpleLruReadPage(SlruCtl ctl, int pageno, TransactionId xid)
{
SlruShared shared = ctl->shared;
@@ -303,9 +262,7 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, TransactionId xid, bool forwrite)
{
/* otherwise, it's ready to use */
SlruRecentlyUsed(shared, slotno);
- if (forwrite)
- shared->page_status[slotno] = SLRU_PAGE_DIRTY;
- return shared->page_buffer[slotno];
+ return slotno;
}
}
else
@@ -327,7 +284,7 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, TransactionId xid, bool forwrite)
/* Release shared lock, grab per-buffer lock instead */
LWLockRelease(shared->ControlLock);
- LWLockAcquire(shared->BufferLocks[slotno], LW_EXCLUSIVE);
+ LWLockAcquire(shared->buffer_locks[slotno], LW_EXCLUSIVE);
/*
* Check to see if someone else already did the read, or took the
@@ -336,7 +293,7 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, TransactionId xid, bool forwrite)
if (shared->page_number[slotno] != pageno ||
shared->page_status[slotno] != SLRU_PAGE_READ_IN_PROGRESS)
{
- LWLockRelease(shared->BufferLocks[slotno]);
+ LWLockRelease(shared->buffer_locks[slotno]);
LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
continue;
}
@@ -352,16 +309,14 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, TransactionId xid, bool forwrite)
shared->page_status[slotno] = ok ? SLRU_PAGE_CLEAN : SLRU_PAGE_EMPTY;
- LWLockRelease(shared->BufferLocks[slotno]);
+ LWLockRelease(shared->buffer_locks[slotno]);
/* Now it's okay to ereport if we failed */
if (!ok)
SlruReportIOError(ctl, pageno, xid);
SlruRecentlyUsed(shared, slotno);
- if (forwrite)
- shared->page_status[slotno] = SLRU_PAGE_DIRTY;
- return shared->page_buffer[slotno];
+ return slotno;
}
}
@@ -379,9 +334,9 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, TransactionId xid, bool forwrite)
void
SimpleLruWritePage(SlruCtl ctl, int slotno, SlruFlush fdata)
{
+ SlruShared shared = ctl->shared;
int pageno;
bool ok;
- SlruShared shared = ctl->shared;
/* Do nothing if page does not need writing */
if (shared->page_status[slotno] != SLRU_PAGE_DIRTY &&
@@ -392,7 +347,7 @@ SimpleLruWritePage(SlruCtl ctl, int slotno, SlruFlush fdata)
/* Release shared lock, grab per-buffer lock instead */
LWLockRelease(shared->ControlLock);
- LWLockAcquire(shared->BufferLocks[slotno], LW_EXCLUSIVE);
+ LWLockAcquire(shared->buffer_locks[slotno], LW_EXCLUSIVE);
/*
* Check to see if someone else already did the write, or took the
@@ -405,7 +360,7 @@ SimpleLruWritePage(SlruCtl ctl, int slotno, SlruFlush fdata)
(shared->page_status[slotno] != SLRU_PAGE_DIRTY &&
shared->page_status[slotno] != SLRU_PAGE_WRITE_IN_PROGRESS))
{
- LWLockRelease(shared->BufferLocks[slotno]);
+ LWLockRelease(shared->buffer_locks[slotno]);
LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
return;
}
@@ -447,7 +402,7 @@ SimpleLruWritePage(SlruCtl ctl, int slotno, SlruFlush fdata)
if (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS)
shared->page_status[slotno] = ok ? SLRU_PAGE_CLEAN : SLRU_PAGE_DIRTY;
- LWLockRelease(shared->BufferLocks[slotno]);
+ LWLockRelease(shared->buffer_locks[slotno]);
/* Now it's okay to ereport if we failed */
if (!ok)
@@ -640,7 +595,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
*/
if (!fdata)
{
- if (pg_fsync(fd))
+ if (ctl->do_fsync && pg_fsync(fd))
{
slru_errcause = SLRU_FSYNC_FAILED;
slru_errno = errno;
@@ -758,7 +713,7 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
unsigned int bestcount = 0;
/* See if page already has a buffer assigned */
- for (slotno = 0; slotno < NUM_CLOG_BUFFERS; slotno++)
+ for (slotno = 0; slotno < NUM_SLRU_BUFFERS; slotno++)
{
if (shared->page_number[slotno] == pageno &&
shared->page_status[slotno] != SLRU_PAGE_EMPTY)
@@ -769,7 +724,7 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
* If we find any EMPTY slot, just select that one. Else locate
* the least-recently-used slot that isn't the latest page.
*/
- for (slotno = 0; slotno < NUM_CLOG_BUFFERS; slotno++)
+ for (slotno = 0; slotno < NUM_SLRU_BUFFERS; slotno++)
{
if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
return slotno;
@@ -795,7 +750,7 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
*/
if (shared->page_status[bestslot] == SLRU_PAGE_READ_IN_PROGRESS)
(void) SimpleLruReadPage(ctl, shared->page_number[bestslot],
- InvalidTransactionId, false);
+ InvalidTransactionId);
else
SimpleLruWritePage(ctl, bestslot, NULL);
@@ -808,18 +763,7 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
}
/*
- * This must be called ONCE during postmaster or standalone-backend startup
- */
-void
-SimpleLruSetLatestPage(SlruCtl ctl, int pageno)
-{
- SlruShared shared = ctl->shared;
-
- shared->latest_page_number = pageno;
-}
-
-/*
- * This is called during checkpoint and postmaster/standalone-backend shutdown
+ * Flush dirty pages to disk during checkpoint or database shutdown
*/
void
SimpleLruFlush(SlruCtl ctl, bool checkpoint)
@@ -831,11 +775,14 @@ SimpleLruFlush(SlruCtl ctl, bool checkpoint)
int i;
bool ok;
+ /*
+ * Find and write dirty pages
+ */
fdata.num_files = 0;
LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
- for (slotno = 0; slotno < NUM_CLOG_BUFFERS; slotno++)
+ for (slotno = 0; slotno < NUM_SLRU_BUFFERS; slotno++)
{
SimpleLruWritePage(ctl, slotno, &fdata);
@@ -857,7 +804,7 @@ SimpleLruFlush(SlruCtl ctl, bool checkpoint)
ok = true;
for (i = 0; i < fdata.num_files; i++)
{
- if (pg_fsync(fdata.fd[i]))
+ if (ctl->do_fsync && pg_fsync(fdata.fd[i]))
{
slru_errcause = SLRU_FSYNC_FAILED;
slru_errno = errno;
@@ -879,40 +826,23 @@ SimpleLruFlush(SlruCtl ctl, bool checkpoint)
/*
* Remove all segments before the one holding the passed page number
- *
- * When this is called, we know that the database logically contains no
- * reference to transaction IDs older than oldestXact. However, we must
- * not remove any segment until we have performed a checkpoint, to ensure
- * that no such references remain on disk either; else a crash just after
- * the truncation might leave us with a problem. Since CLOG segments hold
- * a large number of transactions, the opportunity to actually remove a
- * segment is fairly rare, and so it seems best not to do the checkpoint
- * unless we have confirmed that there is a removable segment. Therefore
- * we issue the checkpoint command here, not in higher-level code as might
- * seem cleaner.
*/
void
SimpleLruTruncate(SlruCtl ctl, int cutoffPage)
{
- int slotno;
SlruShared shared = ctl->shared;
+ int slotno;
/*
* The cutoff point is the start of the segment containing cutoffPage.
*/
cutoffPage -= cutoffPage % SLRU_PAGES_PER_SEGMENT;
- if (!SlruScanDirectory(ctl, cutoffPage, false))
- return; /* nothing to remove */
-
- /* Perform a CHECKPOINT */
- RequestCheckpoint(true);
-
/*
* Scan shared memory and remove any pages preceding the cutoff page,
- * to ensure we won't rewrite them later. (Any dirty pages should
- * have been flushed already during the checkpoint, we're just being
- * extra careful here.)
+ * to ensure we won't rewrite them later. (Since this is normally
+ * called in or just after a checkpoint, any dirty pages should
+ * have been flushed already ... we're just being extra careful here.)
*/
LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
@@ -933,7 +863,7 @@ restart:;
return;
}
- for (slotno = 0; slotno < NUM_CLOG_BUFFERS; slotno++)
+ for (slotno = 0; slotno < NUM_SLRU_BUFFERS; slotno++)
{
if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
continue;
@@ -956,7 +886,7 @@ restart:;
*/
if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS)
(void) SimpleLruReadPage(ctl, shared->page_number[slotno],
- InvalidTransactionId, false);
+ InvalidTransactionId);
else
SimpleLruWritePage(ctl, slotno, NULL);
goto restart;
@@ -969,11 +899,13 @@ restart:;
}
/*
- * SlruTruncate subroutine: scan directory for removable segments.
+ * SimpleLruTruncate subroutine: scan directory for removable segments.
* Actually remove them iff doDeletions is true. Return TRUE iff any
* removable segments were found. Note: no locking is needed.
+ *
+ * This can be called directly from clog.c, for reasons explained there.
*/
-static bool
+bool
SlruScanDirectory(SlruCtl ctl, int cutoffPage, bool doDeletions)
{
bool found = false;
@@ -983,6 +915,13 @@ SlruScanDirectory(SlruCtl ctl, int cutoffPage, bool doDeletions)
int segpage;
char path[MAXPGPATH];
+ /*
+ * The cutoff point is the start of the segment containing cutoffPage.
+ * (This is redundant when called from SimpleLruTruncate, but not when
+ * called directly from clog.c.)
+ */
+ cutoffPage -= cutoffPage % SLRU_PAGES_PER_SEGMENT;
+
cldir = AllocateDir(ctl->Dir);
if (cldir == NULL)
ereport(ERROR,
@@ -1003,10 +942,9 @@ SlruScanDirectory(SlruCtl ctl, int cutoffPage, bool doDeletions)
found = true;
if (doDeletions)
{
- ereport(LOG,
- (errmsg("removing file \"%s/%s\"",
- ctl->Dir, clde->d_name)));
snprintf(path, MAXPGPATH, "%s/%s", ctl->Dir, clde->d_name);
+ ereport(LOG,
+ (errmsg("removing file \"%s\"", path)));
unlink(path);
}
}
@@ -1027,55 +965,3 @@ SlruScanDirectory(SlruCtl ctl, int cutoffPage, bool doDeletions)
return found;
}
-
-/*
- * SLRU resource manager's routines
- */
-void
-slru_redo(XLogRecPtr lsn, XLogRecord *record)
-{
- uint8 info = record->xl_info & ~XLR_INFO_MASK;
- int pageno;
-
- memcpy(&pageno, XLogRecGetData(record), sizeof(int));
-
- switch (info)
- {
- case CLOG_ZEROPAGE:
- clog_zeropage_redo(pageno);
- break;
- case SUBTRANS_ZEROPAGE:
- subtrans_zeropage_redo(pageno);
- break;
- default:
- elog(PANIC, "slru_redo: unknown op code %u", info);
- }
-}
-
-void
-slru_undo(XLogRecPtr lsn, XLogRecord *record)
-{
-}
-
-void
-slru_desc(char *buf, uint8 xl_info, char *rec)
-{
- uint8 info = xl_info & ~XLR_INFO_MASK;
-
- if (info == CLOG_ZEROPAGE)
- {
- int pageno;
-
- memcpy(&pageno, rec, sizeof(int));
- sprintf(buf + strlen(buf), "clog zeropage: %d", pageno);
- }
- else if (info == SUBTRANS_ZEROPAGE)
- {
- int pageno;
-
- memcpy(&pageno, rec, sizeof(int));
- sprintf(buf + strlen(buf), "subtrans zeropage: %d", pageno);
- }
- else
- strcat(buf, "UNKNOWN");
-}