diff options
Diffstat (limited to 'src/backend/access/transam/slru.c')
-rw-r--r-- | src/backend/access/transam/slru.c | 298 |
1 files changed, 92 insertions, 206 deletions
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index d45a7d9f614..5d51f69a531 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -3,49 +3,6 @@ * slru.c * Simple LRU buffering for transaction status logfiles * - * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.18 2004/07/21 22:31:20 tgl Exp $ - * - *------------------------------------------------------------------------- - */ -#include "postgres.h" - -#include <fcntl.h> -#include <sys/stat.h> -#include <unistd.h> - -#include "access/clog.h" -#include "access/slru.h" -#include "access/subtrans.h" -#include "postmaster/bgwriter.h" -#include "storage/fd.h" -#include "storage/lwlock.h" -#include "storage/shmem.h" -#include "miscadmin.h" - - -/* - * Define segment size. A page is the same BLCKSZ as is used everywhere - * else in Postgres. The segment size can be chosen somewhat arbitrarily; - * we make it 32 pages by default, or 256Kb, i.e. 1M transactions for CLOG - * or 64K transactions for SUBTRANS. - * - * Note: because TransactionIds are 32 bits and wrap around at 0xFFFFFFFF, - * page numbering also wraps around at 0xFFFFFFFF/xxxx_XACTS_PER_PAGE (where - * xxxx is CLOG or SUBTRANS, respectively), and segment numbering at - * 0xFFFFFFFF/xxxx_XACTS_PER_PAGE/SLRU_PAGES_PER_SEGMENT. We need - * take no explicit notice of that fact in this module, except when comparing - * segment and page numbers in SimpleLruTruncate (see PagePrecedes()). - */ - -#define SLRU_PAGES_PER_SEGMENT 32 - - -/*---------- - * Shared-memory data structures for SLRU control - * * We use a simple least-recently-used scheme to manage a pool of page * buffers. Under ordinary circumstances we expect that write * traffic will occur mostly to the latest page (and to the just-prior @@ -86,44 +43,46 @@ * to re-dirty a page that is currently being written out. This is handled * by setting the page's state from WRITE_IN_PROGRESS to DIRTY. The writing * process must notice this and not mark the page CLEAN when it's done. - *---------- + * + * + * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.19 2004/08/23 23:22:44 tgl Exp $ + * + *------------------------------------------------------------------------- */ +#include "postgres.h" -typedef enum -{ - SLRU_PAGE_EMPTY, /* buffer is not in use */ - SLRU_PAGE_READ_IN_PROGRESS, /* page is being read in */ - SLRU_PAGE_CLEAN, /* page is valid and not dirty */ - SLRU_PAGE_DIRTY, /* page is valid but needs write */ - SLRU_PAGE_WRITE_IN_PROGRESS /* page is being written out */ -} SlruPageStatus; +#include <fcntl.h> +#include <sys/stat.h> +#include <unistd.h> -/* - * Shared-memory state - */ -typedef struct SlruSharedData -{ - LWLockId ControlLock; +#include "access/slru.h" +#include "access/xlog.h" +#include "storage/fd.h" +#include "storage/shmem.h" +#include "miscadmin.h" - /* - * Info for each buffer slot. Page number is undefined when status is - * EMPTY. lru_count is essentially the number of page switches since - * last use of this page; the page with highest lru_count is the best - * candidate to replace. - */ - char *page_buffer[NUM_CLOG_BUFFERS]; - SlruPageStatus page_status[NUM_CLOG_BUFFERS]; - int page_number[NUM_CLOG_BUFFERS]; - unsigned int page_lru_count[NUM_CLOG_BUFFERS]; - LWLockId BufferLocks[NUM_CLOG_BUFFERS]; /* Per-buffer I/O locks */ - /* - * latest_page_number is the page number of the current end of the - * CLOG; this is not critical data, since we use it only to avoid - * swapping out the latest page. - */ - int latest_page_number; -} SlruSharedData; +/* + * Define segment size. A page is the same BLCKSZ as is used everywhere + * else in Postgres. The segment size can be chosen somewhat arbitrarily; + * we make it 32 pages by default, or 256Kb, i.e. 1M transactions for CLOG + * or 64K transactions for SUBTRANS. + * + * Note: because TransactionIds are 32 bits and wrap around at 0xFFFFFFFF, + * page numbering also wraps around at 0xFFFFFFFF/xxxx_XACTS_PER_PAGE (where + * xxxx is CLOG or SUBTRANS, respectively), and segment numbering at + * 0xFFFFFFFF/xxxx_XACTS_PER_PAGE/SLRU_PAGES_PER_SEGMENT. We need + * take no explicit notice of that fact in this module, except when comparing + * segment and page numbers in SimpleLruTruncate (see PagePrecedes()). + * + * Note: this file currently assumes that segment file names will be four + * hex digits. This sets a lower bound on the segment size (64K transactions + * for 32-bit TransactionIds). + */ +#define SLRU_PAGES_PER_SEGMENT 32 #define SlruFileName(ctl, path, seg) \ snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir, seg) @@ -138,8 +97,8 @@ typedef struct SlruSharedData typedef struct SlruFlushData { int num_files; /* # files actually open */ - int fd[NUM_CLOG_BUFFERS]; /* their FD's */ - int segno[NUM_CLOG_BUFFERS]; /* their clog seg#s */ + int fd[NUM_SLRU_BUFFERS]; /* their FD's */ + int segno[NUM_SLRU_BUFFERS]; /* their log seg#s */ } SlruFlushData; /* @@ -149,7 +108,7 @@ typedef struct SlruFlushData do { \ if ((shared)->page_lru_count[slotno] != 0) { \ int iilru; \ - for (iilru = 0; iilru < NUM_CLOG_BUFFERS; iilru++) \ + for (iilru = 0; iilru < NUM_SLRU_BUFFERS; iilru++) \ (shared)->page_lru_count[iilru]++; \ (shared)->page_lru_count[slotno] = 0; \ } \ @@ -176,7 +135,6 @@ static bool SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata); static void SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid); static int SlruSelectLRUPage(SlruCtl ctl, int pageno); -static bool SlruScanDirectory(SlruCtl ctl, int cutoffPage, bool doDeletions); /* @@ -186,11 +144,12 @@ static bool SlruScanDirectory(SlruCtl ctl, int cutoffPage, bool doDeletions); int SimpleLruShmemSize(void) { - return MAXALIGN(sizeof(SlruSharedData)) + BLCKSZ * NUM_CLOG_BUFFERS; + return BUFFERALIGN(sizeof(SlruSharedData)) + BLCKSZ * NUM_SLRU_BUFFERS; } void -SimpleLruInit(SlruCtl ctl, const char *name, const char *subdir) +SimpleLruInit(SlruCtl ctl, const char *name, + LWLockId ctllock, const char *subdir) { SlruShared shared; bool found; @@ -207,16 +166,16 @@ SimpleLruInit(SlruCtl ctl, const char *name, const char *subdir) memset(shared, 0, sizeof(SlruSharedData)); - shared->ControlLock = LWLockAssign(); + shared->ControlLock = ctllock; - bufptr = (char *) shared + MAXALIGN(sizeof(SlruSharedData)); + bufptr = (char *) shared + BUFFERALIGN(sizeof(SlruSharedData)); - for (slotno = 0; slotno < NUM_CLOG_BUFFERS; slotno++) + for (slotno = 0; slotno < NUM_SLRU_BUFFERS; slotno++) { shared->page_buffer[slotno] = bufptr; shared->page_status[slotno] = SLRU_PAGE_EMPTY; shared->page_lru_count[slotno] = 1; - shared->BufferLocks[slotno] = LWLockAssign(); + shared->buffer_locks[slotno] = LWLockAssign(); bufptr += BLCKSZ; } @@ -225,11 +184,12 @@ SimpleLruInit(SlruCtl ctl, const char *name, const char *subdir) else Assert(found); - /* Initialize the unshared control struct */ + /* + * Initialize the unshared control struct, including directory path. + * We assume caller set PagePrecedes. + */ ctl->shared = shared; - ctl->ControlLock = shared->ControlLock; - - /* Initialize unshared copy of directory path */ + ctl->do_fsync = true; /* default behavior */ snprintf(ctl->Dir, MAXPGPATH, "%s/%s", DataDir, subdir); } @@ -244,8 +204,8 @@ SimpleLruInit(SlruCtl ctl, const char *name, const char *subdir) int SimpleLruZeroPage(SlruCtl ctl, int pageno) { - int slotno; SlruShared shared = ctl->shared; + int slotno; /* Find a suitable buffer slot for the page */ slotno = SlruSelectLRUPage(ctl, pageno); @@ -274,14 +234,13 @@ SimpleLruZeroPage(SlruCtl ctl, int pageno) * The passed-in xid is used only for error reporting, and may be * InvalidTransactionId if no specific xid is associated with the action. * - * Return value is the shared-buffer address of the page. + * Return value is the shared-buffer slot number now holding the page. * The buffer's LRU access info is updated. - * If forwrite is true, the buffer is marked as dirty. * * Control lock must be held at entry, and will be held at exit. */ -char * -SimpleLruReadPage(SlruCtl ctl, int pageno, TransactionId xid, bool forwrite) +int +SimpleLruReadPage(SlruCtl ctl, int pageno, TransactionId xid) { SlruShared shared = ctl->shared; @@ -303,9 +262,7 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, TransactionId xid, bool forwrite) { /* otherwise, it's ready to use */ SlruRecentlyUsed(shared, slotno); - if (forwrite) - shared->page_status[slotno] = SLRU_PAGE_DIRTY; - return shared->page_buffer[slotno]; + return slotno; } } else @@ -327,7 +284,7 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, TransactionId xid, bool forwrite) /* Release shared lock, grab per-buffer lock instead */ LWLockRelease(shared->ControlLock); - LWLockAcquire(shared->BufferLocks[slotno], LW_EXCLUSIVE); + LWLockAcquire(shared->buffer_locks[slotno], LW_EXCLUSIVE); /* * Check to see if someone else already did the read, or took the @@ -336,7 +293,7 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, TransactionId xid, bool forwrite) if (shared->page_number[slotno] != pageno || shared->page_status[slotno] != SLRU_PAGE_READ_IN_PROGRESS) { - LWLockRelease(shared->BufferLocks[slotno]); + LWLockRelease(shared->buffer_locks[slotno]); LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE); continue; } @@ -352,16 +309,14 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, TransactionId xid, bool forwrite) shared->page_status[slotno] = ok ? SLRU_PAGE_CLEAN : SLRU_PAGE_EMPTY; - LWLockRelease(shared->BufferLocks[slotno]); + LWLockRelease(shared->buffer_locks[slotno]); /* Now it's okay to ereport if we failed */ if (!ok) SlruReportIOError(ctl, pageno, xid); SlruRecentlyUsed(shared, slotno); - if (forwrite) - shared->page_status[slotno] = SLRU_PAGE_DIRTY; - return shared->page_buffer[slotno]; + return slotno; } } @@ -379,9 +334,9 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, TransactionId xid, bool forwrite) void SimpleLruWritePage(SlruCtl ctl, int slotno, SlruFlush fdata) { + SlruShared shared = ctl->shared; int pageno; bool ok; - SlruShared shared = ctl->shared; /* Do nothing if page does not need writing */ if (shared->page_status[slotno] != SLRU_PAGE_DIRTY && @@ -392,7 +347,7 @@ SimpleLruWritePage(SlruCtl ctl, int slotno, SlruFlush fdata) /* Release shared lock, grab per-buffer lock instead */ LWLockRelease(shared->ControlLock); - LWLockAcquire(shared->BufferLocks[slotno], LW_EXCLUSIVE); + LWLockAcquire(shared->buffer_locks[slotno], LW_EXCLUSIVE); /* * Check to see if someone else already did the write, or took the @@ -405,7 +360,7 @@ SimpleLruWritePage(SlruCtl ctl, int slotno, SlruFlush fdata) (shared->page_status[slotno] != SLRU_PAGE_DIRTY && shared->page_status[slotno] != SLRU_PAGE_WRITE_IN_PROGRESS)) { - LWLockRelease(shared->BufferLocks[slotno]); + LWLockRelease(shared->buffer_locks[slotno]); LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE); return; } @@ -447,7 +402,7 @@ SimpleLruWritePage(SlruCtl ctl, int slotno, SlruFlush fdata) if (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS) shared->page_status[slotno] = ok ? SLRU_PAGE_CLEAN : SLRU_PAGE_DIRTY; - LWLockRelease(shared->BufferLocks[slotno]); + LWLockRelease(shared->buffer_locks[slotno]); /* Now it's okay to ereport if we failed */ if (!ok) @@ -640,7 +595,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata) */ if (!fdata) { - if (pg_fsync(fd)) + if (ctl->do_fsync && pg_fsync(fd)) { slru_errcause = SLRU_FSYNC_FAILED; slru_errno = errno; @@ -758,7 +713,7 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno) unsigned int bestcount = 0; /* See if page already has a buffer assigned */ - for (slotno = 0; slotno < NUM_CLOG_BUFFERS; slotno++) + for (slotno = 0; slotno < NUM_SLRU_BUFFERS; slotno++) { if (shared->page_number[slotno] == pageno && shared->page_status[slotno] != SLRU_PAGE_EMPTY) @@ -769,7 +724,7 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno) * If we find any EMPTY slot, just select that one. Else locate * the least-recently-used slot that isn't the latest page. */ - for (slotno = 0; slotno < NUM_CLOG_BUFFERS; slotno++) + for (slotno = 0; slotno < NUM_SLRU_BUFFERS; slotno++) { if (shared->page_status[slotno] == SLRU_PAGE_EMPTY) return slotno; @@ -795,7 +750,7 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno) */ if (shared->page_status[bestslot] == SLRU_PAGE_READ_IN_PROGRESS) (void) SimpleLruReadPage(ctl, shared->page_number[bestslot], - InvalidTransactionId, false); + InvalidTransactionId); else SimpleLruWritePage(ctl, bestslot, NULL); @@ -808,18 +763,7 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno) } /* - * This must be called ONCE during postmaster or standalone-backend startup - */ -void -SimpleLruSetLatestPage(SlruCtl ctl, int pageno) -{ - SlruShared shared = ctl->shared; - - shared->latest_page_number = pageno; -} - -/* - * This is called during checkpoint and postmaster/standalone-backend shutdown + * Flush dirty pages to disk during checkpoint or database shutdown */ void SimpleLruFlush(SlruCtl ctl, bool checkpoint) @@ -831,11 +775,14 @@ SimpleLruFlush(SlruCtl ctl, bool checkpoint) int i; bool ok; + /* + * Find and write dirty pages + */ fdata.num_files = 0; LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE); - for (slotno = 0; slotno < NUM_CLOG_BUFFERS; slotno++) + for (slotno = 0; slotno < NUM_SLRU_BUFFERS; slotno++) { SimpleLruWritePage(ctl, slotno, &fdata); @@ -857,7 +804,7 @@ SimpleLruFlush(SlruCtl ctl, bool checkpoint) ok = true; for (i = 0; i < fdata.num_files; i++) { - if (pg_fsync(fdata.fd[i])) + if (ctl->do_fsync && pg_fsync(fdata.fd[i])) { slru_errcause = SLRU_FSYNC_FAILED; slru_errno = errno; @@ -879,40 +826,23 @@ SimpleLruFlush(SlruCtl ctl, bool checkpoint) /* * Remove all segments before the one holding the passed page number - * - * When this is called, we know that the database logically contains no - * reference to transaction IDs older than oldestXact. However, we must - * not remove any segment until we have performed a checkpoint, to ensure - * that no such references remain on disk either; else a crash just after - * the truncation might leave us with a problem. Since CLOG segments hold - * a large number of transactions, the opportunity to actually remove a - * segment is fairly rare, and so it seems best not to do the checkpoint - * unless we have confirmed that there is a removable segment. Therefore - * we issue the checkpoint command here, not in higher-level code as might - * seem cleaner. */ void SimpleLruTruncate(SlruCtl ctl, int cutoffPage) { - int slotno; SlruShared shared = ctl->shared; + int slotno; /* * The cutoff point is the start of the segment containing cutoffPage. */ cutoffPage -= cutoffPage % SLRU_PAGES_PER_SEGMENT; - if (!SlruScanDirectory(ctl, cutoffPage, false)) - return; /* nothing to remove */ - - /* Perform a CHECKPOINT */ - RequestCheckpoint(true); - /* * Scan shared memory and remove any pages preceding the cutoff page, - * to ensure we won't rewrite them later. (Any dirty pages should - * have been flushed already during the checkpoint, we're just being - * extra careful here.) + * to ensure we won't rewrite them later. (Since this is normally + * called in or just after a checkpoint, any dirty pages should + * have been flushed already ... we're just being extra careful here.) */ LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE); @@ -933,7 +863,7 @@ restart:; return; } - for (slotno = 0; slotno < NUM_CLOG_BUFFERS; slotno++) + for (slotno = 0; slotno < NUM_SLRU_BUFFERS; slotno++) { if (shared->page_status[slotno] == SLRU_PAGE_EMPTY) continue; @@ -956,7 +886,7 @@ restart:; */ if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS) (void) SimpleLruReadPage(ctl, shared->page_number[slotno], - InvalidTransactionId, false); + InvalidTransactionId); else SimpleLruWritePage(ctl, slotno, NULL); goto restart; @@ -969,11 +899,13 @@ restart:; } /* - * SlruTruncate subroutine: scan directory for removable segments. + * SimpleLruTruncate subroutine: scan directory for removable segments. * Actually remove them iff doDeletions is true. Return TRUE iff any * removable segments were found. Note: no locking is needed. + * + * This can be called directly from clog.c, for reasons explained there. */ -static bool +bool SlruScanDirectory(SlruCtl ctl, int cutoffPage, bool doDeletions) { bool found = false; @@ -983,6 +915,13 @@ SlruScanDirectory(SlruCtl ctl, int cutoffPage, bool doDeletions) int segpage; char path[MAXPGPATH]; + /* + * The cutoff point is the start of the segment containing cutoffPage. + * (This is redundant when called from SimpleLruTruncate, but not when + * called directly from clog.c.) + */ + cutoffPage -= cutoffPage % SLRU_PAGES_PER_SEGMENT; + cldir = AllocateDir(ctl->Dir); if (cldir == NULL) ereport(ERROR, @@ -1003,10 +942,9 @@ SlruScanDirectory(SlruCtl ctl, int cutoffPage, bool doDeletions) found = true; if (doDeletions) { - ereport(LOG, - (errmsg("removing file \"%s/%s\"", - ctl->Dir, clde->d_name))); snprintf(path, MAXPGPATH, "%s/%s", ctl->Dir, clde->d_name); + ereport(LOG, + (errmsg("removing file \"%s\"", path))); unlink(path); } } @@ -1027,55 +965,3 @@ SlruScanDirectory(SlruCtl ctl, int cutoffPage, bool doDeletions) return found; } - -/* - * SLRU resource manager's routines - */ -void -slru_redo(XLogRecPtr lsn, XLogRecord *record) -{ - uint8 info = record->xl_info & ~XLR_INFO_MASK; - int pageno; - - memcpy(&pageno, XLogRecGetData(record), sizeof(int)); - - switch (info) - { - case CLOG_ZEROPAGE: - clog_zeropage_redo(pageno); - break; - case SUBTRANS_ZEROPAGE: - subtrans_zeropage_redo(pageno); - break; - default: - elog(PANIC, "slru_redo: unknown op code %u", info); - } -} - -void -slru_undo(XLogRecPtr lsn, XLogRecord *record) -{ -} - -void -slru_desc(char *buf, uint8 xl_info, char *rec) -{ - uint8 info = xl_info & ~XLR_INFO_MASK; - - if (info == CLOG_ZEROPAGE) - { - int pageno; - - memcpy(&pageno, rec, sizeof(int)); - sprintf(buf + strlen(buf), "clog zeropage: %d", pageno); - } - else if (info == SUBTRANS_ZEROPAGE) - { - int pageno; - - memcpy(&pageno, rec, sizeof(int)); - sprintf(buf + strlen(buf), "subtrans zeropage: %d", pageno); - } - else - strcat(buf, "UNKNOWN"); -} |