aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/transam/xlogutils.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/transam/xlogutils.c')
-rw-r--r--src/backend/access/transam/xlogutils.c304
1 files changed, 85 insertions, 219 deletions
diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c
index a73bdb52a46..19758700e7b 100644
--- a/src/backend/access/transam/xlogutils.c
+++ b/src/backend/access/transam/xlogutils.c
@@ -11,7 +11,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.54 2008/06/08 22:00:47 alvherre Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.55 2008/06/12 09:12:30 heikki Exp $
*
*-------------------------------------------------------------------------
*/
@@ -190,6 +190,9 @@ XLogCheckInvalidPages(void)
if (foundone)
elog(PANIC, "WAL contains references to invalid pages");
+
+ hash_destroy(invalid_page_tab);
+ invalid_page_tab = NULL;
}
@@ -218,27 +221,40 @@ XLogCheckInvalidPages(void)
* at the end of WAL replay.)
*/
Buffer
-XLogReadBuffer(Relation reln, BlockNumber blkno, bool init)
+XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
{
- BlockNumber lastblock = RelationGetNumberOfBlocks(reln);
+ BlockNumber lastblock;
Buffer buffer;
+ SMgrRelation smgr;
Assert(blkno != P_NEW);
+ /* Open the relation at smgr level */
+ smgr = smgropen(rnode);
+
+ /*
+ * Create the target file if it doesn't already exist. This lets us cope
+ * if the replay sequence contains writes to a relation that is later
+ * deleted. (The original coding of this routine would instead suppress
+ * the writes, but that seems like it risks losing valuable data if the
+ * filesystem loses an inode during a crash. Better to write the data
+ * until we are actually told to delete the file.)
+ */
+ smgrcreate(smgr, false, true);
+
+ lastblock = smgrnblocks(smgr);
+
if (blkno < lastblock)
{
/* page exists in file */
- if (init)
- buffer = ReadOrZeroBuffer(reln, blkno);
- else
- buffer = ReadBuffer(reln, blkno);
+ buffer = ReadBufferWithoutRelcache(rnode, false, blkno, init);
}
else
{
/* hm, page doesn't exist in file */
if (!init)
{
- log_invalid_page(reln->rd_node, blkno, false);
+ log_invalid_page(rnode, blkno, false);
return InvalidBuffer;
}
/* OK to extend the file */
@@ -249,7 +265,7 @@ XLogReadBuffer(Relation reln, BlockNumber blkno, bool init)
{
if (buffer != InvalidBuffer)
ReleaseBuffer(buffer);
- buffer = ReadBuffer(reln, P_NEW);
+ buffer = ReadBufferWithoutRelcache(rnode, false, P_NEW, false);
lastblock++;
}
Assert(BufferGetBlockNumber(buffer) == blkno);
@@ -265,7 +281,7 @@ XLogReadBuffer(Relation reln, BlockNumber blkno, bool init)
if (PageIsNew((PageHeader) page))
{
UnlockReleaseBuffer(buffer);
- log_invalid_page(reln->rd_node, blkno, true);
+ log_invalid_page(rnode, blkno, true);
return InvalidBuffer;
}
}
@@ -275,226 +291,81 @@ XLogReadBuffer(Relation reln, BlockNumber blkno, bool init)
/*
- * Lightweight "Relation" cache --- this substitutes for the normal relcache
- * during XLOG replay.
+ * Struct actually returned by XLogFakeRelcacheEntry, though the declared
+ * return type is Relation.
*/
-
-typedef struct XLogRelDesc
-{
- RelationData reldata;
- struct XLogRelDesc *lessRecently;
- struct XLogRelDesc *moreRecently;
-} XLogRelDesc;
-
-typedef struct XLogRelCacheEntry
+typedef struct
{
- RelFileNode rnode;
- XLogRelDesc *rdesc;
-} XLogRelCacheEntry;
+ RelationData reldata; /* Note: this must be first */
+ FormData_pg_class pgc;
+} FakeRelCacheEntryData;
-static HTAB *_xlrelcache;
-static XLogRelDesc *_xlrelarr = NULL;
-static Form_pg_class _xlpgcarr = NULL;
-static int _xlast = 0;
-static int _xlcnt = 0;
+typedef FakeRelCacheEntryData *FakeRelCacheEntry;
-#define _XLOG_RELCACHESIZE 512
-
-static void
-_xl_init_rel_cache(void)
-{
- HASHCTL ctl;
-
- _xlcnt = _XLOG_RELCACHESIZE;
- _xlast = 0;
- _xlrelarr = (XLogRelDesc *) malloc(sizeof(XLogRelDesc) * _xlcnt);
- memset(_xlrelarr, 0, sizeof(XLogRelDesc) * _xlcnt);
- _xlpgcarr = (Form_pg_class) malloc(sizeof(FormData_pg_class) * _xlcnt);
- memset(_xlpgcarr, 0, sizeof(FormData_pg_class) * _xlcnt);
-
- _xlrelarr[0].moreRecently = &(_xlrelarr[0]);
- _xlrelarr[0].lessRecently = &(_xlrelarr[0]);
-
- memset(&ctl, 0, sizeof(ctl));
- ctl.keysize = sizeof(RelFileNode);
- ctl.entrysize = sizeof(XLogRelCacheEntry);
- ctl.hash = tag_hash;
-
- _xlrelcache = hash_create("XLOG relcache", _XLOG_RELCACHESIZE,
- &ctl, HASH_ELEM | HASH_FUNCTION);
-}
-
-static void
-_xl_remove_hash_entry(XLogRelDesc *rdesc)
-{
- Form_pg_class tpgc = rdesc->reldata.rd_rel;
- XLogRelCacheEntry *hentry;
-
- rdesc->lessRecently->moreRecently = rdesc->moreRecently;
- rdesc->moreRecently->lessRecently = rdesc->lessRecently;
-
- hentry = (XLogRelCacheEntry *) hash_search(_xlrelcache,
- (void *) &(rdesc->reldata.rd_node), HASH_REMOVE, NULL);
- if (hentry == NULL)
- elog(PANIC, "_xl_remove_hash_entry: file was not found in cache");
-
- RelationCloseSmgr(&(rdesc->reldata));
-
- memset(rdesc, 0, sizeof(XLogRelDesc));
- memset(tpgc, 0, sizeof(FormData_pg_class));
- rdesc->reldata.rd_rel = tpgc;
-}
-
-static XLogRelDesc *
-_xl_new_reldesc(void)
-{
- XLogRelDesc *res;
-
- _xlast++;
- if (_xlast < _xlcnt)
- {
- _xlrelarr[_xlast].reldata.rd_rel = &(_xlpgcarr[_xlast]);
- return &(_xlrelarr[_xlast]);
- }
-
- /* reuse */
- res = _xlrelarr[0].moreRecently;
-
- _xl_remove_hash_entry(res);
-
- _xlast--;
- return res;
-}
-
-
-void
-XLogInitRelationCache(void)
-{
- _xl_init_rel_cache();
- invalid_page_tab = NULL;
-}
-
-void
-XLogCloseRelationCache(void)
+/*
+ * Create a fake relation cache entry for a physical relation
+ *
+ * It's often convenient to use the same functions in XLOG replay as in the
+ * main codepath, but those functions typically work with a relcache entry.
+ * We don't have a working relation cache during XLOG replay, but this
+ * function can be used to create a fake relcache entry instead. Only the
+ * fields related to physical storage, like rd_rel, are initialized, so the
+ * fake entry is only usable in low-level operations like ReadBuffer().
+ *
+ * Caller must free the returned entry with FreeFakeRelcacheEntry().
+ */
+Relation
+CreateFakeRelcacheEntry(RelFileNode rnode)
{
- HASH_SEQ_STATUS status;
- XLogRelCacheEntry *hentry;
+ FakeRelCacheEntry fakeentry;
+ Relation rel;
- if (!_xlrelarr)
- return;
+ /* Allocate the Relation struct and all related space in one block. */
+ fakeentry = palloc0(sizeof(FakeRelCacheEntryData));
+ rel = (Relation) fakeentry;
- hash_seq_init(&status, _xlrelcache);
+ rel->rd_rel = &fakeentry->pgc;
+ rel->rd_node = rnode;
- while ((hentry = (XLogRelCacheEntry *) hash_seq_search(&status)) != NULL)
- _xl_remove_hash_entry(hentry->rdesc);
+ /* We don't know the name of the relation; use relfilenode instead */
+ sprintf(RelationGetRelationName(rel), "%u", rnode.relNode);
- hash_destroy(_xlrelcache);
+ /*
+ * We set up the lockRelId in case anything tries to lock the dummy
+ * relation. Note that this is fairly bogus since relNode may be
+ * different from the relation's OID. It shouldn't really matter
+ * though, since we are presumably running by ourselves and can't have
+ * any lock conflicts ...
+ */
+ rel->rd_lockInfo.lockRelId.dbId = rnode.dbNode;
+ rel->rd_lockInfo.lockRelId.relId = rnode.relNode;
- free(_xlrelarr);
- free(_xlpgcarr);
+ rel->rd_targblock = InvalidBlockNumber;
+ rel->rd_smgr = NULL;
- _xlrelarr = NULL;
+ return rel;
}
/*
- * Open a relation during XLOG replay
- *
- * Note: this once had an API that allowed NULL return on failure, but it
- * no longer does; any failure results in elog().
+ * Free a fake relation cache entry.
*/
-Relation
-XLogOpenRelation(RelFileNode rnode)
+void
+FreeFakeRelcacheEntry(Relation fakerel)
{
- XLogRelDesc *res;
- XLogRelCacheEntry *hentry;
- bool found;
-
- hentry = (XLogRelCacheEntry *)
- hash_search(_xlrelcache, (void *) &rnode, HASH_FIND, NULL);
-
- if (hentry)
- {
- res = hentry->rdesc;
-
- res->lessRecently->moreRecently = res->moreRecently;
- res->moreRecently->lessRecently = res->lessRecently;
- }
- else
- {
- res = _xl_new_reldesc();
-
- sprintf(RelationGetRelationName(&(res->reldata)), "%u", rnode.relNode);
-
- res->reldata.rd_node = rnode;
-
- /*
- * We set up the lockRelId in case anything tries to lock the dummy
- * relation. Note that this is fairly bogus since relNode may be
- * different from the relation's OID. It shouldn't really matter
- * though, since we are presumably running by ourselves and can't have
- * any lock conflicts ...
- */
- res->reldata.rd_lockInfo.lockRelId.dbId = rnode.dbNode;
- res->reldata.rd_lockInfo.lockRelId.relId = rnode.relNode;
-
- hentry = (XLogRelCacheEntry *)
- hash_search(_xlrelcache, (void *) &rnode, HASH_ENTER, &found);
-
- if (found)
- elog(PANIC, "xlog relation already present on insert into cache");
-
- hentry->rdesc = res;
-
- res->reldata.rd_targblock = InvalidBlockNumber;
- res->reldata.rd_smgr = NULL;
- RelationOpenSmgr(&(res->reldata));
-
- /*
- * Create the target file if it doesn't already exist. This lets us
- * cope if the replay sequence contains writes to a relation that is
- * later deleted. (The original coding of this routine would instead
- * return NULL, causing the writes to be suppressed. But that seems
- * like it risks losing valuable data if the filesystem loses an inode
- * during a crash. Better to write the data until we are actually
- * told to delete the file.)
- */
- smgrcreate(res->reldata.rd_smgr, res->reldata.rd_istemp, true);
- }
-
- res->moreRecently = &(_xlrelarr[0]);
- res->lessRecently = _xlrelarr[0].lessRecently;
- _xlrelarr[0].lessRecently = res;
- res->lessRecently->moreRecently = res;
-
- return &(res->reldata);
+ pfree(fakerel);
}
/*
* Drop a relation during XLOG replay
*
- * This is called when the relation is about to be deleted; we need to ensure
- * that there is no dangling smgr reference in the xlog relation cache.
- *
- * Currently, we don't bother to physically remove the relation from the
- * cache, we just let it age out normally.
- *
- * This also takes care of removing any open "invalid-page" records for
- * the relation.
+ * This is called when the relation is about to be deleted; we need to remove
+ * any open "invalid-page" records for the relation.
*/
void
XLogDropRelation(RelFileNode rnode)
{
- XLogRelCacheEntry *hentry;
-
- hentry = (XLogRelCacheEntry *)
- hash_search(_xlrelcache, (void *) &rnode, HASH_FIND, NULL);
-
- if (hentry)
- {
- XLogRelDesc *rdesc = hentry->rdesc;
-
- RelationCloseSmgr(&(rdesc->reldata));
- }
+ /* Tell smgr to forget about this relation as well */
+ smgrclosenode(rnode);
forget_invalid_pages(rnode, 0);
}
@@ -507,18 +378,14 @@ XLogDropRelation(RelFileNode rnode)
void
XLogDropDatabase(Oid dbid)
{
- HASH_SEQ_STATUS status;
- XLogRelCacheEntry *hentry;
-
- hash_seq_init(&status, _xlrelcache);
-
- while ((hentry = (XLogRelCacheEntry *) hash_seq_search(&status)) != NULL)
- {
- XLogRelDesc *rdesc = hentry->rdesc;
-
- if (hentry->rnode.dbNode == dbid)
- RelationCloseSmgr(&(rdesc->reldata));
- }
+ /*
+ * This is unnecessarily heavy-handed, as it will close SMgrRelation
+ * objects for other databases as well. DROP DATABASE occurs seldom
+ * enough that it's not worth introducing a variant of smgrclose for
+ * just this purpose. XXX: Or should we rather leave the smgr entries
+ * dangling?
+ */
+ smgrcloseall();
forget_invalid_pages_db(dbid);
}
@@ -526,8 +393,7 @@ XLogDropDatabase(Oid dbid)
/*
* Truncate a relation during XLOG replay
*
- * We don't need to do anything to the fake relcache, but we do need to
- * clean up any open "invalid-page" records for the dropped pages.
+ * We need to clean up any open "invalid-page" records for the dropped pages.
*/
void
XLogTruncateRelation(RelFileNode rnode, BlockNumber nblocks)