aboutsummaryrefslogtreecommitdiff
path: root/src/backend/storage/buffer/bufmgr.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/storage/buffer/bufmgr.c')
-rw-r--r--src/backend/storage/buffer/bufmgr.c253
1 files changed, 119 insertions, 134 deletions
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 4a9ddc32432..d9447ac394d 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.172 2004/07/01 00:50:46 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.173 2004/07/17 03:28:49 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -45,8 +45,8 @@
#include "storage/bufpage.h"
#include "storage/proc.h"
#include "storage/smgr.h"
-#include "utils/memutils.h"
#include "utils/relcache.h"
+#include "utils/resowner.h"
#include "pgstat.h"
@@ -65,13 +65,9 @@ long NDirectFileRead; /* some I/O's are direct file access.
* bypass bufmgr */
long NDirectFileWrite; /* e.g., I/O in psort and hashjoin. */
-/* List of upper-level-transaction buffer refcount arrays */
-static List *upperRefCounts = NIL;
-
-static void PinBuffer(BufferDesc *buf);
-static void UnpinBuffer(BufferDesc *buf);
-static void BufferFixLeak(Buffer bufnum, int32 shouldBe, bool emitWarning);
+static void PinBuffer(BufferDesc *buf, bool fixOwner);
+static void UnpinBuffer(BufferDesc *buf, bool fixOwner);
static void WaitIO(BufferDesc *buf);
static void StartBufferIO(BufferDesc *buf, bool forInput);
static void TerminateBufferIO(BufferDesc *buf, int err_flag);
@@ -103,6 +99,7 @@ static void write_buffer(Buffer buffer, bool unpin);
Buffer
ReadBuffer(Relation reln, BlockNumber blockNum)
{
+ ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
return ReadBufferInternal(reln, blockNum, false);
}
@@ -111,6 +108,8 @@ ReadBuffer(Relation reln, BlockNumber blockNum)
*
* bufferLockHeld: if true, caller already acquired the bufmgr lock.
* (This is assumed never to be true if dealing with a local buffer!)
+ *
+ * The caller must have done ResourceOwnerEnlargeBuffers(CurrentResourceOwner)
*/
static Buffer
ReadBufferInternal(Relation reln, BlockNumber blockNum,
@@ -287,7 +286,7 @@ BufferAlloc(Relation reln,
*/
*foundPtr = TRUE;
- PinBuffer(buf);
+ PinBuffer(buf, true);
if (!(buf->flags & BM_VALID))
{
@@ -337,6 +336,9 @@ BufferAlloc(Relation reln,
buf->refcount = 1;
PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 1;
+ ResourceOwnerRememberBuffer(CurrentResourceOwner,
+ BufferDescriptorGetBuffer(buf));
+
if ((buf->flags & BM_VALID) &&
(buf->flags & BM_DIRTY || buf->cntxDirty))
{
@@ -382,7 +384,7 @@ BufferAlloc(Relation reln,
* buffer we were planning to use.
*/
TerminateBufferIO(buf, 0);
- UnpinBuffer(buf);
+ UnpinBuffer(buf, true);
buf = buf2;
@@ -390,7 +392,7 @@ BufferAlloc(Relation reln,
*foundPtr = TRUE;
- PinBuffer(buf);
+ PinBuffer(buf, true);
if (!(buf->flags & BM_VALID))
{
@@ -425,7 +427,7 @@ BufferAlloc(Relation reln,
if (buf->refcount > 1 || buf->flags & BM_DIRTY || buf->cntxDirty)
{
TerminateBufferIO(buf, 0);
- UnpinBuffer(buf);
+ UnpinBuffer(buf, true);
inProgress = FALSE;
buf = NULL;
}
@@ -497,7 +499,7 @@ write_buffer(Buffer buffer, bool release)
bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
if (release)
- UnpinBuffer(bufHdr);
+ UnpinBuffer(bufHdr, true);
LWLockRelease(BufMgrLock);
}
@@ -561,6 +563,8 @@ ReleaseAndReadBuffer(Buffer buffer,
if (bufHdr->tag.blockNum == blockNum &&
RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node))
return buffer;
+ ResourceOwnerForgetBuffer(CurrentResourceOwner, buffer);
+ /* owner now has a free slot, so no need for Enlarge() */
LocalRefCount[-buffer - 1]--;
}
else
@@ -570,16 +574,20 @@ ReleaseAndReadBuffer(Buffer buffer,
if (bufHdr->tag.blockNum == blockNum &&
RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node))
return buffer;
+ ResourceOwnerForgetBuffer(CurrentResourceOwner, buffer);
+ /* owner now has a free slot, so no need for Enlarge() */
if (PrivateRefCount[buffer - 1] > 1)
PrivateRefCount[buffer - 1]--;
else
{
LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
- UnpinBuffer(bufHdr);
+ UnpinBuffer(bufHdr, false);
return ReadBufferInternal(relation, blockNum, true);
}
}
}
+ else
+ ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
return ReadBufferInternal(relation, blockNum, false);
}
@@ -589,9 +597,12 @@ ReleaseAndReadBuffer(Buffer buffer,
*
* This should be applied only to shared buffers, never local ones.
* Bufmgr lock must be held by caller.
+ *
+ * Most but not all callers want CurrentResourceOwner to be adjusted.
+ * Note that ResourceOwnerEnlargeBuffers must have been done already.
*/
static void
-PinBuffer(BufferDesc *buf)
+PinBuffer(BufferDesc *buf, bool fixOwner)
{
int b = BufferDescriptorGetBuffer(buf) - 1;
@@ -599,6 +610,9 @@ PinBuffer(BufferDesc *buf)
buf->refcount++;
PrivateRefCount[b]++;
Assert(PrivateRefCount[b] > 0);
+ if (fixOwner)
+ ResourceOwnerRememberBuffer(CurrentResourceOwner,
+ BufferDescriptorGetBuffer(buf));
}
/*
@@ -606,12 +620,18 @@ PinBuffer(BufferDesc *buf)
*
* This should be applied only to shared buffers, never local ones.
* Bufmgr lock must be held by caller.
+ *
+ * Most but not all callers want CurrentResourceOwner to be adjusted.
*/
static void
-UnpinBuffer(BufferDesc *buf)
+UnpinBuffer(BufferDesc *buf, bool fixOwner)
{
int b = BufferDescriptorGetBuffer(buf) - 1;
+ if (fixOwner)
+ ResourceOwnerForgetBuffer(CurrentResourceOwner,
+ BufferDescriptorGetBuffer(buf));
+
Assert(buf->refcount > 0);
Assert(PrivateRefCount[b] > 0);
PrivateRefCount[b]--;
@@ -677,6 +697,9 @@ BufferSync(int percent, int maxpages)
if (maxpages > 0 && num_buffer_dirty > maxpages)
num_buffer_dirty = maxpages;
+ /* Make sure we can handle the pin inside the loop */
+ ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
+
/*
* Loop over buffers to be written. Note the BufMgrLock is held at
* loop top, but is released and reacquired within FlushBuffer,
@@ -724,13 +747,13 @@ BufferSync(int percent, int maxpages)
* buffer now and set IO state for it *before* acquiring shlock to
* avoid conflicts with FlushRelationBuffers.
*/
- PinBuffer(bufHdr);
+ PinBuffer(bufHdr, true);
StartBufferIO(bufHdr, false);
FlushBuffer(bufHdr, NULL);
TerminateBufferIO(bufHdr, 0);
- UnpinBuffer(bufHdr);
+ UnpinBuffer(bufHdr, true);
}
LWLockRelease(BufMgrLock);
@@ -831,102 +854,32 @@ AtEOXact_Buffers(bool isCommit)
for (i = 0; i < NBuffers; i++)
{
if (PrivateRefCount[i] != 0)
- BufferFixLeak(i, 0, isCommit);
- }
-
- AtEOXact_LocalBuffers(isCommit);
-}
-
-/*
- * During subtransaction start, save buffer reference counts.
- */
-void
-AtSubStart_Buffers(void)
-{
- int32 *copyRefCounts;
- Size rcSize;
- MemoryContext old_cxt;
-
- /* this is probably the active context already, but be safe */
- old_cxt = MemoryContextSwitchTo(CurTransactionContext);
-
- /*
- * We need to copy the current state of PrivateRefCount[]. In the typical
- * scenario, few if any of the entries will be nonzero, and we could save
- * space by storing only the nonzero ones. However, copying the whole
- * thing is lots simpler and faster both here and in AtEOSubXact_Buffers,
- * so it seems best to waste the space.
- */
- rcSize = NBuffers * sizeof(int32);
- copyRefCounts = (int32 *) palloc(rcSize);
- memcpy(copyRefCounts, PrivateRefCount, rcSize);
-
- /* Attach to list */
- upperRefCounts = lcons(copyRefCounts, upperRefCounts);
-
- MemoryContextSwitchTo(old_cxt);
-}
-
-/*
- * AtEOSubXact_Buffers
- *
- * At subtransaction end, we restore the saved counts. If committing, we
- * complain if the refcounts don't match; if aborting, just restore silently.
- */
-void
-AtEOSubXact_Buffers(bool isCommit)
-{
- int32 *oldRefCounts;
- int i;
-
- oldRefCounts = (int32 *) linitial(upperRefCounts);
- upperRefCounts = list_delete_first(upperRefCounts);
+ {
+ BufferDesc *buf = &(BufferDescriptors[i]);
+
+ if (isCommit)
+ elog(WARNING,
+ "buffer refcount leak: [%03d] "
+ "(rel=%u/%u/%u, blockNum=%u, flags=0x%x, refcount=%u %d)",
+ i,
+ buf->tag.rnode.spcNode, buf->tag.rnode.dbNode,
+ buf->tag.rnode.relNode,
+ buf->tag.blockNum, buf->flags,
+ buf->refcount, PrivateRefCount[i]);
- for (i = 0; i < NBuffers; i++)
- {
- if (PrivateRefCount[i] != oldRefCounts[i])
- BufferFixLeak(i, oldRefCounts[i], isCommit);
+ /*
+ * We don't worry about updating the ResourceOwner structures;
+ * resowner.c will clear them for itself.
+ */
+ PrivateRefCount[i] = 1; /* make sure we release shared pin */
+ LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
+ UnpinBuffer(buf, false);
+ LWLockRelease(BufMgrLock);
+ Assert(PrivateRefCount[i] == 0);
+ }
}
- pfree(oldRefCounts);
-}
-
-/*
- * Fix a buffer refcount leak.
- *
- * The caller does not hold the BufMgrLock.
- */
-static void
-BufferFixLeak(Buffer bufnum, int32 shouldBe, bool emitWarning)
-{
- BufferDesc *buf = &(BufferDescriptors[bufnum]);
-
- if (emitWarning)
- elog(WARNING,
- "buffer refcount leak: [%03d] (rel=%u/%u/%u, blockNum=%u, flags=0x%x, refcount=%u %d, should be=%d)",
- bufnum,
- buf->tag.rnode.spcNode, buf->tag.rnode.dbNode,
- buf->tag.rnode.relNode,
- buf->tag.blockNum, buf->flags,
- buf->refcount, PrivateRefCount[bufnum], shouldBe);
-
- /* If it's less, we're in a heap o' trouble */
- if (PrivateRefCount[bufnum] <= shouldBe)
- elog(FATAL, "buffer refcount was decreased by subtransaction");
-
- if (shouldBe > 0)
- {
- /* We still keep the shared-memory pin */
- PrivateRefCount[bufnum] = shouldBe;
- }
- else
- {
- PrivateRefCount[bufnum] = 1; /* make sure we release shared pin */
- LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
- UnpinBuffer(buf);
- LWLockRelease(BufMgrLock);
- Assert(PrivateRefCount[bufnum] == 0);
- }
+ AtEOXact_LocalBuffers(isCommit);
}
/*
@@ -1172,9 +1125,15 @@ DropRelFileNodeBuffers(RelFileNode rnode, bool istemp,
if (RelFileNodeEquals(bufHdr->tag.rnode, rnode) &&
bufHdr->tag.blockNum >= firstDelBlock)
{
+ if (LocalRefCount[i] != 0)
+ elog(FATAL, "block %u of %u/%u/%u is still referenced (local %u)",
+ bufHdr->tag.blockNum,
+ bufHdr->tag.rnode.spcNode,
+ bufHdr->tag.rnode.dbNode,
+ bufHdr->tag.rnode.relNode,
+ LocalRefCount[i]);
bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
bufHdr->cntxDirty = false;
- LocalRefCount[i] = 0;
bufHdr->tag.rnode.relNode = InvalidOid;
}
}
@@ -1205,28 +1164,21 @@ recheck:
*/
goto recheck;
}
- /* Now we can do what we came for */
- bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
- bufHdr->cntxDirty = false;
/*
- * Release any refcount we may have. If someone else has a
- * pin on the buffer, we got trouble.
+ * There should be no pin on the buffer.
*/
if (bufHdr->refcount != 0)
- {
- /* the sole pin should be ours */
- if (bufHdr->refcount != 1 || PrivateRefCount[i - 1] == 0)
- elog(FATAL, "block %u of %u/%u/%u is still referenced (private %d, global %u)",
- bufHdr->tag.blockNum,
- bufHdr->tag.rnode.spcNode,
- bufHdr->tag.rnode.dbNode,
- bufHdr->tag.rnode.relNode,
- PrivateRefCount[i - 1], bufHdr->refcount);
- /* Make sure it will be released */
- PrivateRefCount[i - 1] = 1;
- UnpinBuffer(bufHdr);
- }
+ elog(FATAL, "block %u of %u/%u/%u is still referenced (private %d, global %u)",
+ bufHdr->tag.blockNum,
+ bufHdr->tag.rnode.spcNode,
+ bufHdr->tag.rnode.dbNode,
+ bufHdr->tag.rnode.relNode,
+ PrivateRefCount[i - 1], bufHdr->refcount);
+
+ /* Now we can do what we came for */
+ bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
+ bufHdr->cntxDirty = false;
/*
* And mark the buffer as no longer occupied by this rel.
@@ -1353,7 +1305,7 @@ PrintPinnedBufs(void)
for (i = 0; i < NBuffers; ++i, ++buf)
{
if (PrivateRefCount[i] > 0)
- elog(WARNING,
+ elog(NOTICE,
"[%02d] (freeNext=%d, freePrev=%d, rel=%u/%u/%u, "
"blockNum=%u, flags=0x%x, refcount=%u %d)",
i, buf->freeNext, buf->freePrev,
@@ -1456,6 +1408,9 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
return;
}
+ /* Make sure we can handle the pin inside the loop */
+ ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
+
LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
for (i = 0; i < NBuffers; i++)
@@ -1466,7 +1421,7 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
if ((bufHdr->flags & BM_VALID) &&
(bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty))
{
- PinBuffer(bufHdr);
+ PinBuffer(bufHdr, true);
/* Someone else might be flushing buffer */
if (bufHdr->flags & BM_IO_IN_PROGRESS)
WaitIO(bufHdr);
@@ -1479,7 +1434,7 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
TerminateBufferIO(bufHdr, 0);
}
- UnpinBuffer(bufHdr);
+ UnpinBuffer(bufHdr, true);
if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
elog(ERROR, "FlushRelationBuffers(\"%s\", %u): block %u was re-dirtied",
RelationGetRelationName(rel), firstDelBlock,
@@ -1507,6 +1462,8 @@ ReleaseBuffer(Buffer buffer)
{
BufferDesc *bufHdr;
+ ResourceOwnerForgetBuffer(CurrentResourceOwner, buffer);
+
if (BufferIsLocal(buffer))
{
Assert(LocalRefCount[-buffer - 1] > 0);
@@ -1526,11 +1483,39 @@ ReleaseBuffer(Buffer buffer)
else
{
LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
- UnpinBuffer(bufHdr);
+ UnpinBuffer(bufHdr, false);
LWLockRelease(BufMgrLock);
}
}
+/*
+ * IncrBufferRefCount
+ * Increment the pin count on a buffer that we have *already* pinned
+ * at least once.
+ *
+ * This function cannot be used on a buffer we do not have pinned,
+ * because it doesn't change the shared buffer state. Therefore the
+ * Assert checks are for refcount > 0. Someone got this wrong once...
+ */
+void
+IncrBufferRefCount(Buffer buffer)
+{
+ ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
+ ResourceOwnerRememberBuffer(CurrentResourceOwner, buffer);
+ if (BufferIsLocal(buffer))
+ {
+ Assert(buffer >= -NLocBuffer);
+ Assert(LocalRefCount[-buffer - 1] > 0);
+ LocalRefCount[-buffer - 1]++;
+ }
+ else
+ {
+ Assert(!BAD_BUFFER_ID(buffer));
+ Assert(PrivateRefCount[buffer - 1] > 0);
+ PrivateRefCount[buffer - 1]++;
+ }
+}
+
#ifdef NOT_USED
void
IncrBufferRefCount_Debug(char *file, int line, Buffer buffer)