aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2005-05-07 21:32:24 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2005-05-07 21:32:24 +0000
commit30f540be4381d8cd4e45f8393264e1c214d0ddd7 (patch)
tree411bb8bb7c0361842f2b305135bb0b44cbeb2ed2 /src/backend/access
parentb72e5fa17b7aef18c744060ede3b1afdf4021dbf (diff)
downloadpostgresql-30f540be4381d8cd4e45f8393264e1c214d0ddd7.tar.gz
postgresql-30f540be4381d8cd4e45f8393264e1c214d0ddd7.zip
Repair very-low-probability race condition between relation extension
and VACUUM: in the interval between adding a new page to the relation and formatting it, it was possible for VACUUM to come along and decide it should format the page too. Though not harmful in itself, this would cause data loss if a third transaction were able to insert tuples into the vacuumed page before the original extender got control back.
Diffstat (limited to 'src/backend/access')
-rw-r--r--src/backend/access/heap/hio.c24
-rw-r--r--src/backend/access/nbtree/nbtpage.c15
-rw-r--r--src/backend/access/nbtree/nbtree.c26
3 files changed, 49 insertions, 16 deletions
diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c
index db0bda6699b..583bb209336 100644
--- a/src/backend/access/heap/hio.c
+++ b/src/backend/access/heap/hio.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.55 2005/04/29 22:28:23 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.56 2005/05/07 21:32:23 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -242,13 +242,6 @@ RelationGetBufferForTuple(Relation relation, Size len,
buffer = ReadBuffer(relation, P_NEW);
/*
- * Release the file-extension lock; it's now OK for someone else to
- * extend the relation some more.
- */
- if (needLock)
- UnlockRelationForExtension(relation, ExclusiveLock);
-
- /*
* We can be certain that locking the otherBuffer first is OK, since
* it must have a lower page number.
*/
@@ -256,9 +249,22 @@ RelationGetBufferForTuple(Relation relation, Size len,
LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);
/*
- * We need to initialize the empty new page.
+ * Now acquire lock on the new page.
*/
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+
+ /*
+ * Release the file-extension lock; it's now OK for someone else to
+ * extend the relation some more. Note that we cannot release this
+ * lock before we have buffer lock on the new page, or we risk a
+ * race condition against vacuumlazy.c --- see comments therein.
+ */
+ if (needLock)
+ UnlockRelationForExtension(relation, ExclusiveLock);
+
+ /*
+ * We need to initialize the empty new page.
+ */
pageHeader = (Page) BufferGetPage(buffer);
Assert(PageIsNew((PageHeader) pageHeader));
PageInit(pageHeader, BufferGetPageSize(buffer), 0);
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c
index 222ed764357..ea023253189 100644
--- a/src/backend/access/nbtree/nbtpage.c
+++ b/src/backend/access/nbtree/nbtpage.c
@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.83 2005/04/29 22:28:24 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.84 2005/05/07 21:32:23 tgl Exp $
*
* NOTES
* Postgres btree pages look like ordinary relation pages. The opaque
@@ -491,18 +491,21 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
buf = ReadBuffer(rel, P_NEW);
+ /* Acquire buffer lock on new page */
+ LockBuffer(buf, BT_WRITE);
+
/*
- * Release the file-extension lock; it's now OK for someone else
- * to extend the relation some more.
+ * Release the file-extension lock; it's now OK for someone else to
+ * extend the relation some more. Note that we cannot release this
+ * lock before we have buffer lock on the new page, or we risk a
+ * race condition against btvacuumcleanup --- see comments therein.
*/
if (needLock)
UnlockRelationForExtension(rel, ExclusiveLock);
- /* Acquire appropriate buffer lock on new page */
- LockBuffer(buf, access);
-
/* Initialize the new page before returning it */
page = BufferGetPage(buf);
+ Assert(PageIsNew((PageHeader) page));
_bt_pageinit(page, BufferGetPageSize(buf));
}
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 010af6dd5cd..b57ab37a5b7 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -12,7 +12,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.128 2005/05/06 17:24:52 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.129 2005/05/07 21:32:23 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -739,11 +739,35 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
BlockNumber pages_deleted = 0;
MemoryContext mycontext;
MemoryContext oldcontext;
+ bool needLock;
Assert(stats != NULL);
+ /*
+ * First find out the number of pages in the index. We must acquire
+ * the relation-extension lock while doing this to avoid a race
+ * condition: if someone else is extending the relation, there is
+ * a window where bufmgr/smgr have created a new all-zero page but
+ * it hasn't yet been write-locked by _bt_getbuf(). If we manage to
+ * scan such a page here, we'll improperly assume it can be recycled.
+ * Taking the lock synchronizes things enough to prevent a problem:
+ * either num_pages won't include the new page, or _bt_getbuf already
+ * has write lock on the buffer and it will be fully initialized before
+ * we can examine it. (See also vacuumlazy.c, which has the same issue.)
+ *
+ * We can skip locking for new or temp relations,
+ * however, since no one else could be accessing them.
+ */
+ needLock = !RELATION_IS_LOCAL(rel);
+
+ if (needLock)
+ LockRelationForExtension(rel, ExclusiveLock);
+
num_pages = RelationGetNumberOfBlocks(rel);
+ if (needLock)
+ UnlockRelationForExtension(rel, ExclusiveLock);
+
/* No point in remembering more than MaxFSMPages pages */
maxFreePages = MaxFSMPages;
if ((BlockNumber) maxFreePages > num_pages)