aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2004-08-11 04:08:02 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2004-08-11 04:08:02 +0000
commitd508e0ddd23a35b9cb30ec1a769de40e98d5561f (patch)
tree1880529858bde1f01601074dd3767dc6879cb562
parenta1dd58e50990b5a1740371b16a1bd1ccb0721b3e (diff)
downloadpostgresql-d508e0ddd23a35b9cb30ec1a769de40e98d5561f.tar.gz
postgresql-d508e0ddd23a35b9cb30ec1a769de40e98d5561f.zip
Fix failure to guarantee that a checkpoint will write out pg_clog updates
for transaction commits that occurred just before the checkpoint. This is an EXTREMELY serious bug --- kudos to Satoshi Okada for creating a reproducible test case to prove its existence.
-rw-r--r--src/backend/access/transam/xact.c36
-rw-r--r--src/backend/access/transam/xlog.c18
-rw-r--r--src/include/storage/lwlock.h3
3 files changed, 47 insertions, 10 deletions
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index e6767d4c4b1..3409de03a56 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.156 2003/10/16 16:50:41 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.156.2.1 2004/08/11 04:08:00 tgl Exp $
*
* NOTES
* Transaction aborts can now occur two ways:
@@ -503,6 +503,7 @@ RecordTransactionCommit(void)
if (MyXactMadeXLogEntry || MyXactMadeTempRelUpdate)
{
TransactionId xid = GetCurrentTransactionId();
+ bool madeTCentries;
XLogRecPtr recptr;
/* Tell bufmgr and smgr to prepare for commit */
@@ -511,12 +512,29 @@ RecordTransactionCommit(void)
START_CRIT_SECTION();
/*
- * We only need to log the commit in xlog if the transaction made
- * any transaction-controlled XLOG entries. (Otherwise, its XID
- * appears nowhere in permanent storage, so no one else will ever
- * care if it committed.)
+ * If our transaction made any transaction-controlled XLOG entries,
+ * we need to lock out checkpoint start between writing our XLOG
+ * record and updating pg_clog. Otherwise it is possible for the
+ * checkpoint to set REDO after the XLOG record but fail to flush the
+ * pg_clog update to disk, leading to loss of the transaction commit
+ * if we crash a little later. Slightly klugy fix for problem
+ * discovered 2004-08-10.
+ *
+ * (If it made no transaction-controlled XLOG entries, its XID
+ * appears nowhere in permanent storage, so no one else will ever care
+ * if it committed; so it doesn't matter if we lose the commit flag.)
+ *
+ * Note we only need a shared lock.
*/
- if (MyLastRecPtr.xrecoff != 0)
+ madeTCentries = (MyLastRecPtr.xrecoff != 0);
+ if (madeTCentries)
+ LWLockAcquire(CheckpointStartLock, LW_SHARED);
+
+ /*
+ * We only need to log the commit in XLOG if the transaction made
+ * any transaction-controlled XLOG entries.
+ */
+ if (madeTCentries)
{
/* Need to emit a commit record */
XLogRecData rdata;
@@ -585,6 +603,10 @@ RecordTransactionCommit(void)
if (MyLastRecPtr.xrecoff != 0 || MyXactMadeTempRelUpdate)
TransactionIdCommit(xid);
+ /* Unlock checkpoint lock if we acquired it */
+ if (madeTCentries)
+ LWLockRelease(CheckpointStartLock);
+
END_CRIT_SECTION();
}
@@ -698,6 +720,8 @@ RecordTransactionAbort(void)
* care if it committed.) We do not flush XLOG to disk in any
* case, since the default assumption after a crash would be that
* we aborted, anyway.
+ * For the same reason, we don't need to worry about interlocking
+ * against checkpoint start.
*/
if (MyLastRecPtr.xrecoff != 0)
{
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 8eb154f7bab..a8acf758a8c 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.125.2.1 2004/02/23 23:03:43 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.125.2.2 2004/08/11 04:08:00 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -3159,6 +3159,15 @@ CreateCheckPoint(bool shutdown, bool force)
checkPoint.ThisStartUpID = ThisStartUpID;
checkPoint.time = time(NULL);
+ /*
+ * We must hold CheckpointStartLock while determining the checkpoint
+ * REDO pointer. This ensures that any concurrent transaction commits
+ * will be either not yet logged, or logged and recorded in pg_clog.
+ * See notes in RecordTransactionCommit().
+ */
+ LWLockAcquire(CheckpointStartLock, LW_EXCLUSIVE);
+
+ /* And we need WALInsertLock too */
LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
/*
@@ -3191,6 +3200,7 @@ CreateCheckPoint(bool shutdown, bool force)
ControlFile->checkPointCopy.redo.xrecoff)
{
LWLockRelease(WALInsertLock);
+ LWLockRelease(CheckpointStartLock);
LWLockRelease(CheckpointLock);
END_CRIT_SECTION();
return;
@@ -3258,11 +3268,13 @@ CreateCheckPoint(bool shutdown, bool force)
#endif
/*
- * Now we can release insert lock, allowing other xacts to proceed
- * even while we are flushing disk buffers.
+ * Now we can release insert lock and checkpoint start lock, allowing
+ * other xacts to proceed even while we are flushing disk buffers.
*/
LWLockRelease(WALInsertLock);
+ LWLockRelease(CheckpointStartLock);
+
/*
* Get the other info we need for the checkpoint record.
*/
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index 7b2a4c92154..6f61eb20fdf 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: lwlock.h,v 1.8 2003/08/04 02:40:14 momjian Exp $
+ * $Id: lwlock.h,v 1.8.4.1 2004/08/11 04:08:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -37,6 +37,7 @@ typedef enum LWLockId
WALWriteLock,
ControlFileLock,
CheckpointLock,
+ CheckpointStartLock,
RelCacheInitLock,
NumFixedLWLocks, /* must be last except for