diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2004-08-11 04:07:16 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2004-08-11 04:07:16 +0000 |
commit | 3fdf649f4fc8a21ba4cec1db7f3fe7bb1105b00c (patch) | |
tree | 4e121f5f8cbe96cce5c5c14b5a1f1af24848a9ea /src | |
parent | bc8a1fc282005f6da9ff7650eadc65b160077e43 (diff) | |
download | postgresql-3fdf649f4fc8a21ba4cec1db7f3fe7bb1105b00c.tar.gz postgresql-3fdf649f4fc8a21ba4cec1db7f3fe7bb1105b00c.zip |
Fix failure to guarantee that a checkpoint will write out pg_clog updates
for transaction commits that occurred just before the checkpoint. This is
an EXTREMELY serious bug --- kudos to Satoshi Okada for creating a
reproducible test case to prove its existence.
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/access/transam/xact.c | 29 | ||||
-rw-r--r-- | src/backend/access/transam/xlog.c | 21 | ||||
-rw-r--r-- | src/include/storage/lwlock.h | 3 |
3 files changed, 45 insertions, 8 deletions
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 4794c761c3a..594a2fcca1a 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.177 2004/08/03 15:57:26 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.178 2004/08/11 04:07:15 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -574,13 +574,28 @@ RecordTransactionCommit(void) START_CRIT_SECTION(); /* - * We only need to log the commit in XLOG if the transaction made - * any transaction-controlled XLOG entries or will delete files. + * If our transaction made any transaction-controlled XLOG entries, + * we need to lock out checkpoint start between writing our XLOG + * record and updating pg_clog. Otherwise it is possible for the + * checkpoint to set REDO after the XLOG record but fail to flush the + * pg_clog update to disk, leading to loss of the transaction commit + * if we crash a little later. Slightly klugy fix for problem + * discovered 2004-08-10. + * * (If it made no transaction-controlled XLOG entries, its XID * appears nowhere in permanent storage, so no one else will ever care - * if it committed.) + * if it committed; so it doesn't matter if we lose the commit flag.) + * + * Note we only need a shared lock. */ madeTCentries = (MyLastRecPtr.xrecoff != 0); + if (madeTCentries) + LWLockAcquire(CheckpointStartLock, LW_SHARED); + + /* + * We only need to log the commit in XLOG if the transaction made + * any transaction-controlled XLOG entries or will delete files. + */ if (madeTCentries || nrels > 0) { XLogRecData rdata[3]; @@ -668,6 +683,10 @@ RecordTransactionCommit(void) TransactionIdCommitTree(nchildren, children); } + /* Unlock checkpoint lock if we acquired it */ + if (madeTCentries) + LWLockRelease(CheckpointStartLock); + END_CRIT_SECTION(); } @@ -850,6 +869,8 @@ RecordTransactionAbort(void) * * We do not flush XLOG to disk unless deleting files, since the * default assumption after a crash would be that we aborted, anyway. + * For the same reason, we don't need to worry about interlocking + * against checkpoint start. */ if (MyLastRecPtr.xrecoff != 0 || nrels > 0) { diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 28fb4c733a6..32ade5d7590 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.158 2004/08/09 16:26:01 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.159 2004/08/11 04:07:15 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -4699,6 +4699,15 @@ CreateCheckPoint(bool shutdown, bool force) checkPoint.ThisTimeLineID = ThisTimeLineID; checkPoint.time = time(NULL); + /* + * We must hold CheckpointStartLock while determining the checkpoint + * REDO pointer. This ensures that any concurrent transaction commits + * will be either not yet logged, or logged and recorded in pg_clog. + * See notes in RecordTransactionCommit(). + */ + LWLockAcquire(CheckpointStartLock, LW_EXCLUSIVE); + + /* And we need WALInsertLock too */ LWLockAcquire(WALInsertLock, LW_EXCLUSIVE); /* @@ -4731,6 +4740,7 @@ CreateCheckPoint(bool shutdown, bool force) ControlFile->checkPointCopy.redo.xrecoff) { LWLockRelease(WALInsertLock); + LWLockRelease(CheckpointStartLock); LWLockRelease(CheckpointLock); END_CRIT_SECTION(); return; @@ -4789,6 +4799,9 @@ CreateCheckPoint(bool shutdown, bool force) * GetSnapshotData needs to get XidGenLock while holding SInvalLock, * so there's a risk of deadlock. Need to find a better solution. See * pgsql-hackers discussion of 17-Dec-01. + * + * XXX actually, the whole UNDO code is dead code and unlikely to ever + * be revived, so the lack of a good solution here is not troubling. */ #ifdef NOT_USED checkPoint.undo = GetUndoRecPtr(); @@ -4798,11 +4811,13 @@ CreateCheckPoint(bool shutdown, bool force) #endif /* - * Now we can release insert lock, allowing other xacts to proceed - * even while we are flushing disk buffers. + * Now we can release insert lock and checkpoint start lock, allowing + * other xacts to proceed even while we are flushing disk buffers. */ LWLockRelease(WALInsertLock); + LWLockRelease(CheckpointStartLock); + /* * Get the other info we need for the checkpoint record. */ diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index b1f6fc95103..7b08231e510 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.12 2004/06/11 16:43:24 tgl Exp $ + * $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.13 2004/08/11 04:07:16 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -36,6 +36,7 @@ typedef enum LWLockId WALWriteLock, ControlFileLock, CheckpointLock, + CheckpointStartLock, RelCacheInitLock, BgWriterCommLock, |