aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/access/transam/twophase.c24
-rw-r--r--src/backend/access/transam/xact.c48
-rw-r--r--src/backend/access/transam/xlog.c53
-rw-r--r--src/backend/storage/ipc/procarray.c94
-rw-r--r--src/backend/storage/lmgr/proc.c4
-rw-r--r--src/include/storage/lwlock.h3
-rw-r--r--src/include/storage/proc.h4
-rw-r--r--src/include/storage/procarray.h5
8 files changed, 182 insertions, 53 deletions
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index 55f6b88bcd0..7d6680485b0 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.28 2007/02/13 19:39:42 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.29 2007/04/03 16:34:35 tgl Exp $
*
* NOTES
* Each global transaction is associated with a global transaction
@@ -279,6 +279,7 @@ MarkAsPreparing(TransactionId xid, const char *gid,
gxact->proc.pid = 0;
gxact->proc.databaseId = databaseid;
gxact->proc.roleId = owner;
+ gxact->proc.inCommit = false;
gxact->proc.inVacuum = false;
gxact->proc.isAutovacuum = false;
gxact->proc.lwWaiting = false;
@@ -934,18 +935,18 @@ EndPrepare(GlobalTransaction gxact)
* odds of a PANIC actually occurring should be very tiny given that we
* were able to write the bogus CRC above.
*
- * We have to lock out checkpoint start here, too; otherwise a checkpoint
+ * We have to set inCommit here, too; otherwise a checkpoint
* starting immediately after the WAL record is inserted could complete
* without fsync'ing our state file. (This is essentially the same kind
* of race condition as the COMMIT-to-clog-write case that
- * RecordTransactionCommit uses CheckpointStartLock for; see notes there.)
+ * RecordTransactionCommit uses inCommit for; see notes there.)
*
* We save the PREPARE record's location in the gxact for later use by
* CheckPointTwoPhase.
*/
START_CRIT_SECTION();
- LWLockAcquire(CheckpointStartLock, LW_SHARED);
+ MyProc->inCommit = true;
gxact->prepare_lsn = XLogInsert(RM_XACT_ID, XLOG_XACT_PREPARE,
records.head);
@@ -982,10 +983,11 @@ EndPrepare(GlobalTransaction gxact)
MarkAsPrepared(gxact);
/*
- * Now we can release the checkpoint start lock: a checkpoint starting
- * after this will certainly see the gxact as a candidate for fsyncing.
+ * Now we can mark ourselves as out of the commit critical section:
+ * a checkpoint starting after this will certainly see the gxact as a
+ * candidate for fsyncing.
*/
- LWLockRelease(CheckpointStartLock);
+ MyProc->inCommit = false;
END_CRIT_SECTION();
@@ -1649,7 +1651,7 @@ RecoverPreparedTransactions(void)
* RecordTransactionCommitPrepared
*
* This is basically the same as RecordTransactionCommit: in particular,
- * we must take the CheckpointStartLock to avoid a race condition.
+ * we must set the inCommit flag to avoid a race condition.
*
* We know the transaction made at least one XLOG entry (its PREPARE),
* so it is never possible to optimize out the commit record.
@@ -1669,7 +1671,7 @@ RecordTransactionCommitPrepared(TransactionId xid,
START_CRIT_SECTION();
/* See notes in RecordTransactionCommit */
- LWLockAcquire(CheckpointStartLock, LW_SHARED);
+ MyProc->inCommit = true;
/* Emit the XLOG commit record */
xlrec.xid = xid;
@@ -1713,8 +1715,8 @@ RecordTransactionCommitPrepared(TransactionId xid,
/* to avoid race conditions, the parent must commit first */
TransactionIdCommitTree(nchildren, children);
- /* Checkpoint is allowed again */
- LWLockRelease(CheckpointStartLock);
+ /* Checkpoint can proceed now */
+ MyProc->inCommit = false;
END_CRIT_SECTION();
}
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index f8058a8f5e9..d47dae0c412 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -10,7 +10,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.238 2007/03/22 19:55:04 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.239 2007/04/03 16:34:35 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -717,34 +717,36 @@ RecordTransactionCommit(void)
START_CRIT_SECTION();
/*
- * If our transaction made any transaction-controlled XLOG entries, we
- * need to lock out checkpoint start between writing our XLOG record
- * and updating pg_clog. Otherwise it is possible for the checkpoint
- * to set REDO after the XLOG record but fail to flush the pg_clog
- * update to disk, leading to loss of the transaction commit if we
- * crash a little later. Slightly klugy fix for problem discovered
- * 2004-08-10.
- *
- * (If it made no transaction-controlled XLOG entries, its XID appears
- * nowhere in permanent storage, so no one else will ever care if it
- * committed; so it doesn't matter if we lose the commit flag.)
- *
- * Note we only need a shared lock.
- */
- madeTCentries = (MyLastRecPtr.xrecoff != 0);
- if (madeTCentries)
- LWLockAcquire(CheckpointStartLock, LW_SHARED);
-
- /*
* We only need to log the commit in XLOG if the transaction made any
* transaction-controlled XLOG entries or will delete files.
*/
+ madeTCentries = (MyLastRecPtr.xrecoff != 0);
if (madeTCentries || nrels > 0)
{
XLogRecData rdata[3];
int lastrdata = 0;
xl_xact_commit xlrec;
+ /*
+ * Mark ourselves as within our "commit critical section". This
+ * forces any concurrent checkpoint to wait until we've updated
+ * pg_clog. Without this, it is possible for the checkpoint to
+ * set REDO after the XLOG record but fail to flush the pg_clog
+ * update to disk, leading to loss of the transaction commit if
+ * the system crashes a little later.
+ *
+ * Note: we could, but don't bother to, set this flag in
+ * RecordTransactionAbort. That's because loss of a transaction
+ * abort is noncritical; the presumption would be that it aborted,
+ * anyway.
+ *
+ * It's safe to change the inCommit flag of our own backend
+ * without holding the ProcArrayLock, since we're the only one
+ * modifying it. This makes checkpoint's determination of which
+ * xacts are inCommit a bit fuzzy, but it doesn't matter.
+ */
+ MyProc->inCommit = true;
+
xlrec.xtime = time(NULL);
xlrec.nrels = nrels;
xlrec.nsubxacts = nchildren;
@@ -825,9 +827,8 @@ RecordTransactionCommit(void)
TransactionIdCommitTree(nchildren, children);
}
- /* Unlock checkpoint lock if we acquired it */
- if (madeTCentries)
- LWLockRelease(CheckpointStartLock);
+ /* Checkpoint can proceed now */
+ MyProc->inCommit = false;
END_CRIT_SECTION();
}
@@ -1961,6 +1962,7 @@ AbortTransaction(void)
MyProc->xid = InvalidTransactionId;
MyProc->xmin = InvalidTransactionId;
MyProc->inVacuum = false; /* must be cleared with xid/xmin */
+ MyProc->inCommit = false; /* be sure this gets cleared */
/* Clear the subtransaction-XID cache too while holding the lock */
MyProc->subxids.nxids = 0;
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index f3e02dd6316..f70a3d5dd0e 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.266 2007/04/03 04:14:26 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.267 2007/04/03 16:34:35 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -5366,6 +5366,8 @@ CreateCheckPoint(bool shutdown, bool force)
int nsegsadded = 0;
int nsegsremoved = 0;
int nsegsrecycled = 0;
+ TransactionId *inCommitXids;
+ int nInCommit;
/*
* Acquire CheckpointLock to ensure only one checkpoint happens at a time.
@@ -5392,14 +5394,9 @@ CreateCheckPoint(bool shutdown, bool force)
checkPoint.time = time(NULL);
/*
- * We must hold CheckpointStartLock while determining the checkpoint REDO
- * pointer. This ensures that any concurrent transaction commits will be
- * either not yet logged, or logged and recorded in pg_clog. See notes in
- * RecordTransactionCommit().
+ * We must hold WALInsertLock while examining insert state to determine
+ * the checkpoint REDO pointer.
*/
- LWLockAcquire(CheckpointStartLock, LW_EXCLUSIVE);
-
- /* And we need WALInsertLock too */
LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
/*
@@ -5431,7 +5428,6 @@ CreateCheckPoint(bool shutdown, bool force)
ControlFile->checkPointCopy.redo.xrecoff)
{
LWLockRelease(WALInsertLock);
- LWLockRelease(CheckpointStartLock);
LWLockRelease(CheckpointLock);
END_CRIT_SECTION();
return;
@@ -5476,18 +5472,49 @@ CreateCheckPoint(bool shutdown, bool force)
}
/*
- * Now we can release insert lock and checkpoint start lock, allowing
- * other xacts to proceed even while we are flushing disk buffers.
+ * Now we can release WAL insert lock, allowing other xacts to proceed
+ * while we are flushing disk buffers.
*/
LWLockRelease(WALInsertLock);
- LWLockRelease(CheckpointStartLock);
-
if (!shutdown)
ereport(DEBUG2,
(errmsg("checkpoint starting")));
/*
+ * Before flushing data, we must wait for any transactions that are
+ * currently in their commit critical sections. If an xact inserted its
+ * commit record into XLOG just before the REDO point, then a crash
+ * restart from the REDO point would not replay that record, which means
+ * that our flushing had better include the xact's update of pg_clog. So
+ * we wait till he's out of his commit critical section before proceeding.
+ * See notes in RecordTransactionCommit().
+ *
+ * Because we've already released WALInsertLock, this test is a bit fuzzy:
+ * it is possible that we will wait for xacts we didn't really need to
+ * wait for. But the delay should be short and it seems better to make
+ * checkpoint take a bit longer than to hold locks longer than necessary.
+ * (In fact, the whole reason we have this issue is that xact.c does
+ * commit record XLOG insertion and clog update as two separate steps
+ * protected by different locks, but again that seems best on grounds
+ * of minimizing lock contention.)
+ *
+ * A transaction that has not yet set inCommit when we look cannot be
+ * at risk, since he's not inserted his commit record yet; and one that's
+ * already cleared it is not at risk either, since he's done fixing clog
+ * and we will correctly flush the update below. So we cannot miss any
+ * xacts we need to wait for.
+ */
+ nInCommit = GetTransactionsInCommit(&inCommitXids);
+ if (nInCommit > 0)
+ {
+ do {
+ pg_usleep(10000L); /* wait for 10 msec */
+ } while (HaveTransactionsInCommit(inCommitXids, nInCommit));
+ }
+ pfree(inCommitXids);
+
+ /*
* Get the other info we need for the checkpoint record.
*/
LWLockAcquire(XidGenLock, LW_SHARED);
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index 12e61c89263..22d031f9eb5 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -23,7 +23,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/ipc/procarray.c,v 1.23 2007/03/25 19:45:14 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/ipc/procarray.c,v 1.24 2007/04/03 16:34:36 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -740,6 +740,98 @@ restart:
}
/*
+ * GetTransactionsInCommit -- Get the XIDs of transactions that are committing
+ *
+ * Constructs an array of XIDs of transactions that are currently in commit
+ * critical sections, as shown by having inCommit set in their PGPROC entries.
+ *
+ * *xids_p is set to a palloc'd array that should be freed by the caller.
+ * The return value is the number of valid entries.
+ *
+ * Note that because backends set or clear inCommit without holding any lock,
+ * the result is somewhat indeterminate, but we don't really care. Even in
+ * a multiprocessor with delayed writes to shared memory, it should be certain
+ * that setting of inCommit will propagate to shared memory when the backend
+ * takes the WALInsertLock, so we cannot fail to see an xact as inCommit if
+ * it's already inserted its commit record. Whether it takes a little while
+ * for clearing of inCommit to propagate is unimportant for correctness.
+ */
+int
+GetTransactionsInCommit(TransactionId **xids_p)
+{
+ ProcArrayStruct *arrayP = procArray;
+ TransactionId *xids;
+ int nxids;
+ int index;
+
+ xids = (TransactionId *) palloc(arrayP->maxProcs * sizeof(TransactionId));
+ nxids = 0;
+
+ LWLockAcquire(ProcArrayLock, LW_SHARED);
+
+ for (index = 0; index < arrayP->numProcs; index++)
+ {
+ PGPROC *proc = arrayP->procs[index];
+ /* Fetch xid just once - see GetNewTransactionId */
+ TransactionId pxid = proc->xid;
+
+ if (proc->inCommit && TransactionIdIsValid(pxid))
+ xids[nxids++] = pxid;
+ }
+
+ LWLockRelease(ProcArrayLock);
+
+ *xids_p = xids;
+ return nxids;
+}
+
+/*
+ * HaveTransactionsInCommit -- Are any of the specified XIDs in commit?
+ *
+ * This is used with the results of GetTransactionsInCommit to see if any
+ * of the specified XIDs are still in their commit critical sections.
+ *
+ * Note: this is O(N^2) in the number of xacts that are/were in commit, but
+ * those numbers should be small enough for it not to be a problem.
+ */
+bool
+HaveTransactionsInCommit(TransactionId *xids, int nxids)
+{
+ bool result = false;
+ ProcArrayStruct *arrayP = procArray;
+ int index;
+
+ LWLockAcquire(ProcArrayLock, LW_SHARED);
+
+ for (index = 0; index < arrayP->numProcs; index++)
+ {
+ PGPROC *proc = arrayP->procs[index];
+ /* Fetch xid just once - see GetNewTransactionId */
+ TransactionId pxid = proc->xid;
+
+ if (proc->inCommit && TransactionIdIsValid(pxid))
+ {
+ int i;
+
+ for (i = 0; i < nxids; i++)
+ {
+ if (xids[i] == pxid)
+ {
+ result = true;
+ break;
+ }
+ }
+ if (result)
+ break;
+ }
+ }
+
+ LWLockRelease(ProcArrayLock);
+
+ return result;
+}
+
+/*
* BackendPidGetProc -- get a backend's PGPROC given its PID
*
* Returns NULL if not found. Note that it is up to the caller to be
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index 87a310aabe7..2691a505826 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.186 2007/03/07 13:35:03 alvherre Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.187 2007/04/03 16:34:36 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -259,6 +259,7 @@ InitProcess(void)
/* databaseId and roleId will be filled in later */
MyProc->databaseId = InvalidOid;
MyProc->roleId = InvalidOid;
+ MyProc->inCommit = false;
MyProc->inVacuum = false;
MyProc->isAutovacuum = IsAutoVacuumWorkerProcess();
MyProc->lwWaiting = false;
@@ -392,6 +393,7 @@ InitAuxiliaryProcess(void)
MyProc->xmin = InvalidTransactionId;
MyProc->databaseId = InvalidOid;
MyProc->roleId = InvalidOid;
+ MyProc->inCommit = false;
MyProc->inVacuum = false;
MyProc->isAutovacuum = IsAutoVacuumLauncherProcess(); /* is this needed? */
MyProc->lwWaiting = false;
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index b4503f92135..c47256a1599 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.34 2007/02/15 23:23:23 alvherre Exp $
+ * $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.35 2007/04/03 16:34:36 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -49,7 +49,6 @@ typedef enum LWLockId
WALWriteLock,
ControlFileLock,
CheckpointLock,
- CheckpointStartLock,
CLogControlLock,
SubtransControlLock,
MultiXactGenLock,
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index 2b20eda828d..772cf52cdf9 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.96 2007/03/07 13:35:03 alvherre Exp $
+ * $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.97 2007/04/03 16:34:36 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -74,6 +74,8 @@ struct PGPROC
Oid databaseId; /* OID of database this backend is using */
Oid roleId; /* OID of role using this backend */
+ bool inCommit; /* true if within commit critical section */
+
bool inVacuum; /* true if current xact is a LAZY VACUUM */
bool isAutovacuum; /* true if it's autovacuum */
diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h
index 93e82e43680..cd2df663809 100644
--- a/src/include/storage/procarray.h
+++ b/src/include/storage/procarray.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/storage/procarray.h,v 1.12 2007/01/16 13:28:57 alvherre Exp $
+ * $PostgreSQL: pgsql/src/include/storage/procarray.h,v 1.13 2007/04/03 16:34:36 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -26,6 +26,9 @@ extern bool TransactionIdIsInProgress(TransactionId xid);
extern bool TransactionIdIsActive(TransactionId xid);
extern TransactionId GetOldestXmin(bool allDbs, bool ignoreVacuum);
+extern int GetTransactionsInCommit(TransactionId **xids_p);
+extern bool HaveTransactionsInCommit(TransactionId *xids, int nxids);
+
extern PGPROC *BackendPidGetProc(int pid);
extern int BackendXidGetPid(TransactionId xid);
extern bool IsBackendPid(int pid);