aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access
diff options
context:
space:
mode:
authorRobert Haas <rhaas@postgresql.org>2017-03-23 14:08:23 -0400
committerRobert Haas <rhaas@postgresql.org>2017-03-23 14:26:31 -0400
commitea42cc18c35381f639d45628d792e790ff39e271 (patch)
tree223916f28189cb8fb5b40793ff563928795f62ca /src/backend/access
parent50c956add83963d7bbb367dd0b879fccddebd623 (diff)
downloadpostgresql-ea42cc18c35381f639d45628d792e790ff39e271.tar.gz
postgresql-ea42cc18c35381f639d45628d792e790ff39e271.zip
Track the oldest XID that can be safely looked up in CLOG.
This provides infrastructure for looking up arbitrary, user-supplied XIDs without a risk of scary-looking failures from within the clog module. Normally, the oldest XID that can be safely looked up in CLOG is the same as the oldest XID that can reused without causing wraparound, and the latter is already tracked. However, while truncation is in progress, the values are different, so we must keep track of them separately. Craig Ringer, reviewed by Simon Riggs and by me. Discussion: http://postgr.es/m/CAMsr+YHQiWNEi0daCTboS40T+V5s_+dst3PYv_8v2wNVH+Xx4g@mail.gmail.com
Diffstat (limited to 'src/backend/access')
-rw-r--r--src/backend/access/rmgrdesc/clogdesc.c12
-rw-r--r--src/backend/access/transam/clog.c46
-rw-r--r--src/backend/access/transam/transam.c4
-rw-r--r--src/backend/access/transam/varsup.c23
-rw-r--r--src/backend/access/transam/xlog.c11
5 files changed, 81 insertions, 15 deletions
diff --git a/src/backend/access/rmgrdesc/clogdesc.c b/src/backend/access/rmgrdesc/clogdesc.c
index 352de48dbe4..ef268c5ab30 100644
--- a/src/backend/access/rmgrdesc/clogdesc.c
+++ b/src/backend/access/rmgrdesc/clogdesc.c
@@ -23,12 +23,20 @@ clog_desc(StringInfo buf, XLogReaderState *record)
char *rec = XLogRecGetData(record);
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
- if (info == CLOG_ZEROPAGE || info == CLOG_TRUNCATE)
+ if (info == CLOG_ZEROPAGE)
{
int pageno;
memcpy(&pageno, rec, sizeof(int));
- appendStringInfo(buf, "%d", pageno);
+ appendStringInfo(buf, "page %d", pageno);
+ }
+ else if (info == CLOG_TRUNCATE)
+ {
+ xl_clog_truncate xlrec;
+
+ memcpy(&xlrec, rec, sizeof(xl_clog_truncate));
+ appendStringInfo(buf, "page %d; oldestXact %u",
+ xlrec.pageno, xlrec.oldestXact);
}
}
diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c
index 5b1d13dac1b..2d335109303 100644
--- a/src/backend/access/transam/clog.c
+++ b/src/backend/access/transam/clog.c
@@ -83,7 +83,8 @@ static SlruCtlData ClogCtlData;
static int ZeroCLOGPage(int pageno, bool writeXlog);
static bool CLOGPagePrecedes(int page1, int page2);
static void WriteZeroPageXlogRec(int pageno);
-static void WriteTruncateXlogRec(int pageno);
+static void WriteTruncateXlogRec(int pageno, TransactionId oldestXact,
+ Oid oldestXidDb);
static void TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
TransactionId *subxids, XidStatus status,
XLogRecPtr lsn, int pageno);
@@ -640,7 +641,7 @@ ExtendCLOG(TransactionId newestXact)
* the XLOG flush unless we have confirmed that there is a removable segment.
*/
void
-TruncateCLOG(TransactionId oldestXact)
+TruncateCLOG(TransactionId oldestXact, Oid oldestxid_datoid)
{
int cutoffPage;
@@ -654,8 +655,26 @@ TruncateCLOG(TransactionId oldestXact)
if (!SlruScanDirectory(ClogCtl, SlruScanDirCbReportPresence, &cutoffPage))
return; /* nothing to remove */
- /* Write XLOG record and flush XLOG to disk */
- WriteTruncateXlogRec(cutoffPage);
+ /*
+ * Advance oldestClogXid before truncating clog, so concurrent xact status
+ * lookups can ensure they don't attempt to access truncated-away clog.
+ *
+ * It's only necessary to do this if we will actually truncate away clog
+ * pages.
+ */
+ AdvanceOldestClogXid(oldestXact);
+
+ /* vac_truncate_clog already advanced oldestXid */
+ Assert(TransactionIdPrecedesOrEquals(oldestXact,
+ ShmemVariableCache->oldestXid));
+
+ /*
+ * Write XLOG record and flush XLOG to disk. We record the oldest xid we're
+ * keeping information about here so we can ensure that it's always ahead
+ * of clog truncation in case we crash, and so a standby finds out the new
+ * valid xid before the next checkpoint.
+ */
+ WriteTruncateXlogRec(cutoffPage, oldestXact, oldestxid_datoid);
/* Now we can remove the old CLOG segment(s) */
SimpleLruTruncate(ClogCtl, cutoffPage);
@@ -704,12 +723,17 @@ WriteZeroPageXlogRec(int pageno)
* in TruncateCLOG().
*/
static void
-WriteTruncateXlogRec(int pageno)
+WriteTruncateXlogRec(int pageno, TransactionId oldestXact, Oid oldestXactDb)
{
XLogRecPtr recptr;
+ xl_clog_truncate xlrec;
+
+ xlrec.pageno = pageno;
+ xlrec.oldestXact = oldestXact;
+ xlrec.oldestXactDb = oldestXactDb;
XLogBeginInsert();
- XLogRegisterData((char *) (&pageno), sizeof(int));
+ XLogRegisterData((char *) (&xlrec), sizeof(xl_clog_truncate));
recptr = XLogInsert(RM_CLOG_ID, CLOG_TRUNCATE);
XLogFlush(recptr);
}
@@ -742,17 +766,19 @@ clog_redo(XLogReaderState *record)
}
else if (info == CLOG_TRUNCATE)
{
- int pageno;
+ xl_clog_truncate xlrec;
- memcpy(&pageno, XLogRecGetData(record), sizeof(int));
+ memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_clog_truncate));
/*
* During XLOG replay, latest_page_number isn't set up yet; insert a
* suitable value to bypass the sanity test in SimpleLruTruncate.
*/
- ClogCtl->shared->latest_page_number = pageno;
+ ClogCtl->shared->latest_page_number = xlrec.pageno;
+
+ AdvanceOldestClogXid(xlrec.oldestXact);
- SimpleLruTruncate(ClogCtl, pageno);
+ SimpleLruTruncate(ClogCtl, xlrec.pageno);
}
else
elog(PANIC, "clog_redo: unknown op code %u", info);
diff --git a/src/backend/access/transam/transam.c b/src/backend/access/transam/transam.c
index b91a259e806..562b53be9aa 100644
--- a/src/backend/access/transam/transam.c
+++ b/src/backend/access/transam/transam.c
@@ -119,7 +119,7 @@ TransactionLogFetch(TransactionId transactionId)
* True iff transaction associated with the identifier did commit.
*
* Note:
- * Assumes transaction identifier is valid.
+ * Assumes transaction identifier is valid and exists in clog.
*/
bool /* true if given transaction committed */
TransactionIdDidCommit(TransactionId transactionId)
@@ -175,7 +175,7 @@ TransactionIdDidCommit(TransactionId transactionId)
* True iff transaction associated with the identifier did abort.
*
* Note:
- * Assumes transaction identifier is valid.
+ * Assumes transaction identifier is valid and exists in clog.
*/
bool /* true if given transaction aborted */
TransactionIdDidAbort(TransactionId transactionId)
diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c
index 42fc351f7bf..5efbfbd3d61 100644
--- a/src/backend/access/transam/varsup.c
+++ b/src/backend/access/transam/varsup.c
@@ -259,7 +259,28 @@ ReadNewTransactionId(void)
}
/*
- * Determine the last safe XID to allocate given the currently oldest
+ * Advance the cluster-wide value for the oldest valid clog entry.
+ *
+ * We must acquire CLogTruncationLock to advance the oldestClogXid. It's not
+ * necessary to hold the lock during the actual clog truncation, only when we
+ * advance the limit, as code looking up arbitrary xids is required to hold
+ * CLogTruncationLock from when it tests oldestClogXid through to when it
+ * completes the clog lookup.
+ */
+void
+AdvanceOldestClogXid(TransactionId oldest_datfrozenxid)
+{
+ LWLockAcquire(CLogTruncationLock, LW_EXCLUSIVE);
+ if (TransactionIdPrecedes(ShmemVariableCache->oldestClogXid,
+ oldest_datfrozenxid))
+ {
+ ShmemVariableCache->oldestClogXid = oldest_datfrozenxid;
+ }
+ LWLockRelease(CLogTruncationLock);
+}
+
+/*
+ * Determine the last safe XID to allocate using the currently oldest
* datfrozenxid (ie, the oldest XID that might exist in any database
* of our cluster), and the OID of the (or a) database with that value.
*/
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index de1937e013d..b99ded5df67 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -5016,6 +5016,7 @@ BootStrapXLOG(void)
ShmemVariableCache->nextOid = checkPoint.nextOid;
ShmemVariableCache->oidCount = 0;
MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
+ AdvanceOldestClogXid(checkPoint.oldestXid);
SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true);
SetCommitTsLimit(InvalidTransactionId, InvalidTransactionId);
@@ -6622,6 +6623,7 @@ StartupXLOG(void)
ShmemVariableCache->nextOid = checkPoint.nextOid;
ShmemVariableCache->oidCount = 0;
MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
+ AdvanceOldestClogXid(checkPoint.oldestXid);
SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true);
SetCommitTsLimit(checkPoint.oldestCommitTsXid,
@@ -8687,6 +8689,11 @@ CreateCheckPoint(int flags)
/*
* Get the other info we need for the checkpoint record.
+ *
+ * We don't need to save oldestClogXid in the checkpoint, it only matters
+ * for the short period in which clog is being truncated, and if we crash
+ * during that we'll redo the clog truncation and fix up oldestClogXid
+ * there.
*/
LWLockAcquire(XidGenLock, LW_SHARED);
checkPoint.nextXid = ShmemVariableCache->nextXid;
@@ -9616,6 +9623,10 @@ xlog_redo(XLogReaderState *record)
MultiXactAdvanceOldest(checkPoint.oldestMulti,
checkPoint.oldestMultiDB);
+ /*
+ * No need to set oldestClogXid here as well; it'll be set when we
+ * redo an xl_clog_truncate if it changed since initialization.
+ */
SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
/*