aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access
diff options
context:
space:
mode:
authorRobert Haas <rhaas@postgresql.org>2022-03-24 14:32:06 -0400
committerRobert Haas <rhaas@postgresql.org>2022-03-24 14:52:28 -0400
commit412ad7a55639516f284cd0ef9757d6ae5c7abd43 (patch)
treea37a03184c7cea9fb8b031644a5d4117cd78c66a /src/backend/access
parent86459b3296803cfa4d3e53c0fc8763412c71b6d0 (diff)
downloadpostgresql-412ad7a55639516f284cd0ef9757d6ae5c7abd43.tar.gz
postgresql-412ad7a55639516f284cd0ef9757d6ae5c7abd43.zip
Fix possible recovery trouble if TRUNCATE overlaps a checkpoint.
If TRUNCATE causes some buffers to be invalidated and thus the checkpoint does not flush them, TRUNCATE must also ensure that the corresponding files are truncated on disk. Otherwise, a replay from the checkpoint might find that the buffers exist but have the wrong contents, which may cause replay to fail. Report by Teja Mupparti. Patch by Kyotaro Horiguchi, per a design suggestion from Heikki Linnakangas, with some changes to the comments by me. Review of this and a prior patch that approached the issue differently by Heikki Linnakangas, Andres Freund, Álvaro Herrera, Masahiko Sawada, and Tom Lane. Discussion: http://postgr.es/m/BYAPR06MB6373BF50B469CA393C614257ABF00@BYAPR06MB6373.namprd06.prod.outlook.com
Diffstat (limited to 'src/backend/access')
-rw-r--r--src/backend/access/transam/multixact.c6
-rw-r--r--src/backend/access/transam/twophase.c12
-rw-r--r--src/backend/access/transam/xact.c5
-rw-r--r--src/backend/access/transam/xlog.c16
-rw-r--r--src/backend/access/transam/xloginsert.c2
5 files changed, 28 insertions, 13 deletions
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 6a70d497380..9f65c600d02 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -3088,8 +3088,8 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
* crash/basebackup, even though the state of the data directory would
* require it.
*/
- Assert(!MyProc->delayChkpt);
- MyProc->delayChkpt = true;
+ Assert((MyProc->delayChkpt & DELAY_CHKPT_START) == 0);
+ MyProc->delayChkpt |= DELAY_CHKPT_START;
/* WAL log truncation */
WriteMTruncateXlogRec(newOldestMultiDB,
@@ -3115,7 +3115,7 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
/* Then offsets */
PerformOffsetsTruncation(oldestMulti, newOldestMulti);
- MyProc->delayChkpt = false;
+ MyProc->delayChkpt &= ~DELAY_CHKPT_START;
END_CRIT_SECTION();
LWLockRelease(MultiXactTruncationLock);
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index 874c8ed1256..4dc8ccc12b9 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -475,7 +475,7 @@ MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, const char *gid,
}
proc->xid = xid;
Assert(proc->xmin == InvalidTransactionId);
- proc->delayChkpt = false;
+ proc->delayChkpt = 0;
proc->statusFlags = 0;
proc->pid = 0;
proc->databaseId = databaseid;
@@ -1164,7 +1164,8 @@ EndPrepare(GlobalTransaction gxact)
START_CRIT_SECTION();
- MyProc->delayChkpt = true;
+ Assert((MyProc->delayChkpt & DELAY_CHKPT_START) == 0);
+ MyProc->delayChkpt |= DELAY_CHKPT_START;
XLogBeginInsert();
for (record = records.head; record != NULL; record = record->next)
@@ -1207,7 +1208,7 @@ EndPrepare(GlobalTransaction gxact)
* checkpoint starting after this will certainly see the gxact as a
* candidate for fsyncing.
*/
- MyProc->delayChkpt = false;
+ MyProc->delayChkpt &= ~DELAY_CHKPT_START;
/*
* Remember that we have this GlobalTransaction entry locked for us. If
@@ -2266,7 +2267,8 @@ RecordTransactionCommitPrepared(TransactionId xid,
START_CRIT_SECTION();
/* See notes in RecordTransactionCommit */
- MyProc->delayChkpt = true;
+ Assert((MyProc->delayChkpt & DELAY_CHKPT_START) == 0);
+ MyProc->delayChkpt |= DELAY_CHKPT_START;
/*
* Emit the XLOG commit record. Note that we mark 2PC commits as
@@ -2314,7 +2316,7 @@ RecordTransactionCommitPrepared(TransactionId xid,
TransactionIdCommitTree(xid, nchildren, children);
/* Checkpoint can proceed now */
- MyProc->delayChkpt = false;
+ MyProc->delayChkpt &= ~DELAY_CHKPT_START;
END_CRIT_SECTION();
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index 8964ddf3ebf..3596a7d7345 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -1387,8 +1387,9 @@ RecordTransactionCommit(void)
* This makes checkpoint's determination of which xacts are delayChkpt
* a bit fuzzy, but it doesn't matter.
*/
+ Assert((MyProc->delayChkpt & DELAY_CHKPT_START) == 0);
START_CRIT_SECTION();
- MyProc->delayChkpt = true;
+ MyProc->delayChkpt |= DELAY_CHKPT_START;
SetCurrentTransactionStopTimestamp();
@@ -1489,7 +1490,7 @@ RecordTransactionCommit(void)
*/
if (markXidCommitted)
{
- MyProc->delayChkpt = false;
+ MyProc->delayChkpt &= ~DELAY_CHKPT_START;
END_CRIT_SECTION();
}
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 4ac3871c74f..17a56152f17 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -6517,18 +6517,30 @@ CreateCheckPoint(int flags)
* and we will correctly flush the update below. So we cannot miss any
* xacts we need to wait for.
*/
- vxids = GetVirtualXIDsDelayingChkpt(&nvxids);
+ vxids = GetVirtualXIDsDelayingChkpt(&nvxids, DELAY_CHKPT_START);
if (nvxids > 0)
{
do
{
pg_usleep(10000L); /* wait for 10 msec */
- } while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids));
+ } while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids,
+ DELAY_CHKPT_START));
}
pfree(vxids);
CheckPointGuts(checkPoint.redo, flags);
+ vxids = GetVirtualXIDsDelayingChkpt(&nvxids, DELAY_CHKPT_COMPLETE);
+ if (nvxids > 0)
+ {
+ do
+ {
+ pg_usleep(10000L); /* wait for 10 msec */
+ } while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids,
+ DELAY_CHKPT_COMPLETE));
+ }
+ pfree(vxids);
+
/*
* Take a snapshot of running transactions and write this to WAL. This
* allows us to reconstruct the state of running transactions during
diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c
index f4eb54b63c4..462e23503e2 100644
--- a/src/backend/access/transam/xloginsert.c
+++ b/src/backend/access/transam/xloginsert.c
@@ -1011,7 +1011,7 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
/*
* Ensure no checkpoint can change our view of RedoRecPtr.
*/
- Assert(MyProc->delayChkpt);
+ Assert((MyProc->delayChkpt & DELAY_CHKPT_START) != 0);
/*
* Update RedoRecPtr so that we can make the right decision