diff options
author | Heikki Linnakangas <heikki.linnakangas@iki.fi> | 2024-06-27 21:06:32 +0300 |
---|---|---|
committer | Heikki Linnakangas <heikki.linnakangas@iki.fi> | 2024-06-27 21:10:27 +0300 |
commit | b5b418b689ec5d3807b0eea34e58166191e18499 (patch) | |
tree | eb995c4aca40965c596f30212d3806e1689856f1 /src/backend | |
parent | e00a9a4c7bcae0f1b798b567bb37841ae7c6ff7d (diff) | |
download | postgresql-b5b418b689ec5d3807b0eea34e58166191e18499.tar.gz postgresql-b5b418b689ec5d3807b0eea34e58166191e18499.zip |
Fix MVCC bug with prepared xact with subxacts on standby
We did not recover the subtransaction IDs of prepared transactions
when starting a hot standby from a shutdown checkpoint. As a result,
such subtransactions were considered as aborted, rather than
in-progress. That would lead to hint bits being set incorrectly, and
the subtransactions suddenly becoming visible to old snapshots when
the prepared transaction was committed.
To fix, update pg_subtrans with prepared transactions's subxids when
starting hot standby from a shutdown checkpoint. The snapshots taken
from that state need to be marked as "suboverflowed", so that we also
check the pg_subtrans.
Backport to all supported versions.
Discussion: https://www.postgresql.org/message-id/6b852e98-2d49-4ca1-9e95-db419a2696e0@iki.fi
Diffstat (limited to 'src/backend')
-rw-r--r-- | src/backend/access/transam/twophase.c | 7 | ||||
-rw-r--r-- | src/backend/access/transam/xlog.c | 14 | ||||
-rw-r--r-- | src/backend/storage/ipc/procarray.c | 18 | ||||
-rw-r--r-- | src/backend/storage/ipc/standby.c | 6 |
4 files changed, 29 insertions, 16 deletions
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index c6af8cfd7e2..f6166816f58 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -2011,9 +2011,8 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p) * This is never called at the end of recovery - we use * RecoverPreparedTransactions() at that point. * - * The lack of calls to SubTransSetParent() calls here is by design; - * those calls are made by RecoverPreparedTransactions() at the end of recovery - * for those xacts that need this. + * This updates pg_subtrans, so that any subtransactions will be correctly + * seen as in-progress in snapshots taken during recovery. */ void StandbyRecoverPreparedTransactions(void) @@ -2033,7 +2032,7 @@ StandbyRecoverPreparedTransactions(void) buf = ProcessTwoPhaseBuffer(xid, gxact->prepare_start_lsn, - gxact->ondisk, false, false); + gxact->ondisk, true, false); if (buf != NULL) pfree(buf); } diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 7aaba6203ae..a19ba7167fd 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -5420,6 +5420,9 @@ StartupXLOG(void) RunningTransactionsData running; TransactionId latestCompletedXid; + /* Update pg_subtrans entries for any prepared transactions */ + StandbyRecoverPreparedTransactions(); + /* * Construct a RunningTransactions snapshot representing a * shut down server, with only prepared transactions still @@ -5428,7 +5431,7 @@ StartupXLOG(void) */ running.xcnt = nxids; running.subxcnt = 0; - running.subxid_overflow = false; + running.subxid_status = SUBXIDS_IN_SUBTRANS; running.nextXid = XidFromFullTransactionId(checkPoint.nextXid); running.oldestRunningXid = oldestActiveXID; latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid); @@ -5438,8 +5441,6 @@ StartupXLOG(void) running.xids = xids; ProcArrayApplyRecoveryInfo(&running); - - StandbyRecoverPreparedTransactions(); } } @@ -7808,6 +7809,9 @@ xlog_redo(XLogReaderState *record) oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids); + /* Update pg_subtrans entries for any prepared transactions */ + StandbyRecoverPreparedTransactions(); + /* * Construct a RunningTransactions snapshot representing a shut * down server, with only prepared transactions still alive. We're @@ -7816,7 +7820,7 @@ xlog_redo(XLogReaderState *record) */ running.xcnt = nxids; running.subxcnt = 0; - running.subxid_overflow = false; + running.subxid_status = SUBXIDS_IN_SUBTRANS; running.nextXid = XidFromFullTransactionId(checkPoint.nextXid); running.oldestRunningXid = oldestActiveXID; latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid); @@ -7826,8 +7830,6 @@ xlog_redo(XLogReaderState *record) running.xids = xids; ProcArrayApplyRecoveryInfo(&running); - - StandbyRecoverPreparedTransactions(); } /* ControlFile->checkPointCopy always tracks the latest ckpt XID */ diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index c5188fbdcfb..316b4fa7197 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -1097,7 +1097,7 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running) * If the snapshot isn't overflowed or if its empty we can reset our * pending state and use this snapshot instead. */ - if (!running->subxid_overflow || running->xcnt == 0) + if (running->subxid_status != SUBXIDS_MISSING || running->xcnt == 0) { /* * If we have already collected known assigned xids, we need to @@ -1249,7 +1249,7 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running) * missing, so conservatively assume the last one is latestObservedXid. * ---------- */ - if (running->subxid_overflow) + if (running->subxid_status == SUBXIDS_MISSING) { standbyState = STANDBY_SNAPSHOT_PENDING; @@ -1261,6 +1261,18 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running) standbyState = STANDBY_SNAPSHOT_READY; standbySnapshotPendingXmin = InvalidTransactionId; + + /* + * If the 'xids' array didn't include all subtransactions, we have to + * mark any snapshots taken as overflowed. + */ + if (running->subxid_status == SUBXIDS_IN_SUBTRANS) + procArray->lastOverflowedXid = latestObservedXid; + else + { + Assert(running->subxid_status == SUBXIDS_IN_ARRAY); + procArray->lastOverflowedXid = InvalidTransactionId; + } } /* @@ -2842,7 +2854,7 @@ GetRunningTransactionData(void) CurrentRunningXacts->xcnt = count - subcount; CurrentRunningXacts->subxcnt = subcount; - CurrentRunningXacts->subxid_overflow = suboverflowed; + CurrentRunningXacts->subxid_status = suboverflowed ? SUBXIDS_IN_SUBTRANS : SUBXIDS_IN_ARRAY; CurrentRunningXacts->nextXid = XidFromFullTransactionId(ShmemVariableCache->nextXid); CurrentRunningXacts->oldestRunningXid = oldestRunningXid; CurrentRunningXacts->latestCompletedXid = latestCompletedXid; diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c index 4c06741a69f..3bdc5f7fb6c 100644 --- a/src/backend/storage/ipc/standby.c +++ b/src/backend/storage/ipc/standby.c @@ -1186,7 +1186,7 @@ standby_redo(XLogReaderState *record) running.xcnt = xlrec->xcnt; running.subxcnt = xlrec->subxcnt; - running.subxid_overflow = xlrec->subxid_overflow; + running.subxid_status = xlrec->subxid_overflow ? SUBXIDS_MISSING : SUBXIDS_IN_ARRAY; running.nextXid = xlrec->nextXid; running.latestCompletedXid = xlrec->latestCompletedXid; running.oldestRunningXid = xlrec->oldestRunningXid; @@ -1351,7 +1351,7 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts) xlrec.xcnt = CurrRunningXacts->xcnt; xlrec.subxcnt = CurrRunningXacts->subxcnt; - xlrec.subxid_overflow = CurrRunningXacts->subxid_overflow; + xlrec.subxid_overflow = (CurrRunningXacts->subxid_status != SUBXIDS_IN_ARRAY); xlrec.nextXid = CurrRunningXacts->nextXid; xlrec.oldestRunningXid = CurrRunningXacts->oldestRunningXid; xlrec.latestCompletedXid = CurrRunningXacts->latestCompletedXid; @@ -1368,7 +1368,7 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts) recptr = XLogInsert(RM_STANDBY_ID, XLOG_RUNNING_XACTS); - if (CurrRunningXacts->subxid_overflow) + if (xlrec.subxid_overflow) elog(trace_recovery(DEBUG2), "snapshot of %d running transactions overflowed (lsn %X/%X oldest xid %u latest complete %u next xid %u)", CurrRunningXacts->xcnt, |