aboutsummaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
authorMichael Paquier <michael@paquier.xyz>2023-07-18 13:44:34 +0900
committerMichael Paquier <michael@paquier.xyz>2023-07-18 13:44:34 +0900
commitd2ee542a2bbec269115f7b1b4eb7ea59f879dc58 (patch)
treee66f560c0259e174b30cc4e02f5ee2b5e008a0e5 /src/backend
parent7d27493b74c5893fb50b82266d019964493318d9 (diff)
downloadpostgresql-d2ee542a2bbec269115f7b1b4eb7ea59f879dc58.tar.gz
postgresql-d2ee542a2bbec269115f7b1b4eb7ea59f879dc58.zip
Fix recovery of 2PC transaction during crash recovery
A crash in the middle of a checkpoint with some two-phase state data already flushed to disk by this checkpoint could cause a follow-up crash recovery to recover twice the same transaction, once from what has been found in pg_twophase/ at the beginning of recovery and a second time when replaying its corresponding record. This would lead to FATAL failures in the startup process during recovery, where the same transaction would have a state recovered twice instead of once: LOG: recovering prepared transaction 731 from shared memory LOG: recovering prepared transaction 731 from shared memory FATAL: lock ExclusiveLock on object 731/0/0 is already held This issue is fixed by skipping the addition of any 2PC state coming from a record whose equivalent 2PC state file has already been loaded in TwoPhaseState at the beginning of recovery by restoreTwoPhaseData(), which is OK as long as the system has not reached a consistent state. The timing to get a messed up recovery processing is very racy, and would very unlikely happen. The thread that has reported the issue has demonstrated the bug using injection points to force a PANIC in the middle of a checkpoint. Issue introduced in 728bd99, so backpatch all the way down. Reported-by: "suyu.cmj" <mengjuan.cmj@alibaba-inc.com> Author: "suyu.cmj" <mengjuan.cmj@alibaba-inc.com> Author: Michael Paquier Discussion: https://postgr.es/m/109e6994-b971-48cb-84f6-829646f18b4c.mengjuan.cmj@alibaba-inc.com Backpatch-through: 11
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/access/transam/twophase.c33
1 files changed, 33 insertions, 0 deletions
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index 50dbc50e1b6..e3707f7d5e3 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -2494,6 +2494,39 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn,
* that it got added in the redo phase
*/
+ /*
+ * In the event of a crash while a checkpoint was running, it may be
+ * possible that some two-phase data found its way to disk while its
+ * corresponding record needs to be replayed in the follow-up recovery.
+ * As the 2PC data was on disk, it has already been restored at the
+ * beginning of recovery with restoreTwoPhaseData(), so skip this record
+ * to avoid duplicates in TwoPhaseState. If a consistent state has been
+ * reached, the record is added to TwoPhaseState and it should have no
+ * corresponding file in pg_twophase.
+ */
+ if (!XLogRecPtrIsInvalid(start_lsn))
+ {
+ char path[MAXPGPATH];
+
+ TwoPhaseFilePath(path, hdr->xid);
+
+ if (access(path, F_OK) == 0)
+ {
+ ereport(reachedConsistency ? ERROR : WARNING,
+ (errmsg("could not recover two-phase state file for transaction %u",
+ hdr->xid),
+ errdetail("Two-phase state file has been found in WAL record %X/%X, but this transaction has already been restored from disk.",
+ (uint32) (start_lsn >> 32),
+ (uint32) start_lsn)));
+ return;
+ }
+
+ if (errno != ENOENT)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not access file \"%s\": %m", path)));
+ }
+
/* Get a free gxact from the freelist */
if (TwoPhaseState->freeGXacts == NULL)
ereport(ERROR,