aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAmit Kapila <akapila@postgresql.org>2025-02-27 09:47:04 +0530
committerAmit Kapila <akapila@postgresql.org>2025-02-27 09:47:04 +0530
commit8709dccc793da0c0c6619cafa182c8e67a871154 (patch)
treef982be6fa381f4bf4609f467b43daa167619fd25
parent845511a72ad01838c9e1766e031d2862cd021801 (diff)
downloadpostgresql-8709dccc793da0c0c6619cafa182c8e67a871154.tar.gz
postgresql-8709dccc793da0c0c6619cafa182c8e67a871154.zip
Fix the race condition in ReplicationSlotAcquire().
After commit f41d8468dd, a process could acquire and use a replication slot that had just been invalidated, leading to failures while accessing WAL. To ensure that we don't accidentally start using invalid slots, we must perform the invalidation check after acquiring the slot or under the spinlock where we associate the slot with a particular process. We choose the earlier method to keep the code simple. Reported-by: Hou Zhijie <houzj.fnst@fujitsu.com> Author: Nisha Moond <nisha.moond412@gmail.com> Reviewed-by: Hou Zhijie <houzj.fnst@fujitsu.com> Reviewed-by: Amit Kapila <amit.kapila16@gmail.com> Discussion: https://postgr.es/m/CABdArM7J-LbGoMPGUPiFiLOyB_TZ5+YaZb=HMES0mQqzVTn8Gg@mail.gmail.com
-rw-r--r--src/backend/replication/slot.c32
1 files changed, 16 insertions, 16 deletions
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index d089085b491..719e531eb90 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -580,19 +580,6 @@ retry:
name)));
}
- /* Invalid slots can't be modified or used before accessing the WAL. */
- if (error_if_invalid && s->data.invalidated != RS_INVAL_NONE)
- {
- LWLockRelease(ReplicationSlotControlLock);
-
- ereport(ERROR,
- errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
- errmsg("can no longer access replication slot \"%s\"",
- NameStr(s->data.name)),
- errdetail("This replication slot has been invalidated due to \"%s\".",
- GetSlotInvalidationCauseName(s->data.invalidated)));
- }
-
/*
* This is the slot we want; check if it's active under some other
* process. In single user mode, we don't need this check.
@@ -650,13 +637,26 @@ retry:
else if (!nowait)
ConditionVariableCancelSleep(); /* no sleep needed after all */
- /* Let everybody know we've modified this slot */
- ConditionVariableBroadcast(&s->active_cv);
-
/* We made this slot active, so it's ours now. */
MyReplicationSlot = s;
/*
+ * We need to check for invalidation after making the slot ours to avoid
+ * the possible race condition with the checkpointer that can otherwise
+ * invalidate the slot immediately after the check.
+ */
+ if (error_if_invalid && s->data.invalidated != RS_INVAL_NONE)
+ ereport(ERROR,
+ errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("can no longer access replication slot \"%s\"",
+ NameStr(s->data.name)),
+ errdetail("This replication slot has been invalidated due to \"%s\".",
+ GetSlotInvalidationCauseName(s->data.invalidated)));
+
+ /* Let everybody know we've modified this slot */
+ ConditionVariableBroadcast(&s->active_cv);
+
+ /*
* The call to pgstat_acquire_replslot() protects against stats for a
* different slot, from before a restart or such, being present during
* pgstat_report_replslot().