aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2018-01-09 12:09:30 -0500
committerTom Lane <tgl@sss.pgh.pa.us>2018-01-09 12:09:30 -0500
commit8a906204aec44de6d8a1514082870f25085d9431 (patch)
treead6bb3d927755ff7541830de42b3baa0fe764564
parent13db3b936359eebf02a768db3a1959af880b6cc6 (diff)
downloadpostgresql-8a906204aec44de6d8a1514082870f25085d9431.tar.gz
postgresql-8a906204aec44de6d8a1514082870f25085d9431.zip
Fix race condition during replication origin drop.
replorigin_drop() misunderstood the API for condition variables: it had ConditionVariablePrepareToSleep and ConditionVariableCancelSleep inside its test-and-sleep loop, rather than outside the loop as intended. The net effect is a narrow race-condition window wherein, if the process using a replication slot releases it immediately after replorigin_drop() releases the ReplicationOriginLock, replorigin_drop() would get into the condition variable's wait list too late and then wait indefinitely for a signal that won't come. Because there's a different CV for each replication slot, we can't just move the ConditionVariablePrepareToSleep call to above the test-and-sleep loop. What we can do, in the wake of commit 13db3b936, is drop the ConditionVariablePrepareToSleep call entirely. This fix depends on that commit because (at least in principle) the slot matching the target replication origin might move around, so that once in a blue moon successive loop iterations might involve different CVs. We can now cope with such a scenario, at the cost of an extra trip through the retry loop. (There are ways we could fix this bug without depending on that commit, but they're all a lot more complicated than this way.) While at it, upgrade the rather skimpy comments in this function. Back-patch to v10 where this code came in. Discussion: https://postgr.es/m/19947.1515455433@sss.pgh.pa.us
-rw-r--r--src/backend/replication/logical/origin.c29
1 files changed, 23 insertions, 6 deletions
diff --git a/src/backend/replication/logical/origin.c b/src/backend/replication/logical/origin.c
index 9c30baf544d..9a20042a3c0 100644
--- a/src/backend/replication/logical/origin.c
+++ b/src/backend/replication/logical/origin.c
@@ -339,20 +339,26 @@ replorigin_drop(RepOriginId roident, bool nowait)
Assert(IsTransactionState());
+ /*
+ * To interlock against concurrent drops, we hold ExclusiveLock on
+ * pg_replication_origin throughout this funcion.
+ */
rel = heap_open(ReplicationOriginRelationId, ExclusiveLock);
+ /*
+ * First, clean up the slot state info, if there is any matching slot.
+ */
restart:
tuple = NULL;
- /* cleanup the slot state info */
LWLockAcquire(ReplicationOriginLock, LW_EXCLUSIVE);
for (i = 0; i < max_replication_slots; i++)
{
ReplicationState *state = &replication_states[i];
- /* found our slot */
if (state->roident == roident)
{
+ /* found our slot, is it busy? */
if (state->acquired_by != 0)
{
ConditionVariable *cv;
@@ -363,16 +369,23 @@ restart:
errmsg("could not drop replication origin with OID %d, in use by PID %d",
state->roident,
state->acquired_by)));
+
+ /*
+ * We must wait and then retry. Since we don't know which CV
+ * to wait on until here, we can't readily use
+ * ConditionVariablePrepareToSleep (calling it here would be
+ * wrong, since we could miss the signal if we did so); just
+ * use ConditionVariableSleep directly.
+ */
cv = &state->origin_cv;
LWLockRelease(ReplicationOriginLock);
- ConditionVariablePrepareToSleep(cv);
+
ConditionVariableSleep(cv, WAIT_EVENT_REPLICATION_ORIGIN_DROP);
- ConditionVariableCancelSleep();
goto restart;
}
- /* first WAL log */
+ /* first make a WAL log entry */
{
xl_replorigin_drop xlrec;
@@ -382,7 +395,7 @@ restart:
XLogInsert(RM_REPLORIGIN_ID, XLOG_REPLORIGIN_DROP);
}
- /* then reset the in-memory entry */
+ /* then clear the in-memory slot */
state->roident = InvalidRepOriginId;
state->remote_lsn = InvalidXLogRecPtr;
state->local_lsn = InvalidXLogRecPtr;
@@ -390,7 +403,11 @@ restart:
}
}
LWLockRelease(ReplicationOriginLock);
+ ConditionVariableCancelSleep();
+ /*
+ * Now, we can delete the catalog entry.
+ */
tuple = SearchSysCache1(REPLORIGIDENT, ObjectIdGetDatum(roident));
if (!HeapTupleIsValid(tuple))
elog(ERROR, "cache lookup failed for replication origin with oid %u",