diff options
author | Andres Freund <andres@anarazel.de> | 2022-02-25 16:58:48 -0800 |
---|---|---|
committer | Andres Freund <andres@anarazel.de> | 2022-02-25 17:04:39 -0800 |
commit | fe0972ee5e6f8a663c5cf3f24ef98987c503da95 (patch) | |
tree | 6e631076d93f18709b1a73e7a9eb719bbcff955d /src/backend | |
parent | 638300fef541fb9393caa1ee8821a639816301d1 (diff) | |
download | postgresql-fe0972ee5e6f8a663c5cf3f24ef98987c503da95.tar.gz postgresql-fe0972ee5e6f8a663c5cf3f24ef98987c503da95.zip |
Add further debug info to help debug 019_replslot_limit.pl failures.
See also afdeff10526. Failures after that commit provided a few more hints,
but not yet enough to understand what's going on.
In 019_replslot_limit.pl shut down nodes with fast instead of immediate mode
if we observe the failure mode. That should tell us whether the failures we're
observing are just a timing issue under high load. PGCTLTIMEOUT should prevent
buildfarm animals from hanging endlessly.
Also adds a bit more logging to replication slot drop and ShutdownPostgres().
Discussion: https://postgr.es/m/20220225192941.hqnvefgdzaro6gzg@alap3.anarazel.de
Diffstat (limited to 'src/backend')
-rw-r--r-- | src/backend/replication/slot.c | 13 | ||||
-rw-r--r-- | src/backend/storage/lmgr/lwlock.c | 7 | ||||
-rw-r--r-- | src/backend/utils/init/postinit.c | 17 |
3 files changed, 37 insertions, 0 deletions
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index 3d39fddaaef..f238a392ae7 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -569,6 +569,10 @@ restart: if (!s->in_use) continue; + /* unlocked read of active_pid is ok for debugging purposes */ + elog(DEBUG3, "temporary replication slot cleanup: %d in use, active_pid: %d", + i, s->active_pid); + SpinLockAcquire(&s->mutex); if (s->active_pid == MyProcPid) { @@ -629,6 +633,9 @@ ReplicationSlotDropPtr(ReplicationSlot *slot) char path[MAXPGPATH]; char tmppath[MAXPGPATH]; + /* temp debugging aid to analyze 019_replslot_limit failures */ + elog(DEBUG3, "replication slot drop: %s: begin", NameStr(slot->data.name)); + /* * If some other backend ran this code concurrently with us, we might try * to delete a slot with a certain name while someone else was trying to @@ -679,6 +686,9 @@ ReplicationSlotDropPtr(ReplicationSlot *slot) path, tmppath))); } + elog(DEBUG3, "replication slot drop: %s: removed on-disk", + NameStr(slot->data.name)); + /* * The slot is definitely gone. Lock out concurrent scans of the array * long enough to kill it. It's OK to clear the active PID here without @@ -734,6 +744,9 @@ ReplicationSlotDropPtr(ReplicationSlot *slot) * a slot while we're still cleaning up the detritus of the old one. */ LWLockRelease(ReplicationSlotAllocationLock); + + elog(DEBUG3, "replication slot drop: %s: done", + NameStr(slot->data.name)); } /* diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index 7b0dea4abec..8f7f1b2f7c3 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -1945,3 +1945,10 @@ LWLockHeldByMeInMode(LWLock *l, LWLockMode mode) } return false; } + +/* temp debugging aid to analyze 019_replslot_limit failures */ +int +LWLockHeldCount(void) +{ + return num_held_lwlocks; +} diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index a29fa0b3e6a..86d193c89fc 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -1262,6 +1262,23 @@ ShutdownPostgres(int code, Datum arg) * them explicitly. */ LockReleaseAll(USER_LOCKMETHOD, true); + + /* + * temp debugging aid to analyze 019_replslot_limit failures + * + * If an error were thrown outside of a transaction nothing up to now + * would have released lwlocks. We probably will add an + * LWLockReleaseAll(). But for now make it easier to understand such cases + * by warning if any lwlocks are held. + */ +#ifdef USE_ASSERT_CHECKING + { + int held_lwlocks = LWLockHeldCount(); + if (held_lwlocks) + elog(WARNING, "holding %d lwlocks at the end of ShutdownPostgres()", + held_lwlocks); + } +#endif } |