diff options
author | Amit Kapila <akapila@postgresql.org> | 2021-04-27 09:09:11 +0530 |
---|---|---|
committer | Amit Kapila <akapila@postgresql.org> | 2021-04-27 09:09:11 +0530 |
commit | 3fa17d37716f978f80dfcdab4e7c73f3a24e7a48 (patch) | |
tree | 43a865e413ebd2852e418535b8fbbdb4a83d6d78 /src/backend/replication | |
parent | e7eea52b2d61917fbbdac7f3f895e4ef636e935b (diff) | |
download | postgresql-3fa17d37716f978f80dfcdab4e7c73f3a24e7a48.tar.gz postgresql-3fa17d37716f978f80dfcdab4e7c73f3a24e7a48.zip |
Use HTAB for replication slot statistics.
Previously, we used to use the array of size max_replication_slots to
store stats for replication slots. But that had two problems in the cases
where a message for dropping a slot gets lost: 1) the stats for the new
slot are not recorded if the array is full and 2) writing beyond the end
of the array if the user reduces the max_replication_slots.
This commit uses HTAB for replication slot statistics, resolving both
problems. Now, pgstat_vacuum_stat() search for all the dead replication
slots in stats hashtable and tell the collector to remove them. To avoid
showing the stats for the already-dropped slots, pg_stat_replication_slots
view searches slot stats by the slot name taken from pg_replication_slots.
Also, we send a message for creating a slot at slot creation, initializing
the stats. This reduces the possibility that the stats are accumulated
into the old slot stats when a message for dropping a slot gets lost.
Reported-by: Andres Freund
Author: Sawada Masahiko, test case by Vignesh C
Reviewed-by: Amit Kapila, Vignesh C, Dilip Kumar
Discussion: https://postgr.es/m/20210319185247.ldebgpdaxsowiflw@alap3.anarazel.de
Diffstat (limited to 'src/backend/replication')
-rw-r--r-- | src/backend/replication/logical/logical.c | 2 | ||||
-rw-r--r-- | src/backend/replication/slot.c | 28 |
2 files changed, 16 insertions, 14 deletions
diff --git a/src/backend/replication/logical/logical.c b/src/backend/replication/logical/logical.c index 35b0c676412..00543ede45a 100644 --- a/src/backend/replication/logical/logical.c +++ b/src/backend/replication/logical/logical.c @@ -1773,7 +1773,7 @@ void UpdateDecodingStats(LogicalDecodingContext *ctx) { ReorderBuffer *rb = ctx->reorder; - PgStat_ReplSlotStats repSlotStat; + PgStat_StatReplSlotEntry repSlotStat; /* Nothing to do if we don't have any replication stats to be sent. */ if (rb->spillBytes <= 0 && rb->streamBytes <= 0 && rb->totalBytes <= 0) diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index f61b163f78d..cf261e200e4 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -328,12 +328,7 @@ ReplicationSlotCreate(const char *name, bool db_specific, * ReplicationSlotAllocationLock. */ if (SlotIsLogical(slot)) - { - PgStat_ReplSlotStats repSlotStat; - MemSet(&repSlotStat, 0, sizeof(PgStat_ReplSlotStats)); - namestrcpy(&repSlotStat.slotname, NameStr(slot->data.name)); - pgstat_report_replslot(&repSlotStat); - } + pgstat_report_replslot_create(NameStr(slot->data.name)); /* * Now that the slot has been marked as in_use and active, it's safe to @@ -349,17 +344,15 @@ ReplicationSlotCreate(const char *name, bool db_specific, * Search for the named replication slot. * * Return the replication slot if found, otherwise NULL. - * - * The caller must hold ReplicationSlotControlLock in shared mode. */ ReplicationSlot * -SearchNamedReplicationSlot(const char *name) +SearchNamedReplicationSlot(const char *name, bool need_lock) { int i; - ReplicationSlot *slot = NULL; + ReplicationSlot *slot = NULL; - Assert(LWLockHeldByMeInMode(ReplicationSlotControlLock, - LW_SHARED)); + if (need_lock) + LWLockAcquire(ReplicationSlotControlLock, LW_SHARED); for (i = 0; i < max_replication_slots; i++) { @@ -372,6 +365,9 @@ SearchNamedReplicationSlot(const char *name) } } + if (need_lock) + LWLockRelease(ReplicationSlotControlLock); + return slot; } @@ -416,7 +412,7 @@ retry: * Search for the slot with the specified name if the slot to acquire is * not given. If the slot is not found, we either return -1 or error out. */ - s = slot ? slot : SearchNamedReplicationSlot(name); + s = slot ? slot : SearchNamedReplicationSlot(name, false); if (s == NULL || !s->in_use) { LWLockRelease(ReplicationSlotControlLock); @@ -713,6 +709,12 @@ ReplicationSlotDropPtr(ReplicationSlot *slot) * reduce that possibility. If the messages reached in reverse, we would * lose one statistics update message. But the next update message will * create the statistics for the replication slot. + * + * XXX In case, the messages for creation and drop slot of the same name + * get lost and create happens before (auto)vacuum cleans up the dead + * slot, the stats will be accumulated into the old slot. One can imagine + * having OIDs for each slot to avoid the accumulation of stats but that + * doesn't seem worth doing as in practice this won't happen frequently. */ if (SlotIsLogical(slot)) pgstat_report_replslot_drop(NameStr(slot->data.name)); |