aboutsummaryrefslogtreecommitdiff
path: root/src/backend/replication
diff options
context:
space:
mode:
authorAmit Kapila <akapila@postgresql.org>2021-04-27 09:09:11 +0530
committerAmit Kapila <akapila@postgresql.org>2021-04-27 09:09:11 +0530
commit3fa17d37716f978f80dfcdab4e7c73f3a24e7a48 (patch)
tree43a865e413ebd2852e418535b8fbbdb4a83d6d78 /src/backend/replication
parente7eea52b2d61917fbbdac7f3f895e4ef636e935b (diff)
downloadpostgresql-3fa17d37716f978f80dfcdab4e7c73f3a24e7a48.tar.gz
postgresql-3fa17d37716f978f80dfcdab4e7c73f3a24e7a48.zip
Use HTAB for replication slot statistics.
Previously, we used to use the array of size max_replication_slots to store stats for replication slots. But that had two problems in the cases where a message for dropping a slot gets lost: 1) the stats for the new slot are not recorded if the array is full and 2) writing beyond the end of the array if the user reduces the max_replication_slots. This commit uses HTAB for replication slot statistics, resolving both problems. Now, pgstat_vacuum_stat() search for all the dead replication slots in stats hashtable and tell the collector to remove them. To avoid showing the stats for the already-dropped slots, pg_stat_replication_slots view searches slot stats by the slot name taken from pg_replication_slots. Also, we send a message for creating a slot at slot creation, initializing the stats. This reduces the possibility that the stats are accumulated into the old slot stats when a message for dropping a slot gets lost. Reported-by: Andres Freund Author: Sawada Masahiko, test case by Vignesh C Reviewed-by: Amit Kapila, Vignesh C, Dilip Kumar Discussion: https://postgr.es/m/20210319185247.ldebgpdaxsowiflw@alap3.anarazel.de
Diffstat (limited to 'src/backend/replication')
-rw-r--r--src/backend/replication/logical/logical.c2
-rw-r--r--src/backend/replication/slot.c28
2 files changed, 16 insertions, 14 deletions
diff --git a/src/backend/replication/logical/logical.c b/src/backend/replication/logical/logical.c
index 35b0c676412..00543ede45a 100644
--- a/src/backend/replication/logical/logical.c
+++ b/src/backend/replication/logical/logical.c
@@ -1773,7 +1773,7 @@ void
UpdateDecodingStats(LogicalDecodingContext *ctx)
{
ReorderBuffer *rb = ctx->reorder;
- PgStat_ReplSlotStats repSlotStat;
+ PgStat_StatReplSlotEntry repSlotStat;
/* Nothing to do if we don't have any replication stats to be sent. */
if (rb->spillBytes <= 0 && rb->streamBytes <= 0 && rb->totalBytes <= 0)
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index f61b163f78d..cf261e200e4 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -328,12 +328,7 @@ ReplicationSlotCreate(const char *name, bool db_specific,
* ReplicationSlotAllocationLock.
*/
if (SlotIsLogical(slot))
- {
- PgStat_ReplSlotStats repSlotStat;
- MemSet(&repSlotStat, 0, sizeof(PgStat_ReplSlotStats));
- namestrcpy(&repSlotStat.slotname, NameStr(slot->data.name));
- pgstat_report_replslot(&repSlotStat);
- }
+ pgstat_report_replslot_create(NameStr(slot->data.name));
/*
* Now that the slot has been marked as in_use and active, it's safe to
@@ -349,17 +344,15 @@ ReplicationSlotCreate(const char *name, bool db_specific,
* Search for the named replication slot.
*
* Return the replication slot if found, otherwise NULL.
- *
- * The caller must hold ReplicationSlotControlLock in shared mode.
*/
ReplicationSlot *
-SearchNamedReplicationSlot(const char *name)
+SearchNamedReplicationSlot(const char *name, bool need_lock)
{
int i;
- ReplicationSlot *slot = NULL;
+ ReplicationSlot *slot = NULL;
- Assert(LWLockHeldByMeInMode(ReplicationSlotControlLock,
- LW_SHARED));
+ if (need_lock)
+ LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
for (i = 0; i < max_replication_slots; i++)
{
@@ -372,6 +365,9 @@ SearchNamedReplicationSlot(const char *name)
}
}
+ if (need_lock)
+ LWLockRelease(ReplicationSlotControlLock);
+
return slot;
}
@@ -416,7 +412,7 @@ retry:
* Search for the slot with the specified name if the slot to acquire is
* not given. If the slot is not found, we either return -1 or error out.
*/
- s = slot ? slot : SearchNamedReplicationSlot(name);
+ s = slot ? slot : SearchNamedReplicationSlot(name, false);
if (s == NULL || !s->in_use)
{
LWLockRelease(ReplicationSlotControlLock);
@@ -713,6 +709,12 @@ ReplicationSlotDropPtr(ReplicationSlot *slot)
* reduce that possibility. If the messages reached in reverse, we would
* lose one statistics update message. But the next update message will
* create the statistics for the replication slot.
+ *
+ * XXX In case, the messages for creation and drop slot of the same name
+ * get lost and create happens before (auto)vacuum cleans up the dead
+ * slot, the stats will be accumulated into the old slot. One can imagine
+ * having OIDs for each slot to avoid the accumulation of stats but that
+ * doesn't seem worth doing as in practice this won't happen frequently.
*/
if (SlotIsLogical(slot))
pgstat_report_replslot_drop(NameStr(slot->data.name));