diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2022-09-29 13:27:48 -0400 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2022-09-29 13:27:48 -0400 |
commit | 551aa6b7b9f3b78e7b6cbab1fea70947fa7f4966 (patch) | |
tree | e1ce6e1dfede8d6f24f56053a955e16ce24ea607 /src | |
parent | d7e39d72ca1c6f188b400d7d58813ff5b5b79064 (diff) | |
download | postgresql-551aa6b7b9f3b78e7b6cbab1fea70947fa7f4966.tar.gz postgresql-551aa6b7b9f3b78e7b6cbab1fea70947fa7f4966.zip |
Improve wording of log messages triggered by max_slot_wal_keep_size.
The one about "terminating process to release replication slot" told
you nothing about why that was happening. The one about "invalidating
slot because its restart_lsn exceeds max_slot_wal_keep_size" told you
what was happening, but violated our message style guideline about
keeping the primary message short. Add DETAIL/HINT lines to carry
the appropriate detail and make the two cases more uniform.
While here, fix bogus test logic in 019_replslot_limit.pl: if it timed
out without seeing the expected log message, no test failure would be
reported. This is flat broken since commit 549ec201d removed the test
counts; even before that it was horribly bad style, since you'd only
get told that not all tests had been run.
Kyotaro Horiguchi, reviewed by Bertrand Drouvot; test fixes by me
Discussion: https://postgr.es/m/20211214.130456.2233153190058148084.horikyota.ntt@gmail.com
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/replication/slot.c | 17 | ||||
-rw-r--r-- | src/test/recovery/t/019_replslot_limit.pl | 15 |
2 files changed, 21 insertions, 11 deletions
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index 0bd00311888..e9328961dd3 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -1293,8 +1293,12 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlot *s, XLogRecPtr oldestLSN, if (last_signaled_pid != active_pid) { ereport(LOG, - (errmsg("terminating process %d to release replication slot \"%s\"", - active_pid, NameStr(slotname)))); + errmsg("terminating process %d to release replication slot \"%s\"", + active_pid, NameStr(slotname)), + errdetail("The slot's restart_lsn %X/%X exceeds the limit by %llu bytes.", + LSN_FORMAT_ARGS(restart_lsn), + (unsigned long long) (oldestLSN - restart_lsn)), + errhint("You might need to increase max_slot_wal_keep_size.")); (void) kill(active_pid, SIGTERM); last_signaled_pid = active_pid; @@ -1331,9 +1335,12 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlot *s, XLogRecPtr oldestLSN, ReplicationSlotRelease(); ereport(LOG, - (errmsg("invalidating slot \"%s\" because its restart_lsn %X/%X exceeds max_slot_wal_keep_size", - NameStr(slotname), - LSN_FORMAT_ARGS(restart_lsn)))); + errmsg("invalidating obsolete replication slot \"%s\"", + NameStr(slotname)), + errdetail("The slot's restart_lsn %X/%X exceeds the limit by %llu bytes.", + LSN_FORMAT_ARGS(restart_lsn), + (unsigned long long) (oldestLSN - restart_lsn)), + errhint("You might need to increase max_slot_wal_keep_size.")); /* done with this slot for now */ break; diff --git a/src/test/recovery/t/019_replslot_limit.pl b/src/test/recovery/t/019_replslot_limit.pl index ce8d36b4c6c..b8341fbb66f 100644 --- a/src/test/recovery/t/019_replslot_limit.pl +++ b/src/test/recovery/t/019_replslot_limit.pl @@ -185,8 +185,7 @@ my $invalidated = 0; for (my $i = 0; $i < 10000; $i++) { if (find_in_log( - $node_primary, - "invalidating slot \"rep1\" because its restart_lsn [0-9A-F/]+ exceeds max_slot_wal_keep_size", + $node_primary, 'invalidating obsolete replication slot "rep1"', $logstart)) { $invalidated = 1; @@ -379,6 +378,7 @@ $logstart = get_log_size($node_primary3); kill 'STOP', $senderpid, $receiverpid; advance_wal($node_primary3, 2); +my $msg_logged = 0; my $max_attempts = $PostgreSQL::Test::Utils::timeout_default; while ($max_attempts-- >= 0) { @@ -387,11 +387,12 @@ while ($max_attempts-- >= 0) "terminating process $senderpid to release replication slot \"rep3\"", $logstart)) { - ok(1, "walsender termination logged"); + $msg_logged = 1; last; } sleep 1; } +ok($msg_logged, "walsender termination logged"); # Now let the walsender continue; slot should be killed now. # (Must not let walreceiver run yet; otherwise the standby could start another @@ -402,18 +403,20 @@ $node_primary3->poll_query_until('postgres', "lost") or die "timed out waiting for slot to be lost"; +$msg_logged = 0; $max_attempts = $PostgreSQL::Test::Utils::timeout_default; while ($max_attempts-- >= 0) { if (find_in_log( - $node_primary3, - 'invalidating slot "rep3" because its restart_lsn', $logstart)) + $node_primary3, 'invalidating obsolete replication slot "rep3"', + $logstart)) { - ok(1, "slot invalidation logged"); + $msg_logged = 1; last; } sleep 1; } +ok($msg_logged, "slot invalidation logged"); # Now let the walreceiver continue, so that the node can be stopped cleanly kill 'CONT', $receiverpid; |