diff options
author | Fujii Masao <fujii@postgresql.org> | 2021-01-13 22:59:17 +0900 |
---|---|---|
committer | Fujii Masao <fujii@postgresql.org> | 2021-01-13 22:59:17 +0900 |
commit | 39b03690b529935a3c33024ee68f08e2d347cf4f (patch) | |
tree | 535e89716c5be4aebec5debb1db1e37e6f0d43c3 /src | |
parent | 6ecaaf810b8b9e3132f00549e630400f38a7ca37 (diff) | |
download | postgresql-39b03690b529935a3c33024ee68f08e2d347cf4f.tar.gz postgresql-39b03690b529935a3c33024ee68f08e2d347cf4f.zip |
Log long wait time on recovery conflict when it's resolved.
This is a follow-up of the work done in commit 0650ff2303. This commit
extends log_recovery_conflict_waits so that a log message is produced
also when recovery conflict has already been resolved after deadlock_timeout
passes, i.e., when the startup process finishes waiting for recovery
conflict after deadlock_timeout. This is useful in investigating how long
recovery conflicts prevented the recovery from applying WAL.
Author: Fujii Masao
Reviewed-by: Kyotaro Horiguchi, Bertrand Drouvot
Discussion: https://postgr.es/m/9a60178c-a853-1440-2cdc-c3af916cff59@amazon.com
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/storage/buffer/bufmgr.c | 12 | ||||
-rw-r--r-- | src/backend/storage/ipc/standby.c | 47 | ||||
-rw-r--r-- | src/backend/storage/lmgr/proc.c | 12 | ||||
-rw-r--r-- | src/include/storage/standby.h | 3 |
4 files changed, 60 insertions, 14 deletions
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index c46b8abad12..561c212092f 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -4034,6 +4034,16 @@ LockBufferForCleanup(Buffer buffer) /* Successfully acquired exclusive lock with pincount 1 */ UnlockBufHdr(bufHdr, buf_state); + /* + * Emit the log message if recovery conflict on buffer pin was + * resolved but the startup process waited longer than + * deadlock_timeout for it. + */ + if (logged_recovery_conflict) + LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, + waitStart, GetCurrentTimestamp(), + NULL, false); + /* Report change to non-waiting status */ if (new_status) { @@ -4088,7 +4098,7 @@ LockBufferForCleanup(Buffer buffer) DeadlockTimeout)) { LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, - waitStart, now, NULL); + waitStart, now, NULL, true); logged_recovery_conflict = true; } } diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c index d4b0f65ba20..39a30c00f7a 100644 --- a/src/backend/storage/ipc/standby.c +++ b/src/backend/storage/ipc/standby.c @@ -226,11 +226,14 @@ WaitExceedsMaxStandbyDelay(uint32 wait_event_info) * wait_start is the timestamp when the caller started to wait. * now is the timestamp when this function has been called. * wait_list is the list of virtual transaction ids assigned to - * conflicting processes. + * conflicting processes. still_waiting indicates whether + * the startup process is still waiting for the recovery conflict + * to be resolved or not. */ void LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start, - TimestampTz now, VirtualTransactionId *wait_list) + TimestampTz now, VirtualTransactionId *wait_list, + bool still_waiting) { long secs; int usecs; @@ -238,6 +241,12 @@ LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start, StringInfoData buf; int nprocs = 0; + /* + * There must be no conflicting processes when the recovery conflict has + * already been resolved. + */ + Assert(still_waiting || wait_list == NULL); + TimestampDifference(wait_start, now, &secs, &usecs); msecs = secs * 1000 + usecs / 1000; usecs = usecs % 1000; @@ -275,12 +284,21 @@ LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start, * conflicting backends in a detail message. Note that if all the backends * in the list are not active, no detail message is logged. */ - ereport(LOG, - errmsg("recovery still waiting after %ld.%03d ms: %s", - msecs, usecs, _(get_recovery_conflict_desc(reason))), - nprocs > 0 ? errdetail_log_plural("Conflicting process: %s.", - "Conflicting processes: %s.", - nprocs, buf.data) : 0); + if (still_waiting) + { + ereport(LOG, + errmsg("recovery still waiting after %ld.%03d ms: %s", + msecs, usecs, _(get_recovery_conflict_desc(reason))), + nprocs > 0 ? errdetail_log_plural("Conflicting process: %s.", + "Conflicting processes: %s.", + nprocs, buf.data) : 0); + } + else + { + ereport(LOG, + errmsg("recovery finished waiting after %ld.%03d ms: %s", + msecs, usecs, _(get_recovery_conflict_desc(reason)))); + } if (nprocs > 0) pfree(buf.data); @@ -375,13 +393,12 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, /* * Emit the log message if the startup process is waiting - * longer than deadlock_timeout for recovery conflict on - * buffer pin. + * longer than deadlock_timeout for recovery conflict. */ if (maybe_log_conflict && TimestampDifferenceExceeds(waitStart, now, DeadlockTimeout)) { - LogRecoveryConflict(reason, waitStart, now, waitlist); + LogRecoveryConflict(reason, waitStart, now, waitlist, true); logged_recovery_conflict = true; } } @@ -391,6 +408,14 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, waitlist++; } + /* + * Emit the log message if recovery conflict was resolved but the startup + * process waited longer than deadlock_timeout for it. + */ + if (logged_recovery_conflict) + LogRecoveryConflict(reason, waitStart, GetCurrentTimestamp(), + NULL, false); + /* Reset ps display if we changed it */ if (new_status) { diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index db0cfaa3600..c87ffc65491 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -1322,7 +1322,8 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) * longer than deadlock_timeout. */ LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_LOCK, - standbyWaitStart, now, cnt > 0 ? vxids : NULL); + standbyWaitStart, now, + cnt > 0 ? vxids : NULL, true); logged_recovery_conflict = true; } } @@ -1608,6 +1609,15 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) } /* + * Emit the log message if recovery conflict on lock was resolved but the + * startup process waited longer than deadlock_timeout for it. + */ + if (InHotStandby && logged_recovery_conflict) + LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_LOCK, + standbyWaitStart, GetCurrentTimestamp(), + NULL, false); + + /* * Re-acquire the lock table's partition lock. We have to do this to hold * off cancel/die interrupts before we can mess with lockAwaited (else we * might have a missed or duplicated locallock update). diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h index a0f3e0bdf09..2b1f340b82b 100644 --- a/src/include/storage/standby.h +++ b/src/include/storage/standby.h @@ -40,7 +40,8 @@ extern void StandbyDeadLockHandler(void); extern void StandbyTimeoutHandler(void); extern void StandbyLockTimeoutHandler(void); extern void LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start, - TimestampTz cur_ts, VirtualTransactionId *wait_list); + TimestampTz cur_ts, VirtualTransactionId *wait_list, + bool still_waiting); /* * Standby Rmgr (RM_STANDBY_ID) |