diff options
author | Thomas Munro <tmunro@postgresql.org> | 2022-03-16 13:37:58 +1300 |
---|---|---|
committer | Thomas Munro <tmunro@postgresql.org> | 2022-03-16 13:57:07 +1300 |
commit | 78c0f85e435bf327dcf2c1aded47b51263b32b82 (patch) | |
tree | 0257c2bb22c24f32aa7f54585725bd66bc0d0637 | |
parent | d9f7ad54e552262ee0090e88d5abd3e04fcdeac8 (diff) | |
download | postgresql-78c0f85e435bf327dcf2c1aded47b51263b32b82.tar.gz postgresql-78c0f85e435bf327dcf2c1aded47b51263b32b82.zip |
Wake up for latches in CheckpointWriteDelay().
The checkpointer shouldn't ignore its latch. Other backends may be
waiting for it to drain the request queue. Hopefully real systems don't
have a full queue often, but the condition is reached easily when
shared_buffers is small.
This involves defining a new wait event, which will appear in the
pg_stat_activity view often due to spread checkpoints.
Back-patch only to 14. Even though the problem exists in earlier
branches too, it's hard to hit there. In 14 we stopped using signal
handlers for latches on Linux, *BSD and macOS, which were previously
hiding this problem by interrupting the sleep (though not reliably, as
the signal could arrive before the sleep begins; precisely the problem
latches address).
Reported-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/20220226213942.nb7uvb2pamyu26dj%40alap3.anarazel.de
-rw-r--r-- | doc/src/sgml/monitoring.sgml | 4 | ||||
-rw-r--r-- | src/backend/postmaster/checkpointer.c | 8 | ||||
-rw-r--r-- | src/backend/utils/activity/wait_event.c | 3 | ||||
-rw-r--r-- | src/include/utils/wait_event.h | 3 |
4 files changed, 16 insertions, 2 deletions
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 15e51f92680..b8ffc210a4a 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -2224,6 +2224,10 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser <entry>Waiting during base backup when throttling activity.</entry> </row> <row> + <entry><literal>CheckpointerWriteDelay</literal></entry> + <entry>Waiting between writes while performing a checkpoint.</entry> + </row> + <row> <entry><literal>PgSleep</literal></entry> <entry>Waiting due to a call to <function>pg_sleep</function> or a sibling function.</entry> diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index 75a95f3de7a..86996750dcd 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -490,6 +490,9 @@ CheckpointerMain(void) } ckpt_active = false; + + /* We may have received an interrupt during the checkpoint. */ + HandleCheckpointerInterrupts(); } /* Check for archive_timeout and switch xlog files if necessary. */ @@ -732,7 +735,10 @@ CheckpointWriteDelay(int flags, double progress) * Checkpointer and bgwriter are no longer related so take the Big * Sleep. */ - pg_usleep(100000L); + WaitLatch(MyLatch, WL_LATCH_SET | WL_EXIT_ON_PM_DEATH | WL_TIMEOUT, + 100, + WAIT_EVENT_CHECKPOINT_WRITE_DELAY); + ResetLatch(MyLatch); } else if (--absorb_counter <= 0) { diff --git a/src/backend/utils/activity/wait_event.c b/src/backend/utils/activity/wait_event.c index 6baf67740c7..affbcf25db6 100644 --- a/src/backend/utils/activity/wait_event.c +++ b/src/backend/utils/activity/wait_event.c @@ -473,6 +473,9 @@ pgstat_get_wait_timeout(WaitEventTimeout w) case WAIT_EVENT_BASE_BACKUP_THROTTLE: event_name = "BaseBackupThrottle"; break; + case WAIT_EVENT_CHECKPOINT_WRITE_DELAY: + event_name = "CheckpointWriteDelay"; + break; case WAIT_EVENT_PG_SLEEP: event_name = "PgSleep"; break; diff --git a/src/include/utils/wait_event.h b/src/include/utils/wait_event.h index 6c6ec2e7118..1fb6f640138 100644 --- a/src/include/utils/wait_event.h +++ b/src/include/utils/wait_event.h @@ -140,7 +140,8 @@ typedef enum WAIT_EVENT_PG_SLEEP, WAIT_EVENT_RECOVERY_APPLY_DELAY, WAIT_EVENT_RECOVERY_RETRIEVE_RETRY_INTERVAL, - WAIT_EVENT_VACUUM_DELAY + WAIT_EVENT_VACUUM_DELAY, + WAIT_EVENT_CHECKPOINT_WRITE_DELAY } WaitEventTimeout; /* ---------- |