aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Munro <tmunro@postgresql.org>2022-03-16 13:37:58 +1300
committerThomas Munro <tmunro@postgresql.org>2022-03-16 13:57:07 +1300
commit78c0f85e435bf327dcf2c1aded47b51263b32b82 (patch)
tree0257c2bb22c24f32aa7f54585725bd66bc0d0637
parentd9f7ad54e552262ee0090e88d5abd3e04fcdeac8 (diff)
downloadpostgresql-78c0f85e435bf327dcf2c1aded47b51263b32b82.tar.gz
postgresql-78c0f85e435bf327dcf2c1aded47b51263b32b82.zip
Wake up for latches in CheckpointWriteDelay().
The checkpointer shouldn't ignore its latch. Other backends may be waiting for it to drain the request queue. Hopefully real systems don't have a full queue often, but the condition is reached easily when shared_buffers is small. This involves defining a new wait event, which will appear in the pg_stat_activity view often due to spread checkpoints. Back-patch only to 14. Even though the problem exists in earlier branches too, it's hard to hit there. In 14 we stopped using signal handlers for latches on Linux, *BSD and macOS, which were previously hiding this problem by interrupting the sleep (though not reliably, as the signal could arrive before the sleep begins; precisely the problem latches address). Reported-by: Andres Freund <andres@anarazel.de> Reviewed-by: Andres Freund <andres@anarazel.de> Discussion: https://postgr.es/m/20220226213942.nb7uvb2pamyu26dj%40alap3.anarazel.de
-rw-r--r--doc/src/sgml/monitoring.sgml4
-rw-r--r--src/backend/postmaster/checkpointer.c8
-rw-r--r--src/backend/utils/activity/wait_event.c3
-rw-r--r--src/include/utils/wait_event.h3
4 files changed, 16 insertions, 2 deletions
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 15e51f92680..b8ffc210a4a 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -2224,6 +2224,10 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
<entry>Waiting during base backup when throttling activity.</entry>
</row>
<row>
+ <entry><literal>CheckpointerWriteDelay</literal></entry>
+ <entry>Waiting between writes while performing a checkpoint.</entry>
+ </row>
+ <row>
<entry><literal>PgSleep</literal></entry>
<entry>Waiting due to a call to <function>pg_sleep</function> or
a sibling function.</entry>
diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c
index 75a95f3de7a..86996750dcd 100644
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -490,6 +490,9 @@ CheckpointerMain(void)
}
ckpt_active = false;
+
+ /* We may have received an interrupt during the checkpoint. */
+ HandleCheckpointerInterrupts();
}
/* Check for archive_timeout and switch xlog files if necessary. */
@@ -732,7 +735,10 @@ CheckpointWriteDelay(int flags, double progress)
* Checkpointer and bgwriter are no longer related so take the Big
* Sleep.
*/
- pg_usleep(100000L);
+ WaitLatch(MyLatch, WL_LATCH_SET | WL_EXIT_ON_PM_DEATH | WL_TIMEOUT,
+ 100,
+ WAIT_EVENT_CHECKPOINT_WRITE_DELAY);
+ ResetLatch(MyLatch);
}
else if (--absorb_counter <= 0)
{
diff --git a/src/backend/utils/activity/wait_event.c b/src/backend/utils/activity/wait_event.c
index 6baf67740c7..affbcf25db6 100644
--- a/src/backend/utils/activity/wait_event.c
+++ b/src/backend/utils/activity/wait_event.c
@@ -473,6 +473,9 @@ pgstat_get_wait_timeout(WaitEventTimeout w)
case WAIT_EVENT_BASE_BACKUP_THROTTLE:
event_name = "BaseBackupThrottle";
break;
+ case WAIT_EVENT_CHECKPOINT_WRITE_DELAY:
+ event_name = "CheckpointWriteDelay";
+ break;
case WAIT_EVENT_PG_SLEEP:
event_name = "PgSleep";
break;
diff --git a/src/include/utils/wait_event.h b/src/include/utils/wait_event.h
index 6c6ec2e7118..1fb6f640138 100644
--- a/src/include/utils/wait_event.h
+++ b/src/include/utils/wait_event.h
@@ -140,7 +140,8 @@ typedef enum
WAIT_EVENT_PG_SLEEP,
WAIT_EVENT_RECOVERY_APPLY_DELAY,
WAIT_EVENT_RECOVERY_RETRIEVE_RETRY_INTERVAL,
- WAIT_EVENT_VACUUM_DELAY
+ WAIT_EVENT_VACUUM_DELAY,
+ WAIT_EVENT_CHECKPOINT_WRITE_DELAY
} WaitEventTimeout;
/* ----------