aboutsummaryrefslogtreecommitdiff
path: root/src/backend/storage/buffer/buf_init.c
diff options
context:
space:
mode:
authorAndres Freund <andres@anarazel.de>2016-02-19 12:17:51 -0800
committerAndres Freund <andres@anarazel.de>2016-03-10 17:05:09 -0800
commit9cd00c457e6a1ebb984167ac556a9961812a683c (patch)
tree1d2857a1ef71d9031651a665b2c754a89d813718 /src/backend/storage/buffer/buf_init.c
parent428b1d6b29ca599c5700d4bc4f4ce4c5880369bf (diff)
downloadpostgresql-9cd00c457e6a1ebb984167ac556a9961812a683c.tar.gz
postgresql-9cd00c457e6a1ebb984167ac556a9961812a683c.zip
Checkpoint sorting and balancing.
Up to now checkpoints were written in the order they're in the BufferDescriptors. That's nearly random in a lot of cases, which performs badly on rotating media, but even on SSDs it causes slowdowns. To avoid that, sort checkpoints before writing them out. We currently sort by tablespace, relfilenode, fork and block number. One of the major reasons that previously wasn't done, was fear of imbalance between tablespaces. To address that balance writes between tablespaces. The other prime concern was that the relatively large allocation to sort the buffers in might fail, preventing checkpoints from happening. Thus pre-allocate the required memory in shared memory, at server startup. This particularly makes it more efficient to have checkpoint flushing enabled, because that'll often result in a lot of writes that can be coalesced into one flush. Discussion: alpine.DEB.2.10.1506011320000.28433@sto Author: Fabien Coelho and Andres Freund
Diffstat (limited to 'src/backend/storage/buffer/buf_init.c')
-rw-r--r--src/backend/storage/buffer/buf_init.c22
1 files changed, 19 insertions, 3 deletions
diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c
index e10071d9c0e..bfa37f1c66b 100644
--- a/src/backend/storage/buffer/buf_init.c
+++ b/src/backend/storage/buffer/buf_init.c
@@ -24,6 +24,7 @@ LWLockMinimallyPadded *BufferIOLWLockArray = NULL;
LWLockTranche BufferIOLWLockTranche;
LWLockTranche BufferContentLWLockTranche;
WritebackContext BackendWritebackContext;
+CkptSortItem *CkptBufferIds;
/*
@@ -70,7 +71,8 @@ InitBufferPool(void)
{
bool foundBufs,
foundDescs,
- foundIOLocks;
+ foundIOLocks,
+ foundBufCkpt;
/* Align descriptors to a cacheline boundary. */
BufferDescriptors = (BufferDescPadded *)
@@ -104,10 +106,21 @@ InitBufferPool(void)
LWLockRegisterTranche(LWTRANCHE_BUFFER_CONTENT,
&BufferContentLWLockTranche);
- if (foundDescs || foundBufs || foundIOLocks)
+ /*
+ * The array used to sort to-be-checkpointed buffer ids is located in
+ * shared memory, to avoid having to allocate significant amounts of
+ * memory at runtime. As that'd be in the middle of a checkpoint, or when
+ * the checkpointer is restarted, memory allocation failures would be
+ * painful.
+ */
+ CkptBufferIds = (CkptSortItem *)
+ ShmemInitStruct("Checkpoint BufferIds",
+ NBuffers * sizeof(CkptSortItem), &foundBufCkpt);
+
+ if (foundDescs || foundBufs || foundIOLocks || foundBufCkpt)
{
/* should find all of these, or none of them */
- Assert(foundDescs && foundBufs && foundIOLocks);
+ Assert(foundDescs && foundBufs && foundIOLocks && foundBufCkpt);
/* note: this path is only taken in EXEC_BACKEND case */
}
else
@@ -190,5 +203,8 @@ BufferShmemSize(void)
/* to allow aligning the above */
size = add_size(size, PG_CACHE_LINE_SIZE);
+ /* size of checkpoint sort array in bufmgr.c */
+ size = add_size(size, mul_size(NBuffers, sizeof(CkptSortItem)));
+
return size;
}