aboutsummaryrefslogtreecommitdiff
path: root/src/include
diff options
context:
space:
mode:
authorThomas Munro <tmunro@postgresql.org>2024-04-03 00:03:08 +1300
committerThomas Munro <tmunro@postgresql.org>2024-04-03 00:23:20 +1300
commit210622c60e1a9db2e2730140b8106ab57d259d15 (patch)
tree9c8de4c53e6cd36fd48ac078d45037e5e8623e23 /src/include
parent13b3b62746ec8bd9c8e3f0bc23862f1172996333 (diff)
downloadpostgresql-210622c60e1a9db2e2730140b8106ab57d259d15.tar.gz
postgresql-210622c60e1a9db2e2730140b8106ab57d259d15.zip
Provide vectored variant of ReadBuffer().
Break ReadBuffer() up into two steps. StartReadBuffers() and WaitReadBuffers() give us two main advantages: 1. Multiple consecutive blocks can be read with one system call. 2. Advice (hints of future reads) can optionally be issued to the kernel ahead of time. The traditional ReadBuffer() function is now implemented in terms of those functions, to avoid duplication. A new GUC io_combine_limit is defined, and the functions for limiting per-backend pin counts are made into public APIs. Those are provided for use by callers of StartReadBuffers(), when deciding how many buffers to read at once. The following commit will add a higher level mechanism for doing that automatically with a practical interface. With some more infrastructure in later work, StartReadBuffers() could be extended to start real asynchronous I/O instead of just issuing advice and leaving WaitReadBuffers() to do the work synchronously. Author: Thomas Munro <thomas.munro@gmail.com> Author: Andres Freund <andres@anarazel.de> (some optimization tweaks) Reviewed-by: Melanie Plageman <melanieplageman@gmail.com> Reviewed-by: Heikki Linnakangas <hlinnaka@iki.fi> Reviewed-by: Nazir Bilal Yavuz <byavuz81@gmail.com> Reviewed-by: Dilip Kumar <dilipbalaut@gmail.com> Reviewed-by: Andres Freund <andres@anarazel.de> Tested-by: Tomas Vondra <tomas.vondra@enterprisedb.com> Discussion: https://postgr.es/m/CA+hUKGJkOiOCa+mag4BF+zHo7qo=o9CFheB8=g6uT5TUm2gkvA@mail.gmail.com
Diffstat (limited to 'src/include')
-rw-r--r--src/include/storage/bufmgr.h55
1 files changed, 55 insertions, 0 deletions
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index d51d46d3353..f380f9d9a6c 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -14,6 +14,7 @@
#ifndef BUFMGR_H
#define BUFMGR_H
+#include "port/pg_iovec.h"
#include "storage/block.h"
#include "storage/buf.h"
#include "storage/bufpage.h"
@@ -106,6 +107,41 @@ typedef struct BufferManagerRelation
#define BMR_REL(p_rel) ((BufferManagerRelation){.rel = p_rel})
#define BMR_SMGR(p_smgr, p_relpersistence) ((BufferManagerRelation){.smgr = p_smgr, .relpersistence = p_relpersistence})
+typedef enum ReadBuffersFlags
+{
+ /* Zero out page if reading fails. */
+ READ_BUFFERS_ZERO_ON_ERROR = (1 << 0),
+
+ /* Call smgrprefetch() if I/O necessary. */
+ READ_BUFFERS_ISSUE_ADVICE = (1 << 1),
+} ReadBuffersFlags;
+
+struct ReadBuffersOperation
+{
+ /*
+ * The following members should be set by the caller. If only smgr is
+ * provided without rel, then smgr_persistence can be set to override the
+ * default assumption of RELPERSISTENCE_PERMANENT.
+ */
+ Relation rel;
+ struct SMgrRelationData *smgr;
+ char smgr_persistence;
+ ForkNumber forknum;
+ BufferAccessStrategy strategy;
+
+ /*
+ * The following private members are private state for communication
+ * between StartReadBuffers() and WaitReadBuffers(), initialized only if
+ * an actual read is required, and should not be modified.
+ */
+ Buffer *buffers;
+ BlockNumber blocknum;
+ int flags;
+ int16 nblocks;
+ int16 io_buffers_len;
+};
+
+typedef struct ReadBuffersOperation ReadBuffersOperation;
/* forward declared, to avoid having to expose buf_internals.h here */
struct WritebackContext;
@@ -133,6 +169,10 @@ extern PGDLLIMPORT bool track_io_timing;
extern PGDLLIMPORT int effective_io_concurrency;
extern PGDLLIMPORT int maintenance_io_concurrency;
+#define MAX_IO_COMBINE_LIMIT PG_IOV_MAX
+#define DEFAULT_IO_COMBINE_LIMIT Min(MAX_IO_COMBINE_LIMIT, (128 * 1024) / BLCKSZ)
+extern PGDLLIMPORT int io_combine_limit;
+
extern PGDLLIMPORT int checkpoint_flush_after;
extern PGDLLIMPORT int backend_flush_after;
extern PGDLLIMPORT int bgwriter_flush_after;
@@ -177,6 +217,18 @@ extern Buffer ReadBufferWithoutRelcache(RelFileLocator rlocator,
ForkNumber forkNum, BlockNumber blockNum,
ReadBufferMode mode, BufferAccessStrategy strategy,
bool permanent);
+
+extern bool StartReadBuffer(ReadBuffersOperation *operation,
+ Buffer *buffer,
+ BlockNumber blocknum,
+ int flags);
+extern bool StartReadBuffers(ReadBuffersOperation *operation,
+ Buffer *buffers,
+ BlockNumber blocknum,
+ int *nblocks,
+ int flags);
+extern void WaitReadBuffers(ReadBuffersOperation *operation);
+
extern void ReleaseBuffer(Buffer buffer);
extern void UnlockReleaseBuffer(Buffer buffer);
extern bool BufferIsExclusiveLocked(Buffer buffer);
@@ -250,6 +302,9 @@ extern bool HoldingBufferPinThatDelaysRecovery(void);
extern bool BgBufferSync(struct WritebackContext *wb_context);
+extern void LimitAdditionalPins(uint32 *additional_pins);
+extern void LimitAdditionalLocalPins(uint32 *additional_pins);
+
/* in buf_init.c */
extern void InitBufferPool(void);
extern Size BufferShmemSize(void);