diff options
author | Thomas Munro <tmunro@postgresql.org> | 2024-04-03 00:03:08 +1300 |
---|---|---|
committer | Thomas Munro <tmunro@postgresql.org> | 2024-04-03 00:23:20 +1300 |
commit | 210622c60e1a9db2e2730140b8106ab57d259d15 (patch) | |
tree | 9c8de4c53e6cd36fd48ac078d45037e5e8623e23 /src/include | |
parent | 13b3b62746ec8bd9c8e3f0bc23862f1172996333 (diff) | |
download | postgresql-210622c60e1a9db2e2730140b8106ab57d259d15.tar.gz postgresql-210622c60e1a9db2e2730140b8106ab57d259d15.zip |
Provide vectored variant of ReadBuffer().
Break ReadBuffer() up into two steps. StartReadBuffers() and
WaitReadBuffers() give us two main advantages:
1. Multiple consecutive blocks can be read with one system call.
2. Advice (hints of future reads) can optionally be issued to the
kernel ahead of time.
The traditional ReadBuffer() function is now implemented in terms of
those functions, to avoid duplication.
A new GUC io_combine_limit is defined, and the functions for limiting
per-backend pin counts are made into public APIs. Those are provided
for use by callers of StartReadBuffers(), when deciding how many buffers
to read at once. The following commit will add a higher level mechanism
for doing that automatically with a practical interface.
With some more infrastructure in later work, StartReadBuffers() could
be extended to start real asynchronous I/O instead of just issuing
advice and leaving WaitReadBuffers() to do the work synchronously.
Author: Thomas Munro <thomas.munro@gmail.com>
Author: Andres Freund <andres@anarazel.de> (some optimization tweaks)
Reviewed-by: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Heikki Linnakangas <hlinnaka@iki.fi>
Reviewed-by: Nazir Bilal Yavuz <byavuz81@gmail.com>
Reviewed-by: Dilip Kumar <dilipbalaut@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Tested-by: Tomas Vondra <tomas.vondra@enterprisedb.com>
Discussion: https://postgr.es/m/CA+hUKGJkOiOCa+mag4BF+zHo7qo=o9CFheB8=g6uT5TUm2gkvA@mail.gmail.com
Diffstat (limited to 'src/include')
-rw-r--r-- | src/include/storage/bufmgr.h | 55 |
1 files changed, 55 insertions, 0 deletions
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index d51d46d3353..f380f9d9a6c 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -14,6 +14,7 @@ #ifndef BUFMGR_H #define BUFMGR_H +#include "port/pg_iovec.h" #include "storage/block.h" #include "storage/buf.h" #include "storage/bufpage.h" @@ -106,6 +107,41 @@ typedef struct BufferManagerRelation #define BMR_REL(p_rel) ((BufferManagerRelation){.rel = p_rel}) #define BMR_SMGR(p_smgr, p_relpersistence) ((BufferManagerRelation){.smgr = p_smgr, .relpersistence = p_relpersistence}) +typedef enum ReadBuffersFlags +{ + /* Zero out page if reading fails. */ + READ_BUFFERS_ZERO_ON_ERROR = (1 << 0), + + /* Call smgrprefetch() if I/O necessary. */ + READ_BUFFERS_ISSUE_ADVICE = (1 << 1), +} ReadBuffersFlags; + +struct ReadBuffersOperation +{ + /* + * The following members should be set by the caller. If only smgr is + * provided without rel, then smgr_persistence can be set to override the + * default assumption of RELPERSISTENCE_PERMANENT. + */ + Relation rel; + struct SMgrRelationData *smgr; + char smgr_persistence; + ForkNumber forknum; + BufferAccessStrategy strategy; + + /* + * The following private members are private state for communication + * between StartReadBuffers() and WaitReadBuffers(), initialized only if + * an actual read is required, and should not be modified. + */ + Buffer *buffers; + BlockNumber blocknum; + int flags; + int16 nblocks; + int16 io_buffers_len; +}; + +typedef struct ReadBuffersOperation ReadBuffersOperation; /* forward declared, to avoid having to expose buf_internals.h here */ struct WritebackContext; @@ -133,6 +169,10 @@ extern PGDLLIMPORT bool track_io_timing; extern PGDLLIMPORT int effective_io_concurrency; extern PGDLLIMPORT int maintenance_io_concurrency; +#define MAX_IO_COMBINE_LIMIT PG_IOV_MAX +#define DEFAULT_IO_COMBINE_LIMIT Min(MAX_IO_COMBINE_LIMIT, (128 * 1024) / BLCKSZ) +extern PGDLLIMPORT int io_combine_limit; + extern PGDLLIMPORT int checkpoint_flush_after; extern PGDLLIMPORT int backend_flush_after; extern PGDLLIMPORT int bgwriter_flush_after; @@ -177,6 +217,18 @@ extern Buffer ReadBufferWithoutRelcache(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool permanent); + +extern bool StartReadBuffer(ReadBuffersOperation *operation, + Buffer *buffer, + BlockNumber blocknum, + int flags); +extern bool StartReadBuffers(ReadBuffersOperation *operation, + Buffer *buffers, + BlockNumber blocknum, + int *nblocks, + int flags); +extern void WaitReadBuffers(ReadBuffersOperation *operation); + extern void ReleaseBuffer(Buffer buffer); extern void UnlockReleaseBuffer(Buffer buffer); extern bool BufferIsExclusiveLocked(Buffer buffer); @@ -250,6 +302,9 @@ extern bool HoldingBufferPinThatDelaysRecovery(void); extern bool BgBufferSync(struct WritebackContext *wb_context); +extern void LimitAdditionalPins(uint32 *additional_pins); +extern void LimitAdditionalLocalPins(uint32 *additional_pins); + /* in buf_init.c */ extern void InitBufferPool(void); extern Size BufferShmemSize(void); |