diff options
author | Andres Freund <andres@anarazel.de> | 2025-03-30 17:28:03 -0400 |
---|---|---|
committer | Andres Freund <andres@anarazel.de> | 2025-03-30 17:28:03 -0400 |
commit | 047cba7fa0f8c6930b0dd1d93d98c736ef1e4a5a (patch) | |
tree | 29feb0de9ab74851134110b04486700444fa3852 /src/include/storage | |
parent | ef64fe26bad92a7b8425767cdbbe8b946d4637f0 (diff) | |
download | postgresql-047cba7fa0f8c6930b0dd1d93d98c736ef1e4a5a.tar.gz postgresql-047cba7fa0f8c6930b0dd1d93d98c736ef1e4a5a.zip |
bufmgr: Implement AIO read support
This commit implements the infrastructure to perform asynchronous reads into
the buffer pool.
To do so, it:
- Adds readv AIO callbacks for shared and local buffers
It may be worth calling out that shared buffer completions may be run in a
different backend than where the IO started.
- Adds an AIO wait reference to BufferDesc, to allow backends to wait for
in-progress asynchronous IOs
- Adapts StartBufferIO(), WaitIO(), TerminateBufferIO(), and their localbuf.c
equivalents, to be able to deal with AIO
- Moves the code to handle BM_PIN_COUNT_WAITER into a helper function, as it
now also needs to be called on IO completion
As of this commit, nothing issues AIO on shared/local buffers. A future commit
will update StartReadBuffers() to do so.
Buffer reads executed through this infrastructure will report invalid page /
checksum errors / warnings differently than before:
In the error case the error message will cover all the blocks that were
included in the read, rather than just the reporting the first invalid
block. If more than one block is invalid, the error will include information
about the range of the read, the first invalid block and the number of invalid
pages, with a HINT towards the server log for per-block details.
For the warning case (i.e. zero_damaged_buffers) we would previously emit one
warning message for each buffer in a multi-block read. Now there is only a
single warning message for the entire read, again referring to the server log
for more details in case of multiple checksum failures within a single larger
read.
Reviewed-by: Noah Misch <noah@leadboat.com>
Reviewed-by: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Nazir Bilal Yavuz <byavuz81@gmail.com>
Discussion: https://postgr.es/m/uvrtrknj4kdytuboidbhwclo4gxhswwcpgadptsjvjqcluzmah%40brqs62irg4dt
Discussion: https://postgr.es/m/20210223100344.llw5an2aklengrmn@alap3.anarazel.de
Discussion: https://postgr.es/m/stj36ea6yyhoxtqkhpieia2z4krnam7qyetc57rfezgk4zgapf@gcnactj4z56m
Diffstat (limited to 'src/include/storage')
-rw-r--r-- | src/include/storage/aio.h | 6 | ||||
-rw-r--r-- | src/include/storage/buf_internals.h | 7 | ||||
-rw-r--r-- | src/include/storage/bufmgr.h | 6 | ||||
-rw-r--r-- | src/include/storage/bufpage.h | 1 |
4 files changed, 17 insertions, 3 deletions
diff --git a/src/include/storage/aio.h b/src/include/storage/aio.h index 4ab4b05145a..9fe9d9ad9fa 100644 --- a/src/include/storage/aio.h +++ b/src/include/storage/aio.h @@ -194,9 +194,13 @@ typedef enum PgAioHandleCallbackID PGAIO_HCB_INVALID = 0, PGAIO_HCB_MD_READV, + + PGAIO_HCB_SHARED_BUFFER_READV, + + PGAIO_HCB_LOCAL_BUFFER_READV, } PgAioHandleCallbackID; -#define PGAIO_HCB_MAX PGAIO_HCB_MD_READV +#define PGAIO_HCB_MAX PGAIO_HCB_LOCAL_BUFFER_READV StaticAssertDecl(PGAIO_HCB_MAX <= (1 << PGAIO_RESULT_ID_BITS), "PGAIO_HCB_MAX is too big for PGAIO_RESULT_ID_BITS"); diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index 9327f60c44c..72b36a4af26 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -17,6 +17,7 @@ #include "pgstat.h" #include "port/atomics.h" +#include "storage/aio_types.h" #include "storage/buf.h" #include "storage/bufmgr.h" #include "storage/condition_variable.h" @@ -264,6 +265,8 @@ typedef struct BufferDesc int wait_backend_pgprocno; /* backend of pin-count waiter */ int freeNext; /* link in freelist chain */ + + PgAioWaitRef io_wref; /* set iff AIO is in progress */ LWLock content_lock; /* to lock access to buffer contents */ } BufferDesc; @@ -472,8 +475,8 @@ extern BlockNumber ExtendBufferedRelLocal(BufferManagerRelation bmr, uint32 *extended_by); extern void MarkLocalBufferDirty(Buffer buffer); extern void TerminateLocalBufferIO(BufferDesc *bufHdr, bool clear_dirty, - uint32 set_flag_bits); -extern bool StartLocalBufferIO(BufferDesc *bufHdr, bool forInput); + uint32 set_flag_bits, bool release_aio); +extern bool StartLocalBufferIO(BufferDesc *bufHdr, bool forInput, bool nowait); extern void FlushLocalBuffer(BufferDesc *bufHdr, SMgrRelation reln); extern void DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber forkNum, diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index 538b890a51d..11f8508a90b 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -15,6 +15,7 @@ #define BUFMGR_H #include "port/pg_iovec.h" +#include "storage/aio_types.h" #include "storage/block.h" #include "storage/buf.h" #include "storage/bufpage.h" @@ -111,6 +112,8 @@ typedef struct BufferManagerRelation #define READ_BUFFERS_ZERO_ON_ERROR (1 << 0) /* Call smgrprefetch() if I/O necessary. */ #define READ_BUFFERS_ISSUE_ADVICE (1 << 1) +/* Don't treat page as invalid due to checksum failures. */ +#define READ_BUFFERS_IGNORE_CHECKSUM_FAILURES (1 << 2) struct ReadBuffersOperation { @@ -170,6 +173,9 @@ extern PGDLLIMPORT int checkpoint_flush_after; extern PGDLLIMPORT int backend_flush_after; extern PGDLLIMPORT int bgwriter_flush_after; +extern const PgAioHandleCallbacks aio_shared_buffer_readv_cb; +extern const PgAioHandleCallbacks aio_local_buffer_readv_cb; + /* in buf_init.c */ extern PGDLLIMPORT char *BufferBlocks; diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h index 26d0a551fc9..aeb67c498c5 100644 --- a/src/include/storage/bufpage.h +++ b/src/include/storage/bufpage.h @@ -467,6 +467,7 @@ do { \ /* flags for PageIsVerified() */ #define PIV_LOG_WARNING (1 << 0) +#define PIV_LOG_LOG (1 << 1) #define PIV_IGNORE_CHECKSUM_FAILURE (1 << 2) #define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap) \ |