aboutsummaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
authorThomas Munro <tmunro@postgresql.org>2023-12-12 11:56:11 +1300
committerThomas Munro <tmunro@postgresql.org>2023-12-12 13:12:43 +1300
commit871fe4917e1e92304bdcc2ab779de7416492c6de (patch)
tree3a914d81d73f25cdc1b5058996fda6a01f8c4ce0 /src/backend
parent0c6be59f5e34f44b20d9ec3ffb58c1d9a0bf985e (diff)
downloadpostgresql-871fe4917e1e92304bdcc2ab779de7416492c6de.tar.gz
postgresql-871fe4917e1e92304bdcc2ab779de7416492c6de.zip
Provide vectored variants of FileRead() and FileWrite().
FileReadV() and FileWriteV() adapt pg_preadv() and pg_pwritev() for fd.c's virtual file descriptors. The simple FileRead() and FileWrite() functions are now implemented in terms of the vectored functions, to avoid code duplication, and they are converted back to the corresponding simple system calls further down (commit 15c9ac36). Later work will make more interesting multi-iovec calls. The traditional behavior of reporting a "fake" ENOSPC error is simplified. It's now always set for non-failing writes, for the benefit of callers that expect to log a meaningful "%m" if they determine that the write was short. (Perhaps we should consider getting rid of that expectation one day.) Reviewed-by: Heikki Linnakangas <hlinnaka@iki.fi> Discussion: https://postgr.es/m/CA+hUKGJkOiOCa+mag4BF+zHo7qo=o9CFheB8=g6uT5TUm2gkvA@mail.gmail.com
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/storage/file/fd.c43
1 files changed, 25 insertions, 18 deletions
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index a185fb3d08c..fbffdd72f0b 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -2110,18 +2110,18 @@ FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info)
}
int
-FileRead(File file, void *buffer, size_t amount, off_t offset,
- uint32 wait_event_info)
+FileReadV(File file, const struct iovec *iov, int iovcnt, off_t offset,
+ uint32 wait_event_info)
{
int returnCode;
Vfd *vfdP;
Assert(FileIsValid(file));
- DO_DB(elog(LOG, "FileRead: %d (%s) " INT64_FORMAT " %zu %p",
+ DO_DB(elog(LOG, "FileReadV: %d (%s) " INT64_FORMAT " %d",
file, VfdCache[file].fileName,
(int64) offset,
- amount, buffer));
+ iovcnt));
returnCode = FileAccess(file);
if (returnCode < 0)
@@ -2131,7 +2131,7 @@ FileRead(File file, void *buffer, size_t amount, off_t offset,
retry:
pgstat_report_wait_start(wait_event_info);
- returnCode = pg_pread(vfdP->fd, buffer, amount, offset);
+ returnCode = pg_preadv(vfdP->fd, iov, iovcnt, offset);
pgstat_report_wait_end();
if (returnCode < 0)
@@ -2166,18 +2166,18 @@ retry:
}
int
-FileWrite(File file, const void *buffer, size_t amount, off_t offset,
- uint32 wait_event_info)
+FileWriteV(File file, const struct iovec *iov, int iovcnt, off_t offset,
+ uint32 wait_event_info)
{
int returnCode;
Vfd *vfdP;
Assert(FileIsValid(file));
- DO_DB(elog(LOG, "FileWrite: %d (%s) " INT64_FORMAT " %zu %p",
+ DO_DB(elog(LOG, "FileWriteV: %d (%s) " INT64_FORMAT " %d",
file, VfdCache[file].fileName,
(int64) offset,
- amount, buffer));
+ iovcnt));
returnCode = FileAccess(file);
if (returnCode < 0)
@@ -2195,7 +2195,10 @@ FileWrite(File file, const void *buffer, size_t amount, off_t offset,
*/
if (temp_file_limit >= 0 && (vfdP->fdstate & FD_TEMP_FILE_LIMIT))
{
- off_t past_write = offset + amount;
+ off_t past_write = offset;
+
+ for (int i = 0; i < iovcnt; ++i)
+ past_write += iov[i].iov_len;
if (past_write > vfdP->fileSize)
{
@@ -2211,23 +2214,27 @@ FileWrite(File file, const void *buffer, size_t amount, off_t offset,
}
retry:
- errno = 0;
pgstat_report_wait_start(wait_event_info);
- returnCode = pg_pwrite(VfdCache[file].fd, buffer, amount, offset);
+ returnCode = pg_pwritev(vfdP->fd, iov, iovcnt, offset);
pgstat_report_wait_end();
- /* if write didn't set errno, assume problem is no disk space */
- if (returnCode != amount && errno == 0)
- errno = ENOSPC;
-
if (returnCode >= 0)
{
/*
+ * Some callers expect short writes to set errno, and traditionally we
+ * have assumed that they imply disk space shortage. We don't want to
+ * waste CPU cycles adding up the total size here, so we'll just set
+ * it for all successful writes in case such a caller determines that
+ * the write was short and ereports "%m".
+ */
+ errno = ENOSPC;
+
+ /*
* Maintain fileSize and temporary_files_size if it's a temp file.
*/
if (vfdP->fdstate & FD_TEMP_FILE_LIMIT)
{
- off_t past_write = offset + amount;
+ off_t past_write = offset + returnCode;
if (past_write > vfdP->fileSize)
{
@@ -2239,7 +2246,7 @@ retry:
else
{
/*
- * See comments in FileRead()
+ * See comments in FileReadV()
*/
#ifdef WIN32
DWORD error = GetLastError();