aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorThomas Munro <tmunro@postgresql.org>2021-01-11 14:37:13 +1300
committerThomas Munro <tmunro@postgresql.org>2021-01-11 15:24:38 +1300
commit13a021f3e8c99915b3cc0cb2021a948d9c71ff32 (patch)
treef532b74cfecef4bb150d2853509f105c3fe05e21 /src
parent01334c92fa09dc496a444a4f206854ef37247258 (diff)
downloadpostgresql-13a021f3e8c99915b3cc0cb2021a948d9c71ff32.tar.gz
postgresql-13a021f3e8c99915b3cc0cb2021a948d9c71ff32.zip
Provide pg_preadv() and pg_pwritev().
Provide synchronous vectored file I/O routines. These map to preadv() and pwritev(), with fallback implementations for systems that don't have them. Also provide a wrapper pg_pwritev_with_retry() that automatically retries on short writes. Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Reviewed-by: Andres Freund <andres@anarazel.de> Discussion: https://postgr.es/m/CA%2BhUKGJA%2Bu-220VONeoREBXJ9P3S94Y7J%2BkqCnTYmahvZJwM%3Dg%40mail.gmail.com
Diffstat (limited to 'src')
-rw-r--r--src/include/pg_config.h.in15
-rw-r--r--src/include/port.h2
-rw-r--r--src/include/port/pg_iovec.h59
-rw-r--r--src/port/Makefile2
-rw-r--r--src/port/pread.c43
-rw-r--r--src/port/pwrite.c107
-rw-r--r--src/tools/msvc/Solution.pm5
7 files changed, 229 insertions, 4 deletions
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index ddaa9e8e182..f4d9f3b408d 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -412,6 +412,9 @@
/* Define to 1 if you have the `pread' function. */
#undef HAVE_PREAD
+/* Define to 1 if you have the `preadv' function. */
+#undef HAVE_PREADV
+
/* Define to 1 if you have the `pstat' function. */
#undef HAVE_PSTAT
@@ -430,6 +433,9 @@
/* Define to 1 if you have the `pwrite' function. */
#undef HAVE_PWRITE
+/* Define to 1 if you have the `pwritev' function. */
+#undef HAVE_PWRITEV
+
/* Define to 1 if you have the `random' function. */
#undef HAVE_RANDOM
@@ -445,6 +451,9 @@
/* Define to 1 if you have the `readlink' function. */
#undef HAVE_READLINK
+/* Define to 1 if you have the `readv' function. */
+#undef HAVE_READV
+
/* Define to 1 if you have the global variable
'rl_completion_append_character'. */
#undef HAVE_RL_COMPLETION_APPEND_CHARACTER
@@ -629,6 +638,9 @@
/* Define to 1 if you have the <sys/ucred.h> header file. */
#undef HAVE_SYS_UCRED_H
+/* Define to 1 if you have the <sys/uio.h> header file. */
+#undef HAVE_SYS_UIO_H
+
/* Define to 1 if you have the <sys/un.h> header file. */
#undef HAVE_SYS_UN_H
@@ -683,6 +695,9 @@
/* Define to 1 if you have the <winldap.h> header file. */
#undef HAVE_WINLDAP_H
+/* Define to 1 if you have the `writev' function. */
+#undef HAVE_WRITEV
+
/* Define to 1 if you have the `X509_get_signature_nid' function. */
#undef HAVE_X509_GET_SIGNATURE_NID
diff --git a/src/include/port.h b/src/include/port.h
index 3e9d4fcd376..6486db9fdde 100644
--- a/src/include/port.h
+++ b/src/include/port.h
@@ -431,6 +431,8 @@ extern ssize_t pg_pread(int fd, void *buf, size_t nbyte, off_t offset);
extern ssize_t pg_pwrite(int fd, const void *buf, size_t nbyte, off_t offset);
#endif
+/* For pg_pwritev() and pg_preadv(), see port/pg_iovec.h. */
+
#if !HAVE_DECL_STRLCAT
extern size_t strlcat(char *dst, const char *src, size_t siz);
#endif
diff --git a/src/include/port/pg_iovec.h b/src/include/port/pg_iovec.h
new file mode 100644
index 00000000000..335f35bf0eb
--- /dev/null
+++ b/src/include/port/pg_iovec.h
@@ -0,0 +1,59 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_iovec.h
+ * Header for the vectored I/O functions in src/port/p{read,write}.c.
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/port/pg_iovec.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_IOVEC_H
+#define PG_IOVEC_H
+
+#include <limits.h>
+
+#ifdef HAVE_SYS_UIO_H
+#include <sys/uio.h>
+#endif
+
+/* If <sys/uio.h> is missing, define our own POSIX-compatible iovec struct. */
+#ifndef HAVE_SYS_UIO_H
+struct iovec
+{
+ void *iov_base;
+ size_t iov_len;
+};
+#endif
+
+/*
+ * If <limits.h> didn't define IOV_MAX, define our own. POSIX requires at
+ * least 16.
+ */
+#ifndef IOV_MAX
+#define IOV_MAX 16
+#endif
+
+/* Define a reasonable maximum that is safe to use on the stack. */
+#define PG_IOV_MAX Min(IOV_MAX, 32)
+
+#ifdef HAVE_PREADV
+#define pg_preadv preadv
+#else
+extern ssize_t pg_preadv(int fd, const struct iovec *iov, int iovcnt, off_t offset);
+#endif
+
+#ifdef HAVE_PWRITEV
+#define pg_pwritev pwritev
+#else
+extern ssize_t pg_pwritev(int fd, const struct iovec *iov, int iovcnt, off_t offset);
+#endif
+
+extern ssize_t pg_pwritev_with_retry(int fd,
+ const struct iovec *iov,
+ int iovcnt,
+ off_t offset);
+
+#endif /* PG_IOVEC_H */
diff --git a/src/port/Makefile b/src/port/Makefile
index e41b005c4f1..bc4923ce840 100644
--- a/src/port/Makefile
+++ b/src/port/Makefile
@@ -53,6 +53,8 @@ OBJS = \
pgstrcasecmp.o \
pgstrsignal.o \
pqsignal.o \
+ pread.o \
+ pwrite.o \
qsort.o \
qsort_arg.o \
quotes.o \
diff --git a/src/port/pread.c b/src/port/pread.c
index 486f07a7dff..a5ae2759fa0 100644
--- a/src/port/pread.c
+++ b/src/port/pread.c
@@ -1,7 +1,7 @@
/*-------------------------------------------------------------------------
*
* pread.c
- * Implementation of pread(2) for platforms that lack one.
+ * Implementation of pread[v](2) for platforms that lack one.
*
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
*
@@ -9,7 +9,8 @@
* src/port/pread.c
*
* Note that this implementation changes the current file position, unlike
- * the POSIX function, so we use the name pg_pread().
+ * the POSIX function, so we use the name pg_pread(). Likewise for the
+ * iovec version.
*
*-------------------------------------------------------------------------
*/
@@ -23,6 +24,9 @@
#include <unistd.h>
#endif
+#include "port/pg_iovec.h"
+
+#ifndef HAVE_PREAD
ssize_t
pg_pread(int fd, void *buf, size_t size, off_t offset)
{
@@ -56,3 +60,38 @@ pg_pread(int fd, void *buf, size_t size, off_t offset)
return read(fd, buf, size);
#endif
}
+#endif
+
+#ifndef HAVE_PREADV
+ssize_t
+pg_preadv(int fd, const struct iovec *iov, int iovcnt, off_t offset)
+{
+#ifdef HAVE_READV
+ if (iovcnt == 1)
+ return pg_pread(fd, iov[0].iov_base, iov[0].iov_len, offset);
+ if (lseek(fd, offset, SEEK_SET) < 0)
+ return -1;
+ return readv(fd, iov, iovcnt);
+#else
+ ssize_t sum = 0;
+ ssize_t part;
+
+ for (int i = 0; i < iovcnt; ++i)
+ {
+ part = pg_pread(fd, iov[i].iov_base, iov[i].iov_len, offset);
+ if (part < 0)
+ {
+ if (i == 0)
+ return -1;
+ else
+ return sum;
+ }
+ sum += part;
+ offset += part;
+ if (part < iov[i].iov_len)
+ return sum;
+ }
+ return sum;
+#endif
+}
+#endif
diff --git a/src/port/pwrite.c b/src/port/pwrite.c
index 282b27115e5..e029f44bc0c 100644
--- a/src/port/pwrite.c
+++ b/src/port/pwrite.c
@@ -1,7 +1,7 @@
/*-------------------------------------------------------------------------
*
* pwrite.c
- * Implementation of pwrite(2) for platforms that lack one.
+ * Implementation of pwrite[v](2) for platforms that lack one.
*
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
*
@@ -9,7 +9,8 @@
* src/port/pwrite.c
*
* Note that this implementation changes the current file position, unlike
- * the POSIX function, so we use the name pg_pwrite().
+ * the POSIX function, so we use the name pg_pwrite(). Likewise for the
+ * iovec version.
*
*-------------------------------------------------------------------------
*/
@@ -23,6 +24,9 @@
#include <unistd.h>
#endif
+#include "port/pg_iovec.h"
+
+#ifndef HAVE_PWRITE
ssize_t
pg_pwrite(int fd, const void *buf, size_t size, off_t offset)
{
@@ -53,3 +57,102 @@ pg_pwrite(int fd, const void *buf, size_t size, off_t offset)
return write(fd, buf, size);
#endif
}
+#endif
+
+#ifndef HAVE_PWRITEV
+ssize_t
+pg_pwritev(int fd, const struct iovec *iov, int iovcnt, off_t offset)
+{
+#ifdef HAVE_WRITEV
+ if (iovcnt == 1)
+ return pg_pwrite(fd, iov[0].iov_base, iov[0].iov_len, offset);
+ if (lseek(fd, offset, SEEK_SET) < 0)
+ return -1;
+ return writev(fd, iov, iovcnt);
+#else
+ ssize_t sum = 0;
+ ssize_t part;
+
+ for (int i = 0; i < iovcnt; ++i)
+ {
+ part = pg_pwrite(fd, iov[i].iov_base, iov[i].iov_len, offset);
+ if (part < 0)
+ {
+ if (i == 0)
+ return -1;
+ else
+ return sum;
+ }
+ sum += part;
+ offset += part;
+ if (part < iov[i].iov_len)
+ return sum;
+ }
+ return sum;
+#endif
+}
+#endif
+
+/*
+ * A convenience wrapper for pg_pwritev() that retries on partial write. If an
+ * error is returned, it is unspecified how much has been written.
+ */
+ssize_t
+pg_pwritev_with_retry(int fd, const struct iovec *iov, int iovcnt, off_t offset)
+{
+ struct iovec iov_copy[PG_IOV_MAX];
+ ssize_t sum = 0;
+ ssize_t part;
+
+ /* We'd better have space to make a copy, in case we need to retry. */
+ if (iovcnt > PG_IOV_MAX)
+ {
+ errno = EINVAL;
+ return -1;
+ }
+
+ for (;;)
+ {
+ /* Write as much as we can. */
+ part = pg_pwritev(fd, iov, iovcnt, offset);
+ if (part < 0)
+ return -1;
+
+#ifdef SIMULATE_SHORT_WRITE
+ part = Min(part, 4096);
+#endif
+
+ /* Count our progress. */
+ sum += part;
+ offset += part;
+
+ /* Step over iovecs that are done. */
+ while (iovcnt > 0 && iov->iov_len <= part)
+ {
+ part -= iov->iov_len;
+ ++iov;
+ --iovcnt;
+ }
+
+ /* Are they all done? */
+ if (iovcnt == 0)
+ {
+ if (part > 0)
+ elog(ERROR, "unexpectedly wrote more than requested");
+ break;
+ }
+
+ /*
+ * Move whatever's left to the front of our mutable copy and adjust the
+ * leading iovec.
+ */
+ Assert(iovcnt > 0);
+ memmove(iov_copy, iov, sizeof(*iov) * iovcnt);
+ Assert(iov->iov_len > part);
+ iov_copy[0].iov_base = (char *) iov_copy[0].iov_base + part;
+ iov_copy[0].iov_len -= part;
+ iov = iov_copy;
+ }
+
+ return sum;
+}
diff --git a/src/tools/msvc/Solution.pm b/src/tools/msvc/Solution.pm
index 95d4e826b1d..59a42bea97a 100644
--- a/src/tools/msvc/Solution.pm
+++ b/src/tools/msvc/Solution.pm
@@ -329,17 +329,20 @@ sub GenerateFiles
HAVE_PPC_LWARX_MUTEX_HINT => undef,
HAVE_PPOLL => undef,
HAVE_PREAD => undef,
+ HAVE_PREADV => undef,
HAVE_PSTAT => undef,
HAVE_PS_STRINGS => undef,
HAVE_PTHREAD => undef,
HAVE_PTHREAD_IS_THREADED_NP => undef,
HAVE_PTHREAD_PRIO_INHERIT => undef,
HAVE_PWRITE => undef,
+ HAVE_PWRITEV => undef,
HAVE_RANDOM => undef,
HAVE_READLINE_H => undef,
HAVE_READLINE_HISTORY_H => undef,
HAVE_READLINE_READLINE_H => undef,
HAVE_READLINK => undef,
+ HAVE_READV => undef,
HAVE_RL_COMPLETION_APPEND_CHARACTER => undef,
HAVE_RL_COMPLETION_MATCHES => undef,
HAVE_RL_COMPLETION_SUPPRESS_QUOTE => undef,
@@ -400,6 +403,7 @@ sub GenerateFiles
HAVE_SYS_TAS_H => undef,
HAVE_SYS_TYPES_H => 1,
HAVE_SYS_UCRED_H => undef,
+ HAVE_SYS_UIO_H => undef,
HAVE_SYS_UN_H => undef,
HAVE_TERMIOS_H => undef,
HAVE_TYPEOF => undef,
@@ -418,6 +422,7 @@ sub GenerateFiles
HAVE_WINLDAP_H => undef,
HAVE_WCSTOMBS_L => 1,
HAVE_WCTYPE_H => 1,
+ HAVE_WRITEV => undef,
HAVE_X509_GET_SIGNATURE_NID => 1,
HAVE_X86_64_POPCNTQ => undef,
HAVE__BOOL => undef,