aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/bin/Makefile1
-rw-r--r--src/bin/pg_test_fsync/.gitignore1
-rw-r--r--src/bin/pg_test_fsync/Makefile27
-rw-r--r--src/bin/pg_test_fsync/pg_test_fsync.c603
-rw-r--r--src/tools/msvc/Mkvcbuild.pm8
5 files changed, 636 insertions, 4 deletions
diff --git a/src/bin/Makefile b/src/bin/Makefile
index cc78798fba7..06a0ab75b6b 100644
--- a/src/bin/Makefile
+++ b/src/bin/Makefile
@@ -23,6 +23,7 @@ SUBDIRS = \
pg_dump \
pg_resetxlog \
pg_rewind \
+ pg_test_fsync \
pg_upgrade \
pgbench \
psql \
diff --git a/src/bin/pg_test_fsync/.gitignore b/src/bin/pg_test_fsync/.gitignore
new file mode 100644
index 00000000000..f3b59324985
--- /dev/null
+++ b/src/bin/pg_test_fsync/.gitignore
@@ -0,0 +1 @@
+/pg_test_fsync
diff --git a/src/bin/pg_test_fsync/Makefile b/src/bin/pg_test_fsync/Makefile
new file mode 100644
index 00000000000..90496df566d
--- /dev/null
+++ b/src/bin/pg_test_fsync/Makefile
@@ -0,0 +1,27 @@
+# src/bin/pg_test_fsync/Makefile
+
+PGFILEDESC = "pg_test_fsync - test various disk sync methods"
+PGAPPICON = win32
+
+subdir = src/bin/pg_test_fsync
+top_builddir = ../../..
+include $(top_builddir)/src/Makefile.global
+
+OBJS = pg_test_fsync.o $(WIN32RES)
+
+all: pg_test_fsync
+
+pg_test_fsync: $(OBJS) | submake-libpgport
+ $(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
+
+install: all installdirs
+ $(INSTALL_PROGRAM) pg_test_fsync$(X) '$(DESTDIR)$(bindir)/pg_test_fsync$(X)'
+
+installdirs:
+ $(MKDIR_P) '$(DESTDIR)$(bindir)'
+
+uninstall:
+ rm -f '$(DESTDIR)$(bindir)/pg_test_fsync$(X)'
+
+clean distclean maintainer-clean:
+ rm -f pg_test_fsync$(X) $(OBJS)
diff --git a/src/bin/pg_test_fsync/pg_test_fsync.c b/src/bin/pg_test_fsync/pg_test_fsync.c
new file mode 100644
index 00000000000..c8427623d20
--- /dev/null
+++ b/src/bin/pg_test_fsync/pg_test_fsync.c
@@ -0,0 +1,603 @@
+/*
+ * pg_test_fsync.c
+ * tests all supported fsync() methods
+ */
+
+#include "postgres_fe.h"
+
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <time.h>
+#include <unistd.h>
+#include <signal.h>
+
+#include "getopt_long.h"
+#include "access/xlogdefs.h"
+
+
+/*
+ * put the temp files in the local directory
+ * unless the user specifies otherwise
+ */
+#define FSYNC_FILENAME "./pg_test_fsync.out"
+
+#define XLOG_BLCKSZ_K (XLOG_BLCKSZ / 1024)
+
+#define LABEL_FORMAT " %-30s"
+#define NA_FORMAT "%20s"
+#define OPS_FORMAT "%13.3f ops/sec %6.0f usecs/op"
+#define USECS_SEC 1000000
+
+/* These are macros to avoid timing the function call overhead. */
+#ifndef WIN32
+#define START_TIMER \
+do { \
+ alarm_triggered = false; \
+ alarm(secs_per_test); \
+ gettimeofday(&start_t, NULL); \
+} while (0)
+#else
+/* WIN32 doesn't support alarm, so we create a thread and sleep there */
+#define START_TIMER \
+do { \
+ alarm_triggered = false; \
+ if (CreateThread(NULL, 0, process_alarm, NULL, 0, NULL) == \
+ INVALID_HANDLE_VALUE) \
+ { \
+ fprintf(stderr, "Cannot create thread for alarm\n"); \
+ exit(1); \
+ } \
+ gettimeofday(&start_t, NULL); \
+} while (0)
+#endif
+
+#define STOP_TIMER \
+do { \
+ gettimeofday(&stop_t, NULL); \
+ print_elapse(start_t, stop_t, ops); \
+} while (0)
+
+
+static const char *progname;
+
+static int secs_per_test = 5;
+static int needs_unlink = 0;
+static char full_buf[XLOG_SEG_SIZE],
+ *buf,
+ *filename = FSYNC_FILENAME;
+static struct timeval start_t,
+ stop_t;
+static bool alarm_triggered = false;
+
+
+static void handle_args(int argc, char *argv[]);
+static void prepare_buf(void);
+static void test_open(void);
+static void test_non_sync(void);
+static void test_sync(int writes_per_op);
+static void test_open_syncs(void);
+static void test_open_sync(const char *msg, int writes_size);
+static void test_file_descriptor_sync(void);
+
+#ifndef WIN32
+static void process_alarm(int sig);
+#else
+static DWORD WINAPI process_alarm(LPVOID param);
+#endif
+static void signal_cleanup(int sig);
+
+#ifdef HAVE_FSYNC_WRITETHROUGH
+static int pg_fsync_writethrough(int fd);
+#endif
+static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops);
+static void die(const char *str);
+
+
+int
+main(int argc, char *argv[])
+{
+ progname = get_progname(argv[0]);
+
+ handle_args(argc, argv);
+
+ /* Prevent leaving behind the test file */
+ pqsignal(SIGINT, signal_cleanup);
+ pqsignal(SIGTERM, signal_cleanup);
+#ifndef WIN32
+ pqsignal(SIGALRM, process_alarm);
+#endif
+#ifdef SIGHUP
+ /* Not defined on win32 */
+ pqsignal(SIGHUP, signal_cleanup);
+#endif
+
+ prepare_buf();
+
+ test_open();
+
+ /* Test using 1 XLOG_BLCKSZ write */
+ test_sync(1);
+
+ /* Test using 2 XLOG_BLCKSZ writes */
+ test_sync(2);
+
+ test_open_syncs();
+
+ test_file_descriptor_sync();
+
+ test_non_sync();
+
+ unlink(filename);
+
+ return 0;
+}
+
+static void
+handle_args(int argc, char *argv[])
+{
+ static struct option long_options[] = {
+ {"filename", required_argument, NULL, 'f'},
+ {"secs-per-test", required_argument, NULL, 's'},
+ {NULL, 0, NULL, 0}
+ };
+
+ int option; /* Command line option */
+ int optindex = 0; /* used by getopt_long */
+
+ if (argc > 1)
+ {
+ if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
+ {
+ printf("Usage: %s [-f FILENAME] [-s SECS-PER-TEST]\n", progname);
+ exit(0);
+ }
+ if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
+ {
+ puts("pg_test_fsync (PostgreSQL) " PG_VERSION);
+ exit(0);
+ }
+ }
+
+ while ((option = getopt_long(argc, argv, "f:s:",
+ long_options, &optindex)) != -1)
+ {
+ switch (option)
+ {
+ case 'f':
+ filename = strdup(optarg);
+ break;
+
+ case 's':
+ secs_per_test = atoi(optarg);
+ break;
+
+ default:
+ fprintf(stderr, "Try \"%s --help\" for more information.\n",
+ progname);
+ exit(1);
+ break;
+ }
+ }
+
+ if (argc > optind)
+ {
+ fprintf(stderr,
+ "%s: too many command-line arguments (first is \"%s\")\n",
+ progname, argv[optind]);
+ fprintf(stderr, "Try \"%s --help\" for more information.\n",
+ progname);
+ exit(1);
+ }
+
+ printf("%d seconds per test\n", secs_per_test);
+#if PG_O_DIRECT != 0
+ printf("O_DIRECT supported on this platform for open_datasync and open_sync.\n");
+#else
+ printf("Direct I/O is not supported on this platform.\n");
+#endif
+}
+
+static void
+prepare_buf(void)
+{
+ int ops;
+
+ /* write random data into buffer */
+ for (ops = 0; ops < XLOG_SEG_SIZE; ops++)
+ full_buf[ops] = random();
+
+ buf = (char *) TYPEALIGN(XLOG_BLCKSZ, full_buf);
+}
+
+static void
+test_open(void)
+{
+ int tmpfile;
+
+ /*
+ * test if we can open the target file
+ */
+ if ((tmpfile = open(filename, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR)) == -1)
+ die("could not open output file");
+ needs_unlink = 1;
+ if (write(tmpfile, full_buf, XLOG_SEG_SIZE) != XLOG_SEG_SIZE)
+ die("write failed");
+
+ /* fsync now so that dirty buffers don't skew later tests */
+ if (fsync(tmpfile) != 0)
+ die("fsync failed");
+
+ close(tmpfile);
+}
+
+static void
+test_sync(int writes_per_op)
+{
+ int tmpfile,
+ ops,
+ writes;
+ bool fs_warning = false;
+
+ if (writes_per_op == 1)
+ printf("\nCompare file sync methods using one %dkB write:\n", XLOG_BLCKSZ_K);
+ else
+ printf("\nCompare file sync methods using two %dkB writes:\n", XLOG_BLCKSZ_K);
+ printf("(in wal_sync_method preference order, except fdatasync is Linux's default)\n");
+
+ /*
+ * Test open_datasync if available
+ */
+ printf(LABEL_FORMAT, "open_datasync");
+ fflush(stdout);
+
+#ifdef OPEN_DATASYNC_FLAG
+ if ((tmpfile = open(filename, O_RDWR | O_DSYNC | PG_O_DIRECT, 0)) == -1)
+ {
+ printf(NA_FORMAT, "n/a*\n");
+ fs_warning = true;
+ }
+ else
+ {
+ START_TIMER;
+ for (ops = 0; alarm_triggered == false; ops++)
+ {
+ for (writes = 0; writes < writes_per_op; writes++)
+ if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+ die("write failed");
+ if (lseek(tmpfile, 0, SEEK_SET) == -1)
+ die("seek failed");
+ }
+ STOP_TIMER;
+ close(tmpfile);
+ }
+#else
+ printf(NA_FORMAT, "n/a\n");
+#endif
+
+/*
+ * Test fdatasync if available
+ */
+ printf(LABEL_FORMAT, "fdatasync");
+ fflush(stdout);
+
+#ifdef HAVE_FDATASYNC
+ if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
+ die("could not open output file");
+ START_TIMER;
+ for (ops = 0; alarm_triggered == false; ops++)
+ {
+ for (writes = 0; writes < writes_per_op; writes++)
+ if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+ die("write failed");
+ fdatasync(tmpfile);
+ if (lseek(tmpfile, 0, SEEK_SET) == -1)
+ die("seek failed");
+ }
+ STOP_TIMER;
+ close(tmpfile);
+#else
+ printf(NA_FORMAT, "n/a\n");
+#endif
+
+/*
+ * Test fsync
+ */
+ printf(LABEL_FORMAT, "fsync");
+ fflush(stdout);
+
+ if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
+ die("could not open output file");
+ START_TIMER;
+ for (ops = 0; alarm_triggered == false; ops++)
+ {
+ for (writes = 0; writes < writes_per_op; writes++)
+ if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+ die("write failed");
+ if (fsync(tmpfile) != 0)
+ die("fsync failed");
+ if (lseek(tmpfile, 0, SEEK_SET) == -1)
+ die("seek failed");
+ }
+ STOP_TIMER;
+ close(tmpfile);
+
+/*
+ * If fsync_writethrough is available, test as well
+ */
+ printf(LABEL_FORMAT, "fsync_writethrough");
+ fflush(stdout);
+
+#ifdef HAVE_FSYNC_WRITETHROUGH
+ if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
+ die("could not open output file");
+ START_TIMER;
+ for (ops = 0; alarm_triggered == false; ops++)
+ {
+ for (writes = 0; writes < writes_per_op; writes++)
+ if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+ die("write failed");
+ if (pg_fsync_writethrough(tmpfile) != 0)
+ die("fsync failed");
+ if (lseek(tmpfile, 0, SEEK_SET) == -1)
+ die("seek failed");
+ }
+ STOP_TIMER;
+ close(tmpfile);
+#else
+ printf(NA_FORMAT, "n/a\n");
+#endif
+
+/*
+ * Test open_sync if available
+ */
+ printf(LABEL_FORMAT, "open_sync");
+ fflush(stdout);
+
+#ifdef OPEN_SYNC_FLAG
+ if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT, 0)) == -1)
+ {
+ printf(NA_FORMAT, "n/a*\n");
+ fs_warning = true;
+ }
+ else
+ {
+ START_TIMER;
+ for (ops = 0; alarm_triggered == false; ops++)
+ {
+ for (writes = 0; writes < writes_per_op; writes++)
+ if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+
+ /*
+ * This can generate write failures if the filesystem has
+ * a large block size, e.g. 4k, and there is no support
+ * for O_DIRECT writes smaller than the file system block
+ * size, e.g. XFS.
+ */
+ die("write failed");
+ if (lseek(tmpfile, 0, SEEK_SET) == -1)
+ die("seek failed");
+ }
+ STOP_TIMER;
+ close(tmpfile);
+ }
+#else
+ printf(NA_FORMAT, "n/a\n");
+#endif
+
+ if (fs_warning)
+ {
+ printf("* This file system and its mount options do not support direct\n");
+ printf("I/O, e.g. ext4 in journaled mode.\n");
+ }
+}
+
+static void
+test_open_syncs(void)
+{
+ printf("\nCompare open_sync with different write sizes:\n");
+ printf("(This is designed to compare the cost of writing 16kB in different write\n"
+ "open_sync sizes.)\n");
+
+ test_open_sync(" 1 * 16kB open_sync write", 16);
+ test_open_sync(" 2 * 8kB open_sync writes", 8);
+ test_open_sync(" 4 * 4kB open_sync writes", 4);
+ test_open_sync(" 8 * 2kB open_sync writes", 2);
+ test_open_sync("16 * 1kB open_sync writes", 1);
+}
+
+/*
+ * Test open_sync with different size files
+ */
+static void
+test_open_sync(const char *msg, int writes_size)
+{
+#ifdef OPEN_SYNC_FLAG
+ int tmpfile,
+ ops,
+ writes;
+#endif
+
+ printf(LABEL_FORMAT, msg);
+ fflush(stdout);
+
+#ifdef OPEN_SYNC_FLAG
+ if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT, 0)) == -1)
+ printf(NA_FORMAT, "n/a*\n");
+ else
+ {
+ START_TIMER;
+ for (ops = 0; alarm_triggered == false; ops++)
+ {
+ for (writes = 0; writes < 16 / writes_size; writes++)
+ if (write(tmpfile, buf, writes_size * 1024) !=
+ writes_size * 1024)
+ die("write failed");
+ if (lseek(tmpfile, 0, SEEK_SET) == -1)
+ die("seek failed");
+ }
+ STOP_TIMER;
+ close(tmpfile);
+ }
+#else
+ printf(NA_FORMAT, "n/a\n");
+#endif
+}
+
+static void
+test_file_descriptor_sync(void)
+{
+ int tmpfile,
+ ops;
+
+ /*
+ * Test whether fsync can sync data written on a different descriptor for
+ * the same file. This checks the efficiency of multi-process fsyncs
+ * against the same file. Possibly this should be done with writethrough
+ * on platforms which support it.
+ */
+ printf("\nTest if fsync on non-write file descriptor is honored:\n");
+ printf("(If the times are similar, fsync() can sync data written on a different\n"
+ "descriptor.)\n");
+
+ /*
+ * first write, fsync and close, which is the normal behavior without
+ * multiple descriptors
+ */
+ printf(LABEL_FORMAT, "write, fsync, close");
+ fflush(stdout);
+
+ START_TIMER;
+ for (ops = 0; alarm_triggered == false; ops++)
+ {
+ if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
+ die("could not open output file");
+ if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+ die("write failed");
+ if (fsync(tmpfile) != 0)
+ die("fsync failed");
+ close(tmpfile);
+
+ /*
+ * open and close the file again to be consistent with the following
+ * test
+ */
+ if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
+ die("could not open output file");
+ close(tmpfile);
+ }
+ STOP_TIMER;
+
+ /*
+ * Now open, write, close, open again and fsync This simulates processes
+ * fsyncing each other's writes.
+ */
+ printf(LABEL_FORMAT, "write, close, fsync");
+ fflush(stdout);
+
+ START_TIMER;
+ for (ops = 0; alarm_triggered == false; ops++)
+ {
+ if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
+ die("could not open output file");
+ if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+ die("write failed");
+ close(tmpfile);
+ /* reopen file */
+ if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
+ die("could not open output file");
+ if (fsync(tmpfile) != 0)
+ die("fsync failed");
+ close(tmpfile);
+ }
+ STOP_TIMER;
+}
+
+static void
+test_non_sync(void)
+{
+ int tmpfile,
+ ops;
+
+ /*
+ * Test a simple write without fsync
+ */
+ printf("\nNon-sync'ed %dkB writes:\n", XLOG_BLCKSZ_K);
+ printf(LABEL_FORMAT, "write");
+ fflush(stdout);
+
+ START_TIMER;
+ for (ops = 0; alarm_triggered == false; ops++)
+ {
+ if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
+ die("could not open output file");
+ if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+ die("write failed");
+ close(tmpfile);
+ }
+ STOP_TIMER;
+}
+
+static void
+signal_cleanup(int signum)
+{
+ /* Delete the file if it exists. Ignore errors */
+ if (needs_unlink)
+ unlink(filename);
+ /* Finish incomplete line on stdout */
+ puts("");
+ exit(signum);
+}
+
+#ifdef HAVE_FSYNC_WRITETHROUGH
+
+static int
+pg_fsync_writethrough(int fd)
+{
+#ifdef WIN32
+ return _commit(fd);
+#elif defined(F_FULLFSYNC)
+ return (fcntl(fd, F_FULLFSYNC, 0) == -1) ? -1 : 0;
+#else
+ errno = ENOSYS;
+ return -1;
+#endif
+}
+#endif
+
+/*
+ * print out the writes per second for tests
+ */
+static void
+print_elapse(struct timeval start_t, struct timeval stop_t, int ops)
+{
+ double total_time = (stop_t.tv_sec - start_t.tv_sec) +
+ (stop_t.tv_usec - start_t.tv_usec) * 0.000001;
+ double per_second = ops / total_time;
+ double avg_op_time_us = (total_time / ops) * USECS_SEC;
+
+ printf(OPS_FORMAT "\n", per_second, avg_op_time_us);
+}
+
+#ifndef WIN32
+static void
+process_alarm(int sig)
+{
+ alarm_triggered = true;
+}
+#else
+static DWORD WINAPI
+process_alarm(LPVOID param)
+{
+ /* WIN32 doesn't support alarm, so we create a thread and sleep here */
+ Sleep(secs_per_test * 1000);
+ alarm_triggered = true;
+ ExitThread(0);
+}
+#endif
+
+static void
+die(const char *str)
+{
+ fprintf(stderr, "%s: %s\n", str, strerror(errno));
+ exit(1);
+}
diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm
index 986f3b3794c..5b86c805ae6 100644
--- a/src/tools/msvc/Mkvcbuild.pm
+++ b/src/tools/msvc/Mkvcbuild.pm
@@ -35,13 +35,13 @@ my @contrib_uselibpq =
my @contrib_uselibpgport = (
'oid2name',
'pg_standby',
- 'pg_test_fsync', 'pg_test_timing',
+ 'pg_test_timing',
'pg_xlogdump',
'vacuumlo');
my @contrib_uselibpgcommon = (
'oid2name',
'pg_standby',
- 'pg_test_fsync', 'pg_test_timing',
+ 'pg_test_timing',
'pg_xlogdump',
'vacuumlo');
my $contrib_extralibs = undef;
@@ -55,8 +55,8 @@ my @contrib_excludes = ('pgcrypto', 'commit_ts', 'intagg', 'sepgsql');
# Set of variables for frontend modules
my $frontend_defines = { 'initdb' => 'FRONTEND' };
my @frontend_uselibpq = ('pg_ctl', 'pg_upgrade', 'pgbench', 'psql');
-my @frontend_uselibpgport = ( 'pg_archivecleanup', 'pg_upgrade', 'pgbench' );
-my @frontend_uselibpgcommon = ( 'pg_archivecleanup', 'pg_upgrade', 'pgbench' );
+my @frontend_uselibpgport = ( 'pg_archivecleanup', 'pg_test_fsync', 'pg_upgrade', 'pgbench' );
+my @frontend_uselibpgcommon = ( 'pg_archivecleanup', 'pg_test_fsync', 'pg_upgrade', 'pgbench' );
my $frontend_extralibs = {
'initdb' => ['ws2_32.lib'],
'pg_restore' => ['ws2_32.lib'],