diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/bin/Makefile | 1 | ||||
-rw-r--r-- | src/bin/pg_test_fsync/.gitignore | 1 | ||||
-rw-r--r-- | src/bin/pg_test_fsync/Makefile | 27 | ||||
-rw-r--r-- | src/bin/pg_test_fsync/pg_test_fsync.c | 603 | ||||
-rw-r--r-- | src/tools/msvc/Mkvcbuild.pm | 8 |
5 files changed, 636 insertions, 4 deletions
diff --git a/src/bin/Makefile b/src/bin/Makefile index cc78798fba7..06a0ab75b6b 100644 --- a/src/bin/Makefile +++ b/src/bin/Makefile @@ -23,6 +23,7 @@ SUBDIRS = \ pg_dump \ pg_resetxlog \ pg_rewind \ + pg_test_fsync \ pg_upgrade \ pgbench \ psql \ diff --git a/src/bin/pg_test_fsync/.gitignore b/src/bin/pg_test_fsync/.gitignore new file mode 100644 index 00000000000..f3b59324985 --- /dev/null +++ b/src/bin/pg_test_fsync/.gitignore @@ -0,0 +1 @@ +/pg_test_fsync diff --git a/src/bin/pg_test_fsync/Makefile b/src/bin/pg_test_fsync/Makefile new file mode 100644 index 00000000000..90496df566d --- /dev/null +++ b/src/bin/pg_test_fsync/Makefile @@ -0,0 +1,27 @@ +# src/bin/pg_test_fsync/Makefile + +PGFILEDESC = "pg_test_fsync - test various disk sync methods" +PGAPPICON = win32 + +subdir = src/bin/pg_test_fsync +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global + +OBJS = pg_test_fsync.o $(WIN32RES) + +all: pg_test_fsync + +pg_test_fsync: $(OBJS) | submake-libpgport + $(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X) + +install: all installdirs + $(INSTALL_PROGRAM) pg_test_fsync$(X) '$(DESTDIR)$(bindir)/pg_test_fsync$(X)' + +installdirs: + $(MKDIR_P) '$(DESTDIR)$(bindir)' + +uninstall: + rm -f '$(DESTDIR)$(bindir)/pg_test_fsync$(X)' + +clean distclean maintainer-clean: + rm -f pg_test_fsync$(X) $(OBJS) diff --git a/src/bin/pg_test_fsync/pg_test_fsync.c b/src/bin/pg_test_fsync/pg_test_fsync.c new file mode 100644 index 00000000000..c8427623d20 --- /dev/null +++ b/src/bin/pg_test_fsync/pg_test_fsync.c @@ -0,0 +1,603 @@ +/* + * pg_test_fsync.c + * tests all supported fsync() methods + */ + +#include "postgres_fe.h" + +#include <sys/stat.h> +#include <sys/time.h> +#include <time.h> +#include <unistd.h> +#include <signal.h> + +#include "getopt_long.h" +#include "access/xlogdefs.h" + + +/* + * put the temp files in the local directory + * unless the user specifies otherwise + */ +#define FSYNC_FILENAME "./pg_test_fsync.out" + +#define XLOG_BLCKSZ_K (XLOG_BLCKSZ / 1024) + +#define LABEL_FORMAT " %-30s" +#define NA_FORMAT "%20s" +#define OPS_FORMAT "%13.3f ops/sec %6.0f usecs/op" +#define USECS_SEC 1000000 + +/* These are macros to avoid timing the function call overhead. */ +#ifndef WIN32 +#define START_TIMER \ +do { \ + alarm_triggered = false; \ + alarm(secs_per_test); \ + gettimeofday(&start_t, NULL); \ +} while (0) +#else +/* WIN32 doesn't support alarm, so we create a thread and sleep there */ +#define START_TIMER \ +do { \ + alarm_triggered = false; \ + if (CreateThread(NULL, 0, process_alarm, NULL, 0, NULL) == \ + INVALID_HANDLE_VALUE) \ + { \ + fprintf(stderr, "Cannot create thread for alarm\n"); \ + exit(1); \ + } \ + gettimeofday(&start_t, NULL); \ +} while (0) +#endif + +#define STOP_TIMER \ +do { \ + gettimeofday(&stop_t, NULL); \ + print_elapse(start_t, stop_t, ops); \ +} while (0) + + +static const char *progname; + +static int secs_per_test = 5; +static int needs_unlink = 0; +static char full_buf[XLOG_SEG_SIZE], + *buf, + *filename = FSYNC_FILENAME; +static struct timeval start_t, + stop_t; +static bool alarm_triggered = false; + + +static void handle_args(int argc, char *argv[]); +static void prepare_buf(void); +static void test_open(void); +static void test_non_sync(void); +static void test_sync(int writes_per_op); +static void test_open_syncs(void); +static void test_open_sync(const char *msg, int writes_size); +static void test_file_descriptor_sync(void); + +#ifndef WIN32 +static void process_alarm(int sig); +#else +static DWORD WINAPI process_alarm(LPVOID param); +#endif +static void signal_cleanup(int sig); + +#ifdef HAVE_FSYNC_WRITETHROUGH +static int pg_fsync_writethrough(int fd); +#endif +static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops); +static void die(const char *str); + + +int +main(int argc, char *argv[]) +{ + progname = get_progname(argv[0]); + + handle_args(argc, argv); + + /* Prevent leaving behind the test file */ + pqsignal(SIGINT, signal_cleanup); + pqsignal(SIGTERM, signal_cleanup); +#ifndef WIN32 + pqsignal(SIGALRM, process_alarm); +#endif +#ifdef SIGHUP + /* Not defined on win32 */ + pqsignal(SIGHUP, signal_cleanup); +#endif + + prepare_buf(); + + test_open(); + + /* Test using 1 XLOG_BLCKSZ write */ + test_sync(1); + + /* Test using 2 XLOG_BLCKSZ writes */ + test_sync(2); + + test_open_syncs(); + + test_file_descriptor_sync(); + + test_non_sync(); + + unlink(filename); + + return 0; +} + +static void +handle_args(int argc, char *argv[]) +{ + static struct option long_options[] = { + {"filename", required_argument, NULL, 'f'}, + {"secs-per-test", required_argument, NULL, 's'}, + {NULL, 0, NULL, 0} + }; + + int option; /* Command line option */ + int optindex = 0; /* used by getopt_long */ + + if (argc > 1) + { + if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) + { + printf("Usage: %s [-f FILENAME] [-s SECS-PER-TEST]\n", progname); + exit(0); + } + if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) + { + puts("pg_test_fsync (PostgreSQL) " PG_VERSION); + exit(0); + } + } + + while ((option = getopt_long(argc, argv, "f:s:", + long_options, &optindex)) != -1) + { + switch (option) + { + case 'f': + filename = strdup(optarg); + break; + + case 's': + secs_per_test = atoi(optarg); + break; + + default: + fprintf(stderr, "Try \"%s --help\" for more information.\n", + progname); + exit(1); + break; + } + } + + if (argc > optind) + { + fprintf(stderr, + "%s: too many command-line arguments (first is \"%s\")\n", + progname, argv[optind]); + fprintf(stderr, "Try \"%s --help\" for more information.\n", + progname); + exit(1); + } + + printf("%d seconds per test\n", secs_per_test); +#if PG_O_DIRECT != 0 + printf("O_DIRECT supported on this platform for open_datasync and open_sync.\n"); +#else + printf("Direct I/O is not supported on this platform.\n"); +#endif +} + +static void +prepare_buf(void) +{ + int ops; + + /* write random data into buffer */ + for (ops = 0; ops < XLOG_SEG_SIZE; ops++) + full_buf[ops] = random(); + + buf = (char *) TYPEALIGN(XLOG_BLCKSZ, full_buf); +} + +static void +test_open(void) +{ + int tmpfile; + + /* + * test if we can open the target file + */ + if ((tmpfile = open(filename, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR)) == -1) + die("could not open output file"); + needs_unlink = 1; + if (write(tmpfile, full_buf, XLOG_SEG_SIZE) != XLOG_SEG_SIZE) + die("write failed"); + + /* fsync now so that dirty buffers don't skew later tests */ + if (fsync(tmpfile) != 0) + die("fsync failed"); + + close(tmpfile); +} + +static void +test_sync(int writes_per_op) +{ + int tmpfile, + ops, + writes; + bool fs_warning = false; + + if (writes_per_op == 1) + printf("\nCompare file sync methods using one %dkB write:\n", XLOG_BLCKSZ_K); + else + printf("\nCompare file sync methods using two %dkB writes:\n", XLOG_BLCKSZ_K); + printf("(in wal_sync_method preference order, except fdatasync is Linux's default)\n"); + + /* + * Test open_datasync if available + */ + printf(LABEL_FORMAT, "open_datasync"); + fflush(stdout); + +#ifdef OPEN_DATASYNC_FLAG + if ((tmpfile = open(filename, O_RDWR | O_DSYNC | PG_O_DIRECT, 0)) == -1) + { + printf(NA_FORMAT, "n/a*\n"); + fs_warning = true; + } + else + { + START_TIMER; + for (ops = 0; alarm_triggered == false; ops++) + { + for (writes = 0; writes < writes_per_op; writes++) + if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ) + die("write failed"); + if (lseek(tmpfile, 0, SEEK_SET) == -1) + die("seek failed"); + } + STOP_TIMER; + close(tmpfile); + } +#else + printf(NA_FORMAT, "n/a\n"); +#endif + +/* + * Test fdatasync if available + */ + printf(LABEL_FORMAT, "fdatasync"); + fflush(stdout); + +#ifdef HAVE_FDATASYNC + if ((tmpfile = open(filename, O_RDWR, 0)) == -1) + die("could not open output file"); + START_TIMER; + for (ops = 0; alarm_triggered == false; ops++) + { + for (writes = 0; writes < writes_per_op; writes++) + if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ) + die("write failed"); + fdatasync(tmpfile); + if (lseek(tmpfile, 0, SEEK_SET) == -1) + die("seek failed"); + } + STOP_TIMER; + close(tmpfile); +#else + printf(NA_FORMAT, "n/a\n"); +#endif + +/* + * Test fsync + */ + printf(LABEL_FORMAT, "fsync"); + fflush(stdout); + + if ((tmpfile = open(filename, O_RDWR, 0)) == -1) + die("could not open output file"); + START_TIMER; + for (ops = 0; alarm_triggered == false; ops++) + { + for (writes = 0; writes < writes_per_op; writes++) + if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ) + die("write failed"); + if (fsync(tmpfile) != 0) + die("fsync failed"); + if (lseek(tmpfile, 0, SEEK_SET) == -1) + die("seek failed"); + } + STOP_TIMER; + close(tmpfile); + +/* + * If fsync_writethrough is available, test as well + */ + printf(LABEL_FORMAT, "fsync_writethrough"); + fflush(stdout); + +#ifdef HAVE_FSYNC_WRITETHROUGH + if ((tmpfile = open(filename, O_RDWR, 0)) == -1) + die("could not open output file"); + START_TIMER; + for (ops = 0; alarm_triggered == false; ops++) + { + for (writes = 0; writes < writes_per_op; writes++) + if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ) + die("write failed"); + if (pg_fsync_writethrough(tmpfile) != 0) + die("fsync failed"); + if (lseek(tmpfile, 0, SEEK_SET) == -1) + die("seek failed"); + } + STOP_TIMER; + close(tmpfile); +#else + printf(NA_FORMAT, "n/a\n"); +#endif + +/* + * Test open_sync if available + */ + printf(LABEL_FORMAT, "open_sync"); + fflush(stdout); + +#ifdef OPEN_SYNC_FLAG + if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT, 0)) == -1) + { + printf(NA_FORMAT, "n/a*\n"); + fs_warning = true; + } + else + { + START_TIMER; + for (ops = 0; alarm_triggered == false; ops++) + { + for (writes = 0; writes < writes_per_op; writes++) + if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ) + + /* + * This can generate write failures if the filesystem has + * a large block size, e.g. 4k, and there is no support + * for O_DIRECT writes smaller than the file system block + * size, e.g. XFS. + */ + die("write failed"); + if (lseek(tmpfile, 0, SEEK_SET) == -1) + die("seek failed"); + } + STOP_TIMER; + close(tmpfile); + } +#else + printf(NA_FORMAT, "n/a\n"); +#endif + + if (fs_warning) + { + printf("* This file system and its mount options do not support direct\n"); + printf("I/O, e.g. ext4 in journaled mode.\n"); + } +} + +static void +test_open_syncs(void) +{ + printf("\nCompare open_sync with different write sizes:\n"); + printf("(This is designed to compare the cost of writing 16kB in different write\n" + "open_sync sizes.)\n"); + + test_open_sync(" 1 * 16kB open_sync write", 16); + test_open_sync(" 2 * 8kB open_sync writes", 8); + test_open_sync(" 4 * 4kB open_sync writes", 4); + test_open_sync(" 8 * 2kB open_sync writes", 2); + test_open_sync("16 * 1kB open_sync writes", 1); +} + +/* + * Test open_sync with different size files + */ +static void +test_open_sync(const char *msg, int writes_size) +{ +#ifdef OPEN_SYNC_FLAG + int tmpfile, + ops, + writes; +#endif + + printf(LABEL_FORMAT, msg); + fflush(stdout); + +#ifdef OPEN_SYNC_FLAG + if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT, 0)) == -1) + printf(NA_FORMAT, "n/a*\n"); + else + { + START_TIMER; + for (ops = 0; alarm_triggered == false; ops++) + { + for (writes = 0; writes < 16 / writes_size; writes++) + if (write(tmpfile, buf, writes_size * 1024) != + writes_size * 1024) + die("write failed"); + if (lseek(tmpfile, 0, SEEK_SET) == -1) + die("seek failed"); + } + STOP_TIMER; + close(tmpfile); + } +#else + printf(NA_FORMAT, "n/a\n"); +#endif +} + +static void +test_file_descriptor_sync(void) +{ + int tmpfile, + ops; + + /* + * Test whether fsync can sync data written on a different descriptor for + * the same file. This checks the efficiency of multi-process fsyncs + * against the same file. Possibly this should be done with writethrough + * on platforms which support it. + */ + printf("\nTest if fsync on non-write file descriptor is honored:\n"); + printf("(If the times are similar, fsync() can sync data written on a different\n" + "descriptor.)\n"); + + /* + * first write, fsync and close, which is the normal behavior without + * multiple descriptors + */ + printf(LABEL_FORMAT, "write, fsync, close"); + fflush(stdout); + + START_TIMER; + for (ops = 0; alarm_triggered == false; ops++) + { + if ((tmpfile = open(filename, O_RDWR, 0)) == -1) + die("could not open output file"); + if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ) + die("write failed"); + if (fsync(tmpfile) != 0) + die("fsync failed"); + close(tmpfile); + + /* + * open and close the file again to be consistent with the following + * test + */ + if ((tmpfile = open(filename, O_RDWR, 0)) == -1) + die("could not open output file"); + close(tmpfile); + } + STOP_TIMER; + + /* + * Now open, write, close, open again and fsync This simulates processes + * fsyncing each other's writes. + */ + printf(LABEL_FORMAT, "write, close, fsync"); + fflush(stdout); + + START_TIMER; + for (ops = 0; alarm_triggered == false; ops++) + { + if ((tmpfile = open(filename, O_RDWR, 0)) == -1) + die("could not open output file"); + if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ) + die("write failed"); + close(tmpfile); + /* reopen file */ + if ((tmpfile = open(filename, O_RDWR, 0)) == -1) + die("could not open output file"); + if (fsync(tmpfile) != 0) + die("fsync failed"); + close(tmpfile); + } + STOP_TIMER; +} + +static void +test_non_sync(void) +{ + int tmpfile, + ops; + + /* + * Test a simple write without fsync + */ + printf("\nNon-sync'ed %dkB writes:\n", XLOG_BLCKSZ_K); + printf(LABEL_FORMAT, "write"); + fflush(stdout); + + START_TIMER; + for (ops = 0; alarm_triggered == false; ops++) + { + if ((tmpfile = open(filename, O_RDWR, 0)) == -1) + die("could not open output file"); + if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ) + die("write failed"); + close(tmpfile); + } + STOP_TIMER; +} + +static void +signal_cleanup(int signum) +{ + /* Delete the file if it exists. Ignore errors */ + if (needs_unlink) + unlink(filename); + /* Finish incomplete line on stdout */ + puts(""); + exit(signum); +} + +#ifdef HAVE_FSYNC_WRITETHROUGH + +static int +pg_fsync_writethrough(int fd) +{ +#ifdef WIN32 + return _commit(fd); +#elif defined(F_FULLFSYNC) + return (fcntl(fd, F_FULLFSYNC, 0) == -1) ? -1 : 0; +#else + errno = ENOSYS; + return -1; +#endif +} +#endif + +/* + * print out the writes per second for tests + */ +static void +print_elapse(struct timeval start_t, struct timeval stop_t, int ops) +{ + double total_time = (stop_t.tv_sec - start_t.tv_sec) + + (stop_t.tv_usec - start_t.tv_usec) * 0.000001; + double per_second = ops / total_time; + double avg_op_time_us = (total_time / ops) * USECS_SEC; + + printf(OPS_FORMAT "\n", per_second, avg_op_time_us); +} + +#ifndef WIN32 +static void +process_alarm(int sig) +{ + alarm_triggered = true; +} +#else +static DWORD WINAPI +process_alarm(LPVOID param) +{ + /* WIN32 doesn't support alarm, so we create a thread and sleep here */ + Sleep(secs_per_test * 1000); + alarm_triggered = true; + ExitThread(0); +} +#endif + +static void +die(const char *str) +{ + fprintf(stderr, "%s: %s\n", str, strerror(errno)); + exit(1); +} diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm index 986f3b3794c..5b86c805ae6 100644 --- a/src/tools/msvc/Mkvcbuild.pm +++ b/src/tools/msvc/Mkvcbuild.pm @@ -35,13 +35,13 @@ my @contrib_uselibpq = my @contrib_uselibpgport = ( 'oid2name', 'pg_standby', - 'pg_test_fsync', 'pg_test_timing', + 'pg_test_timing', 'pg_xlogdump', 'vacuumlo'); my @contrib_uselibpgcommon = ( 'oid2name', 'pg_standby', - 'pg_test_fsync', 'pg_test_timing', + 'pg_test_timing', 'pg_xlogdump', 'vacuumlo'); my $contrib_extralibs = undef; @@ -55,8 +55,8 @@ my @contrib_excludes = ('pgcrypto', 'commit_ts', 'intagg', 'sepgsql'); # Set of variables for frontend modules my $frontend_defines = { 'initdb' => 'FRONTEND' }; my @frontend_uselibpq = ('pg_ctl', 'pg_upgrade', 'pgbench', 'psql'); -my @frontend_uselibpgport = ( 'pg_archivecleanup', 'pg_upgrade', 'pgbench' ); -my @frontend_uselibpgcommon = ( 'pg_archivecleanup', 'pg_upgrade', 'pgbench' ); +my @frontend_uselibpgport = ( 'pg_archivecleanup', 'pg_test_fsync', 'pg_upgrade', 'pgbench' ); +my @frontend_uselibpgcommon = ( 'pg_archivecleanup', 'pg_test_fsync', 'pg_upgrade', 'pgbench' ); my $frontend_extralibs = { 'initdb' => ['ws2_32.lib'], 'pg_restore' => ['ws2_32.lib'], |