aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/transam/xlog.c
diff options
context:
space:
mode:
authorRobert Haas <rhaas@postgresql.org>2015-05-04 14:13:53 -0400
committerRobert Haas <rhaas@postgresql.org>2015-05-04 14:13:53 -0400
commit2ce439f3379aed857517c8ce207485655000fc8e (patch)
tree196ccf42f41939b101aa6d4587ca93c8060b51a2 /src/backend/access/transam/xlog.c
parentec3d976bce7e322c29f1007d19b63b7a3a1a6ee4 (diff)
downloadpostgresql-2ce439f3379aed857517c8ce207485655000fc8e.tar.gz
postgresql-2ce439f3379aed857517c8ce207485655000fc8e.zip
Recursively fsync() the data directory after a crash.
Otherwise, if there's another crash, some writes from after the first crash might make it to disk while writes from before the crash fail to make it to disk. This could lead to data corruption. Back-patch to all supported versions. Abhijit Menon-Sen, reviewed by Andres Freund and slightly revised by me.
Diffstat (limited to 'src/backend/access/transam/xlog.c')
-rw-r--r--src/backend/access/transam/xlog.c42
1 files changed, 42 insertions, 0 deletions
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 6cf441534cc..92822a192b0 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -845,6 +845,8 @@ static void WALInsertLockAcquireExclusive(void);
static void WALInsertLockRelease(void);
static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt);
+static void fsync_pgdata(char *datadir);
+
/*
* Insert an XLOG record represented by an already-constructed chain of data
* chunks. This is a low-level routine; to construct the WAL record header
@@ -5910,6 +5912,18 @@ StartupXLOG(void)
(errmsg("database system was interrupted; last known up at %s",
str_time(ControlFile->time))));
+ /*
+ * If we previously crashed, there might be data which we had written,
+ * intending to fsync it, but which we had not actually fsync'd yet.
+ * Therefore, a power failure in the near future might cause earlier
+ * unflushed writes to be lost, even though more recent data written to
+ * disk from here on would be persisted. To avoid that, fsync the entire
+ * data directory.
+ */
+ if (ControlFile->state != DB_SHUTDOWNED &&
+ ControlFile->state != DB_SHUTDOWNED_IN_RECOVERY)
+ fsync_pgdata(data_directory);
+
/* This is just to allow attaching to startup process with a debugger */
#ifdef XLOG_REPLAY_DELAY
if (ControlFile->state != DB_SHUTDOWNED)
@@ -11123,3 +11137,31 @@ SetWalWriterSleeping(bool sleeping)
XLogCtl->WalWriterSleeping = sleeping;
SpinLockRelease(&XLogCtl->info_lck);
}
+
+/*
+ * Issue fsync recursively on PGDATA and all its contents.
+ */
+static void
+fsync_pgdata(char *datadir)
+{
+ if (!enableFsync)
+ return;
+
+ /*
+ * If possible, hint to the kernel that we're soon going to fsync
+ * the data directory and its contents.
+ */
+#if defined(HAVE_SYNC_FILE_RANGE) || \
+ (defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED))
+ walkdir(datadir, pre_sync_fname);
+#endif
+
+ /*
+ * Now we do the fsync()s in the same order.
+ *
+ * It's important to fsync the destination directory itself as individual
+ * file fsyncs don't guarantee that the directory entry for the file is
+ * synced.
+ */
+ walkdir(datadir, fsync_fname);
+}