aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Paquier <michael@paquier.xyz>2022-04-28 10:11:45 +0900
committerMichael Paquier <michael@paquier.xyz>2022-04-28 10:11:45 +0900
commitccfbd9287d70038518bdd3e85d7f5fd3dd1bb880 (patch)
treec3d991db878972389cb5a460bc5319dfc7c57ecb
parent755df30e48b0a9ff8428f4c1ccb468dac29fc320 (diff)
downloadpostgresql-ccfbd9287d70038518bdd3e85d7f5fd3dd1bb880.tar.gz
postgresql-ccfbd9287d70038518bdd3e85d7f5fd3dd1bb880.zip
Replace existing durable_rename_excl() calls with durable_rename()
durable_rename_excl() attempts to avoid overwriting any existing files by using link() and unlink(), falling back to rename() on some platforms (e.g., Windows where link() followed by unlink() is not concurrent-safe, see 909b449). Most callers of durable_rename_excl() use it just in case there is an existing file, but it happens that for all of them we never expect a target file to exist (WAL segment recycling, creation of timeline history file and basic_archive). basic_archive used durable_rename_excl() to avoid overwriting an archive concurrently created by another server. Now, there is a stat() call to avoid overwriting an existing archive a couple of lines above, so note that this change opens a small TOCTOU window in this module between the stat() call and durable_rename(). Furthermore, as mentioned in the top comment of durable_rename_excl(), this routine can result in multiple hard links to the same file and data corruption, with two or more links to the same file in pg_wal/ if a crash happens before the unlink() call during WAL recycling. Specifically, this would produce links to the same file for the current WAL file and the next one because the half-recycled WAL file was re-recycled during crash recovery of a follow-up cluster restart. This change replaces all calls to durable_rename_excl() with durable_rename(). This removes the protection against accidentally overwriting an existing file, but some platforms are already living without it, and all those code paths never expect an existing file (a couple of assertions are added to check after that, in case). This is a bug fix, but knowing the unlikeliness of the problem involving one of more crashes at an exceptionally bad moment, no backpatch is done. This could be revisited in the future. Author: Nathan Bossart Reviewed-by: Robert Haas, Kyotaro Horiguchi, Michael Paquier Discussion: https://postgr.es/m/20220407182954.GA1231544@nathanxps13
-rw-r--r--contrib/basic_archive/basic_archive.c5
-rw-r--r--src/backend/access/transam/timeline.c18
-rw-r--r--src/backend/access/transam/xlog.c10
3 files changed, 13 insertions, 20 deletions
diff --git a/contrib/basic_archive/basic_archive.c b/contrib/basic_archive/basic_archive.c
index e7efbfb9c34..ed33854c578 100644
--- a/contrib/basic_archive/basic_archive.c
+++ b/contrib/basic_archive/basic_archive.c
@@ -281,9 +281,10 @@ basic_archive_file_internal(const char *file, const char *path)
/*
* Sync the temporary file to disk and move it to its final destination.
- * This will fail if destination already exists.
+ * Note that this will overwrite any existing file, but this is only
+ * possible if someone else created the file since the stat() above.
*/
- (void) durable_rename_excl(temp, destination, ERROR);
+ (void) durable_rename(temp, destination, ERROR);
ereport(DEBUG1,
(errmsg("archived \"%s\" via basic_archive", file)));
diff --git a/src/backend/access/transam/timeline.c b/src/backend/access/transam/timeline.c
index be21968293c..c9344e5e8b4 100644
--- a/src/backend/access/transam/timeline.c
+++ b/src/backend/access/transam/timeline.c
@@ -439,14 +439,11 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
/*
* Now move the completed history file into place with its final name.
+ * The target file should not exist.
*/
TLHistoryFilePath(path, newTLI);
-
- /*
- * Perform the rename using link if available, paranoidly trying to avoid
- * overwriting an existing file (there shouldn't be one).
- */
- durable_rename_excl(tmppath, path, ERROR);
+ Assert(access(path, F_OK) != 0 && errno == ENOENT);
+ durable_rename(tmppath, path, ERROR);
/* The history file can be archived immediately. */
if (XLogArchivingActive())
@@ -517,14 +514,11 @@ writeTimeLineHistoryFile(TimeLineID tli, char *content, int size)
/*
* Now move the completed history file into place with its final name.
+ * The target file should not exist.
*/
TLHistoryFilePath(path, tli);
-
- /*
- * Perform the rename using link if available, paranoidly trying to avoid
- * overwriting an existing file (there shouldn't be one).
- */
- durable_rename_excl(tmppath, path, ERROR);
+ Assert(access(path, F_OK) != 0 && errno == ENOENT);
+ durable_rename(tmppath, path, ERROR);
}
/*
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 61cda56c6ff..45c84e3959c 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -3323,14 +3323,12 @@ InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
}
}
- /*
- * Perform the rename using link if available, paranoidly trying to avoid
- * overwriting an existing file (there shouldn't be one).
- */
- if (durable_rename_excl(tmppath, path, LOG) != 0)
+ /* The target file should not exist */
+ Assert(access(path, F_OK) != 0 && errno == ENOENT);
+ if (durable_rename(tmppath, path, LOG) != 0)
{
LWLockRelease(ControlFileLock);
- /* durable_rename_excl already emitted log message */
+ /* durable_rename already emitted log message */
return false;
}