aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Munro <tmunro@postgresql.org>2023-10-16 17:10:13 +1300
committerThomas Munro <tmunro@postgresql.org>2023-10-16 17:23:02 +1300
commit5e39884d322a7dd1dd595fbaaeb1f88c8907d3a6 (patch)
tree9f373c0eb65249a79869b6b5ecbf91eabf75a5dc
parent606be8a35d9726b6c0468428b6262e58d9e308af (diff)
downloadpostgresql-5e39884d322a7dd1dd595fbaaeb1f88c8907d3a6.tar.gz
postgresql-5e39884d322a7dd1dd595fbaaeb1f88c8907d3a6.zip
Try to handle torn reads of pg_control in frontend.
Some of our src/bin tools read the control file without any kind of interlocking against concurrent writes from the server. At least ext4 and ntfs can expose partially modified contents when you do that. For now, we'll try to tolerate this by retrying up to 10 times if the checksum doesn't match, until we get two reads in a row with the same bad checksum. This is not guaranteed to reach the right conclusion, but it seems very likely to. Thanks to Tom Lane for this suggestion. Various ideas for interlocking or atomicity were considered too complicated, unportable or expensive given the lack of field reports, but remain open for future reconsideration. Back-patch as far as 12. It doesn't seem like a good idea to put a heuristic change for a very rare problem into the final release of 11. Reviewed-by: Anton A. Melnikov <aamelnikov@inbox.ru> Reviewed-by: David Steele <david@pgmasters.net> Reviewed-by: Michael Paquier <michael@paquier.xyz> Discussion: https://postgr.es/m/20221123014224.xisi44byq3cf5psi%40awork3.anarazel.de
-rw-r--r--src/common/controldata_utils.c30
1 files changed, 30 insertions, 0 deletions
diff --git a/src/common/controldata_utils.c b/src/common/controldata_utils.c
index 4c0da6e124e..2331319df22 100644
--- a/src/common/controldata_utils.c
+++ b/src/common/controldata_utils.c
@@ -56,12 +56,22 @@ get_controlfile(const char *DataDir, bool *crc_ok_p)
char ControlFilePath[MAXPGPATH];
pg_crc32c crc;
int r;
+#ifdef FRONTEND
+ pg_crc32c last_crc;
+ int retries = 0;
+#endif
AssertArg(crc_ok_p);
ControlFile = palloc(sizeof(ControlFileData));
snprintf(ControlFilePath, MAXPGPATH, "%s/global/pg_control", DataDir);
+#ifdef FRONTEND
+ INIT_CRC32C(last_crc);
+
+retry:
+#endif
+
#ifndef FRONTEND
if ((fd = OpenTransientFile(ControlFilePath, O_RDONLY | PG_BINARY)) == -1)
ereport(ERROR,
@@ -117,6 +127,26 @@ get_controlfile(const char *DataDir, bool *crc_ok_p)
*crc_ok_p = EQ_CRC32C(crc, ControlFile->crc);
+#ifdef FRONTEND
+
+ /*
+ * If the server was writing at the same time, it is possible that we read
+ * partially updated contents on some systems. If the CRC doesn't match,
+ * retry a limited number of times until we compute the same bad CRC twice
+ * in a row with a short sleep in between. Then the failure is unlikely
+ * to be due to a concurrent write.
+ */
+ if (!*crc_ok_p &&
+ (retries == 0 || !EQ_CRC32C(crc, last_crc)) &&
+ retries < 10)
+ {
+ retries++;
+ last_crc = crc;
+ pg_usleep(10000);
+ goto retry;
+ }
+#endif
+
/* Make sure the control file is valid byte order. */
if (ControlFile->pg_control_version % 65536 == 0 &&
ControlFile->pg_control_version / 65536 != 0)