aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2011-08-16 13:12:03 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2011-08-16 13:12:03 -0400
commit45476031a54c12bebf2f209ea2b6fb3bfc905193 (patch)
treeb85d6590e81df75b7842f6a7cd960de43c3f19a9
parent0615782b23d3e4a9d9acece919bd77f919928953 (diff)
downloadpostgresql-45476031a54c12bebf2f209ea2b6fb3bfc905193.tar.gz
postgresql-45476031a54c12bebf2f209ea2b6fb3bfc905193.zip
Fix race condition in relcache init file invalidation.
The previous code tried to synchronize by unlinking the init file twice, but that doesn't actually work: it leaves a window wherein a third process could read the already-stale init file but miss the SI messages that would tell it the data is stale. The result would be bizarre failures in catalog accesses, typically "could not read block 0 in file ..." later during startup. Instead, hold RelCacheInitLock across both the unlink and the sending of the SI messages. This is more straightforward, and might even be a bit faster since only one unlink call is needed. This has been wrong since it was put in (in 2002!), so back-patch to all supported releases.
-rw-r--r--src/backend/access/transam/twophase.c4
-rw-r--r--src/backend/utils/cache/inval.c33
-rw-r--r--src/backend/utils/cache/relcache.c66
-rw-r--r--src/include/utils/relcache.h3
4 files changed, 57 insertions, 49 deletions
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index 281268120ef..54176ee9df9 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -1356,10 +1356,10 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
* after we send the SI messages. See AtEOXact_Inval()
*/
if (hdr->initfileinval)
- RelationCacheInitFileInvalidate(true);
+ RelationCacheInitFilePreInvalidate();
SendSharedInvalidMessages(invalmsgs, hdr->ninvalmsgs);
if (hdr->initfileinval)
- RelationCacheInitFileInvalidate(false);
+ RelationCacheInitFilePostInvalidate();
/* And now do the callbacks */
if (isCommit)
diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c
index a5580bd92fb..4249bd33765 100644
--- a/src/backend/utils/cache/inval.c
+++ b/src/backend/utils/cache/inval.c
@@ -854,24 +854,12 @@ xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs,
return numSharedInvalidMessagesArray;
}
-#define RecoveryRelationCacheInitFileInvalidate(dbo, tbo, tf) \
-{ \
- DatabasePath = GetDatabasePath(dbo, tbo); \
- elog(trace_recovery(DEBUG4), "removing relcache init file in %s", DatabasePath); \
- RelationCacheInitFileInvalidate(tf); \
- pfree(DatabasePath); \
-}
-
/*
* ProcessCommittedInvalidationMessages is executed by xact_redo_commit()
* to process invalidation messages added to commit records.
*
* Relcache init file invalidation requires processing both
* before and after we send the SI messages. See AtEOXact_Inval()
- *
- * We deliberately avoid SetDatabasePath() since it is intended to be used
- * only once by normal backends, so we set DatabasePath directly then
- * pfree after use. See RecoveryRelationCacheInitFileInvalidate() macro.
*/
void
ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs,
@@ -885,12 +873,25 @@ ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs,
(RelcacheInitFileInval ? " and relcache file invalidation" : ""));
if (RelcacheInitFileInval)
- RecoveryRelationCacheInitFileInvalidate(dbid, tsid, true);
+ {
+ /*
+ * RelationCacheInitFilePreInvalidate requires DatabasePath to be set,
+ * but we should not use SetDatabasePath during recovery, since it is
+ * intended to be used only once by normal backends. Hence, a quick
+ * hack: set DatabasePath directly then unset after use.
+ */
+ DatabasePath = GetDatabasePath(dbid, tsid);
+ elog(trace_recovery(DEBUG4), "removing relcache init file in \"%s\"",
+ DatabasePath);
+ RelationCacheInitFilePreInvalidate();
+ pfree(DatabasePath);
+ DatabasePath = NULL;
+ }
SendSharedInvalidMessages(msgs, nmsgs);
if (RelcacheInitFileInval)
- RecoveryRelationCacheInitFileInvalidate(dbid, tsid, false);
+ RelationCacheInitFilePostInvalidate();
}
/*
@@ -931,7 +932,7 @@ AtEOXact_Inval(bool isCommit)
* unless we committed.
*/
if (transInvalInfo->RelcacheInitFileInval)
- RelationCacheInitFileInvalidate(true);
+ RelationCacheInitFilePreInvalidate();
AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs,
&transInvalInfo->CurrentCmdInvalidMsgs);
@@ -940,7 +941,7 @@ AtEOXact_Inval(bool isCommit)
SendSharedInvalidMessages);
if (transInvalInfo->RelcacheInitFileInval)
- RelationCacheInitFileInvalidate(false);
+ RelationCacheInitFilePostInvalidate();
}
else if (transInvalInfo != NULL)
{
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 1a400a0a576..bc52c56021c 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -4377,8 +4377,8 @@ write_relcache_init_file(bool shared)
* updated by SI message processing, but we can't be sure whether what we
* wrote out was up-to-date.)
*
- * This mustn't run concurrently with RelationCacheInitFileInvalidate, so
- * grab a serialization lock for the duration.
+ * This mustn't run concurrently with the code that unlinks an init file
+ * and sends SI messages, so grab a serialization lock for the duration.
*/
LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
@@ -4442,19 +4442,22 @@ RelationIdIsInInitFile(Oid relationId)
* changed one or more of the relation cache entries that are kept in the
* local init file.
*
- * We actually need to remove the init file twice: once just before sending
- * the SI messages that include relcache inval for such relations, and once
- * just after sending them. The unlink before ensures that a backend that's
- * currently starting cannot read the now-obsolete init file and then miss
- * the SI messages that will force it to update its relcache entries. (This
- * works because the backend startup sequence gets into the PGPROC array before
- * trying to load the init file.) The unlink after is to synchronize with a
- * backend that may currently be trying to write an init file based on data
- * that we've just rendered invalid. Such a backend will see the SI messages,
- * but we can't leave the init file sitting around to fool later backends.
+ * To be safe against concurrent inspection or rewriting of the init file,
+ * we must take RelCacheInitLock, then remove the old init file, then send
+ * the SI messages that include relcache inval for such relations, and then
+ * release RelCacheInitLock. This serializes the whole affair against
+ * write_relcache_init_file, so that we can be sure that any other process
+ * that's concurrently trying to create a new init file won't move an
+ * already-stale version into place after we unlink. Also, because we unlink
+ * before sending the SI messages, a backend that's currently starting cannot
+ * read the now-obsolete init file and then miss the SI messages that will
+ * force it to update its relcache entries. (This works because the backend
+ * startup sequence gets into the sinval array before trying to load the init
+ * file.)
*
- * Ignore any failure to unlink the file, since it might not be there if
- * no backend has been started since the last removal.
+ * We take the lock and do the unlink in RelationCacheInitFilePreInvalidate,
+ * then release the lock in RelationCacheInitFilePostInvalidate. Caller must
+ * send any pending SI messages between those calls.
*
* Notice this deals only with the local init file, not the shared init file.
* The reason is that there can never be a "significant" change to the
@@ -4464,34 +4467,37 @@ RelationIdIsInInitFile(Oid relationId)
* be invalid enough to make it necessary to remove it.
*/
void
-RelationCacheInitFileInvalidate(bool beforeSend)
+RelationCacheInitFilePreInvalidate(void)
{
char initfilename[MAXPGPATH];
snprintf(initfilename, sizeof(initfilename), "%s/%s",
DatabasePath, RELCACHE_INIT_FILENAME);
- if (beforeSend)
- {
- /* no interlock needed here */
- unlink(initfilename);
- }
- else
+ LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
+
+ if (unlink(initfilename) < 0)
{
/*
- * We need to interlock this against write_relcache_init_file, to
- * guard against possibility that someone renames a new-but-
- * already-obsolete init file into place just after we unlink. With
- * the interlock, it's certain that write_relcache_init_file will
- * notice our SI inval message before renaming into place, or else
- * that we will execute second and successfully unlink the file.
+ * The file might not be there if no backend has been started since
+ * the last removal. But complain about failures other than ENOENT.
+ * Fortunately, it's not too late to abort the transaction if we
+ * can't get rid of the would-be-obsolete init file.
*/
- LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
- unlink(initfilename);
- LWLockRelease(RelCacheInitLock);
+ if (errno != ENOENT)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not remove cache file \"%s\": %m",
+ initfilename)));
}
}
+void
+RelationCacheInitFilePostInvalidate(void)
+{
+ LWLockRelease(RelCacheInitLock);
+}
+
/*
* Remove the init files during postmaster startup.
*
diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h
index 1f4def56845..5de9f359ef2 100644
--- a/src/include/utils/relcache.h
+++ b/src/include/utils/relcache.h
@@ -97,7 +97,8 @@ extern void AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
* Routines to help manage rebuilding of relcache init files
*/
extern bool RelationIdIsInInitFile(Oid relationId);
-extern void RelationCacheInitFileInvalidate(bool beforeSend);
+extern void RelationCacheInitFilePreInvalidate(void);
+extern void RelationCacheInitFilePostInvalidate(void);
extern void RelationCacheInitFileRemove(void);
/* should be used only by relcache.c and catcache.c */