aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorNoah Misch <noah@leadboat.com>2024-10-25 06:51:06 -0700
committerNoah Misch <noah@leadboat.com>2024-10-25 06:51:07 -0700
commitdca68242a81b39672557b77c72ce15790bf0f7fe (patch)
tree02bae5e98a8c549d938b12f08b51d212098ec7b2 /src
parentad24b75659100df76bf998956c59481c4262b3ec (diff)
downloadpostgresql-dca68242a81b39672557b77c72ce15790bf0f7fe.tar.gz
postgresql-dca68242a81b39672557b77c72ce15790bf0f7fe.zip
At end of recovery, reset all sinval-managed caches.
An inplace update's invalidation messages are part of its transaction's commit record. However, the update survives even if its transaction aborts or we stop recovery before replaying its transaction commit. After recovery, a backend that started in recovery could update the row without incorporating the inplace update. That could result in a table with an index, yet relhasindex=f. That is a source of index corruption. This bulk invalidation avoids the functional consequences. A future change can fix the !RecoveryInProgress() scenario without changing the WAL format. Back-patch to v17 - v12 (all supported versions). v18 will instead add invalidations to WAL. Discussion: https://postgr.es/m/20240618152349.7f.nmisch@google.com
Diffstat (limited to 'src')
-rw-r--r--src/backend/access/transam/xlog.c25
-rw-r--r--src/backend/storage/ipc/sinvaladt.c41
-rw-r--r--src/include/storage/sinvaladt.h1
3 files changed, 67 insertions, 0 deletions
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 7654ad76de1..f1a795bba9f 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -70,6 +70,7 @@
#include "storage/proc.h"
#include "storage/procarray.h"
#include "storage/reinit.h"
+#include "storage/sinvaladt.h"
#include "storage/smgr.h"
#include "storage/spin.h"
#include "storage/sync.h"
@@ -8020,6 +8021,30 @@ StartupXLOG(void)
}
/*
+ * Invalidate all sinval-managed caches before READ WRITE transactions
+ * begin. The xl_heap_inplace WAL record doesn't store sufficient data
+ * for invalidations. The commit record, if any, has the invalidations.
+ * However, the inplace update is permanent, whether or not we reach a
+ * commit record. Fortunately, read-only transactions tolerate caches not
+ * reflecting the latest inplace updates. Read-only transactions
+ * experience the notable inplace updates as follows:
+ *
+ * - relhasindex=true affects readers only after the CREATE INDEX
+ * transaction commit makes an index fully available to them.
+ *
+ * - datconnlimit=DATCONNLIMIT_INVALID_DB affects readers only at
+ * InitPostgres() time, and that read does not use a cache.
+ *
+ * - relfrozenxid, datfrozenxid, relminmxid, and datminmxid have no effect
+ * on readers.
+ *
+ * Hence, hot standby queries (all READ ONLY) function correctly without
+ * the missing invalidations. This avoided changing the WAL format in
+ * back branches.
+ */
+ SIResetAll();
+
+ /*
* Preallocate additional log files, if wanted.
*/
PreallocXlogFiles(EndOfLog);
diff --git a/src/backend/storage/ipc/sinvaladt.c b/src/backend/storage/ipc/sinvaladt.c
index 946bd8e3cb5..f0e2fb96ea3 100644
--- a/src/backend/storage/ipc/sinvaladt.c
+++ b/src/backend/storage/ipc/sinvaladt.c
@@ -748,6 +748,47 @@ SICleanupQueue(bool callerHasWriteLock, int minFree)
}
}
+/*
+ * SIResetAll
+ * Mark all active backends as "reset"
+ *
+ * Use this when we don't know what needs to be invalidated. It's a
+ * cluster-wide InvalidateSystemCaches(). This was a back-branch-only remedy
+ * to avoid a WAL format change.
+ *
+ * The implementation is like SICleanupQueue(false, MAXNUMMESSAGES + 1), with
+ * one addition. SICleanupQueue() assumes minFree << MAXNUMMESSAGES, so it
+ * assumes hasMessages==true for any backend it resets. We're resetting even
+ * fully-caught-up backends, so we set hasMessages.
+ */
+void
+SIResetAll(void)
+{
+ SISeg *segP = shmInvalBuffer;
+ int i;
+
+ LWLockAcquire(SInvalWriteLock, LW_EXCLUSIVE);
+ LWLockAcquire(SInvalReadLock, LW_EXCLUSIVE);
+
+ for (i = 0; i < segP->lastBackend; i++)
+ {
+ ProcState *stateP = &segP->procState[i];
+
+ if (stateP->procPid == 0 || stateP->sendOnly)
+ continue;
+
+ /* Consuming the reset will update "nextMsgNum" and "signaled". */
+ stateP->resetState = true;
+ stateP->hasMessages = true;
+ }
+
+ segP->minMsgNum = segP->maxMsgNum;
+ segP->nextThreshold = CLEANUP_MIN;
+
+ LWLockRelease(SInvalReadLock);
+ LWLockRelease(SInvalWriteLock);
+}
+
/*
* GetNextLocalTransactionId --- allocate a new LocalTransactionId
diff --git a/src/include/storage/sinvaladt.h b/src/include/storage/sinvaladt.h
index 14148bf8201..07d229a5cb2 100644
--- a/src/include/storage/sinvaladt.h
+++ b/src/include/storage/sinvaladt.h
@@ -37,6 +37,7 @@ extern void BackendIdGetTransactionIds(int backendID, TransactionId *xid, Transa
extern void SIInsertDataEntries(const SharedInvalidationMessage *data, int n);
extern int SIGetDataEntries(SharedInvalidationMessage *data, int datasize);
extern void SICleanupQueue(bool callerHasWriteLock, int minFree);
+extern void SIResetAll(void);
extern LocalTransactionId GetNextLocalTransactionId(void);