diff options
author | Noah Misch <noah@leadboat.com> | 2024-10-25 06:51:02 -0700 |
---|---|---|
committer | Noah Misch <noah@leadboat.com> | 2024-10-25 06:51:02 -0700 |
commit | 243e9b40f1b2dd09d6e5bf91ebf6e822a2cd3704 (patch) | |
tree | d092b3c3b261da64a5f17a35b87d67626f9591f7 /src/backend/access | |
parent | 0fe173680e148984a150326b80c322a91ffa899d (diff) | |
download | postgresql-243e9b40f1b2dd09d6e5bf91ebf6e822a2cd3704.tar.gz postgresql-243e9b40f1b2dd09d6e5bf91ebf6e822a2cd3704.zip |
For inplace update, send nontransactional invalidations.
The inplace update survives ROLLBACK. The inval didn't, so another
backend's DDL could then update the row without incorporating the
inplace update. In the test this fixes, a mix of CREATE INDEX and ALTER
TABLE resulted in a table with an index, yet relhasindex=f. That is a
source of index corruption. Back-patch to v12 (all supported versions).
The back branch versions don't change WAL, because those branches just
added end-of-recovery SIResetAll(). All branches change the ABI of
extern function PrepareToInvalidateCacheTuple(). No PGXN extension
calls that, and there's no apparent use case in extensions.
Reviewed by Nitin Motiani and (in earlier versions) Andres Freund.
Discussion: https://postgr.es/m/20240523000548.58.nmisch@google.com
Diffstat (limited to 'src/backend/access')
-rw-r--r-- | src/backend/access/heap/heapam.c | 60 | ||||
-rw-r--r-- | src/backend/access/heap/heapam_xlog.c | 6 | ||||
-rw-r--r-- | src/backend/access/rmgrdesc/heapdesc.c | 4 | ||||
-rw-r--r-- | src/backend/access/rmgrdesc/standbydesc.c | 6 | ||||
-rw-r--r-- | src/backend/access/transam/xact.c | 26 |
5 files changed, 81 insertions, 21 deletions
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 82a0492aac5..3a13671a1ef 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -6326,6 +6326,9 @@ heap_inplace_update_and_unlock(Relation relation, HeapTupleHeader htup = oldtup->t_data; uint32 oldlen; uint32 newlen; + int nmsgs = 0; + SharedInvalidationMessage *invalMessages = NULL; + bool RelcacheInitFileInval = false; Assert(ItemPointerEquals(&oldtup->t_self, &tuple->t_self)); oldlen = oldtup->t_len - htup->t_hoff; @@ -6333,6 +6336,29 @@ heap_inplace_update_and_unlock(Relation relation, if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff) elog(ERROR, "wrong tuple length"); + /* + * Construct shared cache inval if necessary. Note that because we only + * pass the new version of the tuple, this mustn't be used for any + * operations that could change catcache lookup keys. But we aren't + * bothering with index updates either, so that's true a fortiori. + */ + CacheInvalidateHeapTupleInplace(relation, tuple, NULL); + + /* Like RecordTransactionCommit(), log only if needed */ + if (XLogStandbyInfoActive()) + nmsgs = inplaceGetInvalidationMessages(&invalMessages, + &RelcacheInitFileInval); + + /* + * Unlink relcache init files as needed. If unlinking, acquire + * RelCacheInitLock until after associated invalidations. By doing this + * in advance, if we checkpoint and then crash between inplace + * XLogInsert() and inval, we don't rely on StartupXLOG() -> + * RelationCacheInitFileRemove(). That uses elevel==LOG, so replay would + * neglect to PANIC on EIO. + */ + PreInplace_Inval(); + /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); @@ -6362,9 +6388,16 @@ heap_inplace_update_and_unlock(Relation relation, XLogRecPtr recptr; xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self); + xlrec.dbId = MyDatabaseId; + xlrec.tsId = MyDatabaseTableSpace; + xlrec.relcacheInitFileInval = RelcacheInitFileInval; + xlrec.nmsgs = nmsgs; XLogBeginInsert(); - XLogRegisterData((char *) &xlrec, SizeOfHeapInplace); + XLogRegisterData((char *) &xlrec, MinSizeOfHeapInplace); + if (nmsgs != 0) + XLogRegisterData((char *) invalMessages, + nmsgs * sizeof(SharedInvalidationMessage)); XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); XLogRegisterBufData(0, (char *) htup + htup->t_hoff, newlen); @@ -6376,17 +6409,28 @@ heap_inplace_update_and_unlock(Relation relation, PageSetLSN(BufferGetPage(buffer), recptr); } + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + + /* + * Send invalidations to shared queue. SearchSysCacheLocked1() assumes we + * do this before UnlockTuple(). + * + * If we're mutating a tuple visible only to this transaction, there's an + * equivalent transactional inval from the action that created the tuple, + * and this inval is superfluous. + */ + AtInplace_Inval(); + END_CRIT_SECTION(); + UnlockTuple(relation, &tuple->t_self, InplaceUpdateTupleLock); - heap_inplace_unlock(relation, oldtup, buffer); + AcceptInvalidationMessages(); /* local processing of just-sent inval */ /* - * Send out shared cache inval if necessary. Note that because we only - * pass the new version of the tuple, this mustn't be used for any - * operations that could change catcache lookup keys. But we aren't - * bothering with index updates either, so that's true a fortiori. - * - * XXX ROLLBACK discards the invalidation. See test inplace-inval.spec. + * Queue a transactional inval. The immediate invalidation we just sent + * is the only one known to be necessary. To reduce risk from the + * transition to immediate invalidation, continue sending a transactional + * invalidation like we've long done. Third-party code might rely on it. */ if (!IsBootstrapProcessingMode()) CacheInvalidateHeapTuple(relation, tuple, NULL); diff --git a/src/backend/access/heap/heapam_xlog.c b/src/backend/access/heap/heapam_xlog.c index 6dae7233ecb..c5208f3df61 100644 --- a/src/backend/access/heap/heapam_xlog.c +++ b/src/backend/access/heap/heapam_xlog.c @@ -1170,6 +1170,12 @@ heap_xlog_inplace(XLogReaderState *record) } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); + + ProcessCommittedInvalidationMessages(xlrec->msgs, + xlrec->nmsgs, + xlrec->relcacheInitFileInval, + xlrec->dbId, + xlrec->tsId); } void diff --git a/src/backend/access/rmgrdesc/heapdesc.c b/src/backend/access/rmgrdesc/heapdesc.c index 5f5673e088b..f31cc3a4df4 100644 --- a/src/backend/access/rmgrdesc/heapdesc.c +++ b/src/backend/access/rmgrdesc/heapdesc.c @@ -16,6 +16,7 @@ #include "access/heapam_xlog.h" #include "access/rmgrdesc_utils.h" +#include "storage/standbydefs.h" /* * NOTE: "keyname" argument cannot have trailing spaces or punctuation @@ -253,6 +254,9 @@ heap_desc(StringInfo buf, XLogReaderState *record) xl_heap_inplace *xlrec = (xl_heap_inplace *) rec; appendStringInfo(buf, "off: %u", xlrec->offnum); + standby_desc_invalidations(buf, xlrec->nmsgs, xlrec->msgs, + xlrec->dbId, xlrec->tsId, + xlrec->relcacheInitFileInval); } } diff --git a/src/backend/access/rmgrdesc/standbydesc.c b/src/backend/access/rmgrdesc/standbydesc.c index 25f870b187e..32e509a4006 100644 --- a/src/backend/access/rmgrdesc/standbydesc.c +++ b/src/backend/access/rmgrdesc/standbydesc.c @@ -96,11 +96,7 @@ standby_identify(uint8 info) return id; } -/* - * This routine is used by both standby_desc and xact_desc, because - * transaction commits and XLOG_INVALIDATIONS messages contain invalidations; - * it seems pointless to duplicate the code. - */ +/* also used by non-standby records having analogous invalidation fields */ void standby_desc_invalidations(StringInfo buf, int nmsgs, SharedInvalidationMessage *msgs, diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index d8f6c658420..004f7e10e55 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -1369,14 +1369,24 @@ RecordTransactionCommit(void) /* * Transactions without an assigned xid can contain invalidation - * messages (e.g. explicit relcache invalidations or catcache - * invalidations for inplace updates); standbys need to process those. - * We can't emit a commit record without an xid, and we don't want to - * force assigning an xid, because that'd be problematic for e.g. - * vacuum. Hence we emit a bespoke record for the invalidations. We - * don't want to use that in case a commit record is emitted, so they - * happen synchronously with commits (besides not wanting to emit more - * WAL records). + * messages. While inplace updates do this, this is not known to be + * necessary; see comment at inplace CacheInvalidateHeapTuple(). + * Extensions might still rely on this capability, and standbys may + * need to process those invals. We can't emit a commit record + * without an xid, and we don't want to force assigning an xid, + * because that'd be problematic for e.g. vacuum. Hence we emit a + * bespoke record for the invalidations. We don't want to use that in + * case a commit record is emitted, so they happen synchronously with + * commits (besides not wanting to emit more WAL records). + * + * XXX Every known use of this capability is a defect. Since an XID + * isn't controlling visibility of the change that prompted invals, + * other sessions need the inval even if this transactions aborts. + * + * ON COMMIT DELETE ROWS does a nontransactional index_build(), which + * queues a relcache inval, including in transactions without an xid + * that had read the (empty) table. Standbys don't need any ON COMMIT + * DELETE ROWS invals, but we've not done the work to withhold them. */ if (nmsgs != 0) { |