diff options
author | Noah Misch <noah@leadboat.com> | 2024-11-02 09:04:59 -0700 |
---|---|---|
committer | Noah Misch <noah@leadboat.com> | 2024-11-02 09:05:05 -0700 |
commit | 9a1c73636d608f12cc0545e21d0d964a5f7bf3de (patch) | |
tree | af1d6f041684a0b966c4b336c2209a5887337303 | |
parent | b165e7106086a66fa621d5ed7d8f773da0f9d07d (diff) | |
download | postgresql-9a1c73636d608f12cc0545e21d0d964a5f7bf3de.tar.gz postgresql-9a1c73636d608f12cc0545e21d0d964a5f7bf3de.zip |
Revert "WAL-log inplace update before revealing it to other sessions."
This reverts commit bfd5c6e279c8e1702eea882439dc7ebdf4d4b3a5 (v17) and
counterparts in each other non-master branch. This unblocks reverting a
commit on which it depends.
Discussion: https://postgr.es/m/10ec0bc3-5933-1189-6bb8-5dec4114558e@gmail.com
-rw-r--r-- | src/backend/access/heap/README.tuplock | 4 | ||||
-rw-r--r-- | src/backend/access/heap/heapam.c | 58 | ||||
-rw-r--r-- | src/backend/access/transam/xloginsert.c | 2 |
3 files changed, 18 insertions, 46 deletions
diff --git a/src/backend/access/heap/README.tuplock b/src/backend/access/heap/README.tuplock index 31c52ad28f9..818cd7f9806 100644 --- a/src/backend/access/heap/README.tuplock +++ b/src/backend/access/heap/README.tuplock @@ -203,4 +203,6 @@ Inplace updates create an exception to the rule that tuple data won't change under a reader holding a pin. A reader of a heap_fetch() result tuple may witness a torn read. Current inplace-updated fields are aligned and are no wider than four bytes, and current readers don't need consistency across -fields. Hence, they get by with just fetching each field once. +fields. Hence, they get by with just fetching each field once. XXX such a +caller may also read a value that has not reached WAL; see +systable_inplace_update_finish(). diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index e3eaf11b1a7..26af7db4eed 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -6380,8 +6380,6 @@ heap_inplace_update_and_unlock(Relation relation, HeapTupleHeader htup = oldtup->t_data; uint32 oldlen; uint32 newlen; - char *dst; - char *src; Assert(ItemPointerEquals(&oldtup->t_self, &tuple->t_self)); oldlen = oldtup->t_len - htup->t_hoff; @@ -6389,9 +6387,6 @@ heap_inplace_update_and_unlock(Relation relation, if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff) elog(ERROR, "wrong tuple length"); - dst = (char *) htup + htup->t_hoff; - src = (char *) tuple->t_data + tuple->t_data->t_hoff; - /* * Construct shared cache inval if necessary. Note that because we only * pass the new version of the tuple, this mustn't be used for any @@ -6410,15 +6405,15 @@ heap_inplace_update_and_unlock(Relation relation, */ PreInplace_Inval(); + /* NO EREPORT(ERROR) from here till changes are logged */ + START_CRIT_SECTION(); + + memcpy((char *) htup + htup->t_hoff, + (char *) tuple->t_data + tuple->t_data->t_hoff, + newlen); + /*---------- - * NO EREPORT(ERROR) from here till changes are complete - * - * Our buffer lock won't stop a reader having already pinned and checked - * visibility for this tuple. Hence, we write WAL first, then mutate the - * buffer. Like in MarkBufferDirtyHint() or RecordTransactionCommit(), - * checkpoint delay makes that acceptable. With the usual order of - * changes, a crash after memcpy() and before XLogInsert() could allow - * datfrozenxid to overtake relfrozenxid: + * XXX A crash here can allow datfrozenxid() to get ahead of relfrozenxid: * * ["D" is a VACUUM (ONLY_DATABASE_STATS)] * ["R" is a VACUUM tbl] @@ -6428,28 +6423,14 @@ heap_inplace_update_and_unlock(Relation relation, * D: raise pg_database.datfrozenxid, XLogInsert(), finish * [crash] * [recovery restores datfrozenxid w/o relfrozenxid] - * - * Like in MarkBufferDirtyHint() subroutine XLogSaveBufferForHint(), copy - * the buffer to the stack before logging. Here, that facilitates a FPI - * of the post-mutation block before we accept other sessions seeing it. */ - Assert(!MyProc->delayChkpt); - START_CRIT_SECTION(); - MyProc->delayChkpt = true; + + MarkBufferDirty(buffer); /* XLOG stuff */ if (RelationNeedsWAL(relation)) { xl_heap_inplace xlrec; - PGAlignedBlock copied_buffer; - char *origdata = (char *) BufferGetBlock(buffer); - Page page = BufferGetPage(buffer); - uint16 lower = ((PageHeader) page)->pd_lower; - uint16 upper = ((PageHeader) page)->pd_upper; - uintptr_t dst_offset_in_block; - RelFileNode rnode; - ForkNumber forkno; - BlockNumber blkno; XLogRecPtr recptr; xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self); @@ -6457,28 +6438,16 @@ heap_inplace_update_and_unlock(Relation relation, XLogBeginInsert(); XLogRegisterData((char *) &xlrec, SizeOfHeapInplace); - /* register block matching what buffer will look like after changes */ - memcpy(copied_buffer.data, origdata, lower); - memcpy(copied_buffer.data + upper, origdata + upper, BLCKSZ - upper); - dst_offset_in_block = dst - origdata; - memcpy(copied_buffer.data + dst_offset_in_block, src, newlen); - BufferGetTag(buffer, &rnode, &forkno, &blkno); - Assert(forkno == MAIN_FORKNUM); - XLogRegisterBlock(0, &rnode, forkno, blkno, copied_buffer.data, - REGBUF_STANDARD); - XLogRegisterBufData(0, src, newlen); + XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); + XLogRegisterBufData(0, (char *) htup + htup->t_hoff, newlen); /* inplace updates aren't decoded atm, don't log the origin */ recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE); - PageSetLSN(page, recptr); + PageSetLSN(BufferGetPage(buffer), recptr); } - memcpy(dst, src, newlen); - - MarkBufferDirty(buffer); - LockBuffer(buffer, BUFFER_LOCK_UNLOCK); /* @@ -6491,7 +6460,6 @@ heap_inplace_update_and_unlock(Relation relation, */ AtInplace_Inval(); - MyProc->delayChkpt = false; END_CRIT_SECTION(); UnlockTuple(relation, &tuple->t_self, InplaceUpdateTupleLock); diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c index da4c28f7597..134c78f12b5 100644 --- a/src/backend/access/transam/xloginsert.c +++ b/src/backend/access/transam/xloginsert.c @@ -275,6 +275,8 @@ XLogRegisterBlock(uint8 block_id, RelFileNode *rnode, ForkNumber forknum, { registered_buffer *regbuf; + /* This is currently only used to WAL-log a full-page image of a page */ + Assert(flags & REGBUF_FORCE_IMAGE); Assert(begininsert_called); if (block_id >= max_registered_block_id) |