aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorNoah Misch <noah@leadboat.com>2024-11-02 09:04:59 -0700
committerNoah Misch <noah@leadboat.com>2024-11-02 09:05:05 -0700
commit9a1c73636d608f12cc0545e21d0d964a5f7bf3de (patch)
treeaf1d6f041684a0b966c4b336c2209a5887337303 /src
parentb165e7106086a66fa621d5ed7d8f773da0f9d07d (diff)
downloadpostgresql-9a1c73636d608f12cc0545e21d0d964a5f7bf3de.tar.gz
postgresql-9a1c73636d608f12cc0545e21d0d964a5f7bf3de.zip
Revert "WAL-log inplace update before revealing it to other sessions."
This reverts commit bfd5c6e279c8e1702eea882439dc7ebdf4d4b3a5 (v17) and counterparts in each other non-master branch. This unblocks reverting a commit on which it depends. Discussion: https://postgr.es/m/10ec0bc3-5933-1189-6bb8-5dec4114558e@gmail.com
Diffstat (limited to 'src')
-rw-r--r--src/backend/access/heap/README.tuplock4
-rw-r--r--src/backend/access/heap/heapam.c58
-rw-r--r--src/backend/access/transam/xloginsert.c2
3 files changed, 18 insertions, 46 deletions
diff --git a/src/backend/access/heap/README.tuplock b/src/backend/access/heap/README.tuplock
index 31c52ad28f9..818cd7f9806 100644
--- a/src/backend/access/heap/README.tuplock
+++ b/src/backend/access/heap/README.tuplock
@@ -203,4 +203,6 @@ Inplace updates create an exception to the rule that tuple data won't change
under a reader holding a pin. A reader of a heap_fetch() result tuple may
witness a torn read. Current inplace-updated fields are aligned and are no
wider than four bytes, and current readers don't need consistency across
-fields. Hence, they get by with just fetching each field once.
+fields. Hence, they get by with just fetching each field once. XXX such a
+caller may also read a value that has not reached WAL; see
+systable_inplace_update_finish().
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index e3eaf11b1a7..26af7db4eed 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -6380,8 +6380,6 @@ heap_inplace_update_and_unlock(Relation relation,
HeapTupleHeader htup = oldtup->t_data;
uint32 oldlen;
uint32 newlen;
- char *dst;
- char *src;
Assert(ItemPointerEquals(&oldtup->t_self, &tuple->t_self));
oldlen = oldtup->t_len - htup->t_hoff;
@@ -6389,9 +6387,6 @@ heap_inplace_update_and_unlock(Relation relation,
if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff)
elog(ERROR, "wrong tuple length");
- dst = (char *) htup + htup->t_hoff;
- src = (char *) tuple->t_data + tuple->t_data->t_hoff;
-
/*
* Construct shared cache inval if necessary. Note that because we only
* pass the new version of the tuple, this mustn't be used for any
@@ -6410,15 +6405,15 @@ heap_inplace_update_and_unlock(Relation relation,
*/
PreInplace_Inval();
+ /* NO EREPORT(ERROR) from here till changes are logged */
+ START_CRIT_SECTION();
+
+ memcpy((char *) htup + htup->t_hoff,
+ (char *) tuple->t_data + tuple->t_data->t_hoff,
+ newlen);
+
/*----------
- * NO EREPORT(ERROR) from here till changes are complete
- *
- * Our buffer lock won't stop a reader having already pinned and checked
- * visibility for this tuple. Hence, we write WAL first, then mutate the
- * buffer. Like in MarkBufferDirtyHint() or RecordTransactionCommit(),
- * checkpoint delay makes that acceptable. With the usual order of
- * changes, a crash after memcpy() and before XLogInsert() could allow
- * datfrozenxid to overtake relfrozenxid:
+ * XXX A crash here can allow datfrozenxid() to get ahead of relfrozenxid:
*
* ["D" is a VACUUM (ONLY_DATABASE_STATS)]
* ["R" is a VACUUM tbl]
@@ -6428,28 +6423,14 @@ heap_inplace_update_and_unlock(Relation relation,
* D: raise pg_database.datfrozenxid, XLogInsert(), finish
* [crash]
* [recovery restores datfrozenxid w/o relfrozenxid]
- *
- * Like in MarkBufferDirtyHint() subroutine XLogSaveBufferForHint(), copy
- * the buffer to the stack before logging. Here, that facilitates a FPI
- * of the post-mutation block before we accept other sessions seeing it.
*/
- Assert(!MyProc->delayChkpt);
- START_CRIT_SECTION();
- MyProc->delayChkpt = true;
+
+ MarkBufferDirty(buffer);
/* XLOG stuff */
if (RelationNeedsWAL(relation))
{
xl_heap_inplace xlrec;
- PGAlignedBlock copied_buffer;
- char *origdata = (char *) BufferGetBlock(buffer);
- Page page = BufferGetPage(buffer);
- uint16 lower = ((PageHeader) page)->pd_lower;
- uint16 upper = ((PageHeader) page)->pd_upper;
- uintptr_t dst_offset_in_block;
- RelFileNode rnode;
- ForkNumber forkno;
- BlockNumber blkno;
XLogRecPtr recptr;
xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
@@ -6457,28 +6438,16 @@ heap_inplace_update_and_unlock(Relation relation,
XLogBeginInsert();
XLogRegisterData((char *) &xlrec, SizeOfHeapInplace);
- /* register block matching what buffer will look like after changes */
- memcpy(copied_buffer.data, origdata, lower);
- memcpy(copied_buffer.data + upper, origdata + upper, BLCKSZ - upper);
- dst_offset_in_block = dst - origdata;
- memcpy(copied_buffer.data + dst_offset_in_block, src, newlen);
- BufferGetTag(buffer, &rnode, &forkno, &blkno);
- Assert(forkno == MAIN_FORKNUM);
- XLogRegisterBlock(0, &rnode, forkno, blkno, copied_buffer.data,
- REGBUF_STANDARD);
- XLogRegisterBufData(0, src, newlen);
+ XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+ XLogRegisterBufData(0, (char *) htup + htup->t_hoff, newlen);
/* inplace updates aren't decoded atm, don't log the origin */
recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE);
- PageSetLSN(page, recptr);
+ PageSetLSN(BufferGetPage(buffer), recptr);
}
- memcpy(dst, src, newlen);
-
- MarkBufferDirty(buffer);
-
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
/*
@@ -6491,7 +6460,6 @@ heap_inplace_update_and_unlock(Relation relation,
*/
AtInplace_Inval();
- MyProc->delayChkpt = false;
END_CRIT_SECTION();
UnlockTuple(relation, &tuple->t_self, InplaceUpdateTupleLock);
diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c
index da4c28f7597..134c78f12b5 100644
--- a/src/backend/access/transam/xloginsert.c
+++ b/src/backend/access/transam/xloginsert.c
@@ -275,6 +275,8 @@ XLogRegisterBlock(uint8 block_id, RelFileNode *rnode, ForkNumber forknum,
{
registered_buffer *regbuf;
+ /* This is currently only used to WAL-log a full-page image of a page */
+ Assert(flags & REGBUF_FORCE_IMAGE);
Assert(begininsert_called);
if (block_id >= max_registered_block_id)