Use the buffer cache when initializing an unlogged index.

Some of the ambuildempty functions used smgrwrite() directly, followed by smgrimmedsync(). A few small problems with that: Firstly, one is supposed to use smgrextend() when extending a relation, not smgrwrite(). It doesn't make much difference in production builds. smgrextend() updates the relation size cache, so you miss that, but that's harmless because we never use the cached relation size of an init fork. But if you compile with CHECK_WRITE_VS_EXTEND, you get an assertion failure. Secondly, the smgrwrite() calls were performed before WAL-logging, so the page image written to disk had 0/0 as the LSN, not the LSN of the WAL record. That's also harmless in practice, but seems sloppy. Thirdly, it's better to use the buffer cache, because then you don't need to smgrimmedsync() the relation to disk, which adds latency. Bypassing the cache makes sense for bulk operations like index creation, but not when you're just initializing an empty index. Creation of unlogged tables is hardly performance bottleneck in any real world applications, but nevertheless. Backpatch to v16, but no further. These issues should be harmless in practice, so better to not rock the boat in older branches. Reviewed-by: Robert Haas Discussion: https://www.postgresql.org/message-id/6e5bbc08-cdfc-b2b3-9e23-1a914b9850a9@iki.fi
author: Heikki Linnakangas <heikki.linnakangas@iki.fi> 2023-08-23 17:21:31 +0300
committer: Heikki Linnakangas <heikki.linnakangas@iki.fi> 2023-08-23 17:21:31 +0300
commit: ccadf73163ca88bdaa74b8223d4dde05d17f550b (patch)
tree: c62a538b03de3b3e07ed2f904f142e2e4ba7b18d /src
parent: ee99330a0b39c47a7ac44c6c8fdd74e3da841ba5 (diff)
download: postgresql-ccadf73163ca88bdaa74b8223d4dde05d17f550b.tar.gz
postgresql-ccadf73163ca88bdaa74b8223d4dde05d17f550b.zip
2 files changed, 53 insertions, 59 deletions
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 4553aaee531..ad07b80f758 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -151,31 +151,32 @@ bthandler(PG_FUNCTION_ARGS)
 void
 btbuildempty(Relation index)
 {
+	Buffer		metabuf;
 	Page		metapage;
 
-	/* Construct metapage. */
-	metapage = (Page) palloc_aligned(BLCKSZ, PG_IO_ALIGN_SIZE, 0);
-	_bt_initmetapage(metapage, P_NONE, 0, _bt_allequalimage(index, false));
-
 	/*
-	 * Write the page and log it.  It might seem that an immediate sync would
-	 * be sufficient to guarantee that the file exists on disk, but recovery
-	 * itself might remove it while replaying, for example, an
-	 * XLOG_DBASE_CREATE* or XLOG_TBLSPC_CREATE record.  Therefore, we need
-	 * this even when wal_level=minimal.
+	 * Initalize the metapage.
+	 *
+	 * Regular index build bypasses the buffer manager and uses smgr functions
+	 * directly, with an smgrimmedsync() call at the end.  That makes sense
+	 * when the index is large, but for an empty index, it's better to use the
+	 * buffer cache to avoid the smgrimmedsync().
 	 */
-	PageSetChecksumInplace(metapage, BTREE_METAPAGE);
-	smgrwrite(RelationGetSmgr(index), INIT_FORKNUM, BTREE_METAPAGE,
-			  metapage, true);
-	log_newpage(&RelationGetSmgr(index)->smgr_rlocator.locator, INIT_FORKNUM,
-				BTREE_METAPAGE, metapage, true);
+	metabuf = ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
+	Assert(BufferGetBlockNumber(metabuf) == BTREE_METAPAGE);
+	_bt_lockbuf(index, metabuf, BT_WRITE);
 
-	/*
-	 * An immediate sync is required even if we xlog'd the page, because the
-	 * write did not go through shared_buffers and therefore a concurrent
-	 * checkpoint may have moved the redo pointer past our xlog record.
-	 */
-	smgrimmedsync(RelationGetSmgr(index), INIT_FORKNUM);
+	START_CRIT_SECTION();
+
+	metapage = BufferGetPage(metabuf);
+	_bt_initmetapage(metapage, P_NONE, 0, _bt_allequalimage(index, false));
+	MarkBufferDirty(metabuf);
+	log_newpage_buffer(metabuf, true);
+
+	END_CRIT_SECTION();
+
+	_bt_unlockbuf(index, metabuf);
+	ReleaseBuffer(metabuf);
 }
 
 /*
diff --git a/src/backend/access/spgist/spginsert.c b/src/backend/access/spgist/spginsert.c
index 72d2e1551cd..4443f1918df 100644
--- a/src/backend/access/spgist/spginsert.c
+++ b/src/backend/access/spgist/spginsert.c
@@ -155,49 +155,42 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 void
 spgbuildempty(Relation index)
 {
-	Page		page;
-
-	/* Construct metapage. */
-	page = (Page) palloc_aligned(BLCKSZ, PG_IO_ALIGN_SIZE, 0);
-	SpGistInitMetapage(page);
+	Buffer		metabuffer,
+				rootbuffer,
+				nullbuffer;
 
 	/*
-	 * Write the page and log it unconditionally.  This is important
-	 * particularly for indexes created on tablespaces and databases whose
-	 * creation happened after the last redo pointer as recovery removes any
-	 * of their existing content when the corresponding create records are
-	 * replayed.
+	 * Initialize the meta page and root pages
 	 */
-	PageSetChecksumInplace(page, SPGIST_METAPAGE_BLKNO);
-	smgrwrite(RelationGetSmgr(index), INIT_FORKNUM, SPGIST_METAPAGE_BLKNO,
-			  page, true);
-	log_newpage(&(RelationGetSmgr(index))->smgr_rlocator.locator, INIT_FORKNUM,
-				SPGIST_METAPAGE_BLKNO, page, true);
-
-	/* Likewise for the root page. */
-	SpGistInitPage(page, SPGIST_LEAF);
-
-	PageSetChecksumInplace(page, SPGIST_ROOT_BLKNO);
-	smgrwrite(RelationGetSmgr(index), INIT_FORKNUM, SPGIST_ROOT_BLKNO,
-			  page, true);
-	log_newpage(&(RelationGetSmgr(index))->smgr_rlocator.locator, INIT_FORKNUM,
-				SPGIST_ROOT_BLKNO, page, true);
-
-	/* Likewise for the null-tuples root page. */
-	SpGistInitPage(page, SPGIST_LEAF | SPGIST_NULLS);
-
-	PageSetChecksumInplace(page, SPGIST_NULL_BLKNO);
-	smgrwrite(RelationGetSmgr(index), INIT_FORKNUM, SPGIST_NULL_BLKNO,
-			  page, true);
-	log_newpage(&(RelationGetSmgr(index))->smgr_rlocator.locator, INIT_FORKNUM,
-				SPGIST_NULL_BLKNO, page, true);
+	metabuffer = ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
+	LockBuffer(metabuffer, BUFFER_LOCK_EXCLUSIVE);
+	rootbuffer = ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
+	LockBuffer(rootbuffer, BUFFER_LOCK_EXCLUSIVE);
+	nullbuffer = ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
+	LockBuffer(nullbuffer, BUFFER_LOCK_EXCLUSIVE);
 
-	/*
-	 * An immediate sync is required even if we xlog'd the pages, because the
-	 * writes did not go through shared buffers and therefore a concurrent
-	 * checkpoint may have moved the redo pointer past our xlog record.
-	 */
-	smgrimmedsync(RelationGetSmgr(index), INIT_FORKNUM);
+	Assert(BufferGetBlockNumber(metabuffer) == SPGIST_METAPAGE_BLKNO);
+	Assert(BufferGetBlockNumber(rootbuffer) == SPGIST_ROOT_BLKNO);
+	Assert(BufferGetBlockNumber(nullbuffer) == SPGIST_NULL_BLKNO);
+
+	START_CRIT_SECTION();
+
+	SpGistInitMetapage(BufferGetPage(metabuffer));
+	MarkBufferDirty(metabuffer);
+	SpGistInitBuffer(rootbuffer, SPGIST_LEAF);
+	MarkBufferDirty(rootbuffer);
+	SpGistInitBuffer(nullbuffer, SPGIST_LEAF | SPGIST_NULLS);
+	MarkBufferDirty(nullbuffer);
+
+	log_newpage_buffer(metabuffer, true);
+	log_newpage_buffer(rootbuffer, true);
+	log_newpage_buffer(nullbuffer, true);
+
+	END_CRIT_SECTION();
+
+	UnlockReleaseBuffer(metabuffer);
+	UnlockReleaseBuffer(rootbuffer);
+	UnlockReleaseBuffer(nullbuffer);
 }
 
 /*
author	Heikki Linnakangas <heikki.linnakangas@iki.fi>	2023-08-23 17:21:31 +0300
committer	Heikki Linnakangas <heikki.linnakangas@iki.fi>	2023-08-23 17:21:31 +0300
commit	ccadf73163ca88bdaa74b8223d4dde05d17f550b (patch)
tree	c62a538b03de3b3e07ed2f904f142e2e4ba7b18d /src
parent	ee99330a0b39c47a7ac44c6c8fdd74e3da841ba5 (diff)
download	postgresql-ccadf73163ca88bdaa74b8223d4dde05d17f550b.tar.gz postgresql-ccadf73163ca88bdaa74b8223d4dde05d17f550b.zip