bufmgr: Improve stats when a buffer is read in concurrently

Previously we would have the following inaccuracies when a backend tried to read in a buffer, but that buffer was read in concurrently by another backend: - the read IO was double-counted in the global buffer access stats (pgBufferUsage) - the buffer hit was not accounted for in: - global buffer access statistics - pg_stat_io - relation level IO stats - vacuum cost balancing While trying to read in a buffer that is concurrently read in by another backend is not a common occurrence, it's also not that rare, e.g. due to concurrent sequential scans on the same relation. This scenario has become more likely in PG 17, due to the introducing of read streams, which can pin multiple buffers before calling StartBufferIO() for all the buffers. This behaviour has historically grown, but there doesn't seem to be any reason to continue with the wrong accounting. Reviewed-by: Melanie Plageman <melanieplageman@gmail.com> Discussion: https://postgr.es/m/CAAKRu_Zk-B08AzPsO-6680LUHLOCGaNJYofaxTFseLa=OepV1g@mail.gmail.com
author: Andres Freund <andres@anarazel.de> 2025-03-20 19:58:22 -0400
committer: Andres Freund <andres@anarazel.de> 2025-03-20 19:58:22 -0400
commit: 202b12774d092baf7e5d3309aa3cb0cd1e8a606b (patch)
tree: 1fc7df79bb13ed7c77ac86765fb962d4935fa5e7
parent: 12604593e9f3b3460f7359a39d25731aff6beb88 (diff)
download: postgresql-202b12774d092baf7e5d3309aa3cb0cd1e8a606b.tar.gz
postgresql-202b12774d092baf7e5d3309aa3cb0cd1e8a606b.zip
1 files changed, 22 insertions, 15 deletions
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index d04afa5ab9c..14fc1bd1248 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -1436,19 +1436,6 @@ WaitReadBuffers(ReadBuffersOperation *operation)
 		io_object = IOOBJECT_RELATION;
 	}
 
-	/*
-	 * We count all these blocks as read by this backend.  This is traditional
-	 * behavior, but might turn out to be not true if we find that someone
-	 * else has beaten us and completed the read of some of these blocks.  In
-	 * that case the system globally double-counts, but we traditionally don't
-	 * count this as a "hit", and we don't have a separate counter for "miss,
-	 * but another backend completed the read".
-	 */
-	if (persistence == RELPERSISTENCE_TEMP)
-		pgBufferUsage.local_blks_read += nblocks;
-	else
-		pgBufferUsage.shared_blks_read += nblocks;
-
 	for (int i = 0; i < nblocks; ++i)
 	{
 		int			io_buffers_len;
@@ -1466,8 +1453,9 @@ WaitReadBuffers(ReadBuffersOperation *operation)
 		if (!WaitReadBuffersCanStartIO(buffers[i], false))
 		{
 			/*
-			 * Report this as a 'hit' for this backend, even though it must
-			 * have started out as a miss in PinBufferForBlock().
+			 * Report and track this as a 'hit' for this backend, even though
+			 * it must have started out as a miss in PinBufferForBlock(). The
+			 * other backend will track this as a 'read'.
 			 */
 			TRACE_POSTGRESQL_BUFFER_READ_DONE(forknum, blocknum + i,
 											  operation->smgr->smgr_rlocator.locator.spcOid,
@@ -1475,6 +1463,20 @@ WaitReadBuffers(ReadBuffersOperation *operation)
 											  operation->smgr->smgr_rlocator.locator.relNumber,
 											  operation->smgr->smgr_rlocator.backend,
 											  true);
+
+			if (persistence == RELPERSISTENCE_TEMP)
+				pgBufferUsage.local_blks_hit += 1;
+			else
+				pgBufferUsage.shared_blks_hit += 1;
+
+			if (operation->rel)
+				pgstat_count_buffer_hit(operation->rel);
+
+			pgstat_count_io_op(io_object, io_context, IOOP_HIT, 1, 0);
+
+			if (VacuumCostActive)
+				VacuumCostBalance += VacuumCostPageHit;
+
 			continue;
 		}
 
@@ -1560,6 +1562,11 @@ WaitReadBuffers(ReadBuffersOperation *operation)
 											  false);
 		}
 
+		if (persistence == RELPERSISTENCE_TEMP)
+			pgBufferUsage.local_blks_read += io_buffers_len;
+		else
+			pgBufferUsage.shared_blks_read += io_buffers_len;
+
 		if (VacuumCostActive)
 			VacuumCostBalance += VacuumCostPageMiss * io_buffers_len;
 	}
author	Andres Freund <andres@anarazel.de>	2025-03-20 19:58:22 -0400
committer	Andres Freund <andres@anarazel.de>	2025-03-20 19:58:22 -0400
commit	202b12774d092baf7e5d3309aa3cb0cd1e8a606b (patch)
tree	1fc7df79bb13ed7c77ac86765fb962d4935fa5e7
parent	12604593e9f3b3460f7359a39d25731aff6beb88 (diff)
download	postgresql-202b12774d092baf7e5d3309aa3cb0cd1e8a606b.tar.gz postgresql-202b12774d092baf7e5d3309aa3cb0cd1e8a606b.zip