16 files changed, 426 insertions, 78 deletions
diff --git a/src/backend/storage/aio/aio.c b/src/backend/storage/aio/aio.c
index ebb5a771bfd..3643f27ad6e 100644
--- a/src/backend/storage/aio/aio.c
+++ b/src/backend/storage/aio/aio.c
@@ -184,6 +184,8 @@ pgaio_io_acquire(struct ResourceOwnerData *resowner, PgAioReturn *ret)
 PgAioHandle *
 pgaio_io_acquire_nb(struct ResourceOwnerData *resowner, PgAioReturn *ret)
 {
+	PgAioHandle *ioh = NULL;
+
 	if (pgaio_my_backend->num_staged_ios >= PGAIO_SUBMIT_BATCH_SIZE)
 	{
 		Assert(pgaio_my_backend->num_staged_ios == PGAIO_SUBMIT_BATCH_SIZE);
@@ -193,10 +195,17 @@ pgaio_io_acquire_nb(struct ResourceOwnerData *resowner, PgAioReturn *ret)
 	if (pgaio_my_backend->handed_out_io)
 		elog(ERROR, "API violation: Only one IO can be handed out");
 
+	/*
+	 * Probably not needed today, as interrupts should not process this IO,
+	 * but...
+	 */
+	HOLD_INTERRUPTS();
+
 	if (!dclist_is_empty(&pgaio_my_backend->idle_ios))
 	{
 		dlist_node *ion = dclist_pop_head_node(&pgaio_my_backend->idle_ios);
-		PgAioHandle *ioh = dclist_container(PgAioHandle, node, ion);
+
+		ioh = dclist_container(PgAioHandle, node, ion);
 
 		Assert(ioh->state == PGAIO_HS_IDLE);
 		Assert(ioh->owner_procno == MyProcNumber);
@@ -212,11 +221,11 @@ pgaio_io_acquire_nb(struct ResourceOwnerData *resowner, PgAioReturn *ret)
 			ioh->report_return = ret;
 			ret->result.status = PGAIO_RS_UNKNOWN;
 		}
-
-		return ioh;
 	}
 
-	return NULL;
+	RESUME_INTERRUPTS();
+
+	return ioh;
 }
 
 /*
@@ -233,6 +242,12 @@ pgaio_io_release(PgAioHandle *ioh)
 		Assert(ioh->resowner);
 
 		pgaio_my_backend->handed_out_io = NULL;
+
+		/*
+		 * Note that no interrupts are processed between the handed_out_io
+		 * check and the call to reclaim - that's important as otherwise an
+		 * interrupt could have already reclaimed the handle.
+		 */
 		pgaio_io_reclaim(ioh);
 	}
 	else
@@ -251,6 +266,12 @@ pgaio_io_release_resowner(dlist_node *ioh_node, bool on_error)
 
 	Assert(ioh->resowner);
 
+	/*
+	 * Otherwise an interrupt, in the middle of releasing the IO, could end up
+	 * trying to wait for the IO, leading to state confusion.
+	 */
+	HOLD_INTERRUPTS();
+
 	ResourceOwnerForgetAioHandle(ioh->resowner, &ioh->resowner_node);
 	ioh->resowner = NULL;
 
@@ -291,6 +312,8 @@ pgaio_io_release_resowner(dlist_node *ioh_node, bool on_error)
 	 */
 	if (ioh->report_return)
 		ioh->report_return = NULL;
+
+	RESUME_INTERRUPTS();
 }
 
 /*
@@ -359,6 +382,13 @@ pgaio_io_get_wref(PgAioHandle *ioh, PgAioWaitRef *iow)
 static inline void
 pgaio_io_update_state(PgAioHandle *ioh, PgAioHandleState new_state)
 {
+	/*
+	 * All callers need to have held interrupts in some form, otherwise
+	 * interrupt processing could wait for the IO to complete, while in an
+	 * intermediary state.
+	 */
+	Assert(!INTERRUPTS_CAN_BE_PROCESSED());
+
 	pgaio_debug_io(DEBUG5, ioh,
 				   "updating state to %s",
 				   pgaio_io_state_get_name(new_state));
@@ -396,6 +426,13 @@ pgaio_io_stage(PgAioHandle *ioh, PgAioOp op)
 	Assert(pgaio_my_backend->handed_out_io == ioh);
 	Assert(pgaio_io_has_target(ioh));
 
+	/*
+	 * Otherwise an interrupt, in the middle of staging and possibly executing
+	 * the IO, could end up trying to wait for the IO, leading to state
+	 * confusion.
+	 */
+	HOLD_INTERRUPTS();
+
 	ioh->op = op;
 	ioh->result = 0;
 
@@ -435,6 +472,8 @@ pgaio_io_stage(PgAioHandle *ioh, PgAioOp op)
 		pgaio_io_prepare_submit(ioh);
 		pgaio_io_perform_synchronously(ioh);
 	}
+
+	RESUME_INTERRUPTS();
 }
 
 bool
@@ -517,6 +556,13 @@ bool
 pgaio_io_was_recycled(PgAioHandle *ioh, uint64 ref_generation, PgAioHandleState *state)
 {
 	*state = ioh->state;
+
+	/*
+	 * Ensure that we don't see an earlier state of the handle than ioh->state
+	 * due to compiler or CPU reordering. This protects both ->generation as
+	 * directly used here, and other fields in the handle accessed in the
+	 * caller if the handle was not reused.
+	 */
 	pg_read_barrier();
 
 	return ioh->generation != ref_generation;
@@ -544,8 +590,8 @@ pgaio_io_wait(PgAioHandle *ioh, uint64 ref_generation)
 			&& state != PGAIO_HS_COMPLETED_SHARED
 			&& state != PGAIO_HS_COMPLETED_LOCAL)
 		{
-			elog(PANIC, "waiting for own IO in wrong state: %d",
-				 state);
+			elog(PANIC, "waiting for own IO %d in wrong state: %s",
+				 pgaio_io_get_id(ioh), pgaio_io_get_state_name(ioh));
 		}
 	}
 
@@ -599,7 +645,13 @@ pgaio_io_wait(PgAioHandle *ioh, uint64 ref_generation)
 
 			case PGAIO_HS_COMPLETED_SHARED:
 			case PGAIO_HS_COMPLETED_LOCAL:
-				/* see above */
+
+				/*
+				 * Note that no interrupts are processed between
+				 * pgaio_io_was_recycled() and this check - that's important
+				 * as otherwise an interrupt could have already reclaimed the
+				 * handle.
+				 */
 				if (am_owner)
 					pgaio_io_reclaim(ioh);
 				return;
@@ -610,6 +662,11 @@ pgaio_io_wait(PgAioHandle *ioh, uint64 ref_generation)
 /*
  * Make IO handle ready to be reused after IO has completed or after the
  * handle has been released without being used.
+ *
+ * Note that callers need to be careful about only calling this in the right
+ * state and that no interrupts can be processed between the state check and
+ * the call to pgaio_io_reclaim(). Otherwise interrupt processing could
+ * already have reclaimed the handle.
  */
 static void
 pgaio_io_reclaim(PgAioHandle *ioh)
@@ -618,6 +675,9 @@ pgaio_io_reclaim(PgAioHandle *ioh)
 	Assert(ioh->owner_procno == MyProcNumber);
 	Assert(ioh->state != PGAIO_HS_IDLE);
 
+	/* see comment in function header */
+	HOLD_INTERRUPTS();
+
 	/*
 	 * It's a bit ugly, but right now the easiest place to put the execution
 	 * of local completion callbacks is this function, as we need to execute
@@ -685,6 +745,8 @@ pgaio_io_reclaim(PgAioHandle *ioh)
 	 * efficient in cases where only a few IOs are used.
 	 */
 	dclist_push_head(&pgaio_my_backend->idle_ios, &ioh->node);
+
+	RESUME_INTERRUPTS();
 }
 
 /*
@@ -697,10 +759,10 @@ pgaio_io_wait_for_free(void)
 {
 	int			reclaimed = 0;
 
-	pgaio_debug(DEBUG2, "waiting for free IO with %d pending, %d in-flight, %d idle IOs",
+	pgaio_debug(DEBUG2, "waiting for free IO with %d pending, %u in-flight, %u idle IOs",
 				pgaio_my_backend->num_staged_ios,
 				dclist_count(&pgaio_my_backend->in_flight_ios),
-				dclist_is_empty(&pgaio_my_backend->idle_ios));
+				dclist_count(&pgaio_my_backend->idle_ios));
 
 	/*
 	 * First check if any of our IOs actually have completed - when using
@@ -714,6 +776,16 @@ pgaio_io_wait_for_free(void)
 
 		if (ioh->state == PGAIO_HS_COMPLETED_SHARED)
 		{
+			/*
+			 * Note that no interrupts are processed between the state check
+			 * and the call to reclaim - that's important as otherwise an
+			 * interrupt could have already reclaimed the handle.
+			 *
+			 * Need to ensure that there's no reordering, in the more common
+			 * paths, where we wait for IO, that's done by
+			 * pgaio_io_was_recycled().
+			 */
+			pg_read_barrier();
 			pgaio_io_reclaim(ioh);
 			reclaimed++;
 		}
@@ -730,13 +802,17 @@ pgaio_io_wait_for_free(void)
 	if (pgaio_my_backend->num_staged_ios > 0)
 		pgaio_submit_staged();
 
+	/* possibly some IOs finished during submission */
+	if (!dclist_is_empty(&pgaio_my_backend->idle_ios))
+		return;
+
 	if (dclist_count(&pgaio_my_backend->in_flight_ios) == 0)
 		ereport(ERROR,
 				errmsg_internal("no free IOs despite no in-flight IOs"),
-				errdetail_internal("%d pending, %d in-flight, %d idle IOs",
+				errdetail_internal("%d pending, %u in-flight, %u idle IOs",
 								   pgaio_my_backend->num_staged_ios,
 								   dclist_count(&pgaio_my_backend->in_flight_ios),
-								   dclist_is_empty(&pgaio_my_backend->idle_ios)));
+								   dclist_count(&pgaio_my_backend->idle_ios)));
 
 	/*
 	 * Wait for the oldest in-flight IO to complete.
@@ -747,6 +823,7 @@ pgaio_io_wait_for_free(void)
 	{
 		PgAioHandle *ioh = dclist_head_element(PgAioHandle, node,
 											   &pgaio_my_backend->in_flight_ios);
+		uint64		generation = ioh->generation;
 
 		switch (ioh->state)
 		{
@@ -763,20 +840,36 @@ pgaio_io_wait_for_free(void)
 			case PGAIO_HS_COMPLETED_IO:
 			case PGAIO_HS_SUBMITTED:
 				pgaio_debug_io(DEBUG2, ioh,
-							   "waiting for free io with %d in flight",
+							   "waiting for free io with %u in flight",
 							   dclist_count(&pgaio_my_backend->in_flight_ios));
 
 				/*
 				 * In a more general case this would be racy, because the
 				 * generation could increase after we read ioh->state above.
 				 * But we are only looking at IOs by the current backend and
-				 * the IO can only be recycled by this backend.
+				 * the IO can only be recycled by this backend.  Even this is
+				 * only OK because we get the handle's generation before
+				 * potentially processing interrupts, e.g. as part of
+				 * pgaio_debug_io().
 				 */
-				pgaio_io_wait(ioh, ioh->generation);
+				pgaio_io_wait(ioh, generation);
 				break;
 
 			case PGAIO_HS_COMPLETED_SHARED:
-				/* it's possible that another backend just finished this IO */
+
+				/*
+				 * It's possible that another backend just finished this IO.
+				 *
+				 * Note that no interrupts are processed between the state
+				 * check and the call to reclaim - that's important as
+				 * otherwise an interrupt could have already reclaimed the
+				 * handle.
+				 *
+				 * Need to ensure that there's no reordering, in the more
+				 * common paths, where we wait for IO, that's done by
+				 * pgaio_io_was_recycled().
+				 */
+				pg_read_barrier();
 				pgaio_io_reclaim(ioh);
 				break;
 		}
@@ -926,6 +1019,11 @@ pgaio_wref_check_done(PgAioWaitRef *iow)
 	if (state == PGAIO_HS_COMPLETED_SHARED ||
 		state == PGAIO_HS_COMPLETED_LOCAL)
 	{
+		/*
+		 * Note that no interrupts are processed between
+		 * pgaio_io_was_recycled() and this check - that's important as
+		 * otherwise an interrupt could have already reclaimed the handle.
+		 */
 		if (am_owner)
 			pgaio_io_reclaim(ioh);
 		return true;
@@ -1153,11 +1251,14 @@ pgaio_closing_fd(int fd)
 		{
 			dlist_iter	iter;
 			PgAioHandle *ioh = NULL;
+			uint64		generation;
 
 			dclist_foreach(iter, &pgaio_my_backend->in_flight_ios)
 			{
 				ioh = dclist_container(PgAioHandle, node, iter.cur);
 
+				generation = ioh->generation;
+
 				if (pgaio_io_uses_fd(ioh, fd))
 					break;
 				else
@@ -1168,11 +1269,11 @@ pgaio_closing_fd(int fd)
 				break;
 
 			pgaio_debug_io(DEBUG2, ioh,
-						   "waiting for IO before FD %d gets closed, %d in-flight IOs",
+						   "waiting for IO before FD %d gets closed, %u in-flight IOs",
 						   fd, dclist_count(&pgaio_my_backend->in_flight_ios));
 
 			/* see comment in pgaio_io_wait_for_free() about raciness */
-			pgaio_io_wait(ioh, ioh->generation);
+			pgaio_io_wait(ioh, generation);
 		}
 	}
 }
@@ -1201,13 +1302,14 @@ pgaio_shutdown(int code, Datum arg)
 	while (!dclist_is_empty(&pgaio_my_backend->in_flight_ios))
 	{
 		PgAioHandle *ioh = dclist_head_element(PgAioHandle, node, &pgaio_my_backend->in_flight_ios);
+		uint64		generation = ioh->generation;
 
 		pgaio_debug_io(DEBUG2, ioh,
-					   "waiting for IO to complete during shutdown, %d in-flight IOs",
+					   "waiting for IO to complete during shutdown, %u in-flight IOs",
 					   dclist_count(&pgaio_my_backend->in_flight_ios));
 
 		/* see comment in pgaio_io_wait_for_free() about raciness */
-		pgaio_io_wait(ioh, ioh->generation);
+		pgaio_io_wait(ioh, generation);
 	}
 
 	pgaio_my_backend = NULL;
diff --git a/src/backend/storage/aio/aio_callback.c b/src/backend/storage/aio/aio_callback.c
index 0ad9795bb7e..03c9bba0802 100644
--- a/src/backend/storage/aio/aio_callback.c
+++ b/src/backend/storage/aio/aio_callback.c
@@ -256,6 +256,9 @@ pgaio_io_call_complete_shared(PgAioHandle *ioh)
 					   pgaio_result_status_string(result.status),
 					   result.id, result.error_data, result.result);
 		result = ce->cb->complete_shared(ioh, result, cb_data);
+
+		/* the callback should never transition to unknown */
+		Assert(result.status != PGAIO_RS_UNKNOWN);
 	}
 
 	ioh->distilled_result = result;
@@ -290,6 +293,7 @@ pgaio_io_call_complete_local(PgAioHandle *ioh)
 
 	/* start with distilled result from shared callback */
 	result = ioh->distilled_result;
+	Assert(result.status != PGAIO_RS_UNKNOWN);
 
 	for (int i = ioh->num_callbacks; i > 0; i--)
 	{
@@ -306,6 +310,9 @@ pgaio_io_call_complete_local(PgAioHandle *ioh)
 					   pgaio_result_status_string(result.status),
 					   result.id, result.error_data, result.result);
 		result = ce->cb->complete_local(ioh, result, cb_data);
+
+		/* the callback should never transition to unknown */
+		Assert(result.status != PGAIO_RS_UNKNOWN);
 	}
 
 	/*
diff --git a/src/backend/storage/aio/aio_io.c b/src/backend/storage/aio/aio_io.c
index 00e176135a6..520b5077df2 100644
--- a/src/backend/storage/aio/aio_io.c
+++ b/src/backend/storage/aio/aio_io.c
@@ -181,9 +181,9 @@ pgaio_io_get_op_name(PgAioHandle *ioh)
 		case PGAIO_OP_INVALID:
 			return "invalid";
 		case PGAIO_OP_READV:
-			return "read";
+			return "readv";
 		case PGAIO_OP_WRITEV:
-			return "write";
+			return "writev";
 	}
 
 	return NULL;				/* silence compiler */
diff --git a/src/backend/storage/aio/method_io_uring.c b/src/backend/storage/aio/method_io_uring.c
index c719ba2727a..b78048328e1 100644
--- a/src/backend/storage/aio/method_io_uring.c
+++ b/src/backend/storage/aio/method_io_uring.c
@@ -126,7 +126,7 @@ pgaio_uring_shmem_size(void)
 static void
 pgaio_uring_shmem_init(bool first_time)
 {
-	int			TotalProcs = MaxBackends + NUM_AUXILIARY_PROCS - MAX_IO_WORKERS;
+	int			TotalProcs = pgaio_uring_procs();
 	bool		found;
 
 	pgaio_uring_contexts = (PgAioUringContext *)
@@ -400,9 +400,9 @@ pgaio_uring_wait_one(PgAioHandle *ioh, uint64 ref_generation)
 	while (true)
 	{
 		pgaio_debug_io(DEBUG3, ioh,
-					   "wait_one io_gen: %llu, ref_gen: %llu, cycle %d",
-					   (long long unsigned) ioh->generation,
-					   (long long unsigned) ref_generation,
+					   "wait_one io_gen: %" PRIu64 ", ref_gen: %" PRIu64 ", cycle %d",
+					   ioh->generation,
+					   ref_generation,
 					   waited);
 
 		if (pgaio_io_was_recycled(ioh, ref_generation, &state) ||
diff --git a/src/backend/storage/aio/method_worker.c b/src/backend/storage/aio/method_worker.c
index 743cccc2acd..36be179678d 100644
--- a/src/backend/storage/aio/method_worker.c
+++ b/src/backend/storage/aio/method_worker.c
@@ -461,7 +461,12 @@ IoWorkerMain(const void *startup_data, size_t startup_data_len)
 		int			nwakeups = 0;
 		int			worker;
 
-		/* Try to get a job to do. */
+		/*
+		 * Try to get a job to do.
+		 *
+		 * The lwlock acquisition also provides the necessary memory barrier
+		 * to ensure that we don't see an outdated data in the handle.
+		 */
 		LWLockAcquire(AioWorkerSubmissionQueueLock, LW_EXCLUSIVE);
 		if ((io_index = pgaio_worker_submission_queue_consume()) == UINT32_MAX)
 		{
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index f93131a645e..bd68d7e0ca9 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -4550,11 +4550,9 @@ DropRelationBuffers(SMgrRelation smgr_reln, ForkNumber *forkNum,
 	if (RelFileLocatorBackendIsTemp(rlocator))
 	{
 		if (rlocator.backend == MyProcNumber)
-		{
-			for (j = 0; j < nforks; j++)
-				DropRelationLocalBuffers(rlocator.locator, forkNum[j],
-										 firstDelBlock[j]);
-		}
+			DropRelationLocalBuffers(rlocator.locator, forkNum, nforks,
+									 firstDelBlock);
+
 		return;
 	}
 
@@ -7320,7 +7318,7 @@ buffer_readv_report(PgAioResult result, const PgAioTargetData *td,
 				affected_count > 1 ?
 				errdetail("Block %u held first zeroed page.",
 						  first + first_off) : 0,
-				errhint("See server log for details about the other %u invalid block(s).",
+				errhint("See server log for details about the other %d invalid block(s).",
 						affected_count + checkfail_count - 1));
 		return;
 	}
diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c
index 63101d56a07..3da9c41ee1d 100644
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -629,7 +629,7 @@ InvalidateLocalBuffer(BufferDesc *bufHdr, bool check_unreferenced)
 	 */
 	if (check_unreferenced &&
 		(LocalRefCount[bufid] != 0 || BUF_STATE_GET_REFCOUNT(buf_state) != 0))
-		elog(ERROR, "block %u of %s is still referenced (local %u)",
+		elog(ERROR, "block %u of %s is still referenced (local %d)",
 			 bufHdr->tag.blockNum,
 			 relpathbackend(BufTagGetRelFileLocator(&bufHdr->tag),
 							MyProcNumber,
@@ -660,10 +660,11 @@ InvalidateLocalBuffer(BufferDesc *bufHdr, bool check_unreferenced)
  *		See DropRelationBuffers in bufmgr.c for more notes.
  */
 void
-DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber forkNum,
-						 BlockNumber firstDelBlock)
+DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber *forkNum,
+						 int nforks, BlockNumber *firstDelBlock)
 {
 	int			i;
+	int			j;
 
 	for (i = 0; i < NLocBuffer; i++)
 	{
@@ -672,12 +673,18 @@ DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber forkNum,
 
 		buf_state = pg_atomic_read_u32(&bufHdr->state);
 
-		if ((buf_state & BM_TAG_VALID) &&
-			BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator) &&
-			BufTagGetForkNum(&bufHdr->tag) == forkNum &&
-			bufHdr->tag.blockNum >= firstDelBlock)
+		if (!(buf_state & BM_TAG_VALID) ||
+			!BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator))
+			continue;
+
+		for (j = 0; j < nforks; j++)
 		{
-			InvalidateLocalBuffer(bufHdr, true);
+			if (BufTagGetForkNum(&bufHdr->tag) == forkNum[j] &&
+				bufHdr->tag.blockNum >= firstDelBlock[j])
+			{
+				InvalidateLocalBuffer(bufHdr, true);
+				break;
+			}
 		}
 	}
 }
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 0e8299dd556..a4ec7959f31 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -400,25 +400,22 @@ pg_fsync(int fd)
 	 * portable, even if it runs ok on the current system.
 	 *
 	 * We assert here that a descriptor for a file was opened with write
-	 * permissions (either O_RDWR or O_WRONLY) and for a directory without
-	 * write permissions (O_RDONLY).
+	 * permissions (i.e., not O_RDONLY) and for a directory without write
+	 * permissions (O_RDONLY).  Notice that the assertion check is made even
+	 * if fsync() is disabled.
 	 *
-	 * Ignore any fstat errors and let the follow-up fsync() do its work.
-	 * Doing this sanity check here counts for the case where fsync() is
-	 * disabled.
+	 * If fstat() fails, ignore it and let the follow-up fsync() complain.
 	 */
 	if (fstat(fd, &st) == 0)
 	{
 		int			desc_flags = fcntl(fd, F_GETFL);
 
-		/*
-		 * O_RDONLY is historically 0, so just make sure that for directories
-		 * no write flags are used.
-		 */
+		desc_flags &= O_ACCMODE;
+
 		if (S_ISDIR(st.st_mode))
-			Assert((desc_flags & (O_RDWR | O_WRONLY)) == 0);
+			Assert(desc_flags == O_RDONLY);
 		else
-			Assert((desc_flags & (O_RDWR | O_WRONLY)) != 0);
+			Assert(desc_flags != O_RDONLY);
 	}
 	errno = 0;
 #endif
diff --git a/src/backend/storage/ipc/dsm_registry.c b/src/backend/storage/ipc/dsm_registry.c
index 1d4fd31ffed..828c2ff0c7f 100644
--- a/src/backend/storage/ipc/dsm_registry.c
+++ b/src/backend/storage/ipc/dsm_registry.c
@@ -15,6 +15,20 @@
  * current backend.  This function guarantees that only one backend
  * initializes the segment and that all other backends just attach it.
  *
+ * A DSA can be created in or retrieved from the registry by calling
+ * GetNamedDSA().  As with GetNamedDSMSegment(), if a DSA with the provided
+ * name does not yet exist, it is created.  Otherwise, GetNamedDSA()
+ * ensures the DSA is attached to the current backend.  This function
+ * guarantees that only one backend initializes the DSA and that all other
+ * backends just attach it.
+ *
+ * A dshash table can be created in or retrieved from the registry by
+ * calling GetNamedDSHash().  As with GetNamedDSMSegment(), if a hash
+ * table with the provided name does not yet exist, it is created.
+ * Otherwise, GetNamedDSHash() ensures the hash table is attached to the
+ * current backend.  This function guarantees that only one backend
+ * initializes the table and that all other backends just attach it.
+ *
  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
@@ -32,6 +46,12 @@
 #include "storage/shmem.h"
 #include "utils/memutils.h"
 
+#define DSMR_NAME_LEN				128
+
+#define DSMR_DSA_TRANCHE_SUFFIX		" DSA"
+#define DSMR_DSA_TRANCHE_SUFFIX_LEN (sizeof(DSMR_DSA_TRANCHE_SUFFIX) - 1)
+#define DSMR_DSA_TRANCHE_NAME_LEN	(DSMR_NAME_LEN + DSMR_DSA_TRANCHE_SUFFIX_LEN)
+
 typedef struct DSMRegistryCtxStruct
 {
 	dsa_handle	dsah;
@@ -40,15 +60,48 @@ typedef struct DSMRegistryCtxStruct
 
 static DSMRegistryCtxStruct *DSMRegistryCtx;
 
-typedef struct DSMRegistryEntry
+typedef struct NamedDSMState
 {
-	char		name[64];
 	dsm_handle	handle;
 	size_t		size;
+} NamedDSMState;
+
+typedef struct NamedDSAState
+{
+	dsa_handle	handle;
+	int			tranche;
+	char		tranche_name[DSMR_DSA_TRANCHE_NAME_LEN];
+} NamedDSAState;
+
+typedef struct NamedDSHState
+{
+	NamedDSAState dsa;
+	dshash_table_handle handle;
+	int			tranche;
+	char		tranche_name[DSMR_NAME_LEN];
+} NamedDSHState;
+
+typedef enum DSMREntryType
+{
+	DSMR_ENTRY_TYPE_DSM,
+	DSMR_ENTRY_TYPE_DSA,
+	DSMR_ENTRY_TYPE_DSH,
+} DSMREntryType;
+
+typedef struct DSMRegistryEntry
+{
+	char		name[DSMR_NAME_LEN];
+	DSMREntryType type;
+	union
+	{
+		NamedDSMState dsm;
+		NamedDSAState dsa;
+		NamedDSHState dsh;
+	}			data;
 } DSMRegistryEntry;
 
 static const dshash_parameters dsh_params = {
-	offsetof(DSMRegistryEntry, handle),
+	offsetof(DSMRegistryEntry, type),
 	sizeof(DSMRegistryEntry),
 	dshash_strcmp,
 	dshash_strhash,
@@ -141,7 +194,7 @@ GetNamedDSMSegment(const char *name, size_t size,
 		ereport(ERROR,
 				(errmsg("DSM segment name cannot be empty")));
 
-	if (strlen(name) >= offsetof(DSMRegistryEntry, handle))
+	if (strlen(name) >= offsetof(DSMRegistryEntry, type))
 		ereport(ERROR,
 				(errmsg("DSM segment name too long")));
 
@@ -158,32 +211,39 @@ GetNamedDSMSegment(const char *name, size_t size,
 	entry = dshash_find_or_insert(dsm_registry_table, name, found);
 	if (!(*found))
 	{
+		NamedDSMState *state = &entry->data.dsm;
+		dsm_segment *seg;
+
+		entry->type = DSMR_ENTRY_TYPE_DSM;
+
 		/* Initialize the segment. */
-		dsm_segment *seg = dsm_create(size, 0);
+		seg = dsm_create(size, 0);
 
 		dsm_pin_segment(seg);
 		dsm_pin_mapping(seg);
-		entry->handle = dsm_segment_handle(seg);
-		entry->size = size;
+		state->handle = dsm_segment_handle(seg);
+		state->size = size;
 		ret = dsm_segment_address(seg);
 
 		if (init_callback)
 			(*init_callback) (ret);
 	}
-	else if (entry->size != size)
-	{
+	else if (entry->type != DSMR_ENTRY_TYPE_DSM)
 		ereport(ERROR,
-				(errmsg("requested DSM segment size does not match size of "
-						"existing segment")));
-	}
+				(errmsg("requested DSM segment does not match type of existing entry")));
+	else if (entry->data.dsm.size != size)
+		ereport(ERROR,
+				(errmsg("requested DSM segment size does not match size of existing segment")));
 	else
 	{
-		dsm_segment *seg = dsm_find_mapping(entry->handle);
+		NamedDSMState *state = &entry->data.dsm;
+		dsm_segment *seg;
 
 		/* If the existing segment is not already attached, attach it now. */
+		seg = dsm_find_mapping(state->handle);
 		if (seg == NULL)
 		{
-			seg = dsm_attach(entry->handle);
+			seg = dsm_attach(state->handle);
 			if (seg == NULL)
 				elog(ERROR, "could not map dynamic shared memory segment");
 
@@ -198,3 +258,180 @@ GetNamedDSMSegment(const char *name, size_t size,
 
 	return ret;
 }
+
+/*
+ * Initialize or attach a named DSA.
+ *
+ * This routine returns a pointer to the DSA.  A new LWLock tranche ID will be
+ * generated if needed.  Note that the lock tranche will be registered with the
+ * provided name.  Also note that this should be called at most once for a
+ * given DSA in each backend.
+ */
+dsa_area *
+GetNamedDSA(const char *name, bool *found)
+{
+	DSMRegistryEntry *entry;
+	MemoryContext oldcontext;
+	dsa_area   *ret;
+
+	Assert(found);
+
+	if (!name || *name == '\0')
+		ereport(ERROR,
+				(errmsg("DSA name cannot be empty")));
+
+	if (strlen(name) >= offsetof(DSMRegistryEntry, type))
+		ereport(ERROR,
+				(errmsg("DSA name too long")));
+
+	/* Be sure any local memory allocated by DSM/DSA routines is persistent. */
+	oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+
+	/* Connect to the registry. */
+	init_dsm_registry();
+
+	entry = dshash_find_or_insert(dsm_registry_table, name, found);
+	if (!(*found))
+	{
+		NamedDSAState *state = &entry->data.dsa;
+
+		entry->type = DSMR_ENTRY_TYPE_DSA;
+
+		/* Initialize the LWLock tranche for the DSA. */
+		state->tranche = LWLockNewTrancheId();
+		strcpy(state->tranche_name, name);
+		LWLockRegisterTranche(state->tranche, state->tranche_name);
+
+		/* Initialize the DSA. */
+		ret = dsa_create(state->tranche);
+		dsa_pin(ret);
+		dsa_pin_mapping(ret);
+
+		/* Store handle for other backends to use. */
+		state->handle = dsa_get_handle(ret);
+	}
+	else if (entry->type != DSMR_ENTRY_TYPE_DSA)
+		ereport(ERROR,
+				(errmsg("requested DSA does not match type of existing entry")));
+	else
+	{
+		NamedDSAState *state = &entry->data.dsa;
+
+		if (dsa_is_attached(state->handle))
+			ereport(ERROR,
+					(errmsg("requested DSA already attached to current process")));
+
+		/* Initialize existing LWLock tranche for the DSA. */
+		LWLockRegisterTranche(state->tranche, state->tranche_name);
+
+		/* Attach to existing DSA. */
+		ret = dsa_attach(state->handle);
+		dsa_pin_mapping(ret);
+	}
+
+	dshash_release_lock(dsm_registry_table, entry);
+	MemoryContextSwitchTo(oldcontext);
+
+	return ret;
+}
+
+/*
+ * Initialize or attach a named dshash table.
+ *
+ * This routine returns the address of the table.  The tranche_id member of
+ * params is ignored; new tranche IDs will be generated if needed.  Note that
+ * the DSA lock tranche will be registered with the provided name with " DSA"
+ * appended.  The dshash lock tranche will be registered with the provided
+ * name.  Also note that this should be called at most once for a given table
+ * in each backend.
+ */
+dshash_table *
+GetNamedDSHash(const char *name, const dshash_parameters *params, bool *found)
+{
+	DSMRegistryEntry *entry;
+	MemoryContext oldcontext;
+	dshash_table *ret;
+
+	Assert(params);
+	Assert(found);
+
+	if (!name || *name == '\0')
+		ereport(ERROR,
+				(errmsg("DSHash name cannot be empty")));
+
+	if (strlen(name) >= offsetof(DSMRegistryEntry, type))
+		ereport(ERROR,
+				(errmsg("DSHash name too long")));
+
+	/* Be sure any local memory allocated by DSM/DSA routines is persistent. */
+	oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+
+	/* Connect to the registry. */
+	init_dsm_registry();
+
+	entry = dshash_find_or_insert(dsm_registry_table, name, found);
+	if (!(*found))
+	{
+		NamedDSAState *dsa_state = &entry->data.dsh.dsa;
+		NamedDSHState *dsh_state = &entry->data.dsh;
+		dshash_parameters params_copy;
+		dsa_area   *dsa;
+
+		entry->type = DSMR_ENTRY_TYPE_DSH;
+
+		/* Initialize the LWLock tranche for the DSA. */
+		dsa_state->tranche = LWLockNewTrancheId();
+		sprintf(dsa_state->tranche_name, "%s%s", name, DSMR_DSA_TRANCHE_SUFFIX);
+		LWLockRegisterTranche(dsa_state->tranche, dsa_state->tranche_name);
+
+		/* Initialize the LWLock tranche for the dshash table. */
+		dsh_state->tranche = LWLockNewTrancheId();
+		strcpy(dsh_state->tranche_name, name);
+		LWLockRegisterTranche(dsh_state->tranche, dsh_state->tranche_name);
+
+		/* Initialize the DSA for the hash table. */
+		dsa = dsa_create(dsa_state->tranche);
+		dsa_pin(dsa);
+		dsa_pin_mapping(dsa);
+
+		/* Initialize the dshash table. */
+		memcpy(&params_copy, params, sizeof(dshash_parameters));
+		params_copy.tranche_id = dsh_state->tranche;
+		ret = dshash_create(dsa, &params_copy, NULL);
+
+		/* Store handles for other backends to use. */
+		dsa_state->handle = dsa_get_handle(dsa);
+		dsh_state->handle = dshash_get_hash_table_handle(ret);
+	}
+	else if (entry->type != DSMR_ENTRY_TYPE_DSH)
+		ereport(ERROR,
+				(errmsg("requested DSHash does not match type of existing entry")));
+	else
+	{
+		NamedDSAState *dsa_state = &entry->data.dsh.dsa;
+		NamedDSHState *dsh_state = &entry->data.dsh;
+		dsa_area   *dsa;
+
+		/* XXX: Should we verify params matches what table was created with? */
+
+		if (dsa_is_attached(dsa_state->handle))
+			ereport(ERROR,
+					(errmsg("requested DSHash already attached to current process")));
+
+		/* Initialize existing LWLock tranches for the DSA and dshash table. */
+		LWLockRegisterTranche(dsa_state->tranche, dsa_state->tranche_name);
+		LWLockRegisterTranche(dsh_state->tranche, dsh_state->tranche_name);
+
+		/* Attach to existing DSA for the hash table. */
+		dsa = dsa_attach(dsa_state->handle);
+		dsa_pin_mapping(dsa);
+
+		/* Attach to existing dshash table. */
+		ret = dshash_attach(dsa, params, dsh_state->handle, NULL);
+	}
+
+	dshash_release_lock(dsm_registry_table, entry);
+	MemoryContextSwitchTo(oldcontext);
+
+	return ret;
+}
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index 00c76d05356..2fa045e6b0f 100644
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -51,7 +51,6 @@
 #include "storage/sinvaladt.h"
 #include "utils/guc.h"
 #include "utils/injection_point.h"
-#include "utils/memutils.h"
 
 /* GUCs */
 int			shared_memory_type = DEFAULT_SHARED_MEMORY_TYPE;
@@ -151,7 +150,6 @@ CalculateShmemSize(int *num_semaphores)
 	size = add_size(size, InjectionPointShmemSize());
 	size = add_size(size, SlotSyncShmemSize());
 	size = add_size(size, AioShmemSize());
-	size = add_size(size, MemoryContextReportingShmemSize());
 
 	/* include additional requested shmem from preload libraries */
 	size = add_size(size, total_addin_request);
@@ -345,7 +343,6 @@ CreateOrAttachShmemStructs(void)
 	WaitEventCustomShmemInit();
 	InjectionPointShmemInit();
 	AioShmemInit();
-	MemoryContextReportingShmemInit();
 }
 
 /*
diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c
index ce69e26d720..a9bb540b55a 100644
--- a/src/backend/storage/ipc/procsignal.c
+++ b/src/backend/storage/ipc/procsignal.c
@@ -691,9 +691,6 @@ procsignal_sigusr1_handler(SIGNAL_ARGS)
 	if (CheckProcSignal(PROCSIG_LOG_MEMORY_CONTEXT))
 		HandleLogMemoryContextInterrupt();
 
-	if (CheckProcSignal(PROCSIG_GET_MEMORY_CONTEXT))
-		HandleGetMemoryContextInterrupt();
-
 	if (CheckProcSignal(PROCSIG_PARALLEL_APPLY_MESSAGE))
 		HandleParallelApplyMessageInterrupt();
 
diff --git a/src/backend/storage/ipc/shmem.c b/src/backend/storage/ipc/shmem.c
index c9ae3b45b76..ca3656fc76f 100644
--- a/src/backend/storage/ipc/shmem.c
+++ b/src/backend/storage/ipc/shmem.c
@@ -679,12 +679,10 @@ pg_get_shmem_allocations_numa(PG_FUNCTION_ARGS)
 		 */
 		for (i = 0; i < shm_ent_page_count; i++)
 		{
-			volatile uint64 touch pg_attribute_unused();
-
 			page_ptrs[i] = startptr + (i * os_page_size);
 
 			if (firstNumaTouch)
-				pg_numa_touch_mem_if_required(touch, page_ptrs[i]);
+				pg_numa_touch_mem_if_required(page_ptrs[i]);
 
 			CHECK_FOR_INTERRUPTS();
 		}
diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c
index f50962983c3..3f6bf70bd3c 100644
--- a/src/backend/storage/lmgr/lmgr.c
+++ b/src/backend/storage/lmgr/lmgr.c
@@ -717,7 +717,10 @@ XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid,
 		 * through, to avoid slowing down the normal case.)
 		 */
 		if (!first)
+		{
+			CHECK_FOR_INTERRUPTS();
 			pg_usleep(1000L);
+		}
 		first = false;
 		xid = SubTransGetTopmostTransaction(xid);
 	}
@@ -757,7 +760,10 @@ ConditionalXactLockTableWait(TransactionId xid, bool logLockFailure)
 
 		/* See XactLockTableWait about this case */
 		if (!first)
+		{
+			CHECK_FOR_INTERRUPTS();
 			pg_usleep(1000L);
+		}
 		first = false;
 		xid = SubTransGetTopmostTransaction(xid);
 	}
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index 86b06b9223f..2776ceb295b 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -51,7 +51,7 @@
 
 /* GUC variables */
 int			max_locks_per_xact; /* used to set the lock table size */
-bool		log_lock_failure = false;
+bool		log_lock_failures = false;
 
 #define NLOCKENTS() \
 	mul_size(max_locks_per_xact, add_size(MaxBackends, max_prepared_xacts))
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index 5148ef982e3..46f44bc4511 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -178,8 +178,6 @@ static const char *const BuiltinTrancheNames[] = {
 	[LWTRANCHE_XACT_SLRU] = "XactSLRU",
 	[LWTRANCHE_PARALLEL_VACUUM_DSA] = "ParallelVacuumDSA",
 	[LWTRANCHE_AIO_URING_COMPLETION] = "AioUringCompletion",
-	[LWTRANCHE_MEMORY_CONTEXT_REPORTING_STATE] = "MemoryContextReportingState",
-	[LWTRANCHE_MEMORY_CONTEXT_REPORTING_PROC] = "MemoryContextReportingPerProcess",
 };
 
 StaticAssertDecl(lengthof(BuiltinTrancheNames) ==
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index f194e6b3dcc..e9ef0fbfe32 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -50,7 +50,6 @@
 #include "storage/procsignal.h"
 #include "storage/spin.h"
 #include "storage/standby.h"
-#include "utils/memutils.h"
 #include "utils/timeout.h"
 #include "utils/timestamp.h"