aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/access/common/Makefile1
-rw-r--r--src/backend/access/common/meson.build1
-rw-r--r--src/backend/access/common/tidstore.c463
-rw-r--r--src/include/access/tidstore.h49
-rw-r--r--src/test/modules/Makefile1
-rw-r--r--src/test/modules/meson.build1
-rw-r--r--src/test/modules/test_tidstore/.gitignore4
-rw-r--r--src/test/modules/test_tidstore/Makefile23
-rw-r--r--src/test/modules/test_tidstore/expected/test_tidstore.out97
-rw-r--r--src/test/modules/test_tidstore/meson.build33
-rw-r--r--src/test/modules/test_tidstore/sql/test_tidstore.sql65
-rw-r--r--src/test/modules/test_tidstore/test_tidstore--1.0.sql27
-rw-r--r--src/test/modules/test_tidstore/test_tidstore.c317
-rw-r--r--src/test/modules/test_tidstore/test_tidstore.control4
-rw-r--r--src/tools/pgindent/typedefs.list5
15 files changed, 1091 insertions, 0 deletions
diff --git a/src/backend/access/common/Makefile b/src/backend/access/common/Makefile
index b9aff0ccfdc..e78de312659 100644
--- a/src/backend/access/common/Makefile
+++ b/src/backend/access/common/Makefile
@@ -25,6 +25,7 @@ OBJS = \
scankey.o \
session.o \
syncscan.o \
+ tidstore.o \
toast_compression.o \
toast_internals.o \
tupconvert.o \
diff --git a/src/backend/access/common/meson.build b/src/backend/access/common/meson.build
index 725041a4ce2..14090c728fb 100644
--- a/src/backend/access/common/meson.build
+++ b/src/backend/access/common/meson.build
@@ -13,6 +13,7 @@ backend_sources += files(
'scankey.c',
'session.c',
'syncscan.c',
+ 'tidstore.c',
'toast_compression.c',
'toast_internals.c',
'tupconvert.c',
diff --git a/src/backend/access/common/tidstore.c b/src/backend/access/common/tidstore.c
new file mode 100644
index 00000000000..745393806d3
--- /dev/null
+++ b/src/backend/access/common/tidstore.c
@@ -0,0 +1,463 @@
+/*-------------------------------------------------------------------------
+ *
+ * tidstore.c
+ * TID (ItemPointerData) storage implementation.
+ *
+ * TidStore is a in-memory data structure to store TIDs (ItemPointerData).
+ * Internally it uses a radix tree as the storage for TIDs. The key is the
+ * BlockNumber and the value is a bitmap of offsets, BlocktableEntry.
+ *
+ * TidStore can be shared among parallel worker processes by passing DSA area
+ * to TidStoreCreate(). Other backends can attach to the shared TidStore by
+ * TidStoreAttach().
+ *
+ * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/access/common/tidstore.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/tidstore.h"
+#include "miscadmin.h"
+#include "nodes/bitmapset.h"
+#include "storage/lwlock.h"
+#include "utils/dsa.h"
+
+
+#define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD)
+#define BITNUM(x) ((x) % BITS_PER_BITMAPWORD)
+
+/* number of active words for a page: */
+#define WORDS_PER_PAGE(n) ((n) / BITS_PER_BITMAPWORD + 1)
+
+/*
+ * This is named similarly to PagetableEntry in tidbitmap.c
+ * because the two have a similar function.
+ */
+typedef struct BlocktableEntry
+{
+ uint16 nwords;
+ bitmapword words[FLEXIBLE_ARRAY_MEMBER];
+} BlocktableEntry;
+#define MaxBlocktableEntrySize \
+ offsetof(BlocktableEntry, words) + \
+ (sizeof(bitmapword) * WORDS_PER_PAGE(MaxOffsetNumber))
+
+#define RT_PREFIX local_ts
+#define RT_SCOPE static
+#define RT_DECLARE
+#define RT_DEFINE
+#define RT_VALUE_TYPE BlocktableEntry
+#define RT_VARLEN_VALUE_SIZE(page) \
+ (offsetof(BlocktableEntry, words) + \
+ sizeof(bitmapword) * (page)->nwords)
+#include "lib/radixtree.h"
+
+#define RT_PREFIX shared_ts
+#define RT_SHMEM
+#define RT_SCOPE static
+#define RT_DECLARE
+#define RT_DEFINE
+#define RT_VALUE_TYPE BlocktableEntry
+#define RT_VARLEN_VALUE_SIZE(page) \
+ (offsetof(BlocktableEntry, words) + \
+ sizeof(bitmapword) * (page)->nwords)
+#include "lib/radixtree.h"
+
+/* Per-backend state for a TidStore */
+struct TidStore
+{
+ /* MemoryContext where the TidStore is allocated */
+ MemoryContext context;
+
+ /* MemoryContext that the radix tree uses */
+ MemoryContext rt_context;
+
+ /* Storage for TIDs. Use either one depending on TidStoreIsShared() */
+ union
+ {
+ local_ts_radix_tree *local;
+ shared_ts_radix_tree *shared;
+ } tree;
+
+ /* DSA area for TidStore if using shared memory */
+ dsa_area *area;
+};
+#define TidStoreIsShared(ts) ((ts)->area != NULL)
+
+/* Iterator for TidStore */
+struct TidStoreIter
+{
+ TidStore *ts;
+
+ /* iterator of radix tree. Use either one depending on TidStoreIsShared() */
+ union
+ {
+ shared_ts_iter *shared;
+ local_ts_iter *local;
+ } tree_iter;
+
+ /* output for the caller */
+ TidStoreIterResult output;
+};
+
+static void tidstore_iter_extract_tids(TidStoreIter *iter, BlockNumber blkno,
+ BlocktableEntry *page);
+
+/*
+ * Create a TidStore. The TidStore will live in the memory context that is
+ * CurrentMemoryContext at the time of this call. The TID storage, backed
+ * by a radix tree, will live in its child memory context, rt_context. The
+ * TidStore will be limited to (approximately) max_bytes total memory
+ * consumption. If the 'area' is non-NULL, the radix tree is created in the
+ * DSA area.
+ *
+ * The returned object is allocated in backend-local memory.
+ */
+TidStore *
+TidStoreCreate(size_t max_bytes, dsa_area *area, int tranche_id)
+{
+ TidStore *ts;
+ size_t initBlockSize = ALLOCSET_DEFAULT_INITSIZE;
+ size_t minContextSize = ALLOCSET_DEFAULT_MINSIZE;
+ size_t maxBlockSize = ALLOCSET_DEFAULT_MAXSIZE;
+
+ ts = palloc0(sizeof(TidStore));
+ ts->context = CurrentMemoryContext;
+
+ /* choose the maxBlockSize to be no larger than 1/16 of max_bytes */
+ while (16 * maxBlockSize > max_bytes * 1024L)
+ maxBlockSize >>= 1;
+
+ if (maxBlockSize < ALLOCSET_DEFAULT_INITSIZE)
+ maxBlockSize = ALLOCSET_DEFAULT_INITSIZE;
+
+ /* Create a memory context for the TID storage */
+ ts->rt_context = AllocSetContextCreate(CurrentMemoryContext,
+ "TID storage",
+ minContextSize,
+ initBlockSize,
+ maxBlockSize);
+
+ if (area != NULL)
+ {
+ ts->tree.shared = shared_ts_create(ts->rt_context, area,
+ tranche_id);
+ ts->area = area;
+ }
+ else
+ ts->tree.local = local_ts_create(ts->rt_context);
+
+ return ts;
+}
+
+/*
+ * Attach to the shared TidStore using the given handle. The returned object
+ * is allocated in backend-local memory using the CurrentMemoryContext.
+ */
+TidStore *
+TidStoreAttach(dsa_area *area, dsa_pointer handle)
+{
+ TidStore *ts;
+
+ Assert(area != NULL);
+ Assert(DsaPointerIsValid(handle));
+
+ /* create per-backend state */
+ ts = palloc0(sizeof(TidStore));
+
+ /* Find the shared the shared radix tree */
+ ts->tree.shared = shared_ts_attach(area, handle);
+ ts->area = area;
+
+ return ts;
+}
+
+/*
+ * Detach from a TidStore. This detaches from radix tree and frees the
+ * backend-local resources. The radix tree will continue to exist until
+ * it is either explicitly destroyed, or the area that backs it is returned
+ * to the operating system.
+ */
+void
+TidStoreDetach(TidStore *ts)
+{
+ Assert(TidStoreIsShared(ts));
+
+ shared_ts_detach(ts->tree.shared);
+ pfree(ts);
+}
+
+/*
+ * Lock support functions.
+ *
+ * We can use the radix tree's lock for shared TidStore as the data we
+ * need to protect is only the shared radix tree.
+ */
+
+void
+TidStoreLockExclusive(TidStore *ts)
+{
+ if (TidStoreIsShared(ts))
+ shared_ts_lock_exclusive(ts->tree.shared);
+}
+
+void
+TidStoreLockShare(TidStore *ts)
+{
+ if (TidStoreIsShared(ts))
+ shared_ts_lock_share(ts->tree.shared);
+}
+
+void
+TidStoreUnlock(TidStore *ts)
+{
+ if (TidStoreIsShared(ts))
+ shared_ts_unlock(ts->tree.shared);
+}
+
+/*
+ * Destroy a TidStore, returning all memory.
+ *
+ * Note that the caller must be certain that no other backend will attempt to
+ * access the TidStore before calling this function. Other backend must
+ * explicitly call TidStoreDetach() to free up backend-local memory associated
+ * with the TidStore. The backend that calls TidStoreDestroy() must not call
+ * TidStoreDetach().
+ */
+void
+TidStoreDestroy(TidStore *ts)
+{
+ /* Destroy underlying radix tree */
+ if (TidStoreIsShared(ts))
+ shared_ts_free(ts->tree.shared);
+ else
+ local_ts_free(ts->tree.local);
+
+ MemoryContextDelete(ts->rt_context);
+
+ pfree(ts);
+}
+
+/*
+ * Create or replace an entry for the given block and array of offsets.
+ *
+ * NB: This function is designed and optimized for vacuum's heap scanning
+ * phase, so has some limitations:
+ *
+ * - The offset numbers "offsets" must be sorted in ascending order.
+ * - If the block number already exists, the entry will be replaced --
+ * there is no way to add or remove offsets from an entry.
+ */
+void
+TidStoreSetBlockOffsets(TidStore *ts, BlockNumber blkno, OffsetNumber *offsets,
+ int num_offsets)
+{
+ char data[MaxBlocktableEntrySize];
+ BlocktableEntry *page = (BlocktableEntry *) data;
+ bitmapword word;
+ int wordnum;
+ int next_word_threshold;
+ int idx = 0;
+
+ Assert(num_offsets > 0);
+
+ /* Check if the given offset numbers are ordered */
+ for (int i = 1; i < num_offsets; i++)
+ Assert(offsets[i] > offsets[i - 1]);
+
+ for (wordnum = 0, next_word_threshold = BITS_PER_BITMAPWORD;
+ wordnum <= WORDNUM(offsets[num_offsets - 1]);
+ wordnum++, next_word_threshold += BITS_PER_BITMAPWORD)
+ {
+ word = 0;
+
+ while (idx < num_offsets)
+ {
+ OffsetNumber off = offsets[idx];
+
+ /* safety check to ensure we don't overrun bit array bounds */
+ if (!OffsetNumberIsValid(off))
+ elog(ERROR, "tuple offset out of range: %u", off);
+
+ if (off >= next_word_threshold)
+ break;
+
+ word |= ((bitmapword) 1 << BITNUM(off));
+ idx++;
+ }
+
+ /* write out offset bitmap for this wordnum */
+ page->words[wordnum] = word;
+ }
+
+ page->nwords = wordnum;
+ Assert(page->nwords == WORDS_PER_PAGE(offsets[num_offsets - 1]));
+
+ if (TidStoreIsShared(ts))
+ shared_ts_set(ts->tree.shared, blkno, page);
+ else
+ local_ts_set(ts->tree.local, blkno, page);
+}
+
+/* Return true if the given TID is present in the TidStore */
+bool
+TidStoreIsMember(TidStore *ts, ItemPointer tid)
+{
+ int wordnum;
+ int bitnum;
+ BlocktableEntry *page;
+ BlockNumber blk = ItemPointerGetBlockNumber(tid);
+ OffsetNumber off = ItemPointerGetOffsetNumber(tid);
+
+ if (TidStoreIsShared(ts))
+ page = shared_ts_find(ts->tree.shared, blk);
+ else
+ page = local_ts_find(ts->tree.local, blk);
+
+ /* no entry for the blk */
+ if (page == NULL)
+ return false;
+
+ wordnum = WORDNUM(off);
+ bitnum = BITNUM(off);
+
+ /* no bitmap for the off */
+ if (wordnum >= page->nwords)
+ return false;
+
+ return (page->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0;
+}
+
+/*
+ * Prepare to iterate through a TidStore.
+ *
+ * The TidStoreIter struct is created in the caller's memory context, and it
+ * will be freed in TidStoreEndIterate.
+ *
+ * The caller is responsible for locking TidStore until the iteration is
+ * finished.
+ */
+TidStoreIter *
+TidStoreBeginIterate(TidStore *ts)
+{
+ TidStoreIter *iter;
+
+ iter = palloc0(sizeof(TidStoreIter));
+ iter->ts = ts;
+
+ /*
+ * We start with an array large enough to contain at least the offsets
+ * from one completely full bitmap element.
+ */
+ iter->output.max_offset = 2 * BITS_PER_BITMAPWORD;
+ iter->output.offsets = palloc(sizeof(OffsetNumber) * iter->output.max_offset);
+
+ if (TidStoreIsShared(ts))
+ iter->tree_iter.shared = shared_ts_begin_iterate(ts->tree.shared);
+ else
+ iter->tree_iter.local = local_ts_begin_iterate(ts->tree.local);
+
+ return iter;
+}
+
+
+/*
+ * Scan the TidStore and return the TIDs of the next block. The offsets in
+ * each iteration result are ordered, as are the block numbers over all
+ * iterations.
+ */
+TidStoreIterResult *
+TidStoreIterateNext(TidStoreIter *iter)
+{
+ uint64 key;
+ BlocktableEntry *page;
+
+ if (TidStoreIsShared(iter->ts))
+ page = shared_ts_iterate_next(iter->tree_iter.shared, &key);
+ else
+ page = local_ts_iterate_next(iter->tree_iter.local, &key);
+
+ if (page == NULL)
+ return NULL;
+
+ /* Collect TIDs from the key-value pair */
+ tidstore_iter_extract_tids(iter, (BlockNumber) key, page);
+
+ return &(iter->output);
+}
+
+/*
+ * Finish the iteration on TidStore.
+ *
+ * The caller is responsible for releasing any locks.
+ */
+void
+TidStoreEndIterate(TidStoreIter *iter)
+{
+ if (TidStoreIsShared(iter->ts))
+ shared_ts_end_iterate(iter->tree_iter.shared);
+ else
+ local_ts_end_iterate(iter->tree_iter.local);
+
+ pfree(iter->output.offsets);
+ pfree(iter);
+}
+
+/*
+ * Return the memory usage of TidStore.
+ */
+size_t
+TidStoreMemoryUsage(TidStore *ts)
+{
+ if (TidStoreIsShared(ts))
+ return shared_ts_memory_usage(ts->tree.shared);
+ else
+ return local_ts_memory_usage(ts->tree.local);
+}
+
+dsa_pointer
+TidStoreGetHandle(TidStore *ts)
+{
+ Assert(TidStoreIsShared(ts));
+
+ return (dsa_pointer) shared_ts_get_handle(ts->tree.shared);
+}
+
+/* Extract TIDs from the given key-value pair */
+static void
+tidstore_iter_extract_tids(TidStoreIter *iter, BlockNumber blkno,
+ BlocktableEntry *page)
+{
+ TidStoreIterResult *result = (&iter->output);
+ int wordnum;
+
+ result->num_offsets = 0;
+ result->blkno = blkno;
+
+ for (wordnum = 0; wordnum < page->nwords; wordnum++)
+ {
+ bitmapword w = page->words[wordnum];
+ int off = wordnum * BITS_PER_BITMAPWORD;
+
+ /* Make sure there is enough space to add offsets */
+ if ((result->num_offsets + BITS_PER_BITMAPWORD) > result->max_offset)
+ {
+ result->max_offset *= 2;
+ result->offsets = repalloc(result->offsets,
+ sizeof(OffsetNumber) * result->max_offset);
+ }
+
+ while (w != 0)
+ {
+ if (w & 1)
+ result->offsets[result->num_offsets++] = (OffsetNumber) off;
+ off++;
+ w >>= 1;
+ }
+ }
+}
diff --git a/src/include/access/tidstore.h b/src/include/access/tidstore.h
new file mode 100644
index 00000000000..8cf4e94f123
--- /dev/null
+++ b/src/include/access/tidstore.h
@@ -0,0 +1,49 @@
+/*-------------------------------------------------------------------------
+ *
+ * tidstore.h
+ * TidStore interface.
+ *
+ *
+ * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/tidstore.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef TIDSTORE_H
+#define TIDSTORE_H
+
+#include "storage/itemptr.h"
+#include "utils/dsa.h"
+
+typedef struct TidStore TidStore;
+typedef struct TidStoreIter TidStoreIter;
+
+/* Result struct for TidStoreIterateNext */
+typedef struct TidStoreIterResult
+{
+ BlockNumber blkno;
+ int max_offset;
+ int num_offsets;
+ OffsetNumber *offsets;
+} TidStoreIterResult;
+
+extern TidStore *TidStoreCreate(size_t max_bytes, dsa_area *dsa,
+ int tranche_id);
+extern TidStore *TidStoreAttach(dsa_area *dsa, dsa_pointer rt_dp);
+extern void TidStoreDetach(TidStore *ts);
+extern void TidStoreLockExclusive(TidStore *ts);
+extern void TidStoreLockShare(TidStore *ts);
+extern void TidStoreUnlock(TidStore *ts);
+extern void TidStoreDestroy(TidStore *ts);
+extern void TidStoreSetBlockOffsets(TidStore *ts, BlockNumber blkno, OffsetNumber *offsets,
+ int num_offsets);
+extern bool TidStoreIsMember(TidStore *ts, ItemPointer tid);
+extern TidStoreIter *TidStoreBeginIterate(TidStore *ts);
+extern TidStoreIterResult *TidStoreIterateNext(TidStoreIter *iter);
+extern void TidStoreEndIterate(TidStoreIter *iter);
+extern size_t TidStoreMemoryUsage(TidStore *ts);
+extern dsa_pointer TidStoreGetHandle(TidStore *ts);
+
+#endif /* TIDSTORE_H */
diff --git a/src/test/modules/Makefile b/src/test/modules/Makefile
index 875a76d6f1d..1cbd532156d 100644
--- a/src/test/modules/Makefile
+++ b/src/test/modules/Makefile
@@ -35,6 +35,7 @@ SUBDIRS = \
test_rls_hooks \
test_shm_mq \
test_slru \
+ test_tidstore \
unsafe_tests \
worker_spi \
xid_wraparound
diff --git a/src/test/modules/meson.build b/src/test/modules/meson.build
index f1d18a1b297..7c11fb97f21 100644
--- a/src/test/modules/meson.build
+++ b/src/test/modules/meson.build
@@ -34,6 +34,7 @@ subdir('test_resowner')
subdir('test_rls_hooks')
subdir('test_shm_mq')
subdir('test_slru')
+subdir('test_tidstore')
subdir('unsafe_tests')
subdir('worker_spi')
subdir('xid_wraparound')
diff --git a/src/test/modules/test_tidstore/.gitignore b/src/test/modules/test_tidstore/.gitignore
new file mode 100644
index 00000000000..5dcb3ff9723
--- /dev/null
+++ b/src/test/modules/test_tidstore/.gitignore
@@ -0,0 +1,4 @@
+# Generated subdirectories
+/log/
+/results/
+/tmp_check/
diff --git a/src/test/modules/test_tidstore/Makefile b/src/test/modules/test_tidstore/Makefile
new file mode 100644
index 00000000000..dab107d70c3
--- /dev/null
+++ b/src/test/modules/test_tidstore/Makefile
@@ -0,0 +1,23 @@
+# src/test/modules/test_tidstore/Makefile
+
+MODULE_big = test_tidstore
+OBJS = \
+ $(WIN32RES) \
+ test_tidstore.o
+PGFILEDESC = "test_tidstore - test code for src/backend/access/common/tidstore.c"
+
+EXTENSION = test_tidstore
+DATA = test_tidstore--1.0.sql
+
+REGRESS = test_tidstore
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = src/test/modules/test_tidstore
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/src/test/modules/test_tidstore/expected/test_tidstore.out b/src/test/modules/test_tidstore/expected/test_tidstore.out
new file mode 100644
index 00000000000..0ae2f970dac
--- /dev/null
+++ b/src/test/modules/test_tidstore/expected/test_tidstore.out
@@ -0,0 +1,97 @@
+-- Note: The test code use an array of TIDs for verification similar
+-- to vacuum's dead item array pre-PG17. To avoid adding duplicates,
+-- each call to do_set_block_offsets() should use different block
+-- numbers.
+CREATE EXTENSION test_tidstore;
+-- To hide the output of do_set_block_offsets()
+CREATE TEMP TABLE hideblocks(blockno bigint);
+-- Constant values used in the tests.
+\set maxblkno 4294967295
+-- The maximum number of heap tuples (MaxHeapTuplesPerPage) in 8kB block is 291.
+-- We use a higher number to test tidstore.
+\set maxoffset 512
+SELECT test_create(false);
+ test_create
+-------------
+
+(1 row)
+
+-- Test on empty tidstore.
+SELECT test_is_full();
+ test_is_full
+--------------
+ f
+(1 row)
+
+SELECT check_set_block_offsets();
+ check_set_block_offsets
+-------------------------
+
+(1 row)
+
+-- Add TIDs.
+INSERT INTO hideblocks (blockno)
+SELECT do_set_block_offsets(blk, array_agg(off)::int2[])
+ FROM
+ (VALUES (0), (1), (:maxblkno / 2), (:maxblkno - 1), (:maxblkno)) AS blocks(blk),
+ (VALUES (1), (2), (:maxoffset / 2), (:maxoffset - 1), (:maxoffset)) AS offsets(off)
+ GROUP BY blk;
+-- Add enough TIDs to cause the store to appear "full", compared
+-- to the allocated memory it started out with. This is easier
+-- with memory contexts in local memory.
+INSERT INTO hideblocks (blockno)
+SELECT do_set_block_offsets(blk, ARRAY[1,31,32,63,64,200]::int2[])
+ FROM generate_series(1000, 2000, 1) blk;
+-- Zero offset not allowed
+SELECT do_set_block_offsets(1, ARRAY[0]::int2[]);
+ERROR: tuple offset out of range: 0
+-- Check TIDs we've added to the store.
+SELECT check_set_block_offsets();
+ check_set_block_offsets
+-------------------------
+
+(1 row)
+
+SELECT test_is_full();
+ test_is_full
+--------------
+ t
+(1 row)
+
+-- Re-create the TID store for randommized tests.
+SELECT test_destroy();
+ test_destroy
+--------------
+
+(1 row)
+
+-- Use shared memory this time. We can't do that in test_radixtree.sql,
+-- because unused static functions would raise warnings there.
+SELECT test_create(true);
+ test_create
+-------------
+
+(1 row)
+
+-- Random TIDs test. The offset numbers are randomized and must be
+-- unique and ordered.
+INSERT INTO hideblocks (blockno)
+SELECT do_set_block_offsets(blkno, array_agg(DISTINCT greatest((random() * :maxoffset)::int, 1))::int2[])
+ FROM generate_series(1, 100) num_offsets,
+ generate_series(1000, 1100, 1) blkno
+GROUP BY blkno;
+-- Check TIDs we've added to the store.
+SELECT check_set_block_offsets();
+ check_set_block_offsets
+-------------------------
+
+(1 row)
+
+-- cleanup
+SELECT test_destroy();
+ test_destroy
+--------------
+
+(1 row)
+
+DROP TABLE hideblocks;
diff --git a/src/test/modules/test_tidstore/meson.build b/src/test/modules/test_tidstore/meson.build
new file mode 100644
index 00000000000..0ed3ea2ef33
--- /dev/null
+++ b/src/test/modules/test_tidstore/meson.build
@@ -0,0 +1,33 @@
+# Copyright (c) 2024, PostgreSQL Global Development Group
+
+test_tidstore_sources = files(
+ 'test_tidstore.c',
+)
+
+if host_system == 'windows'
+ test_tidstore_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+ '--NAME', 'test_tidstore',
+ '--FILEDESC', 'test_tidstore - test code for src/backend/access/common/tidstore.c',])
+endif
+
+test_tidstore = shared_module('test_tidstore',
+ test_tidstore_sources,
+ kwargs: pg_test_mod_args,
+)
+test_install_libs += test_tidstore
+
+test_install_data += files(
+ 'test_tidstore.control',
+ 'test_tidstore--1.0.sql',
+)
+
+tests += {
+ 'name': 'test_tidstore',
+ 'sd': meson.current_source_dir(),
+ 'bd': meson.current_build_dir(),
+ 'regress': {
+ 'sql': [
+ 'test_tidstore',
+ ],
+ },
+}
diff --git a/src/test/modules/test_tidstore/sql/test_tidstore.sql b/src/test/modules/test_tidstore/sql/test_tidstore.sql
new file mode 100644
index 00000000000..e5edfbb2649
--- /dev/null
+++ b/src/test/modules/test_tidstore/sql/test_tidstore.sql
@@ -0,0 +1,65 @@
+-- Note: The test code use an array of TIDs for verification similar
+-- to vacuum's dead item array pre-PG17. To avoid adding duplicates,
+-- each call to do_set_block_offsets() should use different block
+-- numbers.
+
+CREATE EXTENSION test_tidstore;
+
+-- To hide the output of do_set_block_offsets()
+CREATE TEMP TABLE hideblocks(blockno bigint);
+
+-- Constant values used in the tests.
+\set maxblkno 4294967295
+-- The maximum number of heap tuples (MaxHeapTuplesPerPage) in 8kB block is 291.
+-- We use a higher number to test tidstore.
+\set maxoffset 512
+
+SELECT test_create(false);
+
+-- Test on empty tidstore.
+SELECT test_is_full();
+SELECT check_set_block_offsets();
+
+-- Add TIDs.
+INSERT INTO hideblocks (blockno)
+SELECT do_set_block_offsets(blk, array_agg(off)::int2[])
+ FROM
+ (VALUES (0), (1), (:maxblkno / 2), (:maxblkno - 1), (:maxblkno)) AS blocks(blk),
+ (VALUES (1), (2), (:maxoffset / 2), (:maxoffset - 1), (:maxoffset)) AS offsets(off)
+ GROUP BY blk;
+
+-- Add enough TIDs to cause the store to appear "full", compared
+-- to the allocated memory it started out with. This is easier
+-- with memory contexts in local memory.
+INSERT INTO hideblocks (blockno)
+SELECT do_set_block_offsets(blk, ARRAY[1,31,32,63,64,200]::int2[])
+ FROM generate_series(1000, 2000, 1) blk;
+
+-- Zero offset not allowed
+SELECT do_set_block_offsets(1, ARRAY[0]::int2[]);
+
+-- Check TIDs we've added to the store.
+SELECT check_set_block_offsets();
+
+SELECT test_is_full();
+
+-- Re-create the TID store for randommized tests.
+SELECT test_destroy();
+-- Use shared memory this time. We can't do that in test_radixtree.sql,
+-- because unused static functions would raise warnings there.
+SELECT test_create(true);
+
+-- Random TIDs test. The offset numbers are randomized and must be
+-- unique and ordered.
+INSERT INTO hideblocks (blockno)
+SELECT do_set_block_offsets(blkno, array_agg(DISTINCT greatest((random() * :maxoffset)::int, 1))::int2[])
+ FROM generate_series(1, 100) num_offsets,
+ generate_series(1000, 1100, 1) blkno
+GROUP BY blkno;
+
+-- Check TIDs we've added to the store.
+SELECT check_set_block_offsets();
+
+-- cleanup
+SELECT test_destroy();
+DROP TABLE hideblocks;
diff --git a/src/test/modules/test_tidstore/test_tidstore--1.0.sql b/src/test/modules/test_tidstore/test_tidstore--1.0.sql
new file mode 100644
index 00000000000..7e6c60c7bb3
--- /dev/null
+++ b/src/test/modules/test_tidstore/test_tidstore--1.0.sql
@@ -0,0 +1,27 @@
+/* src/test/modules/test_tidstore/test_tidstore--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION test_tidstore" to load this file. \quit
+
+CREATE FUNCTION test_create(
+shared bool)
+RETURNS void STRICT PARALLEL UNSAFE
+AS 'MODULE_PATHNAME' LANGUAGE C;
+
+CREATE FUNCTION do_set_block_offsets(
+blkno bigint,
+offsets int2[])
+RETURNS bigint STRICT PARALLEL UNSAFE
+AS 'MODULE_PATHNAME' LANGUAGE C;
+
+CREATE FUNCTION check_set_block_offsets()
+RETURNS void STRICT PARALLEL UNSAFE
+AS 'MODULE_PATHNAME' LANGUAGE C;
+
+CREATE FUNCTION test_is_full()
+RETURNS bool STRICT PARALLEL UNSAFE
+AS 'MODULE_PATHNAME' LANGUAGE C;
+
+CREATE FUNCTION test_destroy()
+RETURNS void STRICT PARALLEL UNSAFE
+AS 'MODULE_PATHNAME' LANGUAGE C;
diff --git a/src/test/modules/test_tidstore/test_tidstore.c b/src/test/modules/test_tidstore/test_tidstore.c
new file mode 100644
index 00000000000..c74ad2cf8b8
--- /dev/null
+++ b/src/test/modules/test_tidstore/test_tidstore.c
@@ -0,0 +1,317 @@
+/*--------------------------------------------------------------------------
+ *
+ * test_tidstore.c
+ * Test TidStore data structure.
+ *
+ * Note: all locking in this test module is useless since there is only
+ * a single process to use the TidStore. It is meant to be an example of
+ * usage.
+ *
+ * Copyright (c) 2024, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/test/modules/test_tidstore/test_tidstore.c
+ *
+ * -------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/tidstore.h"
+#include "fmgr.h"
+#include "funcapi.h"
+#include "storage/block.h"
+#include "storage/itemptr.h"
+#include "storage/lwlock.h"
+#include "utils/array.h"
+#include "utils/memutils.h"
+
+PG_MODULE_MAGIC;
+
+PG_FUNCTION_INFO_V1(test_create);
+PG_FUNCTION_INFO_V1(do_set_block_offsets);
+PG_FUNCTION_INFO_V1(check_set_block_offsets);
+PG_FUNCTION_INFO_V1(test_is_full);
+PG_FUNCTION_INFO_V1(test_destroy);
+
+static TidStore *tidstore = NULL;
+static dsa_area *dsa = NULL;
+static size_t tidstore_empty_size;
+
+/* array for verification of some tests */
+typedef struct ItemArray
+{
+ ItemPointerData *insert_tids;
+ ItemPointerData *lookup_tids;
+ ItemPointerData *iter_tids;
+ int max_tids;
+ int num_tids;
+} ItemArray;
+
+static ItemArray items;
+
+/* comparator routine for ItemPointer */
+static int
+itemptr_cmp(const void *left, const void *right)
+{
+ BlockNumber lblk,
+ rblk;
+ OffsetNumber loff,
+ roff;
+
+ lblk = ItemPointerGetBlockNumber((ItemPointer) left);
+ rblk = ItemPointerGetBlockNumber((ItemPointer) right);
+
+ if (lblk < rblk)
+ return -1;
+ if (lblk > rblk)
+ return 1;
+
+ loff = ItemPointerGetOffsetNumber((ItemPointer) left);
+ roff = ItemPointerGetOffsetNumber((ItemPointer) right);
+
+ if (loff < roff)
+ return -1;
+ if (loff > roff)
+ return 1;
+
+ return 0;
+}
+
+/*
+ * Create a TidStore. If shared is false, the tidstore is created
+ * on TopMemoryContext, otherwise on DSA. Although the tidstore
+ * is created on DSA, only the same process can subsequently use
+ * the tidstore. The tidstore handle is not shared anywhere.
+*/
+Datum
+test_create(PG_FUNCTION_ARGS)
+{
+ bool shared = PG_GETARG_BOOL(0);
+ MemoryContext old_ctx;
+
+ /* doesn't really matter, since it's just a hint */
+ size_t tidstore_max_size = 2 * 1024 * 1024;
+ size_t array_init_size = 1024;
+
+ Assert(tidstore == NULL);
+ Assert(dsa == NULL);
+
+ /*
+ * Create the TidStore on TopMemoryContext so that the same process use it
+ * for subsequent tests.
+ */
+ old_ctx = MemoryContextSwitchTo(TopMemoryContext);
+
+ if (shared)
+ {
+ int tranche_id;
+
+ tranche_id = LWLockNewTrancheId();
+ LWLockRegisterTranche(tranche_id, "test_tidstore");
+
+ dsa = dsa_create(tranche_id);
+
+ /*
+ * Remain attached until end of backend or explicitly detached so that
+ * the same process use the tidstore for subsequent tests.
+ */
+ dsa_pin_mapping(dsa);
+
+ tidstore = TidStoreCreate(tidstore_max_size, dsa, tranche_id);
+ }
+ else
+ tidstore = TidStoreCreate(tidstore_max_size, NULL, 0);
+
+ tidstore_empty_size = TidStoreMemoryUsage(tidstore);
+
+ items.num_tids = 0;
+ items.max_tids = array_init_size / sizeof(ItemPointerData);
+ items.insert_tids = (ItemPointerData *) palloc0(array_init_size);
+ items.lookup_tids = (ItemPointerData *) palloc0(array_init_size);
+ items.iter_tids = (ItemPointerData *) palloc0(array_init_size);
+
+ MemoryContextSwitchTo(old_ctx);
+
+ PG_RETURN_VOID();
+}
+
+static void
+sanity_check_array(ArrayType *ta)
+{
+ if (ARR_HASNULL(ta) && array_contains_nulls(ta))
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("array must not contain nulls")));
+
+ if (ARR_NDIM(ta) > 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_EXCEPTION),
+ errmsg("argument must be empty or one-dimensional array")));
+}
+
+/* Set the given block and offsets pairs */
+Datum
+do_set_block_offsets(PG_FUNCTION_ARGS)
+{
+ BlockNumber blkno = PG_GETARG_INT64(0);
+ ArrayType *ta = PG_GETARG_ARRAYTYPE_P_COPY(1);
+ OffsetNumber *offs;
+ int noffs;
+
+ sanity_check_array(ta);
+
+ noffs = ArrayGetNItems(ARR_NDIM(ta), ARR_DIMS(ta));
+ offs = ((OffsetNumber *) ARR_DATA_PTR(ta));
+
+ /* Set TIDs in the store */
+ TidStoreLockExclusive(tidstore);
+ TidStoreSetBlockOffsets(tidstore, blkno, offs, noffs);
+ TidStoreUnlock(tidstore);
+
+ /* Set TIDs in verification array */
+ for (int i = 0; i < noffs; i++)
+ {
+ ItemPointer tid;
+ int idx = items.num_tids + i;
+
+ /* Enlarge the TID arrays if necessary */
+ if (idx >= items.max_tids)
+ {
+ items.max_tids *= 2;
+ items.insert_tids = repalloc(items.insert_tids, sizeof(ItemPointerData) * items.max_tids);
+ items.lookup_tids = repalloc(items.lookup_tids, sizeof(ItemPointerData) * items.max_tids);
+ items.iter_tids = repalloc(items.iter_tids, sizeof(ItemPointerData) * items.max_tids);
+ }
+
+ tid = &(items.insert_tids[idx]);
+ ItemPointerSet(tid, blkno, offs[i]);
+ }
+
+ /* Update statistics */
+ items.num_tids += noffs;
+
+ PG_RETURN_INT64(blkno);
+}
+
+/*
+ * Verify TIDs in store against the array.
+ */
+Datum
+check_set_block_offsets(PG_FUNCTION_ARGS)
+{
+ TidStoreIter *iter;
+ TidStoreIterResult *iter_result;
+ int num_iter_tids = 0;
+ int num_lookup_tids = 0;
+ BlockNumber prevblkno = 0;;
+
+ /* lookup each member in the verification array */
+ for (int i = 0; i < items.num_tids; i++)
+ if (!TidStoreIsMember(tidstore, &items.insert_tids[i]))
+ elog(ERROR, "missing TID with block %u, offset %u",
+ ItemPointerGetBlockNumber(&items.insert_tids[i]),
+ ItemPointerGetOffsetNumber(&items.insert_tids[i]));
+
+ /*
+ * Lookup all possible TIDs for each distinct block in the verification
+ * array and save successful lookups in the lookup array.
+ */
+
+ for (int i = 0; i < items.num_tids; i++)
+ {
+ BlockNumber blkno = ItemPointerGetBlockNumber(&items.insert_tids[i]);
+
+ if (i > 0 && blkno == prevblkno)
+ continue;
+
+ for (OffsetNumber offset = FirstOffsetNumber; offset < MaxOffsetNumber; offset++)
+ {
+ ItemPointerData tid;
+
+ ItemPointerSet(&tid, blkno, offset);
+
+ TidStoreLockShare(tidstore);
+ if (TidStoreIsMember(tidstore, &tid))
+ ItemPointerSet(&items.lookup_tids[num_lookup_tids++], blkno, offset);
+ TidStoreUnlock(tidstore);
+ }
+
+ prevblkno = blkno;
+ }
+
+ /* Collect TIDs stored in the tidstore, in order */
+
+ TidStoreLockShare(tidstore);
+ iter = TidStoreBeginIterate(tidstore);
+ while ((iter_result = TidStoreIterateNext(iter)) != NULL)
+ {
+ for (int i = 0; i < iter_result->num_offsets; i++)
+ ItemPointerSet(&(items.iter_tids[num_iter_tids++]), iter_result->blkno,
+ iter_result->offsets[i]);
+ }
+ TidStoreEndIterate(iter);
+ TidStoreUnlock(tidstore);
+
+ /*
+ * Sort verification and lookup arrays and test that all arrays are the
+ * same.
+ */
+
+ if (num_lookup_tids != items.num_tids)
+ elog(ERROR, "should have %d TIDs, have %d", items.num_tids, num_lookup_tids);
+ if (num_iter_tids != items.num_tids)
+ elog(ERROR, "should have %d TIDs, have %d", items.num_tids, num_iter_tids);
+
+ qsort(items.insert_tids, items.num_tids, sizeof(ItemPointerData), itemptr_cmp);
+ qsort(items.lookup_tids, items.num_tids, sizeof(ItemPointerData), itemptr_cmp);
+ for (int i = 0; i < items.num_tids; i++)
+ {
+ if (itemptr_cmp((const void *) &items.insert_tids[i], (const void *) &items.iter_tids[i]) != 0)
+ elog(ERROR, "TID iter array doesn't match verification array, got (%u,%u) expected (%u,%u)",
+ ItemPointerGetBlockNumber(&items.iter_tids[i]),
+ ItemPointerGetOffsetNumber(&items.iter_tids[i]),
+ ItemPointerGetBlockNumber(&items.insert_tids[i]),
+ ItemPointerGetOffsetNumber(&items.insert_tids[i]));
+ if (itemptr_cmp((const void *) &items.insert_tids[i], (const void *) &items.lookup_tids[i]) != 0)
+ elog(ERROR, "TID lookup array doesn't match verification array, got (%u,%u) expected (%u,%u)",
+ ItemPointerGetBlockNumber(&items.lookup_tids[i]),
+ ItemPointerGetOffsetNumber(&items.lookup_tids[i]),
+ ItemPointerGetBlockNumber(&items.insert_tids[i]),
+ ItemPointerGetOffsetNumber(&items.insert_tids[i]));
+ }
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * In real world use, we care if the memory usage is greater than
+ * some configured limit. Here we just want to verify that
+ * TidStoreMemoryUsage is not broken.
+ */
+Datum
+test_is_full(PG_FUNCTION_ARGS)
+{
+ bool is_full;
+
+ is_full = (TidStoreMemoryUsage(tidstore) > tidstore_empty_size);
+
+ PG_RETURN_BOOL(is_full);
+}
+
+/* Free the tidstore */
+Datum
+test_destroy(PG_FUNCTION_ARGS)
+{
+ TidStoreDestroy(tidstore);
+ tidstore = NULL;
+ items.num_tids = 0;
+ pfree(items.insert_tids);
+ pfree(items.lookup_tids);
+ pfree(items.iter_tids);
+
+ if (dsa)
+ dsa_detach(dsa);
+ dsa = NULL;
+
+ PG_RETURN_VOID();
+}
diff --git a/src/test/modules/test_tidstore/test_tidstore.control b/src/test/modules/test_tidstore/test_tidstore.control
new file mode 100644
index 00000000000..9b6bd4638f9
--- /dev/null
+++ b/src/test/modules/test_tidstore/test_tidstore.control
@@ -0,0 +1,4 @@
+comment = 'Test code for tidstore'
+default_version = '1.0'
+module_pathname = '$libdir/test_tidstore'
+relocatable = true
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index e294f8bc4e6..c79e7b2eb6e 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -4065,3 +4065,8 @@ rfile
ws_options
ws_file_info
PathKeyInfo
+TidStore
+TidStoreIter
+TidStoreIterResult
+BlocktableEntry
+ItemArray