aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/access/gin/ginbtree.c77
-rw-r--r--src/test/modules/Makefile4
-rw-r--r--src/test/modules/gin/Makefile16
-rw-r--r--src/test/modules/gin/expected/gin_incomplete_splits.out180
-rw-r--r--src/test/modules/gin/meson.build16
-rw-r--r--src/test/modules/gin/sql/gin_incomplete_splits.sql144
-rw-r--r--src/test/modules/meson.build1
7 files changed, 418 insertions, 20 deletions
diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c
index 39e2c07ad3d..86f938686c3 100644
--- a/src/backend/access/gin/ginbtree.c
+++ b/src/backend/access/gin/ginbtree.c
@@ -19,6 +19,7 @@
#include "access/xloginsert.h"
#include "miscadmin.h"
#include "storage/predicate.h"
+#include "utils/injection_point.h"
#include "utils/memutils.h"
#include "utils/rel.h"
@@ -28,6 +29,8 @@ static bool ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
Buffer childbuf, GinStatsData *buildStats);
static void ginFinishSplit(GinBtree btree, GinBtreeStack *stack,
bool freestack, GinStatsData *buildStats);
+static void ginFinishOldSplit(GinBtree btree, GinBtreeStack *stack,
+ GinStatsData *buildStats, int access);
/*
* Lock buffer by needed method for search.
@@ -108,7 +111,7 @@ ginFindLeafPage(GinBtree btree, bool searchMode,
* encounter on the way.
*/
if (!searchMode && GinPageIsIncompleteSplit(page))
- ginFinishSplit(btree, stack, false, NULL);
+ ginFinishOldSplit(btree, stack, NULL, access);
/*
* ok, page is correctly locked, we should check to move right ..,
@@ -128,7 +131,7 @@ ginFindLeafPage(GinBtree btree, bool searchMode,
page = BufferGetPage(stack->buffer);
if (!searchMode && GinPageIsIncompleteSplit(page))
- ginFinishSplit(btree, stack, false, NULL);
+ ginFinishOldSplit(btree, stack, NULL, access);
}
if (GinPageIsLeaf(page)) /* we found, return locked page */
@@ -164,8 +167,11 @@ ginFindLeafPage(GinBtree btree, bool searchMode,
* Step right from current page.
*
* The next page is locked first, before releasing the current page. This is
- * crucial to protect from concurrent page deletion (see comment in
- * ginDeletePage).
+ * crucial to prevent concurrent VACUUM from deleting a page that we are about
+ * to step to. (The lock-coupling isn't strictly necessary when we are
+ * traversing the tree to find an insert location, because page deletion grabs
+ * a cleanup lock on the root to prevent any concurrent inserts. See Page
+ * deletion section in the README. But there's no harm in doing it always.)
*/
Buffer
ginStepRight(Buffer buffer, Relation index, int lockmode)
@@ -262,7 +268,7 @@ ginFindParents(GinBtree btree, GinBtreeStack *stack)
ptr->parent = root;
ptr->off = InvalidOffsetNumber;
- ginFinishSplit(btree, ptr, false, NULL);
+ ginFinishOldSplit(btree, ptr, NULL, GIN_EXCLUSIVE);
}
leftmostBlkno = btree->getLeftMostChild(btree, page);
@@ -291,7 +297,7 @@ ginFindParents(GinBtree btree, GinBtreeStack *stack)
ptr->parent = root;
ptr->off = InvalidOffsetNumber;
- ginFinishSplit(btree, ptr, false, NULL);
+ ginFinishOldSplit(btree, ptr, NULL, GIN_EXCLUSIVE);
}
}
@@ -670,15 +676,6 @@ ginFinishSplit(GinBtree btree, GinBtreeStack *stack, bool freestack,
bool done;
bool first = true;
- /*
- * freestack == false when we encounter an incompletely split page during
- * a scan, while freestack == true is used in the normal scenario that a
- * split is finished right after the initial insert.
- */
- if (!freestack)
- elog(DEBUG1, "finishing incomplete split of block %u in gin index \"%s\"",
- stack->blkno, RelationGetRelationName(btree->index));
-
/* this loop crawls up the stack until the insertion is complete */
do
{
@@ -686,6 +683,13 @@ ginFinishSplit(GinBtree btree, GinBtreeStack *stack, bool freestack,
void *insertdata;
BlockNumber updateblkno;
+#ifdef USE_INJECTION_POINTS
+ if (GinPageIsLeaf(BufferGetPage(stack->buffer)))
+ INJECTION_POINT("gin-leave-leaf-split-incomplete");
+ else
+ INJECTION_POINT("gin-leave-internal-split-incomplete");
+#endif
+
/* search parent to lock */
LockBuffer(parent->buffer, GIN_EXCLUSIVE);
@@ -699,7 +703,7 @@ ginFinishSplit(GinBtree btree, GinBtreeStack *stack, bool freestack,
* would fail.
*/
if (GinPageIsIncompleteSplit(BufferGetPage(parent->buffer)))
- ginFinishSplit(btree, parent, false, buildStats);
+ ginFinishOldSplit(btree, parent, buildStats, GIN_EXCLUSIVE);
/* move right if it's needed */
page = BufferGetPage(parent->buffer);
@@ -723,7 +727,7 @@ ginFinishSplit(GinBtree btree, GinBtreeStack *stack, bool freestack,
page = BufferGetPage(parent->buffer);
if (GinPageIsIncompleteSplit(BufferGetPage(parent->buffer)))
- ginFinishSplit(btree, parent, false, buildStats);
+ ginFinishOldSplit(btree, parent, buildStats, GIN_EXCLUSIVE);
}
/* insert the downlink */
@@ -760,6 +764,43 @@ ginFinishSplit(GinBtree btree, GinBtreeStack *stack, bool freestack,
}
/*
+ * An entry point to ginFinishSplit() that is used when we stumble upon an
+ * existing incompletely split page in the tree, as opposed to completing a
+ * split that we just made outselves. The difference is that stack->buffer may
+ * be merely share-locked on entry, and will be upgraded to exclusive mode.
+ *
+ * Note: Upgrading the lock momentarily releases it. Doing that in a scan
+ * would not be OK, because a concurrent VACUUM might delete the page while
+ * we're not holding the lock. It's OK in an insert, though, because VACUUM
+ * has a different mechanism that prevents it from running concurrently with
+ * inserts. (Namely, it holds a cleanup lock on the root.)
+ */
+static void
+ginFinishOldSplit(GinBtree btree, GinBtreeStack *stack, GinStatsData *buildStats, int access)
+{
+ INJECTION_POINT("gin-finish-incomplete-split");
+ elog(DEBUG1, "finishing incomplete split of block %u in gin index \"%s\"",
+ stack->blkno, RelationGetRelationName(btree->index));
+
+ if (access == GIN_SHARE)
+ {
+ LockBuffer(stack->buffer, GIN_UNLOCK);
+ LockBuffer(stack->buffer, GIN_EXCLUSIVE);
+
+ if (!GinPageIsIncompleteSplit(BufferGetPage(stack->buffer)))
+ {
+ /*
+ * Someone else already completed the split while we were not
+ * holding the lock.
+ */
+ return;
+ }
+ }
+
+ ginFinishSplit(btree, stack, false, buildStats);
+}
+
+/*
* Insert a value to tree described by stack.
*
* The value to be inserted is given in 'insertdata'. Its format depends
@@ -779,7 +820,7 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack, void *insertdata,
/* If the leaf page was incompletely split, finish the split first */
if (GinPageIsIncompleteSplit(BufferGetPage(stack->buffer)))
- ginFinishSplit(btree, stack, false, buildStats);
+ ginFinishOldSplit(btree, stack, buildStats, GIN_EXCLUSIVE);
done = ginPlaceToPage(btree, stack,
insertdata, InvalidBlockNumber,
diff --git a/src/test/modules/Makefile b/src/test/modules/Makefile
index e32c8925f67..89aa41b5e31 100644
--- a/src/test/modules/Makefile
+++ b/src/test/modules/Makefile
@@ -40,9 +40,9 @@ SUBDIRS = \
ifeq ($(enable_injection_points),yes)
-SUBDIRS += injection_points
+SUBDIRS += injection_points gin
else
-ALWAYS_SUBDIRS += injection_points
+ALWAYS_SUBDIRS += injection_points gin
endif
ifeq ($(with_ssl),openssl)
diff --git a/src/test/modules/gin/Makefile b/src/test/modules/gin/Makefile
new file mode 100644
index 00000000000..a7731b7fa18
--- /dev/null
+++ b/src/test/modules/gin/Makefile
@@ -0,0 +1,16 @@
+# src/test/modules/gin/Makefile
+
+EXTRA_INSTALL = src/test/modules/injection_points
+
+REGRESS = gin_incomplete_splits
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = src/test/modules/gin
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/src/test/modules/gin/expected/gin_incomplete_splits.out b/src/test/modules/gin/expected/gin_incomplete_splits.out
new file mode 100644
index 00000000000..9a60f8c128a
--- /dev/null
+++ b/src/test/modules/gin/expected/gin_incomplete_splits.out
@@ -0,0 +1,180 @@
+--
+-- The test uses a GIN index over int[]. The table contains arrays
+-- with integers from 1 to :next_i. Each integer occurs exactly once,
+-- no gaps or duplicates, although the index does contain some
+-- duplicate elements because some of the inserting transactions are
+-- rolled back during the test. The exact contents of the table depend
+-- on the physical layout of the index, which in turn depends at least
+-- on the block size, so instead of check for the exact contents, we
+-- check those invariants. :next_i psql variable is maintained at all
+-- times to hold the last inserted integer + 1.
+--
+-- This uses injection points to cause errors that leave some page
+-- splits in "incomplete" state
+create extension injection_points;
+-- Use the index for all the queries
+set enable_seqscan=off;
+-- Print a NOTICE whenever an incomplete split gets fixed
+SELECT injection_points_attach('gin-finish-incomplete-split', 'notice');
+ injection_points_attach
+-------------------------
+
+(1 row)
+
+--
+-- First create the test table and some helper functions
+--
+create table gin_incomplete_splits(i int4[]) with (autovacuum_enabled = off);
+create index on gin_incomplete_splits using gin (i) with (fastupdate = off);
+-- Creates an array with all integers from $1 (inclusive) $2 (exclusive)
+create function range_array(int, int) returns int[] language sql immutable as $$
+ select array_agg(g) from generate_series($1, $2 - 1) g
+$$;
+-- Inserts an array with 'n' rows to the test table. Pass :next_i as
+-- the first argument, returns the new value for :next_i.
+create function insert_n(first_i int, n int) returns int language plpgsql as $$
+begin
+ insert into gin_incomplete_splits values (range_array(first_i, first_i+n));
+ return first_i + n;
+end;
+$$;
+-- Inserts to the table until an insert fails. Like insert_n(), returns the
+-- new value for :next_i.
+create function insert_until_fail(next_i int, step int default 1) returns int language plpgsql as $$
+declare
+ i integer;
+begin
+ -- Insert arrays with 'step' elements each, until an error occurs.
+ loop
+ begin
+ select insert_n(next_i, step) into next_i;
+ exception when others then
+ raise notice 'failed with: %', sqlerrm;
+ exit;
+ end;
+
+ -- The caller is expected to set an injection point that eventuall
+ -- causes an error. But bail out if still no error after 10000
+ -- attempts, so that we don't get stuck in an infinite loop.
+ i := i + 1;
+ if i = 10000 then
+ raise 'no error on inserts after ';
+ end if;
+ end loop;
+
+ return next_i;
+end;
+$$;
+-- Check the invariants.
+create function verify(next_i int) returns bool language plpgsql as $$
+declare
+ a integer[];
+ elem integer;
+ c integer;
+begin
+ -- Perform a scan over the trailing part of the index, where the
+ -- possible incomplete splits are. (We don't check the whole table,
+ -- because that'd be pretty slow.)
+ c := 0;
+ -- Find all arrays that overlap with the last 200 inserted integers. Or
+ -- the next 100, which shouldn't exist.
+ for a in select i from gin_incomplete_splits where i && range_array(next_i - 200, next_i + 100)
+ loop
+ -- count all elements in those arrays in the window.
+ foreach elem in ARRAY a loop
+ if elem >= next_i then
+ raise 'unexpected value % in array', elem;
+ end if;
+ if elem >= next_i - 200 then
+ c := c + 1;
+ end if;
+ end loop;
+ end loop;
+ if c <> 200 then
+ raise 'unexpected count % ', c;
+ end if;
+
+ return true;
+end;
+$$;
+-- Insert one array to get started.
+select insert_n(1, 1000) as next_i
+\gset
+select verify(:next_i);
+ verify
+--------
+ t
+(1 row)
+
+--
+-- Test incomplete leaf split
+--
+SELECT injection_points_attach('gin-leave-leaf-split-incomplete', 'error');
+ injection_points_attach
+-------------------------
+
+(1 row)
+
+select insert_until_fail(:next_i) as next_i
+\gset
+NOTICE: failed with: error triggered for injection point gin-leave-leaf-split-incomplete
+SELECT injection_points_detach('gin-leave-leaf-split-incomplete');
+ injection_points_detach
+-------------------------
+
+(1 row)
+
+-- Verify that a scan works even though there's an incomplete split
+select verify(:next_i);
+ verify
+--------
+ t
+(1 row)
+
+-- Insert some more rows, finishing the split
+select insert_n(:next_i, 10) as next_i
+\gset
+NOTICE: notice triggered for injection point gin-finish-incomplete-split
+-- Verify that a scan still works
+select verify(:next_i);
+ verify
+--------
+ t
+(1 row)
+
+--
+-- Test incomplete internal page split
+--
+SELECT injection_points_attach('gin-leave-internal-split-incomplete', 'error');
+ injection_points_attach
+-------------------------
+
+(1 row)
+
+select insert_until_fail(:next_i, 100) as next_i
+\gset
+NOTICE: failed with: error triggered for injection point gin-leave-internal-split-incomplete
+SELECT injection_points_detach('gin-leave-internal-split-incomplete');
+ injection_points_detach
+-------------------------
+
+(1 row)
+
+ -- Verify that a scan works even though there's an incomplete split
+select verify(:next_i);
+ verify
+--------
+ t
+(1 row)
+
+-- Insert some more rows, finishing the split
+select insert_n(:next_i, 10) as next_i
+\gset
+NOTICE: notice triggered for injection point gin-finish-incomplete-split
+-- Verify that a scan still works
+select verify(:next_i);
+ verify
+--------
+ t
+(1 row)
+
diff --git a/src/test/modules/gin/meson.build b/src/test/modules/gin/meson.build
new file mode 100644
index 00000000000..9734b51de2d
--- /dev/null
+++ b/src/test/modules/gin/meson.build
@@ -0,0 +1,16 @@
+# Copyright (c) 2022-2024, PostgreSQL Global Development Group
+
+if not get_option('injection_points')
+ subdir_done()
+endif
+
+tests += {
+ 'name': 'gin',
+ 'sd': meson.current_source_dir(),
+ 'bd': meson.current_build_dir(),
+ 'regress': {
+ 'sql': [
+ 'gin_incomplete_splits',
+ ],
+ },
+}
diff --git a/src/test/modules/gin/sql/gin_incomplete_splits.sql b/src/test/modules/gin/sql/gin_incomplete_splits.sql
new file mode 100644
index 00000000000..4e180b25195
--- /dev/null
+++ b/src/test/modules/gin/sql/gin_incomplete_splits.sql
@@ -0,0 +1,144 @@
+--
+-- The test uses a GIN index over int[]. The table contains arrays
+-- with integers from 1 to :next_i. Each integer occurs exactly once,
+-- no gaps or duplicates, although the index does contain some
+-- duplicate elements because some of the inserting transactions are
+-- rolled back during the test. The exact contents of the table depend
+-- on the physical layout of the index, which in turn depends at least
+-- on the block size, so instead of check for the exact contents, we
+-- check those invariants. :next_i psql variable is maintained at all
+-- times to hold the last inserted integer + 1.
+--
+
+-- This uses injection points to cause errors that leave some page
+-- splits in "incomplete" state
+create extension injection_points;
+
+-- Use the index for all the queries
+set enable_seqscan=off;
+
+-- Print a NOTICE whenever an incomplete split gets fixed
+SELECT injection_points_attach('gin-finish-incomplete-split', 'notice');
+
+--
+-- First create the test table and some helper functions
+--
+create table gin_incomplete_splits(i int4[]) with (autovacuum_enabled = off);
+
+create index on gin_incomplete_splits using gin (i) with (fastupdate = off);
+
+-- Creates an array with all integers from $1 (inclusive) $2 (exclusive)
+create function range_array(int, int) returns int[] language sql immutable as $$
+ select array_agg(g) from generate_series($1, $2 - 1) g
+$$;
+
+-- Inserts an array with 'n' rows to the test table. Pass :next_i as
+-- the first argument, returns the new value for :next_i.
+create function insert_n(first_i int, n int) returns int language plpgsql as $$
+begin
+ insert into gin_incomplete_splits values (range_array(first_i, first_i+n));
+ return first_i + n;
+end;
+$$;
+
+-- Inserts to the table until an insert fails. Like insert_n(), returns the
+-- new value for :next_i.
+create function insert_until_fail(next_i int, step int default 1) returns int language plpgsql as $$
+declare
+ i integer;
+begin
+ -- Insert arrays with 'step' elements each, until an error occurs.
+ loop
+ begin
+ select insert_n(next_i, step) into next_i;
+ exception when others then
+ raise notice 'failed with: %', sqlerrm;
+ exit;
+ end;
+
+ -- The caller is expected to set an injection point that eventuall
+ -- causes an error. But bail out if still no error after 10000
+ -- attempts, so that we don't get stuck in an infinite loop.
+ i := i + 1;
+ if i = 10000 then
+ raise 'no error on inserts after ';
+ end if;
+ end loop;
+
+ return next_i;
+end;
+$$;
+
+-- Check the invariants.
+create function verify(next_i int) returns bool language plpgsql as $$
+declare
+ a integer[];
+ elem integer;
+ c integer;
+begin
+ -- Perform a scan over the trailing part of the index, where the
+ -- possible incomplete splits are. (We don't check the whole table,
+ -- because that'd be pretty slow.)
+ c := 0;
+ -- Find all arrays that overlap with the last 200 inserted integers. Or
+ -- the next 100, which shouldn't exist.
+ for a in select i from gin_incomplete_splits where i && range_array(next_i - 200, next_i + 100)
+ loop
+ -- count all elements in those arrays in the window.
+ foreach elem in ARRAY a loop
+ if elem >= next_i then
+ raise 'unexpected value % in array', elem;
+ end if;
+ if elem >= next_i - 200 then
+ c := c + 1;
+ end if;
+ end loop;
+ end loop;
+ if c <> 200 then
+ raise 'unexpected count % ', c;
+ end if;
+
+ return true;
+end;
+$$;
+
+-- Insert one array to get started.
+select insert_n(1, 1000) as next_i
+\gset
+select verify(:next_i);
+
+
+--
+-- Test incomplete leaf split
+--
+SELECT injection_points_attach('gin-leave-leaf-split-incomplete', 'error');
+select insert_until_fail(:next_i) as next_i
+\gset
+SELECT injection_points_detach('gin-leave-leaf-split-incomplete');
+
+-- Verify that a scan works even though there's an incomplete split
+select verify(:next_i);
+
+-- Insert some more rows, finishing the split
+select insert_n(:next_i, 10) as next_i
+\gset
+-- Verify that a scan still works
+select verify(:next_i);
+
+
+--
+-- Test incomplete internal page split
+--
+SELECT injection_points_attach('gin-leave-internal-split-incomplete', 'error');
+select insert_until_fail(:next_i, 100) as next_i
+\gset
+SELECT injection_points_detach('gin-leave-internal-split-incomplete');
+
+ -- Verify that a scan works even though there's an incomplete split
+select verify(:next_i);
+
+-- Insert some more rows, finishing the split
+select insert_n(:next_i, 10) as next_i
+\gset
+-- Verify that a scan still works
+select verify(:next_i);
diff --git a/src/test/modules/meson.build b/src/test/modules/meson.build
index 397e0906e6f..8fbe742d385 100644
--- a/src/test/modules/meson.build
+++ b/src/test/modules/meson.build
@@ -5,6 +5,7 @@ subdir('commit_ts')
subdir('delay_execution')
subdir('dummy_index_am')
subdir('dummy_seclabel')
+subdir('gin')
subdir('injection_points')
subdir('ldap_password_func')
subdir('libpq_pipeline')