diff options
author | Peter Geoghegan <pg@bowt.ie> | 2019-03-20 10:04:01 -0700 |
---|---|---|
committer | Peter Geoghegan <pg@bowt.ie> | 2019-03-20 10:04:01 -0700 |
commit | dd299df8189bd00fbe54b72c64f43b6af2ffeccd (patch) | |
tree | 931ef720687d61cf5e75464fa0b1c1d75fb3f9d3 /src/test | |
parent | e5adcb789d80ba565ccacb1ed4341a7c29085238 (diff) | |
download | postgresql-dd299df8189bd00fbe54b72c64f43b6af2ffeccd.tar.gz postgresql-dd299df8189bd00fbe54b72c64f43b6af2ffeccd.zip |
Make heap TID a tiebreaker nbtree index column.
Make nbtree treat all index tuples as having a heap TID attribute.
Index searches can distinguish duplicates by heap TID, since heap TID is
always guaranteed to be unique. This general approach has numerous
benefits for performance, and is prerequisite to teaching VACUUM to
perform "retail index tuple deletion".
Naively adding a new attribute to every pivot tuple has unacceptable
overhead (it bloats internal pages), so suffix truncation of pivot
tuples is added. This will usually truncate away the "extra" heap TID
attribute from pivot tuples during a leaf page split, and may also
truncate away additional user attributes. This can increase fan-out,
especially in a multi-column index. Truncation can only occur at the
attribute granularity, which isn't particularly effective, but works
well enough for now. A future patch may add support for truncating
"within" text attributes by generating truncated key values using new
opclass infrastructure.
Only new indexes (BTREE_VERSION 4 indexes) will have insertions that
treat heap TID as a tiebreaker attribute, or will have pivot tuples
undergo suffix truncation during a leaf page split (on-disk
compatibility with versions 2 and 3 is preserved). Upgrades to version
4 cannot be performed on-the-fly, unlike upgrades from version 2 to
version 3. contrib/amcheck continues to work with version 2 and 3
indexes, while also enforcing stricter invariants when verifying version
4 indexes. These stricter invariants are the same invariants described
by "3.1.12 Sequencing" from the Lehman and Yao paper.
A later patch will enhance the logic used by nbtree to pick a split
point. This patch is likely to negatively impact performance without
smarter choices around the precise point to split leaf pages at. Making
these two mostly-distinct sets of enhancements into distinct commits
seems like it might clarify their design, even though neither commit is
particularly useful on its own.
The maximum allowed size of new tuples is reduced by an amount equal to
the space required to store an extra MAXALIGN()'d TID in a new high key
during leaf page splits. The user-facing definition of the "1/3 of a
page" restriction is already imprecise, and so does not need to be
revised. However, there should be a compatibility note in the v12
release notes.
Author: Peter Geoghegan
Reviewed-By: Heikki Linnakangas, Alexander Korotkov
Discussion: https://postgr.es/m/CAH2-WzkVb0Kom=R+88fDFb=JSxZMFvbHVC6Mn9LJ2n=X=kS-Uw@mail.gmail.com
Diffstat (limited to 'src/test')
-rw-r--r-- | src/test/regress/expected/btree_index.out | 34 | ||||
-rw-r--r-- | src/test/regress/expected/create_index.out | 13 | ||||
-rw-r--r-- | src/test/regress/expected/dependency.out | 4 | ||||
-rw-r--r-- | src/test/regress/expected/event_trigger.out | 4 | ||||
-rw-r--r-- | src/test/regress/expected/foreign_data.out | 9 | ||||
-rw-r--r-- | src/test/regress/expected/rowsecurity.out | 4 | ||||
-rw-r--r-- | src/test/regress/sql/btree_index.sql | 37 | ||||
-rw-r--r-- | src/test/regress/sql/create_index.sql | 14 | ||||
-rw-r--r-- | src/test/regress/sql/foreign_data.sql | 2 |
9 files changed, 63 insertions, 58 deletions
diff --git a/src/test/regress/expected/btree_index.out b/src/test/regress/expected/btree_index.out index b21298a2a6b..ff443a476c5 100644 --- a/src/test/regress/expected/btree_index.out +++ b/src/test/regress/expected/btree_index.out @@ -199,28 +199,22 @@ reset enable_seqscan; reset enable_indexscan; reset enable_bitmapscan; -- --- Test B-tree page deletion. In particular, deleting a non-leaf page. +-- Test B-tree fast path (cache rightmost leaf page) optimization. -- --- First create a tree that's at least four levels deep. The text inserted --- is long and poorly compressible. That way only a few index tuples fit on --- each page, allowing us to get a tall tree with fewer pages. -create table btree_tall_tbl(id int4, t text); -create index btree_tall_idx on btree_tall_tbl (id, t) with (fillfactor = 10); -insert into btree_tall_tbl - select g, g::text || '_' || - (select string_agg(md5(i::text), '_') from generate_series(1, 50) i) -from generate_series(1, 100) g; --- Delete most entries, and vacuum. This causes page deletions. -delete from btree_tall_tbl where id < 950; -vacuum btree_tall_tbl; --- --- Test B-tree insertion with a metapage update (XLOG_BTREE_INSERT_META --- WAL record type). This happens when a "fast root" page is split. +-- First create a tree that's at least three levels deep (i.e. has one level +-- between the root and leaf levels). The text inserted is long. It won't be +-- compressed because we use plain storage in the table. Only a few index +-- tuples fit on each internal page, allowing us to get a tall tree with few +-- pages. (A tall tree is required to trigger caching.) -- --- The vacuum above should've turned the leaf page into a fast root. We just --- need to insert some rows to cause the fast root page to split. -insert into btree_tall_tbl (id, t) - select g, repeat('x', 100) from generate_series(1, 500) g; +-- The text column must be the leading column in the index, since suffix +-- truncation would otherwise truncate tuples on internal pages, leaving us +-- with a short tree. +create table btree_tall_tbl(id int4, t text); +alter table btree_tall_tbl alter COLUMN t set storage plain; +create index btree_tall_idx on btree_tall_tbl (t, id) with (fillfactor = 10); +insert into btree_tall_tbl select g, repeat('x', 250) +from generate_series(1, 130) g; -- -- Test vacuum_cleanup_index_scale_factor -- diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out index 5d4eb59a0ce..54d3eee1979 100644 --- a/src/test/regress/expected/create_index.out +++ b/src/test/regress/expected/create_index.out @@ -3225,11 +3225,22 @@ explain (costs off) CREATE TABLE delete_test_table (a bigint, b bigint, c bigint, d bigint); INSERT INTO delete_test_table SELECT i, 1, 2, 3 FROM generate_series(1,80000) i; ALTER TABLE delete_test_table ADD PRIMARY KEY (a,b,c,d); +-- Delete many entries, and vacuum. This causes page deletions. DELETE FROM delete_test_table WHERE a > 40000; VACUUM delete_test_table; -DELETE FROM delete_test_table WHERE a > 10; +-- Delete most entries, and vacuum, deleting internal pages and creating "fast +-- root" +DELETE FROM delete_test_table WHERE a < 79990; VACUUM delete_test_table; -- +-- Test B-tree insertion with a metapage update (XLOG_BTREE_INSERT_META +-- WAL record type). This happens when a "fast root" page is split. This +-- also creates coverage for nbtree FSM page recycling. +-- +-- The vacuum above should've turned the leaf page into a fast root. We just +-- need to insert some rows to cause the fast root page to split. +INSERT INTO delete_test_table SELECT i, 1, 2, 3 FROM generate_series(1,1000) i; +-- -- REINDEX (VERBOSE) -- CREATE TABLE reindex_verbose(id integer primary key); diff --git a/src/test/regress/expected/dependency.out b/src/test/regress/expected/dependency.out index 8e50f8ffbb0..8d31110b874 100644 --- a/src/test/regress/expected/dependency.out +++ b/src/test/regress/expected/dependency.out @@ -128,9 +128,9 @@ FROM pg_type JOIN pg_class c ON typrelid = c.oid WHERE typname = 'deptest_t'; -- doesn't work: grant still exists DROP USER regress_dep_user1; ERROR: role "regress_dep_user1" cannot be dropped because some objects depend on it -DETAIL: owner of default privileges on new relations belonging to role regress_dep_user1 in schema deptest +DETAIL: privileges for table deptest1 privileges for database regression -privileges for table deptest1 +owner of default privileges on new relations belonging to role regress_dep_user1 in schema deptest DROP OWNED BY regress_dep_user1; DROP USER regress_dep_user1; \set VERBOSITY terse diff --git a/src/test/regress/expected/event_trigger.out b/src/test/regress/expected/event_trigger.out index d0c9f9a67f8..f7891faa23c 100644 --- a/src/test/regress/expected/event_trigger.out +++ b/src/test/regress/expected/event_trigger.out @@ -187,9 +187,9 @@ ERROR: event trigger "regress_event_trigger" does not exist -- should fail, regress_evt_user owns some objects drop role regress_evt_user; ERROR: role "regress_evt_user" cannot be dropped because some objects depend on it -DETAIL: owner of event trigger regress_event_trigger3 +DETAIL: owner of user mapping for regress_evt_user on server useless_server owner of default privileges on new relations belonging to role regress_evt_user -owner of user mapping for regress_evt_user on server useless_server +owner of event trigger regress_event_trigger3 -- cleanup before next test -- these are all OK; the second one should emit a NOTICE drop event trigger if exists regress_event_trigger2; diff --git a/src/test/regress/expected/foreign_data.out b/src/test/regress/expected/foreign_data.out index 4d82d3a7e84..0b7582accbd 100644 --- a/src/test/regress/expected/foreign_data.out +++ b/src/test/regress/expected/foreign_data.out @@ -441,8 +441,8 @@ ALTER SERVER s1 OWNER TO regress_test_indirect; RESET ROLE; DROP ROLE regress_test_indirect; -- ERROR ERROR: role "regress_test_indirect" cannot be dropped because some objects depend on it -DETAIL: owner of server s1 -privileges for foreign-data wrapper foo +DETAIL: privileges for foreign-data wrapper foo +owner of server s1 \des+ List of foreign servers Name | Owner | Foreign-data wrapper | Access privileges | Type | Version | FDW options | Description @@ -1995,16 +1995,13 @@ ERROR: cannot attach a permanent relation as partition of temporary relation "t DROP FOREIGN TABLE foreign_part; DROP TABLE temp_parted; -- Cleanup +\set VERBOSITY terse DROP SCHEMA foreign_schema CASCADE; DROP ROLE regress_test_role; -- ERROR ERROR: role "regress_test_role" cannot be dropped because some objects depend on it -DETAIL: privileges for server s4 -privileges for foreign-data wrapper foo -owner of user mapping for regress_test_role on server s6 DROP SERVER t1 CASCADE; NOTICE: drop cascades to user mapping for public on server t1 DROP USER MAPPING FOR regress_test_role SERVER s6; -\set VERBOSITY terse DROP FOREIGN DATA WRAPPER foo CASCADE; NOTICE: drop cascades to 5 other objects DROP SERVER s8 CASCADE; diff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out index 2e170497c9d..bad5199d9ee 100644 --- a/src/test/regress/expected/rowsecurity.out +++ b/src/test/regress/expected/rowsecurity.out @@ -3503,8 +3503,8 @@ SELECT refclassid::regclass, deptype SAVEPOINT q; DROP ROLE regress_rls_eve; --fails due to dependency on POLICY p ERROR: role "regress_rls_eve" cannot be dropped because some objects depend on it -DETAIL: target of policy p on table tbl1 -privileges for table tbl1 +DETAIL: privileges for table tbl1 +target of policy p on table tbl1 ROLLBACK TO q; ALTER POLICY p ON tbl1 TO regress_rls_frank USING (true); SAVEPOINT q; diff --git a/src/test/regress/sql/btree_index.sql b/src/test/regress/sql/btree_index.sql index 2b087be796c..19fbfa8b720 100644 --- a/src/test/regress/sql/btree_index.sql +++ b/src/test/regress/sql/btree_index.sql @@ -84,32 +84,23 @@ reset enable_indexscan; reset enable_bitmapscan; -- --- Test B-tree page deletion. In particular, deleting a non-leaf page. +-- Test B-tree fast path (cache rightmost leaf page) optimization. -- --- First create a tree that's at least four levels deep. The text inserted --- is long and poorly compressible. That way only a few index tuples fit on --- each page, allowing us to get a tall tree with fewer pages. -create table btree_tall_tbl(id int4, t text); -create index btree_tall_idx on btree_tall_tbl (id, t) with (fillfactor = 10); -insert into btree_tall_tbl - select g, g::text || '_' || - (select string_agg(md5(i::text), '_') from generate_series(1, 50) i) -from generate_series(1, 100) g; - --- Delete most entries, and vacuum. This causes page deletions. -delete from btree_tall_tbl where id < 950; -vacuum btree_tall_tbl; - +-- First create a tree that's at least three levels deep (i.e. has one level +-- between the root and leaf levels). The text inserted is long. It won't be +-- compressed because we use plain storage in the table. Only a few index +-- tuples fit on each internal page, allowing us to get a tall tree with few +-- pages. (A tall tree is required to trigger caching.) -- --- Test B-tree insertion with a metapage update (XLOG_BTREE_INSERT_META --- WAL record type). This happens when a "fast root" page is split. --- - --- The vacuum above should've turned the leaf page into a fast root. We just --- need to insert some rows to cause the fast root page to split. -insert into btree_tall_tbl (id, t) - select g, repeat('x', 100) from generate_series(1, 500) g; +-- The text column must be the leading column in the index, since suffix +-- truncation would otherwise truncate tuples on internal pages, leaving us +-- with a short tree. +create table btree_tall_tbl(id int4, t text); +alter table btree_tall_tbl alter COLUMN t set storage plain; +create index btree_tall_idx on btree_tall_tbl (t, id) with (fillfactor = 10); +insert into btree_tall_tbl select g, repeat('x', 250) +from generate_series(1, 130) g; -- -- Test vacuum_cleanup_index_scale_factor diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql index 67ecad8dd5e..4487421ef30 100644 --- a/src/test/regress/sql/create_index.sql +++ b/src/test/regress/sql/create_index.sql @@ -1146,12 +1146,24 @@ explain (costs off) CREATE TABLE delete_test_table (a bigint, b bigint, c bigint, d bigint); INSERT INTO delete_test_table SELECT i, 1, 2, 3 FROM generate_series(1,80000) i; ALTER TABLE delete_test_table ADD PRIMARY KEY (a,b,c,d); +-- Delete many entries, and vacuum. This causes page deletions. DELETE FROM delete_test_table WHERE a > 40000; VACUUM delete_test_table; -DELETE FROM delete_test_table WHERE a > 10; +-- Delete most entries, and vacuum, deleting internal pages and creating "fast +-- root" +DELETE FROM delete_test_table WHERE a < 79990; VACUUM delete_test_table; -- +-- Test B-tree insertion with a metapage update (XLOG_BTREE_INSERT_META +-- WAL record type). This happens when a "fast root" page is split. This +-- also creates coverage for nbtree FSM page recycling. +-- +-- The vacuum above should've turned the leaf page into a fast root. We just +-- need to insert some rows to cause the fast root page to split. +INSERT INTO delete_test_table SELECT i, 1, 2, 3 FROM generate_series(1,1000) i; + +-- -- REINDEX (VERBOSE) -- CREATE TABLE reindex_verbose(id integer primary key); diff --git a/src/test/regress/sql/foreign_data.sql b/src/test/regress/sql/foreign_data.sql index d6fb3fae4e1..1cc1f6e0129 100644 --- a/src/test/regress/sql/foreign_data.sql +++ b/src/test/regress/sql/foreign_data.sql @@ -805,11 +805,11 @@ DROP FOREIGN TABLE foreign_part; DROP TABLE temp_parted; -- Cleanup +\set VERBOSITY terse DROP SCHEMA foreign_schema CASCADE; DROP ROLE regress_test_role; -- ERROR DROP SERVER t1 CASCADE; DROP USER MAPPING FOR regress_test_role SERVER s6; -\set VERBOSITY terse DROP FOREIGN DATA WRAPPER foo CASCADE; DROP SERVER s8 CASCADE; \set VERBOSITY default |