diff options
-rw-r--r-- | src/test/recovery/meson.build | 1 | ||||
-rw-r--r-- | src/test/recovery/t/043_vacuum_horizon_floor.pl | 268 |
2 files changed, 0 insertions, 269 deletions
diff --git a/src/test/recovery/meson.build b/src/test/recovery/meson.build index 1d55d6bf560..b1eb77b1ec1 100644 --- a/src/test/recovery/meson.build +++ b/src/test/recovery/meson.build @@ -51,7 +51,6 @@ tests += { 't/040_standby_failover_slots_sync.pl', 't/041_checkpoint_at_promote.pl', 't/042_low_level_backup.pl', - 't/043_vacuum_horizon_floor.pl', ], }, } diff --git a/src/test/recovery/t/043_vacuum_horizon_floor.pl b/src/test/recovery/t/043_vacuum_horizon_floor.pl deleted file mode 100644 index 7ccf5ca850c..00000000000 --- a/src/test/recovery/t/043_vacuum_horizon_floor.pl +++ /dev/null @@ -1,268 +0,0 @@ -use strict; -use warnings; -use PostgreSQL::Test::Cluster; -use Test::More; - -# Test that vacuum prunes away all dead tuples killed before OldestXmin -# -# This test creates a table on a primary, updates the table to generate dead -# tuples for vacuum, and then, during the vacuum, uses the replica to force -# GlobalVisState->maybe_needed on the primary to move backwards and precede -# the value of OldestXmin set at the beginning of vacuuming the table. - -# Set up nodes -my $node_primary = PostgreSQL::Test::Cluster->new('primary'); -$node_primary->init(allows_streaming => 'physical'); - -$node_primary->append_conf( - 'postgresql.conf', qq[ -hot_standby_feedback = on -log_recovery_conflict_waits = true -autovacuum = off -log_min_messages = INFO -maintenance_work_mem = 1024 -]); -$node_primary->start; - -my $node_replica = PostgreSQL::Test::Cluster->new('standby'); - -$node_primary->backup('my_backup'); -$node_replica->init_from_backup($node_primary, 'my_backup', - has_streaming => 1); - -$node_replica->start; - -my $test_db = "test_db"; -$node_primary->safe_psql('postgres', "CREATE DATABASE $test_db"); - -# Save the original connection info for later use -my $orig_conninfo = $node_primary->connstr(); - -my $table1 = "vac_horizon_floor_table"; - -# Long-running Primary Session A -my $psql_primaryA = - $node_primary->background_psql($test_db, on_error_stop => 1); - -# Long-running Primary Session B -my $psql_primaryB = - $node_primary->background_psql($test_db, on_error_stop => 1); - -# The TIDStore vacuum uses to store dead items is optimized for its target -# system. On a 32-bit system, our example requires twice as many pages with -# the same number of dead items per page to fill the TIDStore and trigger a -# second round of index vacuuming. -my $is_64bit = $node_primary->safe_psql($test_db, - qq[SELECT typbyval FROM pg_type WHERE typname = 'int8';]); - -my $nrows = $is_64bit eq 't' ? 400000 : 800000; - -# Because vacuum's first pass, pruning, is where we use the GlobalVisState to -# check tuple visibility, GlobalVisState->maybe_needed must move backwards -# during pruning before checking the visibility for a tuple which would have -# been considered HEAPTUPLE_DEAD prior to maybe_needed moving backwards but -# HEAPTUPLE_RECENTLY_DEAD compared to the new, older value of maybe_needed. -# -# We must not only force the horizon on the primary to move backwards but also -# force the vacuuming backend's GlobalVisState to be updated. GlobalVisState -# is forced to update during index vacuuming. -# -# _bt_pendingfsm_finalize() calls GetOldestNonRemovableTransactionId() at the -# end of a round of index vacuuming, updating the backend's GlobalVisState -# and, in our case, moving maybe_needed backwards. -# -# Then vacuum's first (pruning) pass will continue and pruning will find our -# later inserted and updated tuple HEAPTUPLE_RECENTLY_DEAD when compared to -# maybe_needed but HEAPTUPLE_DEAD when compared to OldestXmin. -# -# Thus, we must force at least two rounds of index vacuuming to ensure that -# some tuple visibility checks will happen after a round of index vacuuming. -# To accomplish this, we set maintenance_work_mem to its minimum value and -# insert and update enough rows that we force at least one round of index -# vacuuming before getting to a dead tuple which was killed after the standby -# is disconnected. -$node_primary->safe_psql($test_db, qq[ - CREATE TABLE ${table1}(col1 int) - WITH (autovacuum_enabled=false, fillfactor=10); - INSERT INTO $table1 VALUES(7); - INSERT INTO $table1 SELECT generate_series(1, $nrows) % 3; - CREATE INDEX on ${table1}(col1); - UPDATE $table1 SET col1 = 3 WHERE col1 = 0; - INSERT INTO $table1 VALUES(7); -]); - -# We will later move the primary forward while the standby is disconnected. -# For now, however, there is no reason not to wait for the standby to catch -# up. -my $primary_lsn = $node_primary->lsn('flush'); -$node_primary->wait_for_catchup($node_replica, 'replay', $primary_lsn); - -# Test that the WAL receiver is up and running. -$node_replica->poll_query_until($test_db, qq[ - select exists (select * from pg_stat_wal_receiver);] , 't'); - -# Set primary_conninfo to something invalid on the replica and reload the -# config. Once the config is reloaded, the startup process will force the WAL -# receiver to restart and it will be unable to reconnect because of the -# invalid connection information. -$node_replica->safe_psql($test_db, qq[ - ALTER SYSTEM SET primary_conninfo = ''; - SELECT pg_reload_conf(); - ]); - -# Wait until the WAL receiver has shut down and been unable to start up again. -$node_replica->poll_query_until($test_db, qq[ - select exists (select * from pg_stat_wal_receiver);] , 'f'); - -# Now insert and update a tuple which will be visible to the vacuum on the -# primary but which will have xmax newer than the oldest xmin on the standby -# that was recently disconnected. -my $res = $psql_primaryA->query_safe( - qq[ - INSERT INTO $table1 VALUES (99); - UPDATE $table1 SET col1 = 100 WHERE col1 = 99; - SELECT 'after_update'; - ] - ); - -# Make sure the UPDATE finished -like($res, qr/^after_update$/m, "UPDATE occurred on primary session A"); - -# Open a cursor on the primary whose pin will keep VACUUM from getting a -# cleanup lock on the first page of the relation. We want VACUUM to be able to -# start, calculate initial values for OldestXmin and GlobalVisState and then -# be unable to proceed with pruning our dead tuples. This will allow us to -# reconnect the standby and push the horizon back before we start actual -# pruning and vacuuming. -my $primary_cursor1 = "vac_horizon_floor_cursor1"; - -# The first value inserted into the table was a 7, so FETCH FORWARD should -# return a 7. That's how we know the cursor has a pin. -$res = $psql_primaryB->query_safe( - qq[ - BEGIN; - DECLARE $primary_cursor1 CURSOR FOR SELECT * FROM $table1 WHERE col1 = 7; - FETCH $primary_cursor1; - ] - ); - -is($res, 7, qq[Cursor query returned $res. Expected value 7.]); - -# Get the PID of the session which will run the VACUUM FREEZE so that we can -# use it to filter pg_stat_activity later. -my $vacuum_pid = $psql_primaryA->query_safe("SELECT pg_backend_pid();"); - -# Now start a VACUUM FREEZE on the primary. It will call vacuum_get_cutoffs() -# and establish values of OldestXmin and GlobalVisState which are newer than -# all of our dead tuples. Then it will be unable to get a cleanup lock to -# start pruning, so it will hang. -# -# We use VACUUM FREEZE because it will wait for a cleanup lock instead of -# skipping the page pinned by the cursor. Note that works because the target -# tuple's xmax precedes OldestXmin which ensures that lazy_scan_noprune() will -# return false and we will wait for the cleanup lock. -$psql_primaryA->{stdin} .= qq[ - VACUUM (VERBOSE, FREEZE) $table1; - \\echo VACUUM - ]; - -# Make sure the VACUUM command makes it to the server. -$psql_primaryA->{run}->pump_nb(); - -# Make sure that the VACUUM has already called vacuum_get_cutoffs() and is -# just waiting on the lock to start vacuuming. We don't want the standby to -# re-establish a connection to the primary and push the horizon back until -# we've saved initial values in GlobalVisState and calculated OldestXmin. -$node_primary->poll_query_until($test_db, - qq[ - SELECT count(*) >= 1 FROM pg_stat_activity - WHERE pid = $vacuum_pid - AND wait_event = 'BufferPin'; - ], - 't'); - -# Ensure the WAL receiver is still not active on the replica. -$node_replica->poll_query_until($test_db, qq[ - SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);] , 'f'); - -# Allow the WAL receiver connection to re-establish. -$node_replica->safe_psql( - $test_db, qq[ - ALTER SYSTEM SET primary_conninfo = '$orig_conninfo'; - SELECT pg_reload_conf(); - ]); - -# Ensure the new WAL receiver has connected. -$node_replica->poll_query_until($test_db, qq[ - SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);] , 't'); - -# Once the WAL sender is shown on the primary, the replica should have -# connected with the primary and pushed the horizon backward. Primary Session -# A won't see that until the VACUUM FREEZE proceeds and does its first round -# of index vacuuming. -$node_primary->poll_query_until($test_db, qq[ - SELECT EXISTS (SELECT * FROm pg_stat_replication);] , 't'); - -# Move the cursor forward to the next 7. We inserted the 7 much later, so -# advancing the cursor should allow vacuum to proceed vacuuming most pages of -# the relation. Because we set maintanence_work_mem sufficiently low, we -# expect that a round of index vacuuming has happened and that the vacuum is -# now waiting for the cursor to release its pin on the last page of the -# relation. -$res = $psql_primaryB->query_safe("FETCH $primary_cursor1"); -is($res, 7, - qq[Cursor query returned $res from second fetch. Expected value 7.]); - -# Prevent the test from incorrectly passing by confirming that we did indeed -# do a pass of index vacuuming. -$node_primary->poll_query_until($test_db, qq[ - SELECT index_vacuum_count > 0 - FROM pg_stat_progress_vacuum - WHERE datname='$test_db' AND relid::regclass = '$table1'::regclass; - ] , 't'); - -# Commit the transaction with the open cursor so that the VACUUM can finish. -$psql_primaryB->query_until( - qr/^commit$/m, - qq[ - COMMIT; - \\echo commit - ] - ); - -# VACUUM proceeds with pruning and does a visibility check on each tuple. In -# older versions of Postgres, pruning found our final dead tuple -# non-removable (HEAPTUPLE_RECENTLY_DEAD) since its xmax is after the new -# value of maybe_needed. Then heap_prepare_freeze_tuple() would decide the -# tuple xmax should be frozen because it precedes OldestXmin. Vacuum would -# then error out in heap_pre_freeze_checks() with "cannot freeze committed -# xmax". This was fixed by changing pruning to find all -# HEAPTUPLE_RECENTLY_DEAD tuples with xmaxes preceding OldestXmin -# HEAPTUPLE_DEAD and removing them. - -# With the fix, VACUUM should finish successfully, incrementing the table -# vacuum_count. -$node_primary->poll_query_until($test_db, - qq[ - SELECT vacuum_count > 0 - FROM pg_stat_all_tables WHERE relname = '${table1}'; - ] - , 't'); - -$primary_lsn = $node_primary->lsn('flush'); - -# Make sure something causes us to flush -$node_primary->safe_psql($test_db, "INSERT INTO $table1 VALUES (1);"); - -# Nothing on the replica should cause a recovery conflict, so this should -# finish successfully. -$node_primary->wait_for_catchup($node_replica, 'replay', $primary_lsn); - -## Shut down psqls -$psql_primaryA->quit; -$psql_primaryB->quit; - -$node_replica->stop(); -$node_primary->stop(); - -done_testing(); |