# # Tests related to WAL archiving and recovery. # use strict; use warnings; use PostgresNode; use TestLib; use Test::More tests => 13; use Config; my $primary = get_new_node('master'); $primary->init( has_archiving => 1, allows_streaming => 1); $primary->append_conf('postgresql.conf', 'autovacuum = off'); $primary->start; my $primary_data = $primary->data_dir; # Temporarily use an archive_command value to make the archiver fail, # knowing that archiving is enabled. Note that we cannot use a command # that does not exist as in this case the archiver process would just exit # without reporting the failure to pg_stat_archiver. This also cannot # use a plain "false" as that's unportable on Windows. So, instead, as # a portable solution, use an archive command based on a command known to # work but will fail: copy with an incorrect original path. my $incorrect_command = $TestLib::windows_os ? qq{copy "%p_does_not_exist" "%f_does_not_exist"} : qq{cp "%p_does_not_exist" "%f_does_not_exist"}; $primary->safe_psql( 'postgres', qq{ ALTER SYSTEM SET archive_command TO '$incorrect_command'; SELECT pg_reload_conf(); }); # Save the WAL segment currently in use and switch to a new segment. # This will be used to track the activity of the archiver. my $segment_name_1 = $primary->safe_psql('postgres', q{SELECT pg_walfile_name(pg_current_wal_lsn())}); my $segment_path_1 = "pg_wal/archive_status/$segment_name_1"; my $segment_path_1_ready = "$segment_path_1.ready"; my $segment_path_1_done = "$segment_path_1.done"; $primary->safe_psql( 'postgres', q{ CREATE TABLE mine AS SELECT generate_series(1,10) AS x; SELECT pg_switch_wal(); CHECKPOINT; }); # Wait for an archive failure. $primary->poll_query_until('postgres', q{SELECT failed_count > 0 FROM pg_stat_archiver}, 't') or die "Timed out while waiting for archiving to fail"; ok( -f "$primary_data/$segment_path_1_ready", ".ready file exists for WAL segment $segment_name_1 waiting to be archived" ); ok( !-f "$primary_data/$segment_path_1_done", ".done file does not exist for WAL segment $segment_name_1 waiting to be archived" ); is( $primary->safe_psql( 'postgres', q{ SELECT archived_count, last_failed_wal FROM pg_stat_archiver }), "0|$segment_name_1", "pg_stat_archiver failed to archive $segment_name_1"); # Crash the cluster for the next test in charge of checking that non-archived # WAL segments are not removed. $primary->stop('immediate'); # Recovery tests for the archiving with a standby partially check # the recovery behavior when restoring a backup taken using a # snapshot with no pg_start/stop_backup. In this situation, # the recovered standby should enter first crash recovery then # switch to regular archive recovery. Note that the base backup # is taken here so as archive_command will fail. This is necessary # for the assumptions of the tests done with the standbys below. $primary->backup_fs_cold('backup'); $primary->start; ok( -f "$primary_data/$segment_path_1_ready", ".ready file for WAL segment $segment_name_1 still exists after crash recovery on primary" ); # Allow WAL archiving again and wait for a success. $primary->safe_psql( 'postgres', q{ ALTER SYSTEM RESET archive_command; SELECT pg_reload_conf(); }); $primary->poll_query_until('postgres', q{SELECT archived_count FROM pg_stat_archiver}, '1') or die "Timed out while waiting for archiving to finish"; ok(!-f "$primary_data/$segment_path_1_ready", ".ready file for archived WAL segment $segment_name_1 removed"); ok(-f "$primary_data/$segment_path_1_done", ".done file for archived WAL segment $segment_name_1 exists"); is( $primary->safe_psql( 'postgres', q{ SELECT last_archived_wal FROM pg_stat_archiver }), $segment_name_1, "archive success reported in pg_stat_archiver for WAL segment $segment_name_1" ); # Create some WAL activity and a new checkpoint so as the next standby can # create a restartpoint. As this standby starts in crash recovery because # of the cold backup taken previously, it needs a clean restartpoint to deal # with existing status files. my $segment_name_2 = $primary->safe_psql('postgres', q{SELECT pg_walfile_name(pg_current_wal_lsn())}); my $segment_path_2 = "pg_wal/archive_status/$segment_name_2"; my $segment_path_2_ready = "$segment_path_2.ready"; my $segment_path_2_done = "$segment_path_2.done"; $primary->safe_psql( 'postgres', q{ INSERT INTO mine SELECT generate_series(10,20) AS x; SELECT pg_switch_wal(); CHECKPOINT; }); $primary->poll_query_until('postgres', q{ SELECT last_archived_wal FROM pg_stat_archiver }, $segment_name_2) or die "Timed out while waiting for archiving to finish"; # Test standby with archive_mode = on. my $standby1 = get_new_node('standby'); $standby1->init_from_backup($primary, 'backup', has_restoring => 1); $standby1->append_conf('postgresql.conf', "archive_mode = on"); my $standby1_data = $standby1->data_dir; $standby1->start; $standby1->safe_psql('postgres', q{CHECKPOINT}); # Recovery with archive_mode=on should not create .ready files. # Note that this segment did not exist in the backup. ok( !-f "$standby1_data/$segment_path_2_ready", ".ready file for WAL segment $segment_name_2 not created on standby when archive_mode=on on standby" ); # Test recovery with archive_mode = always, which should always keep # .ready files if archiving is enabled, though here we want the archive # command to fail to persist the .ready files. Note that this node # has inherited the archive command of the previous cold backup that # will cause archiving failures. my $standby2 = get_new_node('standby2'); $standby2->init_from_backup($primary, 'backup', has_restoring => 1); $standby2->append_conf('postgresql.conf', 'archive_mode = always'); my $standby2_data = $standby2->data_dir; $standby2->start; $standby2->safe_psql('postgres', q{CHECKPOINT}); ok( -f "$standby2_data/$segment_path_1_ready", ".ready file for WAL segment $segment_name_1 existing in backup is kept with archive_mode=always on standby" ); # Reset statistics of the archiver for the next checks. $standby2->safe_psql('postgres', q{SELECT pg_stat_reset_shared('archiver')}); # Now crash the cluster to check that recovery step does not # remove non-archived WAL segments on a standby where archiving # is enabled. $standby2->stop('immediate'); $standby2->start; ok( -f "$standby2_data/$segment_path_1_ready", "WAL segment still ready to archive after crash recovery on standby with archive_mode=always" ); # Allow WAL archiving again, and wait for the segments to be archived. $standby2->safe_psql( 'postgres', q{ ALTER SYSTEM RESET archive_command; SELECT pg_reload_conf(); }); $standby2->poll_query_until('postgres', q{SELECT last_archived_wal FROM pg_stat_archiver}, $segment_name_2) or die "Timed out while waiting for archiving to finish"; is( $standby2->safe_psql( 'postgres', q{SELECT archived_count FROM pg_stat_archiver}), '2', "correct number of WAL segments archived from standby"); ok( !-f "$standby2_data/$segment_path_1_ready" && !-f "$standby2_data/$segment_path_2_ready", ".ready files removed after archive success with archive_mode=always on standby" ); ok( -f "$standby2_data/$segment_path_1_done" && -f "$standby2_data/$segment_path_2_done", ".done files created after archive success with archive_mode=always on standby" );