diff options
Diffstat (limited to 'src/test/recovery/t/017_shm.pl')
-rw-r--r-- | src/test/recovery/t/017_shm.pl | 200 |
1 files changed, 200 insertions, 0 deletions
diff --git a/src/test/recovery/t/017_shm.pl b/src/test/recovery/t/017_shm.pl new file mode 100644 index 00000000000..3cbe938ddd1 --- /dev/null +++ b/src/test/recovery/t/017_shm.pl @@ -0,0 +1,200 @@ +# +# Tests of pg_shmem.h functions +# +use strict; +use warnings; +use IPC::Run 'run'; +use PostgresNode; +use Test::More; +use TestLib; +use Time::HiRes qw(usleep); + +plan tests => 5; + +my $tempdir = TestLib::tempdir; +my $port; + +# Log "ipcs" diffs on a best-effort basis, swallowing any error. +my $ipcs_before = "$tempdir/ipcs_before"; +eval { run_log [ 'ipcs', '-am' ], '>', $ipcs_before; }; + +sub log_ipcs +{ + eval { run_log [ 'ipcs', '-am' ], '|', [ 'diff', $ipcs_before, '-' ] }; + return; +} + +# These tests need a $port such that nothing creates or removes a segment in +# $port's IpcMemoryKey range while this test script runs. While there's no +# way to ensure that in general, we do ensure that if PostgreSQL tests are the +# only actors. With TCP, the first get_new_node picks a port number. With +# Unix sockets, use a postmaster, $port_holder, to represent a key space +# reservation. $port_holder holds a reservation on the key space of port +# 1+$port_holder->port if it created the first IpcMemoryKey of its own port's +# key space. If multiple copies of this test script run concurrently, they +# will pick different ports. $port_holder postmasters use odd-numbered ports, +# and tests use even-numbered ports. In the absence of collisions from other +# shmget() activity, gnat starts with key 0x7d001 (512001), and flea starts +# with key 0x7d002 (512002). +my $port_holder; +if (!$PostgresNode::use_tcp) +{ + my $lock_port; + for ($lock_port = 511; $lock_port < 711; $lock_port += 2) + { + $port_holder = PostgresNode->get_new_node( + "port${lock_port}_holder", + port => $lock_port, + own_host => 1); + $port_holder->init; + $port_holder->append_conf('postgresql.conf', 'max_connections = 5'); + $port_holder->start; + # Match the AddToDataDirLockFile() call in sysv_shmem.c. Assume all + # systems not using sysv_shmem.c do use TCP. + my $shmem_key_line_prefix = sprintf("%9lu ", 1 + $lock_port * 1000); + last + if slurp_file($port_holder->data_dir . '/postmaster.pid') =~ + /^$shmem_key_line_prefix/m; + $port_holder->stop; + } + $port = $lock_port + 1; +} + +# Node setup. +sub init_start +{ + my $name = shift; + my $ret = PostgresNode->get_new_node($name, port => $port, own_host => 1); + defined($port) or $port = $ret->port; # same port for all nodes + $ret->init; + # Limit semaphore consumption, since we run several nodes concurrently. + $ret->append_conf('postgresql.conf', 'max_connections = 5'); + $ret->start; + log_ipcs(); + return $ret; +} +my $gnat = init_start 'gnat'; +my $flea = init_start 'flea'; + +# Upon postmaster death, postmaster children exit automatically. +$gnat->kill9; +log_ipcs(); +$flea->restart; # flea ignores the shm key gnat abandoned. +log_ipcs(); +poll_start($gnat); # gnat recycles its former shm key. +log_ipcs(); + +# After clean shutdown, the nodes swap shm keys. +$gnat->stop; +$flea->restart; +log_ipcs(); +$gnat->start; +log_ipcs(); + +# Scenarios involving no postmaster.pid, dead postmaster, and a live backend. +# Use a regress.c function to emulate the responsiveness of a backend working +# through a CPU-intensive task. +$gnat->safe_psql('postgres', <<EOSQL); +CREATE FUNCTION wait_pid(int) + RETURNS void + AS '$ENV{REGRESS_SHLIB}' + LANGUAGE C STRICT; +EOSQL +my $slow_query = 'SELECT wait_pid(pg_backend_pid())'; +my ($stdout, $stderr); +my $slow_client = IPC::Run::start( + [ + 'psql', '-X', '-qAt', '-d', $gnat->connstr('postgres'), + '-c', $slow_query + ], + '<', + \undef, + '>', + \$stdout, + '2>', + \$stderr, + IPC::Run::timeout(900)); # five times the poll_query_until timeout +ok( $gnat->poll_query_until( + 'postgres', + "SELECT 1 FROM pg_stat_activity WHERE query = '$slow_query'", '1'), + 'slow query started'); +my $slow_pid = $gnat->safe_psql('postgres', + "SELECT pid FROM pg_stat_activity WHERE query = '$slow_query'"); +$gnat->kill9; +unlink($gnat->data_dir . '/postmaster.pid'); +$gnat->rotate_logfile; # on Windows, can't open old log for writing +log_ipcs(); +# Reject ordinary startup. Retry for the same reasons poll_start() does. +my $pre_existing_msg = qr/pre-existing shared memory block/; +{ + my $max_attempts = 180 * 10; # Retry every 0.1s for at least 180s. + my $attempts = 0; + while ($attempts < $max_attempts) + { + last + if $gnat->start(fail_ok => 1) + || slurp_file($gnat->logfile) =~ $pre_existing_msg; + usleep(100_000); + $attempts++; + } +} +like(slurp_file($gnat->logfile), + $pre_existing_msg, 'detected live backend via shared memory'); +# Reject single-user startup. +my $single_stderr; +ok( !run_log( + [ 'postgres', '--single', '-D', $gnat->data_dir, 'template1' ], + '<', \('SELECT 1 + 1'), '2>', \$single_stderr), + 'live query blocks --single'); +print STDERR $single_stderr; +like($single_stderr, $pre_existing_msg, + 'single-user mode detected live backend via shared memory'); +log_ipcs(); +# Fail to reject startup if shm key N has become available and we crash while +# using key N+1. This is unwanted, but expected. Windows is immune, because +# its GetSharedMemName() use DataDir strings, not numeric keys. +$flea->stop; # release first key +is( $gnat->start(fail_ok => 1), + $TestLib::windows_os ? 0 : 1, + 'key turnover fools only sysv_shmem.c'); +$gnat->stop; # release first key (no-op on $TestLib::windows_os) +$flea->start; # grab first key +# cleanup +TestLib::system_log('pg_ctl', 'kill', 'QUIT', $slow_pid); +$slow_client->finish; # client has detected backend termination +log_ipcs(); +poll_start($gnat); # recycle second key + +$gnat->stop; +$flea->stop; +$port_holder->stop if $port_holder; +log_ipcs(); + + +# We may need retries to start a new postmaster. Causes: +# - kernel is slow to deliver SIGKILL +# - postmaster parent is slow to waitpid() +# - postmaster child is slow to exit in response to SIGQUIT +# - postmaster child is slow to exit after postmaster death +sub poll_start +{ + my ($node) = @_; + + my $max_attempts = 180 * 10; + my $attempts = 0; + + while ($attempts < $max_attempts) + { + $node->start(fail_ok => 1) && return 1; + + # Wait 0.1 second before retrying. + usleep(100_000); + + $attempts++; + } + + # No success within 180 seconds. Try one last time without fail_ok, which + # will BAIL_OUT unless it succeeds. + $node->start && return 1; + return 0; +} |