1 files changed, 200 insertions, 0 deletions
diff --git a/src/test/recovery/t/017_shm.pl b/src/test/recovery/t/017_shm.pl
new file mode 100644
index 00000000000..3cbe938ddd1
--- /dev/null
+++ b/src/test/recovery/t/017_shm.pl
@@ -0,0 +1,200 @@
+#
+# Tests of pg_shmem.h functions
+#
+use strict;
+use warnings;
+use IPC::Run 'run';
+use PostgresNode;
+use Test::More;
+use TestLib;
+use Time::HiRes qw(usleep);
+
+plan tests => 5;
+
+my $tempdir = TestLib::tempdir;
+my $port;
+
+# Log "ipcs" diffs on a best-effort basis, swallowing any error.
+my $ipcs_before = "$tempdir/ipcs_before";
+eval { run_log [ 'ipcs', '-am' ], '>', $ipcs_before; };
+
+sub log_ipcs
+{
+	eval { run_log [ 'ipcs', '-am' ], '|', [ 'diff', $ipcs_before, '-' ] };
+	return;
+}
+
+# These tests need a $port such that nothing creates or removes a segment in
+# $port's IpcMemoryKey range while this test script runs.  While there's no
+# way to ensure that in general, we do ensure that if PostgreSQL tests are the
+# only actors.  With TCP, the first get_new_node picks a port number.  With
+# Unix sockets, use a postmaster, $port_holder, to represent a key space
+# reservation.  $port_holder holds a reservation on the key space of port
+# 1+$port_holder->port if it created the first IpcMemoryKey of its own port's
+# key space.  If multiple copies of this test script run concurrently, they
+# will pick different ports.  $port_holder postmasters use odd-numbered ports,
+# and tests use even-numbered ports.  In the absence of collisions from other
+# shmget() activity, gnat starts with key 0x7d001 (512001), and flea starts
+# with key 0x7d002 (512002).
+my $port_holder;
+if (!$PostgresNode::use_tcp)
+{
+	my $lock_port;
+	for ($lock_port = 511; $lock_port < 711; $lock_port += 2)
+	{
+		$port_holder = PostgresNode->get_new_node(
+			"port${lock_port}_holder",
+			port     => $lock_port,
+			own_host => 1);
+		$port_holder->init;
+		$port_holder->append_conf('postgresql.conf', 'max_connections = 5');
+		$port_holder->start;
+		# Match the AddToDataDirLockFile() call in sysv_shmem.c.  Assume all
+		# systems not using sysv_shmem.c do use TCP.
+		my $shmem_key_line_prefix = sprintf("%9lu ", 1 + $lock_port * 1000);
+		last
+		  if slurp_file($port_holder->data_dir . '/postmaster.pid') =~
+		  /^$shmem_key_line_prefix/m;
+		$port_holder->stop;
+	}
+	$port = $lock_port + 1;
+}
+
+# Node setup.
+sub init_start
+{
+	my $name = shift;
+	my $ret = PostgresNode->get_new_node($name, port => $port, own_host => 1);
+	defined($port) or $port = $ret->port;    # same port for all nodes
+	$ret->init;
+	# Limit semaphore consumption, since we run several nodes concurrently.
+	$ret->append_conf('postgresql.conf', 'max_connections = 5');
+	$ret->start;
+	log_ipcs();
+	return $ret;
+}
+my $gnat = init_start 'gnat';
+my $flea = init_start 'flea';
+
+# Upon postmaster death, postmaster children exit automatically.
+$gnat->kill9;
+log_ipcs();
+$flea->restart;       # flea ignores the shm key gnat abandoned.
+log_ipcs();
+poll_start($gnat);    # gnat recycles its former shm key.
+log_ipcs();
+
+# After clean shutdown, the nodes swap shm keys.
+$gnat->stop;
+$flea->restart;
+log_ipcs();
+$gnat->start;
+log_ipcs();
+
+# Scenarios involving no postmaster.pid, dead postmaster, and a live backend.
+# Use a regress.c function to emulate the responsiveness of a backend working
+# through a CPU-intensive task.
+$gnat->safe_psql('postgres', <<EOSQL);
+CREATE FUNCTION wait_pid(int)
+   RETURNS void
+   AS '$ENV{REGRESS_SHLIB}'
+   LANGUAGE C STRICT;
+EOSQL
+my $slow_query = 'SELECT wait_pid(pg_backend_pid())';
+my ($stdout, $stderr);
+my $slow_client = IPC::Run::start(
+	[
+		'psql', '-X', '-qAt', '-d', $gnat->connstr('postgres'),
+		'-c', $slow_query
+	],
+	'<',
+	\undef,
+	'>',
+	\$stdout,
+	'2>',
+	\$stderr,
+	IPC::Run::timeout(900));    # five times the poll_query_until timeout
+ok( $gnat->poll_query_until(
+		'postgres',
+		"SELECT 1 FROM pg_stat_activity WHERE query = '$slow_query'", '1'),
+	'slow query started');
+my $slow_pid = $gnat->safe_psql('postgres',
+	"SELECT pid FROM pg_stat_activity WHERE query = '$slow_query'");
+$gnat->kill9;
+unlink($gnat->data_dir . '/postmaster.pid');
+$gnat->rotate_logfile;    # on Windows, can't open old log for writing
+log_ipcs();
+# Reject ordinary startup.  Retry for the same reasons poll_start() does.
+my $pre_existing_msg = qr/pre-existing shared memory block/;
+{
+	my $max_attempts = 180 * 10;    # Retry every 0.1s for at least 180s.
+	my $attempts     = 0;
+	while ($attempts < $max_attempts)
+	{
+		last
+		  if $gnat->start(fail_ok => 1)
+		  || slurp_file($gnat->logfile) =~ $pre_existing_msg;
+		usleep(100_000);
+		$attempts++;
+	}
+}
+like(slurp_file($gnat->logfile),
+	$pre_existing_msg, 'detected live backend via shared memory');
+# Reject single-user startup.
+my $single_stderr;
+ok( !run_log(
+		[ 'postgres', '--single', '-D', $gnat->data_dir, 'template1' ],
+		'<', \('SELECT 1 + 1'), '2>', \$single_stderr),
+	'live query blocks --single');
+print STDERR $single_stderr;
+like($single_stderr, $pre_existing_msg,
+	'single-user mode detected live backend via shared memory');
+log_ipcs();
+# Fail to reject startup if shm key N has become available and we crash while
+# using key N+1.  This is unwanted, but expected.  Windows is immune, because
+# its GetSharedMemName() use DataDir strings, not numeric keys.
+$flea->stop;    # release first key
+is( $gnat->start(fail_ok => 1),
+	$TestLib::windows_os ? 0 : 1,
+	'key turnover fools only sysv_shmem.c');
+$gnat->stop;     # release first key (no-op on $TestLib::windows_os)
+$flea->start;    # grab first key
+# cleanup
+TestLib::system_log('pg_ctl', 'kill', 'QUIT', $slow_pid);
+$slow_client->finish;    # client has detected backend termination
+log_ipcs();
+poll_start($gnat);       # recycle second key
+
+$gnat->stop;
+$flea->stop;
+$port_holder->stop if $port_holder;
+log_ipcs();
+
+
+# We may need retries to start a new postmaster.  Causes:
+# - kernel is slow to deliver SIGKILL
+# - postmaster parent is slow to waitpid()
+# - postmaster child is slow to exit in response to SIGQUIT
+# - postmaster child is slow to exit after postmaster death
+sub poll_start
+{
+	my ($node) = @_;
+
+	my $max_attempts = 180 * 10;
+	my $attempts     = 0;
+
+	while ($attempts < $max_attempts)
+	{
+		$node->start(fail_ok => 1) && return 1;
+
+		# Wait 0.1 second before retrying.
+		usleep(100_000);
+
+		$attempts++;
+	}
+
+	# No success within 180 seconds.  Try one last time without fail_ok, which
+	# will BAIL_OUT unless it succeeds.
+	$node->start && return 1;
+	return 0;
+}