1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
|
#
# Tests of pg_shmem.h functions
#
use strict;
use warnings;
use IPC::Run 'run';
use PostgresNode;
use Test::More;
use TestLib;
use Time::HiRes qw(usleep);
if ($^O eq 'msys')
{
plan skip_all => 'missing SIGKILL implementation';
}
else
{
plan tests => 5;
}
my $tempdir = TestLib::tempdir;
my $port;
# Log "ipcs" diffs on a best-effort basis, swallowing any error.
my $ipcs_before = "$tempdir/ipcs_before";
eval { run_log [ 'ipcs', '-am' ], '>', $ipcs_before; };
sub log_ipcs
{
eval { run_log [ 'ipcs', '-am' ], '|', [ 'diff', $ipcs_before, '-' ] };
return;
}
# These tests need a $port such that nothing creates or removes a segment in
# $port's IpcMemoryKey range while this test script runs. While there's no
# way to ensure that in general, we do ensure that if PostgreSQL tests are the
# only actors. With TCP, the first get_new_node picks a port number. With
# Unix sockets, use a postmaster, $port_holder, to represent a key space
# reservation. $port_holder holds a reservation on the key space of port
# 1+$port_holder->port if it created the first IpcMemoryKey of its own port's
# key space. If multiple copies of this test script run concurrently, they
# will pick different ports. $port_holder postmasters use odd-numbered ports,
# and tests use even-numbered ports. In the absence of collisions from other
# shmget() activity, gnat starts with key 0x7d001 (512001), and flea starts
# with key 0x7d002 (512002).
my $port_holder;
if (!$PostgresNode::use_tcp)
{
my $lock_port;
for ($lock_port = 511; $lock_port < 711; $lock_port += 2)
{
$port_holder = PostgresNode->get_new_node(
"port${lock_port}_holder",
port => $lock_port,
own_host => 1);
$port_holder->init;
$port_holder->append_conf('postgresql.conf', 'max_connections = 5');
$port_holder->start;
# Match the AddToDataDirLockFile() call in sysv_shmem.c. Assume all
# systems not using sysv_shmem.c do use TCP.
my $shmem_key_line_prefix = sprintf("%9lu ", 1 + $lock_port * 1000);
last
if slurp_file($port_holder->data_dir . '/postmaster.pid') =~
/^$shmem_key_line_prefix/m;
$port_holder->stop;
}
$port = $lock_port + 1;
}
# Node setup.
sub init_start
{
my $name = shift;
my $ret = PostgresNode->get_new_node($name, port => $port, own_host => 1);
defined($port) or $port = $ret->port; # same port for all nodes
$ret->init;
# Limit semaphore consumption, since we run several nodes concurrently.
$ret->append_conf('postgresql.conf', 'max_connections = 5');
$ret->start;
log_ipcs();
return $ret;
}
my $gnat = init_start 'gnat';
my $flea = init_start 'flea';
# Upon postmaster death, postmaster children exit automatically.
$gnat->kill9;
log_ipcs();
$flea->restart; # flea ignores the shm key gnat abandoned.
log_ipcs();
poll_start($gnat); # gnat recycles its former shm key.
log_ipcs();
# After clean shutdown, the nodes swap shm keys.
$gnat->stop;
$flea->restart;
log_ipcs();
$gnat->start;
log_ipcs();
# Scenarios involving no postmaster.pid, dead postmaster, and a live backend.
# Use a regress.c function to emulate the responsiveness of a backend working
# through a CPU-intensive task.
$gnat->safe_psql('postgres', <<EOSQL);
CREATE FUNCTION wait_pid(int)
RETURNS void
AS '$ENV{REGRESS_SHLIB}'
LANGUAGE C STRICT;
EOSQL
my $slow_query = 'SELECT wait_pid(pg_backend_pid())';
my ($stdout, $stderr);
my $slow_client = IPC::Run::start(
[
'psql', '-X', '-qAt', '-d', $gnat->connstr('postgres'),
'-c', $slow_query
],
'<',
\undef,
'>',
\$stdout,
'2>',
\$stderr,
IPC::Run::timeout(900)); # five times the poll_query_until timeout
ok( $gnat->poll_query_until(
'postgres',
"SELECT 1 FROM pg_stat_activity WHERE query = '$slow_query'", '1'),
'slow query started');
my $slow_pid = $gnat->safe_psql('postgres',
"SELECT pid FROM pg_stat_activity WHERE query = '$slow_query'");
$gnat->kill9;
unlink($gnat->data_dir . '/postmaster.pid');
$gnat->rotate_logfile; # on Windows, can't open old log for writing
log_ipcs();
# Reject ordinary startup. Retry for the same reasons poll_start() does.
my $pre_existing_msg = qr/pre-existing shared memory block/;
{
my $max_attempts = 180 * 10; # Retry every 0.1s for at least 180s.
my $attempts = 0;
while ($attempts < $max_attempts)
{
last
if $gnat->start(fail_ok => 1)
|| slurp_file($gnat->logfile) =~ $pre_existing_msg;
usleep(100_000);
$attempts++;
}
}
like(slurp_file($gnat->logfile),
$pre_existing_msg, 'detected live backend via shared memory');
# Reject single-user startup.
my $single_stderr;
ok( !run_log(
[ 'postgres', '--single', '-D', $gnat->data_dir, 'template1' ],
'<', \('SELECT 1 + 1'), '2>', \$single_stderr),
'live query blocks --single');
print STDERR $single_stderr;
like($single_stderr, $pre_existing_msg,
'single-user mode detected live backend via shared memory');
log_ipcs();
# Fail to reject startup if shm key N has become available and we crash while
# using key N+1. This is unwanted, but expected. Windows is immune, because
# its GetSharedMemName() use DataDir strings, not numeric keys.
$flea->stop; # release first key
is( $gnat->start(fail_ok => 1),
$TestLib::windows_os ? 0 : 1,
'key turnover fools only sysv_shmem.c');
$gnat->stop; # release first key (no-op on $TestLib::windows_os)
$flea->start; # grab first key
# cleanup
TestLib::system_log('pg_ctl', 'kill', 'QUIT', $slow_pid);
$slow_client->finish; # client has detected backend termination
log_ipcs();
poll_start($gnat); # recycle second key
$gnat->stop;
$flea->stop;
$port_holder->stop if $port_holder;
log_ipcs();
# We may need retries to start a new postmaster. Causes:
# - kernel is slow to deliver SIGKILL
# - postmaster parent is slow to waitpid()
# - postmaster child is slow to exit in response to SIGQUIT
# - postmaster child is slow to exit after postmaster death
sub poll_start
{
my ($node) = @_;
my $max_attempts = 180 * 10;
my $attempts = 0;
while ($attempts < $max_attempts)
{
$node->start(fail_ok => 1) && return 1;
# Wait 0.1 second before retrying.
usleep(100_000);
$attempts++;
}
# No success within 180 seconds. Try one last time without fail_ok, which
# will BAIL_OUT unless it succeeds.
$node->start && return 1;
return 0;
}
|