src/bin/pg_rewind/t/008_min_recovery_point.pl


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170

# Copyright (c) 2021-2025, PostgreSQL Global Development Group

#
# Test situation where a target data directory contains
# WAL records beyond both the last checkpoint and the divergence
# point:
#
# Target WAL (TLI 2):
#
# backup ... Checkpoint A ... INSERT 'rewind this'
#            (TLI 1 -> 2)
#
#            ^ last common                        ^ minRecoveryPoint
#              checkpoint
#
# Source WAL (TLI 3):
#
# backup ... Checkpoint A ... Checkpoint B ... INSERT 'keep this'
#            (TLI 1 -> 2)     (TLI 2 -> 3)
#
#
# The last common checkpoint is Checkpoint A. But there is WAL on TLI 2
# after the last common checkpoint that needs to be rewound. We used to
# have a bug where minRecoveryPoint was ignored, and pg_rewind concluded
# that the target doesn't need rewinding in this scenario, because the
# last checkpoint on the target TLI was an ancestor of the source TLI.
#
#
# This test does not make use of RewindTest as it requires three
# nodes.

use strict;
use warnings FATAL => 'all';
use PostgreSQL::Test::Cluster;
use PostgreSQL::Test::Utils;
use Test::More;

use File::Copy;

my $tmp_folder = PostgreSQL::Test::Utils::tempdir;

my $node_1 = PostgreSQL::Test::Cluster->new('node_1');
$node_1->init(allows_streaming => 1);
$node_1->append_conf(
	'postgresql.conf', qq(
wal_keep_size='100 MB'
));

$node_1->start;

# Create a couple of test tables
$node_1->safe_psql('postgres', 'CREATE TABLE public.foo (t TEXT)');
$node_1->safe_psql('postgres', 'CREATE TABLE public.bar (t TEXT)');
$node_1->safe_psql('postgres', "INSERT INTO public.bar VALUES ('in both')");

#
# Create node_2 and node_3 as standbys following node_1
#
my $backup_name = 'my_backup';
$node_1->backup($backup_name);

my $node_2 = PostgreSQL::Test::Cluster->new('node_2');
$node_2->init_from_backup($node_1, $backup_name, has_streaming => 1);
$node_2->start;

my $node_3 = PostgreSQL::Test::Cluster->new('node_3');
$node_3->init_from_backup($node_1, $backup_name, has_streaming => 1);
$node_3->start;

# Wait until node 3 has connected and caught up
$node_1->wait_for_catchup('node_3');

#
# Swap the roles of node_1 and node_3, so that node_1 follows node_3.
#
$node_1->stop('fast');
$node_3->promote;

# reconfigure node_1 as a standby following node_3
my $node_3_connstr = $node_3->connstr;
$node_1->append_conf(
	'postgresql.conf', qq(
primary_conninfo='$node_3_connstr'
));
$node_1->set_standby_mode();
$node_1->start();

# also reconfigure node_2 to follow node_3
$node_2->append_conf(
	'postgresql.conf', qq(
primary_conninfo='$node_3_connstr'
));
$node_2->restart();

#
# Promote node_1, to create a split-brain scenario.
#

# make sure node_1 is full caught up with node_3 first
$node_3->wait_for_catchup('node_1');

$node_1->promote;

#
# We now have a split-brain with two primaries. Insert a row on both to
# demonstratively create a split brain. After the rewind, we should only
# see the insert on 1, as the insert on node 3 is rewound away.
#
$node_1->safe_psql('postgres',
	"INSERT INTO public.foo (t) VALUES ('keep this')");
# 'bar' is unmodified in node 1, so it won't be overwritten by replaying the
# WAL from node 1.
$node_3->safe_psql('postgres',
	"INSERT INTO public.bar (t) VALUES ('rewind this')");

# Insert more rows in node 1, to bump up the XID counter. Otherwise, if
# rewind doesn't correctly rewind the changes made on the other node,
# we might fail to notice if the inserts are invisible because the XIDs
# are not marked as committed.
$node_1->safe_psql('postgres',
	"INSERT INTO public.foo (t) VALUES ('and this')");
$node_1->safe_psql('postgres',
	"INSERT INTO public.foo (t) VALUES ('and this too')");

# Wait for node 2 to catch up
$node_2->poll_query_until('postgres',
	q|SELECT COUNT(*) > 1 FROM public.bar|, 't');

# At this point node_2 will shut down without a shutdown checkpoint,
# but with WAL entries beyond the preceding shutdown checkpoint.
$node_2->stop('fast');
$node_3->stop('fast');

my $node_2_pgdata = $node_2->data_dir;
my $node_1_connstr = $node_1->connstr;

# Keep a temporary postgresql.conf or it would be overwritten during the rewind.
copy(
	"$node_2_pgdata/postgresql.conf",
	"$tmp_folder/node_2-postgresql.conf.tmp");

command_ok(
	[
		'pg_rewind',
		'--source-server' => $node_1_connstr,
		'--target-pgdata' => $node_2_pgdata,
		'--debug',
	],
	'run pg_rewind');

# Now move back postgresql.conf with old settings
move(
	"$tmp_folder/node_2-postgresql.conf.tmp",
	"$node_2_pgdata/postgresql.conf");

$node_2->start;

# Check contents of the test tables after rewind. The rows inserted in node 3
# before rewind should've been overwritten with the data from node 1.
my $result;
$result = $node_2->safe_psql('postgres', 'SELECT * FROM public.foo');
is( $result, qq(keep this
and this
and this too), 'table foo after rewind');

$result = $node_2->safe_psql('postgres', 'SELECT * FROM public.bar');
is($result, qq(in both), 'table bar after rewind');

done_testing();