aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/src/sgml/config.sgml17
-rw-r--r--src/backend/postmaster/postmaster.c5
-rw-r--r--src/backend/storage/file/fd.c12
-rw-r--r--src/backend/utils/misc/guc.c9
-rw-r--r--src/backend/utils/misc/postgresql.conf.sample2
-rw-r--r--src/include/postmaster/postmaster.h1
-rw-r--r--src/test/recovery/t/022_crash_temp_files.pl192
7 files changed, 233 insertions, 5 deletions
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 8603cf3f941..863ac31c6b4 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -9671,6 +9671,23 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir'
</listitem>
</varlistentry>
+ <varlistentry id="guc-remove-temp-files-after-crash" xreflabel="remove_temp_files_after_crash">
+ <term><varname>remove_temp_files_after_crash</varname> (<type>boolean</type>)
+ <indexterm>
+ <primary><varname>remove_temp_files_after_crash</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ When set to <literal>on</literal>, which is the default,
+ <productname>PostgreSQL</productname> will automatically remove
+ temporary files after a backend crash. If disabled, the files will be
+ retained and may be used for debugging, for example. Repeated crashes
+ may however result in accumulation of useless files.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry id="guc-data-sync-retry" xreflabel="data_sync_retry">
<term><varname>data_sync_retry</varname> (<type>boolean</type>)
<indexterm>
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index e8af05c04ed..fc9c769bc66 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -242,6 +242,7 @@ bool Db_user_namespace = false;
bool enable_bonjour = false;
char *bonjour_name;
bool restart_after_crash = true;
+bool remove_temp_files_after_crash = true;
/* PIDs of special child processes; 0 when not running */
static pid_t StartupPID = 0,
@@ -3976,6 +3977,10 @@ PostmasterStateMachine(void)
ereport(LOG,
(errmsg("all server processes terminated; reinitializing")));
+ /* remove leftover temporary files after a crash */
+ if (remove_temp_files_after_crash)
+ RemovePgTempFiles();
+
/* allow background workers to immediately restart */
ResetBackgroundWorkerCrashTimes();
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index b58502837aa..110ba31517a 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -3024,11 +3024,13 @@ CleanupTempFiles(bool isCommit, bool isProcExit)
* remove any leftover files created by OpenTemporaryFile and any leftover
* temporary relation files created by mdcreate.
*
- * NOTE: we could, but don't, call this during a post-backend-crash restart
- * cycle. The argument for not doing it is that someone might want to examine
- * the temp files for debugging purposes. This does however mean that
- * OpenTemporaryFile had better allow for collision with an existing temp
- * file name.
+ * During post-backend-crash restart cycle, this routine is called when
+ * remove_temp_files_after_crash GUC is enabled. Multiple crashes while
+ * queries are using temp files could result in useless storage usage that can
+ * only be reclaimed by a service restart. The argument against enabling it is
+ * that someone might want to examine the temporary files for debugging
+ * purposes. This does however mean that OpenTemporaryFile had better allow for
+ * collision with an existing temp file name.
*
* NOTE: this function and its subroutines generally report syscall failures
* with ereport(LOG) and keep going. Removing temp files is not so critical
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index b263e3493ba..5a3ca5b765f 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1371,6 +1371,15 @@ static struct config_bool ConfigureNamesBool[] =
true,
NULL, NULL, NULL
},
+ {
+ {"remove_temp_files_after_crash", PGC_SIGHUP, ERROR_HANDLING_OPTIONS,
+ gettext_noop("Remove temporary files after backend crash."),
+ NULL
+ },
+ &remove_temp_files_after_crash,
+ true,
+ NULL, NULL, NULL
+ },
{
{"log_duration", PGC_SUSET, LOGGING_WHAT,
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 6647f8fd6e9..3ff507d5f60 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -758,6 +758,8 @@
#exit_on_error = off # terminate session on any error?
#restart_after_crash = on # reinitialize after backend crash?
+#remove_temp_files_after_crash = on # remove temporary files after
+ # backend crash?
#data_sync_retry = off # retry or panic on failure to fsync
# data?
# (change requires restart)
diff --git a/src/include/postmaster/postmaster.h b/src/include/postmaster/postmaster.h
index cfa59c4dc0d..0efdd7c232e 100644
--- a/src/include/postmaster/postmaster.h
+++ b/src/include/postmaster/postmaster.h
@@ -29,6 +29,7 @@ extern bool log_hostname;
extern bool enable_bonjour;
extern char *bonjour_name;
extern bool restart_after_crash;
+extern bool remove_temp_files_after_crash;
#ifdef WIN32
extern HANDLE PostmasterHandle;
diff --git a/src/test/recovery/t/022_crash_temp_files.pl b/src/test/recovery/t/022_crash_temp_files.pl
new file mode 100644
index 00000000000..c37b2277709
--- /dev/null
+++ b/src/test/recovery/t/022_crash_temp_files.pl
@@ -0,0 +1,192 @@
+# Test remove of temporary files after a crash.
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More;
+use Config;
+use Time::HiRes qw(usleep);
+
+plan tests => 9;
+
+
+# To avoid hanging while expecting some specific input from a psql
+# instance being driven by us, add a timeout high enough that it
+# should never trigger even on very slow machines, unless something
+# is really wrong.
+my $psql_timeout = IPC::Run::timer(60);
+
+my $node = get_new_node('node_crash');
+$node->init();
+$node->start();
+
+# By default, PostgresNode doesn't restart after crash
+# Reduce work_mem to generate temporary file with a few number of rows
+$node->safe_psql(
+ 'postgres',
+ q[ALTER SYSTEM SET remove_temp_files_after_crash = on;
+ ALTER SYSTEM SET log_connections = 1;
+ ALTER SYSTEM SET work_mem = '64kB';
+ ALTER SYSTEM SET restart_after_crash = on;
+ SELECT pg_reload_conf();]);
+
+# create table, insert rows
+$node->safe_psql(
+ 'postgres',
+ q[CREATE TABLE tab_crash (a text);
+ INSERT INTO tab_crash (a) SELECT gen_random_uuid() FROM generate_series(1, 500);]);
+
+# Run psql, keeping session alive, so we have an alive backend to kill.
+my ($killme_stdin, $killme_stdout, $killme_stderr) = ('', '', '');
+my $killme = IPC::Run::start(
+ [
+ 'psql', '-X', '-qAt', '-v', 'ON_ERROR_STOP=1', '-f', '-', '-d',
+ $node->connstr('postgres')
+ ],
+ '<',
+ \$killme_stdin,
+ '>',
+ \$killme_stdout,
+ '2>',
+ \$killme_stderr,
+ $psql_timeout);
+
+# Get backend pid
+$killme_stdin .= q[
+SELECT pg_backend_pid();
+];
+ok(pump_until($killme, \$killme_stdout, qr/[[:digit:]]+[\r\n]$/m),
+ 'acquired pid for SIGKILL');
+my $pid = $killme_stdout;
+chomp($pid);
+$killme_stdout = '';
+$killme_stderr = '';
+
+# Run the query that generates a temporary file and that will be killed before
+# it finishes. Since the query that generates the temporary file does not
+# return before the connection is killed, use a SELECT before to trigger
+# pump_until.
+$killme_stdin .= q[
+BEGIN;
+SELECT $$in-progress-before-sigkill$$;
+WITH foo AS (SELECT a FROM tab_crash ORDER BY a) SELECT a, pg_sleep(1) FROM foo;
+];
+ok(pump_until($killme, \$killme_stdout, qr/in-progress-before-sigkill/m),
+ 'select in-progress-before-sigkill');
+$killme_stdout = '';
+$killme_stderr = '';
+
+# Wait some time so the temporary file is generated by SELECT
+usleep(10_000);
+
+# Kill with SIGKILL
+my $ret = TestLib::system_log('pg_ctl', 'kill', 'KILL', $pid);
+is($ret, 0, 'killed process with KILL');
+
+# Close psql session
+$killme->finish;
+
+# Wait till server restarts
+$node->poll_query_until('postgres', 'SELECT 1', '1');
+
+# Check for temporary files
+is($node->safe_psql(
+ 'postgres',
+ 'SELECT COUNT(1) FROM pg_ls_dir($$base/pgsql_tmp$$)'),
+ qq(0), 'no temporary files');
+
+#
+# Test old behavior (don't remove temporary files after crash)
+#
+$node->safe_psql(
+ 'postgres',
+ q[ALTER SYSTEM SET remove_temp_files_after_crash = off;
+ SELECT pg_reload_conf();]);
+
+# Restart psql session
+($killme_stdin, $killme_stdout, $killme_stderr) = ('', '', '');
+$killme->run();
+
+# Get backend pid
+$killme_stdin .= q[
+SELECT pg_backend_pid();
+];
+ok(pump_until($killme, \$killme_stdout, qr/[[:digit:]]+[\r\n]$/m),
+ 'acquired pid for SIGKILL');
+$pid = $killme_stdout;
+chomp($pid);
+$killme_stdout = '';
+$killme_stderr = '';
+
+# Run the query that generates a temporary file and that will be killed before
+# it finishes. Since the query that generates the temporary file does not
+# return before the connection is killed, use a SELECT before to trigger
+# pump_until.
+$killme_stdin .= q[
+BEGIN;
+SELECT $$in-progress-before-sigkill$$;
+WITH foo AS (SELECT a FROM tab_crash ORDER BY a) SELECT a, pg_sleep(1) FROM foo;
+];
+ok(pump_until($killme, \$killme_stdout, qr/in-progress-before-sigkill/m),
+ 'select in-progress-before-sigkill');
+$killme_stdout = '';
+$killme_stderr = '';
+
+# Wait some time so the temporary file is generated by SELECT
+usleep(10_000);
+
+# Kill with SIGKILL
+$ret = TestLib::system_log('pg_ctl', 'kill', 'KILL', $pid);
+is($ret, 0, 'killed process with KILL');
+
+# Close psql session
+$killme->finish;
+
+# Wait till server restarts
+$node->poll_query_until('postgres', 'SELECT 1', '1');
+
+# Check for temporary files -- should be there
+is($node->safe_psql(
+ 'postgres',
+ 'SELECT COUNT(1) FROM pg_ls_dir($$base/pgsql_tmp$$)'),
+ qq(1), 'one temporary file');
+
+# Restart should remove the temporary files
+$node->restart();
+
+# Check the temporary files -- should be gone
+is($node->safe_psql(
+ 'postgres',
+ 'SELECT COUNT(1) FROM pg_ls_dir($$base/pgsql_tmp$$)'),
+ qq(0), 'temporary file was removed');
+
+$node->stop();
+
+# Pump until string is matched, or timeout occurs
+sub pump_until
+{
+ my ($proc, $stream, $untl) = @_;
+ $proc->pump_nb();
+ while (1)
+ {
+ last if $$stream =~ /$untl/;
+ if ($psql_timeout->is_expired)
+ {
+ diag("aborting wait: program timed out");
+ diag("stream contents: >>", $$stream, "<<");
+ diag("pattern searched for: ", $untl);
+
+ return 0;
+ }
+ if (not $proc->pumpable())
+ {
+ diag("aborting wait: program died");
+ diag("stream contents: >>", $$stream, "<<");
+ diag("pattern searched for: ", $untl);
+
+ return 0;
+ }
+ $proc->pump();
+ }
+ return 1;
+}