aboutsummaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2018-12-16 14:32:14 -0500
committerTom Lane <tgl@sss.pgh.pa.us>2018-12-16 14:32:14 -0500
commit34010ac2fa187ce032a7b243c829c7ef5f047e20 (patch)
tree0ff784227f802f543fd2b4fd93a3ef148e9a1a97 /src/backend
parent8b90174e3f0638ff542bef7f367429750d18b08c (diff)
downloadpostgresql-34010ac2fa187ce032a7b243c829c7ef5f047e20.tar.gz
postgresql-34010ac2fa187ce032a7b243c829c7ef5f047e20.zip
Improve detection of child-process SIGPIPE failures.
Commit ffa4cbd62 added logic to detect SIGPIPE failure of a COPY child process, but it only worked correctly if the SIGPIPE occurred in the immediate child process. Depending on the shell in use and the complexity of the shell command string, we might instead get back an exit code of 128 + SIGPIPE, representing a shell error exit reporting SIGPIPE in the child process. We could just hack up ClosePipeToProgram() to add the extra case, but it seems like this is a fairly general issue deserving a more general and better-documented solution. I chose to add a couple of functions in src/common/wait_error.c, which is a natural place to know about wait-result encodings, that will test for either a specific child-process signal type or any child-process signal failure. Then, adjust other places that were doing ad-hoc tests of this type to use the common functions. In RestoreArchivedFile, this fixes a race condition affecting whether the process will report an error or just silently proc_exit(1): before, that depended on whether the intermediate shell got SIGTERM'd itself or reported a child process failing on SIGTERM. Like the previous patch, back-patch to v10; we could go further but there seems no real need to. Per report from Erik Rijkers. Discussion: https://postgr.es/m/f3683f87ab1701bea5d86a7742b22432@xs4all.nl
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/access/transam/xlogarchive.c20
-rw-r--r--src/backend/commands/copy.c3
-rw-r--r--src/backend/postmaster/pgarch.c10
3 files changed, 10 insertions, 23 deletions
diff --git a/src/backend/access/transam/xlogarchive.c b/src/backend/access/transam/xlogarchive.c
index 90b150c7a8b..7825cfe532e 100644
--- a/src/backend/access/transam/xlogarchive.c
+++ b/src/backend/access/transam/xlogarchive.c
@@ -59,7 +59,6 @@ RestoreArchivedFile(char *path, const char *xlogfname,
char *endp;
const char *sp;
int rc;
- bool signaled;
struct stat stat_buf;
XLogSegNo restartSegNo;
XLogRecPtr restartRedoPtr;
@@ -288,17 +287,12 @@ RestoreArchivedFile(char *path, const char *xlogfname,
* will perform an immediate shutdown when it sees us exiting
* unexpectedly.
*
- * Per the Single Unix Spec, shells report exit status > 128 when a called
- * command died on a signal. Also, 126 and 127 are used to report
- * problems such as an unfindable command; treat those as fatal errors
- * too.
+ * We treat hard shell errors such as "command not found" as fatal, too.
*/
- if (WIFSIGNALED(rc) && WTERMSIG(rc) == SIGTERM)
+ if (wait_result_is_signal(rc, SIGTERM))
proc_exit(1);
- signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125;
-
- ereport(signaled ? FATAL : DEBUG2,
+ ereport(wait_result_is_any_signal(rc, true) ? FATAL : DEBUG2,
(errmsg("could not restore file \"%s\" from archive: %s",
xlogfname, wait_result_to_str(rc))));
@@ -334,7 +328,6 @@ ExecuteRecoveryCommand(char *command, char *commandName, bool failOnSignal)
char *endp;
const char *sp;
int rc;
- bool signaled;
XLogSegNo restartSegNo;
XLogRecPtr restartRedoPtr;
TimeLineID restartTli;
@@ -401,12 +394,9 @@ ExecuteRecoveryCommand(char *command, char *commandName, bool failOnSignal)
{
/*
* If the failure was due to any sort of signal, it's best to punt and
- * abort recovery. See also detailed comments on signals in
- * RestoreArchivedFile().
+ * abort recovery. See comments in RestoreArchivedFile().
*/
- signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125;
-
- ereport((signaled && failOnSignal) ? FATAL : WARNING,
+ ereport((failOnSignal && wait_result_is_any_signal(rc, true)) ? FATAL : WARNING,
/*------
translator: First %s represents a recovery.conf parameter name like
"recovery_end_command", the 2nd is the value of that parameter, the
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 1a7d4f99ee8..226d2ef3dac 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -17,7 +17,6 @@
#include <ctype.h>
#include <unistd.h>
#include <sys/stat.h>
-#include <sys/wait.h>
#include <netinet/in.h>
#include <arpa/inet.h>
@@ -1713,7 +1712,7 @@ ClosePipeToProgram(CopyState cstate)
* problem.
*/
if (cstate->is_copy_from && !cstate->reached_eof &&
- WIFSIGNALED(pclose_rc) && WTERMSIG(pclose_rc) == SIGPIPE)
+ wait_result_is_signal(pclose_rc, SIGPIPE))
return;
ereport(ERROR,
diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c
index ddf9d698e04..60e153ee2c9 100644
--- a/src/backend/postmaster/pgarch.c
+++ b/src/backend/postmaster/pgarch.c
@@ -573,13 +573,11 @@ pgarch_archiveXlog(char *xlog)
* If either the shell itself, or a called command, died on a signal,
* abort the archiver. We do this because system() ignores SIGINT and
* SIGQUIT while waiting; so a signal is very likely something that
- * should have interrupted us too. If we overreact it's no big deal,
- * the postmaster will just start the archiver again.
- *
- * Per the Single Unix Spec, shells report exit status > 128 when a
- * called command died on a signal.
+ * should have interrupted us too. Also die if the shell got a hard
+ * "command not found" type of error. If we overreact it's no big
+ * deal, the postmaster will just start the archiver again.
*/
- int lev = (WIFSIGNALED(rc) || WEXITSTATUS(rc) > 128) ? FATAL : LOG;
+ int lev = wait_result_is_any_signal(rc, true) ? FATAL : LOG;
if (WIFEXITED(rc))
{