aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/bootstrap/bootstrap.c8
-rw-r--r--src/backend/postmaster/Makefile1
-rw-r--r--src/backend/postmaster/launch_backend.c3
-rw-r--r--src/backend/postmaster/meson.build1
-rw-r--r--src/backend/postmaster/pmchild.c285
-rw-r--r--src/backend/postmaster/postmaster.c912
-rw-r--r--src/backend/postmaster/syslogger.c6
-rw-r--r--src/backend/storage/ipc/pmsignal.c89
-rw-r--r--src/backend/storage/lmgr/proc.c12
-rw-r--r--src/backend/tcop/postgres.c6
-rw-r--r--src/include/postmaster/bgworker_internals.h2
-rw-r--r--src/include/postmaster/postmaster.h45
-rw-r--r--src/include/postmaster/syslogger.h2
-rw-r--r--src/include/storage/pmsignal.h4
-rw-r--r--src/tools/pgindent/typedefs.list3
15 files changed, 785 insertions, 594 deletions
diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c
index ed59dfce893..d31a67599c9 100644
--- a/src/backend/bootstrap/bootstrap.c
+++ b/src/backend/bootstrap/bootstrap.c
@@ -31,6 +31,7 @@
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "pg_getopt.h"
+#include "postmaster/postmaster.h"
#include "storage/bufpage.h"
#include "storage/ipc.h"
#include "storage/proc.h"
@@ -309,6 +310,13 @@ BootstrapModeMain(int argc, char *argv[], bool check_only)
InitializeMaxBackends();
+ /*
+ * Even though bootstrapping runs in single-process mode, initialize
+ * postmaster child slots array so that --check can detect running out of
+ * shared memory or other resources if max_connections is set too high.
+ */
+ InitPostmasterChildSlots();
+
InitializeFastPathLocks();
CreateSharedMemoryAndSemaphores();
diff --git a/src/backend/postmaster/Makefile b/src/backend/postmaster/Makefile
index db08543d195..0f4435d2d97 100644
--- a/src/backend/postmaster/Makefile
+++ b/src/backend/postmaster/Makefile
@@ -22,6 +22,7 @@ OBJS = \
interrupt.o \
launch_backend.o \
pgarch.o \
+ pmchild.o \
postmaster.o \
startup.o \
syslogger.o \
diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c
index 423e6120438..6ce75f6f77d 100644
--- a/src/backend/postmaster/launch_backend.c
+++ b/src/backend/postmaster/launch_backend.c
@@ -118,6 +118,7 @@ typedef struct
bool query_id_enabled;
int max_safe_fds;
int MaxBackends;
+ int num_pmchild_slots;
#ifdef WIN32
HANDLE PostmasterHandle;
HANDLE initial_signal_pipe;
@@ -735,6 +736,7 @@ save_backend_variables(BackendParameters *param, ClientSocket *client_sock,
param->max_safe_fds = max_safe_fds;
param->MaxBackends = MaxBackends;
+ param->num_pmchild_slots = num_pmchild_slots;
#ifdef WIN32
param->PostmasterHandle = PostmasterHandle;
@@ -994,6 +996,7 @@ restore_backend_variables(BackendParameters *param)
max_safe_fds = param->max_safe_fds;
MaxBackends = param->MaxBackends;
+ num_pmchild_slots = param->num_pmchild_slots;
#ifdef WIN32
PostmasterHandle = param->PostmasterHandle;
diff --git a/src/backend/postmaster/meson.build b/src/backend/postmaster/meson.build
index 0ea4bbe084e..0e80f209863 100644
--- a/src/backend/postmaster/meson.build
+++ b/src/backend/postmaster/meson.build
@@ -10,6 +10,7 @@ backend_sources += files(
'interrupt.c',
'launch_backend.c',
'pgarch.c',
+ 'pmchild.c',
'postmaster.c',
'startup.c',
'syslogger.c',
diff --git a/src/backend/postmaster/pmchild.c b/src/backend/postmaster/pmchild.c
new file mode 100644
index 00000000000..381cf005a9b
--- /dev/null
+++ b/src/backend/postmaster/pmchild.c
@@ -0,0 +1,285 @@
+/*-------------------------------------------------------------------------
+ *
+ * pmchild.c
+ * Functions for keeping track of postmaster child processes.
+ *
+ * Postmaster keeps track of all child processes so that when a process exits,
+ * it knows what kind of a process it was and can clean up accordingly. Every
+ * child process is allocated a PMChild struct from a fixed pool of structs.
+ * The size of the pool is determined by various settings that configure how
+ * many worker processes and backend connections are allowed, i.e.
+ * autovacuum_max_workers, max_worker_processes, max_wal_senders, and
+ * max_connections.
+ *
+ * Dead-end backends are handled slightly differently. There is no limit
+ * on the number of dead-end backends, and they do not need unique IDs, so
+ * their PMChild structs are allocated dynamically, not from a pool.
+ *
+ * The structures and functions in this file are private to the postmaster
+ * process. But note that there is an array in shared memory, managed by
+ * pmsignal.c, that mirrors this.
+ *
+ *
+ * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/postmaster/pmchild.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "miscadmin.h"
+#include "postmaster/autovacuum.h"
+#include "postmaster/postmaster.h"
+#include "replication/walsender.h"
+#include "storage/pmsignal.h"
+#include "storage/proc.h"
+
+/*
+ * Freelists for different kinds of child processes. We maintain separate
+ * pools for each, so that for example launching a lot of regular backends
+ * cannot prevent autovacuum or an aux process from launching.
+ */
+typedef struct PMChildPool
+{
+ int size; /* number of PMChild slots reserved for this
+ * kind of processes */
+ int first_slotno; /* first slot belonging to this pool */
+ dlist_head freelist; /* currently unused PMChild entries */
+} PMChildPool;
+
+static PMChildPool pmchild_pools[BACKEND_NUM_TYPES];
+NON_EXEC_STATIC int num_pmchild_slots = 0;
+
+/*
+ * List of active child processes. This includes dead-end children.
+ */
+dlist_head ActiveChildList;
+
+/*
+ * MaxLivePostmasterChildren
+ *
+ * This reports the number of postmaster child processes that can be active.
+ * It includes all children except for dead-end children. This allows the
+ * array in shared memory (PMChildFlags) to have a fixed maximum size.
+ */
+int
+MaxLivePostmasterChildren(void)
+{
+ if (num_pmchild_slots == 0)
+ elog(ERROR, "PM child array not initialized yet");
+ return num_pmchild_slots;
+}
+
+/*
+ * Initialize at postmaster startup
+ *
+ * Note: This is not called on crash restart. We rely on PMChild entries to
+ * remain valid through the restart process. This is important because the
+ * syslogger survives through the crash restart process, so we must not
+ * invalidate its PMChild slot.
+ */
+void
+InitPostmasterChildSlots(void)
+{
+ int slotno;
+ PMChild *slots;
+
+ /*
+ * We allow more connections here than we can have backends because some
+ * might still be authenticating; they might fail auth, or some existing
+ * backend might exit before the auth cycle is completed. The exact
+ * MaxConnections limit is enforced when a new backend tries to join the
+ * PGPROC array.
+ *
+ * WAL senders start out as regular backends, so they share the same pool.
+ */
+ pmchild_pools[B_BACKEND].size = 2 * (MaxConnections + max_wal_senders);
+
+ pmchild_pools[B_AUTOVAC_WORKER].size = autovacuum_max_workers;
+ pmchild_pools[B_BG_WORKER].size = max_worker_processes;
+
+ /*
+ * There can be only one of each of these running at a time. They each
+ * get their own pool of just one entry.
+ */
+ pmchild_pools[B_AUTOVAC_LAUNCHER].size = 1;
+ pmchild_pools[B_SLOTSYNC_WORKER].size = 1;
+ pmchild_pools[B_ARCHIVER].size = 1;
+ pmchild_pools[B_BG_WRITER].size = 1;
+ pmchild_pools[B_CHECKPOINTER].size = 1;
+ pmchild_pools[B_STARTUP].size = 1;
+ pmchild_pools[B_WAL_RECEIVER].size = 1;
+ pmchild_pools[B_WAL_SUMMARIZER].size = 1;
+ pmchild_pools[B_WAL_WRITER].size = 1;
+ pmchild_pools[B_LOGGER].size = 1;
+
+ /* The rest of the pmchild_pools are left at zero size */
+
+ /* Count the total number of slots */
+ num_pmchild_slots = 0;
+ for (int i = 0; i < BACKEND_NUM_TYPES; i++)
+ num_pmchild_slots += pmchild_pools[i].size;
+
+ /* Initialize them */
+ slots = palloc(num_pmchild_slots * sizeof(PMChild));
+ slotno = 0;
+ for (int btype = 0; btype < BACKEND_NUM_TYPES; btype++)
+ {
+ pmchild_pools[btype].first_slotno = slotno + 1;
+ dlist_init(&pmchild_pools[btype].freelist);
+
+ for (int j = 0; j < pmchild_pools[btype].size; j++)
+ {
+ slots[slotno].pid = 0;
+ slots[slotno].child_slot = slotno + 1;
+ slots[slotno].bkend_type = B_INVALID;
+ slots[slotno].rw = NULL;
+ slots[slotno].bgworker_notify = false;
+ dlist_push_tail(&pmchild_pools[btype].freelist, &slots[slotno].elem);
+ slotno++;
+ }
+ }
+ Assert(slotno == num_pmchild_slots);
+
+ /* Initialize other structures */
+ dlist_init(&ActiveChildList);
+}
+
+/*
+ * Allocate a PMChild entry for a postmaster child process of given type.
+ *
+ * The entry is taken from the right pool for the type.
+ *
+ * pmchild->child_slot in the returned struct is unique among all active child
+ * processes.
+ */
+PMChild *
+AssignPostmasterChildSlot(BackendType btype)
+{
+ dlist_head *freelist;
+ PMChild *pmchild;
+
+ if (pmchild_pools[btype].size == 0)
+ elog(ERROR, "cannot allocate a PMChild slot for backend type %d", btype);
+
+ freelist = &pmchild_pools[btype].freelist;
+ if (dlist_is_empty(freelist))
+ return NULL;
+
+ pmchild = dlist_container(PMChild, elem, dlist_pop_head_node(freelist));
+ pmchild->pid = 0;
+ pmchild->bkend_type = btype;
+ pmchild->rw = NULL;
+ pmchild->bgworker_notify = true;
+
+ /*
+ * pmchild->child_slot for each entry was initialized when the array of
+ * slots was allocated. Sanity check it.
+ */
+ if (!(pmchild->child_slot >= pmchild_pools[btype].first_slotno &&
+ pmchild->child_slot < pmchild_pools[btype].first_slotno + pmchild_pools[btype].size))
+ {
+ elog(ERROR, "pmchild freelist for backend type %d is corrupt",
+ pmchild->bkend_type);
+ }
+
+ dlist_push_head(&ActiveChildList, &pmchild->elem);
+
+ /* Update the status in the shared memory array */
+ MarkPostmasterChildSlotAssigned(pmchild->child_slot);
+
+ elog(DEBUG2, "assigned pm child slot %d for %s",
+ pmchild->child_slot, PostmasterChildName(btype));
+
+ return pmchild;
+}
+
+/*
+ * Allocate a PMChild struct for a dead-end backend. Dead-end children are
+ * not assigned a child_slot number. The struct is palloc'd; returns NULL if
+ * out of memory.
+ */
+PMChild *
+AllocDeadEndChild(void)
+{
+ PMChild *pmchild;
+
+ elog(DEBUG2, "allocating dead-end child");
+
+ pmchild = (PMChild *) palloc_extended(sizeof(PMChild), MCXT_ALLOC_NO_OOM);
+ if (pmchild)
+ {
+ pmchild->pid = 0;
+ pmchild->child_slot = 0;
+ pmchild->bkend_type = B_DEAD_END_BACKEND;
+ pmchild->rw = NULL;
+ pmchild->bgworker_notify = false;
+
+ dlist_push_head(&ActiveChildList, &pmchild->elem);
+ }
+
+ return pmchild;
+}
+
+/*
+ * Release a PMChild slot, after the child process has exited.
+ *
+ * Returns true if the child detached cleanly from shared memory, false
+ * otherwise (see MarkPostmasterChildSlotUnassigned).
+ */
+bool
+ReleasePostmasterChildSlot(PMChild *pmchild)
+{
+ dlist_delete(&pmchild->elem);
+ if (pmchild->bkend_type == B_DEAD_END_BACKEND)
+ {
+ elog(DEBUG2, "releasing dead-end backend");
+ pfree(pmchild);
+ return true;
+ }
+ else
+ {
+ PMChildPool *pool;
+
+ elog(DEBUG2, "releasing pm child slot %d", pmchild->child_slot);
+
+ /* WAL senders start out as regular backends, and share the pool */
+ if (pmchild->bkend_type == B_WAL_SENDER)
+ pool = &pmchild_pools[B_BACKEND];
+ else
+ pool = &pmchild_pools[pmchild->bkend_type];
+
+ /* sanity check that we return the entry to the right pool */
+ if (!(pmchild->child_slot >= pool->first_slotno &&
+ pmchild->child_slot < pool->first_slotno + pool->size))
+ {
+ elog(ERROR, "pmchild freelist for backend type %d is corrupt",
+ pmchild->bkend_type);
+ }
+
+ dlist_push_head(&pool->freelist, &pmchild->elem);
+ return MarkPostmasterChildSlotUnassigned(pmchild->child_slot);
+ }
+}
+
+/*
+ * Find the PMChild entry of a running child process by PID.
+ */
+PMChild *
+FindPostmasterChildByPid(int pid)
+{
+ dlist_iter iter;
+
+ dlist_foreach(iter, &ActiveChildList)
+ {
+ PMChild *bp = dlist_container(PMChild, elem, iter.cur);
+
+ if (bp->pid == pid)
+ return bp;
+ }
+ return NULL;
+}
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index b5300949843..4129c71efad 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -139,9 +139,7 @@ typedef struct
StaticAssertDecl(BACKEND_NUM_TYPES < 32, "too many backend types for uint32");
static const BackendTypeMask BTYPE_MASK_ALL = {(1 << BACKEND_NUM_TYPES) - 1};
-#if 0 /* unused */
static const BackendTypeMask BTYPE_MASK_NONE = {0};
-#endif
static inline BackendTypeMask
btmask(BackendType t)
@@ -151,14 +149,12 @@ btmask(BackendType t)
return mask;
}
-#if 0 /* unused */
static inline BackendTypeMask
btmask_add(BackendTypeMask mask, BackendType t)
{
mask.mask |= 1 << t;
return mask;
}
-#endif
static inline BackendTypeMask
btmask_del(BackendTypeMask mask, BackendType t)
@@ -192,48 +188,9 @@ btmask_contains(BackendTypeMask mask, BackendType t)
return (mask.mask & (1 << t)) != 0;
}
-/*
- * List of active backends (or child processes anyway; we don't actually
- * know whether a given child has become a backend or is still in the
- * authorization phase). This is used mainly to keep track of how many
- * children we have and send them appropriate signals when necessary.
- *
- * As shown in the above set of backend types, this list includes not only
- * "normal" client sessions, but also autovacuum workers, walsenders, and
- * background workers. (Note that at the time of launch, walsenders are
- * labeled B_BACKEND; we relabel them to B_WAL_SENDER
- * upon noticing they've changed their PMChildFlags entry. Hence that check
- * must be done before any operation that needs to distinguish walsenders
- * from normal backends.)
- *
- * Also, "dead_end" children are in it: these are children launched just for
- * the purpose of sending a friendly rejection message to a would-be client.
- * We must track them because they are attached to shared memory, but we know
- * they will never become live backends. dead_end children are not assigned a
- * PMChildSlot. dead_end children have bkend_type B_DEAD_END_BACKEND.
- *
- * "Special" children such as the startup, bgwriter, autovacuum launcher, and
- * slot sync worker tasks are not in this list. They are tracked via StartupPID
- * and other pid_t variables below. (Thus, there can't be more than one of any
- * given "special" child process type. We use BackendList entries for any
- * child process there can be more than one of.)
- */
-typedef struct bkend
-{
- pid_t pid; /* process id of backend */
- int child_slot; /* PMChildSlot for this backend, if any */
- BackendType bkend_type; /* child process flavor, see above */
- RegisteredBgWorker *rw; /* bgworker info, if this is a bgworker */
- bool bgworker_notify; /* gets bgworker start/stop notifications */
- dlist_node elem; /* list link in BackendList */
-} Backend;
-
-static dlist_head BackendList = DLIST_STATIC_INIT(BackendList);
BackgroundWorker *MyBgworkerEntry = NULL;
-
-
/* The socket number we are listening for connections on */
int PostPortNumber = DEF_PGPORT;
@@ -285,17 +242,17 @@ bool remove_temp_files_after_crash = true;
bool send_abort_for_crash = false;
bool send_abort_for_kill = false;
-/* PIDs of special child processes; 0 when not running */
-static pid_t StartupPID = 0,
- BgWriterPID = 0,
- CheckpointerPID = 0,
- WalWriterPID = 0,
- WalReceiverPID = 0,
- WalSummarizerPID = 0,
- AutoVacPID = 0,
- PgArchPID = 0,
- SysLoggerPID = 0,
- SlotSyncWorkerPID = 0;
+/* special child processes; NULL when not running */
+static PMChild *StartupPMChild = NULL,
+ *BgWriterPMChild = NULL,
+ *CheckpointerPMChild = NULL,
+ *WalWriterPMChild = NULL,
+ *WalReceiverPMChild = NULL,
+ *WalSummarizerPMChild = NULL,
+ *AutoVacLauncherPMChild = NULL,
+ *PgArchPMChild = NULL,
+ *SysLoggerPMChild = NULL,
+ *SlotSyncWorkerPMChild = NULL;
/* Startup process's status */
typedef enum
@@ -341,13 +298,13 @@ static bool FatalError = false; /* T if recovering from backend crash */
*
* Normal child backends can only be launched when we are in PM_RUN or
* PM_HOT_STANDBY state. (connsAllowed can also restrict launching.)
- * In other states we handle connection requests by launching "dead_end"
+ * In other states we handle connection requests by launching "dead-end"
* child processes, which will simply send the client an error message and
- * quit. (We track these in the BackendList so that we can know when they
+ * quit. (We track these in the ActiveChildList so that we can know when they
* are all gone; this is important because they're still connected to shared
* memory, and would interfere with an attempt to destroy the shmem segment,
* possibly leading to SHMALL failure when we try to make a new one.)
- * In PM_WAIT_DEAD_END state we are waiting for all the dead_end children
+ * In PM_WAIT_DEAD_END state we are waiting for all the dead-end children
* to drain out of the system, and therefore stop accepting connection
* requests at all until the last existing child has quit (which hopefully
* will not be very long).
@@ -372,7 +329,7 @@ typedef enum
* ckpt */
PM_SHUTDOWN_2, /* waiting for archiver and walsenders to
* finish */
- PM_WAIT_DEAD_END, /* waiting for dead_end children to exit */
+ PM_WAIT_DEAD_END, /* waiting for dead-end children to exit */
PM_NO_CHILDREN, /* all important children have exited */
} PMState;
@@ -449,7 +406,7 @@ static void process_pm_child_exit(void);
static void process_pm_reload_request(void);
static void process_pm_shutdown_request(void);
static void dummy_handler(SIGNAL_ARGS);
-static void CleanupBackend(Backend *bp, int exitstatus);
+static void CleanupBackend(PMChild *bp, int exitstatus);
static void HandleChildCrash(int pid, int exitstatus, const char *procname);
static void LogChildExit(int lev, const char *procname,
int pid, int exitstatus);
@@ -460,17 +417,18 @@ static int ServerLoop(void);
static int BackendStartup(ClientSocket *client_sock);
static void report_fork_failure_to_client(ClientSocket *client_sock, int errnum);
static CAC_state canAcceptConnections(BackendType backend_type);
-static void signal_child(pid_t pid, int signal);
-static void sigquit_child(pid_t pid);
+static void signal_child(PMChild *pmchild, int signal);
+static void sigquit_child(PMChild *pmchild);
static bool SignalChildren(int signal, BackendTypeMask targetMask);
static void TerminateChildren(int signal);
static int CountChildren(BackendTypeMask targetMask);
-static Backend *assign_backendlist_entry(void);
static void LaunchMissingBackgroundProcesses(void);
static void maybe_start_bgworkers(void);
static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
-static pid_t StartChildProcess(BackendType type);
+static PMChild *StartChildProcess(BackendType type);
+static void StartSysLogger(void);
static void StartAutovacuumWorker(void);
+static bool StartBackgroundWorker(RegisteredBgWorker *rw);
static void InitPostmasterDeathWatchHandle(void);
#ifdef WIN32
@@ -948,9 +906,11 @@ PostmasterMain(int argc, char *argv[])
/*
* Now that loadable modules have had their chance to alter any GUCs,
- * calculate MaxBackends.
+ * calculate MaxBackends and initialize the machinery to track child
+ * processes.
*/
InitializeMaxBackends();
+ InitPostmasterChildSlots();
/*
* Calculate the size of the PGPROC fast-path lock arrays.
@@ -1079,7 +1039,8 @@ PostmasterMain(int argc, char *argv[])
/*
* If enabled, start up syslogger collection subprocess
*/
- SysLoggerPID = SysLogger_Start();
+ if (Logging_collector)
+ StartSysLogger();
/*
* Reset whereToSendOutput from DestDebug (its starting state) to
@@ -1381,16 +1342,16 @@ PostmasterMain(int argc, char *argv[])
AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STARTING);
/* Start bgwriter and checkpointer so they can help with recovery */
- if (CheckpointerPID == 0)
- CheckpointerPID = StartChildProcess(B_CHECKPOINTER);
- if (BgWriterPID == 0)
- BgWriterPID = StartChildProcess(B_BG_WRITER);
+ if (CheckpointerPMChild == NULL)
+ CheckpointerPMChild = StartChildProcess(B_CHECKPOINTER);
+ if (BgWriterPMChild == NULL)
+ BgWriterPMChild = StartChildProcess(B_BG_WRITER);
/*
* We're ready to rock and roll...
*/
- StartupPID = StartChildProcess(B_STARTUP);
- Assert(StartupPID != 0);
+ StartupPMChild = StartChildProcess(B_STARTUP);
+ Assert(StartupPMChild != NULL);
StartupStatus = STARTUP_RUNNING;
pmState = PM_STARTUP;
@@ -1720,8 +1681,8 @@ ServerLoop(void)
if (avlauncher_needs_signal)
{
avlauncher_needs_signal = false;
- if (AutoVacPID != 0)
- kill(AutoVacPID, SIGUSR2);
+ if (AutoVacLauncherPMChild != NULL)
+ kill(AutoVacLauncherPMChild->pid, SIGUSR2);
}
#ifdef HAVE_PTHREAD_IS_THREADED_NP
@@ -1803,23 +1764,23 @@ ServerLoop(void)
/*
* canAcceptConnections --- check to see if database state allows connections
- * of the specified type. backend_type can be B_BACKEND, B_AUTOVAC_WORKER, or
- * B_BG_WORKER. (Note that we don't yet know whether a normal B_BACKEND
- * connection might turn into a walsender.)
+ * of the specified type. backend_type can be B_BACKEND or B_AUTOVAC_WORKER.
+ * (Note that we don't yet know whether a normal B_BACKEND connection might
+ * turn into a walsender.)
*/
static CAC_state
canAcceptConnections(BackendType backend_type)
{
CAC_state result = CAC_OK;
+ Assert(backend_type == B_BACKEND || backend_type == B_AUTOVAC_WORKER);
+
/*
* Can't start backends when in startup/shutdown/inconsistent recovery
* state. We treat autovac workers the same as user backends for this
- * purpose. However, bgworkers are excluded from this test; we expect
- * bgworker_should_start_now() decided whether the DB state allows them.
+ * purpose.
*/
- if (pmState != PM_RUN && pmState != PM_HOT_STANDBY &&
- backend_type != B_BG_WORKER)
+ if (pmState != PM_RUN && pmState != PM_HOT_STANDBY)
{
if (Shutdown > NoShutdown)
return CAC_SHUTDOWN; /* shutdown is pending */
@@ -1834,26 +1795,11 @@ canAcceptConnections(BackendType backend_type)
/*
* "Smart shutdown" restrictions are applied only to normal connections,
- * not to autovac workers or bgworkers.
+ * not to autovac workers.
*/
if (!connsAllowed && backend_type == B_BACKEND)
return CAC_SHUTDOWN; /* shutdown is pending */
- /*
- * Don't start too many children.
- *
- * We allow more connections here than we can have backends because some
- * might still be authenticating; they might fail auth, or some existing
- * backend might exit before the auth cycle is completed. The exact
- * MaxBackends limit is enforced when a new backend tries to join the
- * shared-inval backend array.
- *
- * The limit here must match the sizes of the per-child-process arrays;
- * see comments for MaxLivePostmasterChildren().
- */
- if (CountChildren(btmask_all_except(B_DEAD_END_BACKEND)) >= MaxLivePostmasterChildren())
- result = CAC_TOOMANY;
-
return result;
}
@@ -2021,26 +1967,6 @@ process_pm_reload_request(void)
(errmsg("received SIGHUP, reloading configuration files")));
ProcessConfigFile(PGC_SIGHUP);
SignalChildren(SIGHUP, btmask_all_except(B_DEAD_END_BACKEND));
- if (StartupPID != 0)
- signal_child(StartupPID, SIGHUP);
- if (BgWriterPID != 0)
- signal_child(BgWriterPID, SIGHUP);
- if (CheckpointerPID != 0)
- signal_child(CheckpointerPID, SIGHUP);
- if (WalWriterPID != 0)
- signal_child(WalWriterPID, SIGHUP);
- if (WalReceiverPID != 0)
- signal_child(WalReceiverPID, SIGHUP);
- if (WalSummarizerPID != 0)
- signal_child(WalSummarizerPID, SIGHUP);
- if (AutoVacPID != 0)
- signal_child(AutoVacPID, SIGHUP);
- if (PgArchPID != 0)
- signal_child(PgArchPID, SIGHUP);
- if (SysLoggerPID != 0)
- signal_child(SysLoggerPID, SIGHUP);
- if (SlotSyncWorkerPID != 0)
- signal_child(SlotSyncWorkerPID, SIGHUP);
/* Reload authentication config files too */
if (!load_hba())
@@ -2278,15 +2204,15 @@ process_pm_child_exit(void)
while ((pid = waitpid(-1, &exitstatus, WNOHANG)) > 0)
{
- bool found;
- dlist_mutable_iter iter;
+ PMChild *pmchild;
/*
* Check if this child was a startup process.
*/
- if (pid == StartupPID)
+ if (StartupPMChild && pid == StartupPMChild->pid)
{
- StartupPID = 0;
+ ReleasePostmasterChildSlot(StartupPMChild);
+ StartupPMChild = NULL;
/*
* Startup process exited in response to a shutdown request (or it
@@ -2339,7 +2265,7 @@ process_pm_child_exit(void)
* restart in that case.
*
* This stanza also handles the case where we sent a SIGQUIT
- * during PM_STARTUP due to some dead_end child crashing: in that
+ * during PM_STARTUP due to some dead-end child crashing: in that
* situation, if the startup process dies on the SIGQUIT, we need
* to transition to PM_WAIT_BACKENDS state which will allow
* PostmasterStateMachine to restart the startup process. (On the
@@ -2397,9 +2323,10 @@ process_pm_child_exit(void)
* one at the next iteration of the postmaster's main loop, if
* necessary. Any other exit condition is treated as a crash.
*/
- if (pid == BgWriterPID)
+ if (BgWriterPMChild && pid == BgWriterPMChild->pid)
{
- BgWriterPID = 0;
+ ReleasePostmasterChildSlot(BgWriterPMChild);
+ BgWriterPMChild = NULL;
if (!EXIT_STATUS_0(exitstatus))
HandleChildCrash(pid, exitstatus,
_("background writer process"));
@@ -2409,9 +2336,10 @@ process_pm_child_exit(void)
/*
* Was it the checkpointer?
*/
- if (pid == CheckpointerPID)
+ if (CheckpointerPMChild && pid == CheckpointerPMChild->pid)
{
- CheckpointerPID = 0;
+ ReleasePostmasterChildSlot(CheckpointerPMChild);
+ CheckpointerPMChild = NULL;
if (EXIT_STATUS_0(exitstatus) && pmState == PM_SHUTDOWN)
{
/*
@@ -2422,7 +2350,7 @@ process_pm_child_exit(void)
*
* At this point we should have no normal backend children
* left (else we'd not be in PM_SHUTDOWN state) but we might
- * have dead_end children to wait for.
+ * have dead-end children to wait for.
*
* If we have an archiver subprocess, tell it to do a last
* archive cycle and quit. Likewise, if we have walsender
@@ -2431,8 +2359,8 @@ process_pm_child_exit(void)
Assert(Shutdown > NoShutdown);
/* Waken archiver for the last time */
- if (PgArchPID != 0)
- signal_child(PgArchPID, SIGUSR2);
+ if (PgArchPMChild != NULL)
+ signal_child(PgArchPMChild, SIGUSR2);
/*
* Waken walsenders for the last time. No regular backends
@@ -2460,9 +2388,10 @@ process_pm_child_exit(void)
* new one at the next iteration of the postmaster's main loop, if
* necessary. Any other exit condition is treated as a crash.
*/
- if (pid == WalWriterPID)
+ if (WalWriterPMChild && pid == WalWriterPMChild->pid)
{
- WalWriterPID = 0;
+ ReleasePostmasterChildSlot(WalWriterPMChild);
+ WalWriterPMChild = NULL;
if (!EXIT_STATUS_0(exitstatus))
HandleChildCrash(pid, exitstatus,
_("WAL writer process"));
@@ -2475,9 +2404,10 @@ process_pm_child_exit(void)
* backends. (If we need a new wal receiver, we'll start one at the
* next iteration of the postmaster's main loop.)
*/
- if (pid == WalReceiverPID)
+ if (WalReceiverPMChild && pid == WalReceiverPMChild->pid)
{
- WalReceiverPID = 0;
+ ReleasePostmasterChildSlot(WalReceiverPMChild);
+ WalReceiverPMChild = NULL;
if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
HandleChildCrash(pid, exitstatus,
_("WAL receiver process"));
@@ -2489,9 +2419,10 @@ process_pm_child_exit(void)
* a new one at the next iteration of the postmaster's main loop, if
* necessary. Any other exit condition is treated as a crash.
*/
- if (pid == WalSummarizerPID)
+ if (WalSummarizerPMChild && pid == WalSummarizerPMChild->pid)
{
- WalSummarizerPID = 0;
+ ReleasePostmasterChildSlot(WalSummarizerPMChild);
+ WalSummarizerPMChild = NULL;
if (!EXIT_STATUS_0(exitstatus))
HandleChildCrash(pid, exitstatus,
_("WAL summarizer process"));
@@ -2504,9 +2435,10 @@ process_pm_child_exit(void)
* loop, if necessary. Any other exit condition is treated as a
* crash.
*/
- if (pid == AutoVacPID)
+ if (AutoVacLauncherPMChild && pid == AutoVacLauncherPMChild->pid)
{
- AutoVacPID = 0;
+ ReleasePostmasterChildSlot(AutoVacLauncherPMChild);
+ AutoVacLauncherPMChild = NULL;
if (!EXIT_STATUS_0(exitstatus))
HandleChildCrash(pid, exitstatus,
_("autovacuum launcher process"));
@@ -2519,9 +2451,10 @@ process_pm_child_exit(void)
* and just try to start a new one on the next cycle of the
* postmaster's main loop, to retry archiving remaining files.
*/
- if (pid == PgArchPID)
+ if (PgArchPMChild && pid == PgArchPMChild->pid)
{
- PgArchPID = 0;
+ ReleasePostmasterChildSlot(PgArchPMChild);
+ PgArchPMChild = NULL;
if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
HandleChildCrash(pid, exitstatus,
_("archiver process"));
@@ -2529,11 +2462,15 @@ process_pm_child_exit(void)
}
/* Was it the system logger? If so, try to start a new one */
- if (pid == SysLoggerPID)
+ if (SysLoggerPMChild && pid == SysLoggerPMChild->pid)
{
- SysLoggerPID = 0;
+ ReleasePostmasterChildSlot(SysLoggerPMChild);
+ SysLoggerPMChild = NULL;
+
/* for safety's sake, launch new logger *first* */
- SysLoggerPID = SysLogger_Start();
+ if (Logging_collector)
+ StartSysLogger();
+
if (!EXIT_STATUS_0(exitstatus))
LogChildExit(LOG, _("system logger process"),
pid, exitstatus);
@@ -2547,9 +2484,10 @@ process_pm_child_exit(void)
* start a new one at the next iteration of the postmaster's main
* loop, if necessary. Any other exit condition is treated as a crash.
*/
- if (pid == SlotSyncWorkerPID)
+ if (SlotSyncWorkerPMChild && pid == SlotSyncWorkerPMChild->pid)
{
- SlotSyncWorkerPID = 0;
+ ReleasePostmasterChildSlot(SlotSyncWorkerPMChild);
+ SlotSyncWorkerPMChild = NULL;
if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
HandleChildCrash(pid, exitstatus,
_("slot sync worker process"));
@@ -2559,25 +2497,17 @@ process_pm_child_exit(void)
/*
* Was it a backend or a background worker?
*/
- found = false;
- dlist_foreach_modify(iter, &BackendList)
+ pmchild = FindPostmasterChildByPid(pid);
+ if (pmchild)
{
- Backend *bp = dlist_container(Backend, elem, iter.cur);
-
- if (bp->pid == pid)
- {
- dlist_delete(iter.cur);
- CleanupBackend(bp, exitstatus);
- found = true;
- break;
- }
+ CleanupBackend(pmchild, exitstatus);
}
/*
* We don't know anything about this child process. That's highly
* unexpected, as we do track all the child processes that we fork.
*/
- if (!found)
+ else
{
if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
HandleChildCrash(pid, exitstatus, _("untracked child process"));
@@ -2596,17 +2526,21 @@ process_pm_child_exit(void)
/*
* CleanupBackend -- cleanup after terminated backend or background worker.
*
- * Remove all local state associated with backend. The Backend entry has
- * already been unlinked from BackendList, but we will free it here.
+ * Remove all local state associated with the child process and release its
+ * PMChild slot.
*/
static void
-CleanupBackend(Backend *bp,
+CleanupBackend(PMChild *bp,
int exitstatus) /* child's exit status. */
{
char namebuf[MAXPGPATH];
const char *procname;
bool crashed = false;
bool logged = false;
+ pid_t bp_pid;
+ bool bp_bgworker_notify;
+ BackendType bp_bkend_type;
+ RegisteredBgWorker *rw;
/* Construct a process name for the log message */
if (bp->bkend_type == B_BG_WORKER)
@@ -2622,7 +2556,7 @@ CleanupBackend(Backend *bp,
* If a backend dies in an ugly way then we must signal all other backends
* to quickdie. If exit status is zero (normal) or one (FATAL exit), we
* assume everything is all right and proceed to remove the backend from
- * the active backend list.
+ * the active child list.
*/
if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
crashed = true;
@@ -2645,25 +2579,28 @@ CleanupBackend(Backend *bp,
#endif
/*
- * If the process attached to shared memory, check that it detached
- * cleanly.
+ * Release the PMChild entry.
+ *
+ * If the process attached to shared memory, this also checks that it
+ * detached cleanly.
*/
- if (bp->bkend_type != B_DEAD_END_BACKEND)
+ bp_pid = bp->pid;
+ bp_bgworker_notify = bp->bgworker_notify;
+ bp_bkend_type = bp->bkend_type;
+ rw = bp->rw;
+ if (!ReleasePostmasterChildSlot(bp))
{
- if (!ReleasePostmasterChildSlot(bp->child_slot))
- {
- /*
- * Uh-oh, the child failed to clean itself up. Treat as a crash
- * after all.
- */
- crashed = true;
- }
+ /*
+ * Uh-oh, the child failed to clean itself up. Treat as a crash after
+ * all.
+ */
+ crashed = true;
}
+ bp = NULL;
if (crashed)
{
- HandleChildCrash(bp->pid, exitstatus, procname);
- pfree(bp);
+ HandleChildCrash(bp_pid, exitstatus, procname);
return;
}
@@ -2674,17 +2611,15 @@ CleanupBackend(Backend *bp,
* gets skipped in the (probably very common) case where the backend has
* never requested any such notifications.
*/
- if (bp->bgworker_notify)
- BackgroundWorkerStopNotifications(bp->pid);
+ if (bp_bgworker_notify)
+ BackgroundWorkerStopNotifications(bp_pid);
/*
* If it was a background worker, also update its RegisteredBgWorker
* entry.
*/
- if (bp->bkend_type == B_BG_WORKER)
+ if (bp_bkend_type == B_BG_WORKER)
{
- RegisteredBgWorker *rw = bp->rw;
-
if (!EXIT_STATUS_0(exitstatus))
{
/* Record timestamp, so we know when to restart the worker. */
@@ -2703,7 +2638,7 @@ CleanupBackend(Backend *bp,
if (!logged)
{
LogChildExit(EXIT_STATUS_0(exitstatus) ? DEBUG1 : LOG,
- procname, bp->pid, exitstatus);
+ procname, bp_pid, exitstatus);
logged = true;
}
@@ -2712,9 +2647,7 @@ CleanupBackend(Backend *bp,
}
if (!logged)
- LogChildExit(DEBUG2, procname, bp->pid, exitstatus);
-
- pfree(bp);
+ LogChildExit(DEBUG2, procname, bp_pid, exitstatus);
}
/*
@@ -2724,9 +2657,7 @@ CleanupBackend(Backend *bp,
* The objectives here are to clean up our local state about the child
* process, and to signal all other remaining children to quickdie.
*
- * If it's a backend, the caller has already removed it from the BackendList.
- * If it's an aux process, the corresponding *PID global variable has been
- * reset already.
+ * The caller has already released its PMChild slot.
*/
static void
HandleChildCrash(int pid, int exitstatus, const char *procname)
@@ -2750,63 +2681,34 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
SetQuitSignalReason(PMQUIT_FOR_CRASH);
}
+ /*
+ * Signal all other child processes to exit. The crashed process has
+ * already been removed from ActiveChildList.
+ */
if (take_action)
{
dlist_iter iter;
- dlist_foreach(iter, &BackendList)
+ dlist_foreach(iter, &ActiveChildList)
{
- Backend *bp = dlist_container(Backend, elem, iter.cur);
+ PMChild *bp = dlist_container(PMChild, elem, iter.cur);
+
+ /* We do NOT restart the syslogger */
+ if (bp == SysLoggerPMChild)
+ continue;
+
+ if (bp == StartupPMChild)
+ StartupStatus = STARTUP_SIGNALED;
/*
* This backend is still alive. Unless we did so already, tell it
* to commit hara-kiri.
*
- * We could exclude dead_end children here, but at least when
+ * We could exclude dead-end children here, but at least when
* sending SIGABRT it seems better to include them.
*/
- sigquit_child(bp->pid);
+ sigquit_child(bp);
}
-
- if (StartupPID != 0)
- {
- sigquit_child(StartupPID);
- StartupStatus = STARTUP_SIGNALED;
- }
-
- /* Take care of the bgwriter too */
- if (BgWriterPID != 0)
- sigquit_child(BgWriterPID);
-
- /* Take care of the checkpointer too */
- if (CheckpointerPID != 0)
- sigquit_child(CheckpointerPID);
-
- /* Take care of the walwriter too */
- if (WalWriterPID != 0)
- sigquit_child(WalWriterPID);
-
- /* Take care of the walreceiver too */
- if (WalReceiverPID != 0)
- sigquit_child(WalReceiverPID);
-
- /* Take care of the walsummarizer too */
- if (WalSummarizerPID != 0)
- sigquit_child(WalSummarizerPID);
-
- /* Take care of the autovacuum launcher too */
- if (AutoVacPID != 0)
- sigquit_child(AutoVacPID);
-
- /* Take care of the archiver too */
- if (PgArchPID != 0)
- sigquit_child(PgArchPID);
-
- /* Take care of the slot sync worker too */
- if (SlotSyncWorkerPID != 0)
- sigquit_child(SlotSyncWorkerPID);
-
- /* We do NOT restart the syslogger */
}
if (Shutdown != ImmediateShutdown)
@@ -2915,86 +2817,108 @@ PostmasterStateMachine(void)
}
/*
- * If we're ready to do so, signal child processes to shut down. (This
- * isn't a persistent state, but treating it as a distinct pmState allows
- * us to share this code across multiple shutdown code paths.)
+ * In the PM_WAIT_BACKENDS state, wait for all the regular backends and
+ * procesess like autovacuum and background workers that are comparable to
+ * backends to exit.
+ *
+ * PM_STOP_BACKENDS is a transient state that means the same as
+ * PM_WAIT_BACKENDS, but we signal the processes first, before waiting for
+ * them. Treating it as a distinct pmState allows us to share this code
+ * across multiple shutdown code paths.
*/
- if (pmState == PM_STOP_BACKENDS)
+ if (pmState == PM_STOP_BACKENDS || pmState == PM_WAIT_BACKENDS)
{
+ BackendTypeMask targetMask = BTYPE_MASK_NONE;
+
/*
- * Forget any pending requests for background workers, since we're no
- * longer willing to launch any new workers. (If additional requests
- * arrive, BackgroundWorkerStateChange will reject them.)
+ * PM_WAIT_BACKENDS state ends when we have no regular backends, no
+ * autovac launcher or workers, and no bgworkers (including
+ * unconnected ones). No walwriter, bgwriter, slot sync worker, or
+ * WAL summarizer either.
*/
- ForgetUnstartedBackgroundWorkers();
-
- /* Signal all backend children except walsenders and dead-end backends */
- SignalChildren(SIGTERM, btmask_all_except2(B_WAL_SENDER, B_DEAD_END_BACKEND));
- /* and the autovac launcher too */
- if (AutoVacPID != 0)
- signal_child(AutoVacPID, SIGTERM);
- /* and the bgwriter too */
- if (BgWriterPID != 0)
- signal_child(BgWriterPID, SIGTERM);
- /* and the walwriter too */
- if (WalWriterPID != 0)
- signal_child(WalWriterPID, SIGTERM);
+ targetMask = btmask_add(targetMask, B_BACKEND);
+ targetMask = btmask_add(targetMask, B_AUTOVAC_LAUNCHER);
+ targetMask = btmask_add(targetMask, B_AUTOVAC_WORKER);
+ targetMask = btmask_add(targetMask, B_BG_WORKER);
+
+ targetMask = btmask_add(targetMask, B_WAL_WRITER);
+ targetMask = btmask_add(targetMask, B_BG_WRITER);
+ targetMask = btmask_add(targetMask, B_SLOTSYNC_WORKER);
+ targetMask = btmask_add(targetMask, B_WAL_SUMMARIZER);
+
/* If we're in recovery, also stop startup and walreceiver procs */
- if (StartupPID != 0)
- signal_child(StartupPID, SIGTERM);
- if (WalReceiverPID != 0)
- signal_child(WalReceiverPID, SIGTERM);
- if (WalSummarizerPID != 0)
- signal_child(WalSummarizerPID, SIGTERM);
- if (SlotSyncWorkerPID != 0)
- signal_child(SlotSyncWorkerPID, SIGTERM);
- /* checkpointer, archiver, stats, and syslogger may continue for now */
-
- /* Now transition to PM_WAIT_BACKENDS state to wait for them to die */
- pmState = PM_WAIT_BACKENDS;
- }
+ targetMask = btmask_add(targetMask, B_STARTUP);
+ targetMask = btmask_add(targetMask, B_WAL_RECEIVER);
- /*
- * If we are in a state-machine state that implies waiting for backends to
- * exit, see if they're all gone, and change state if so.
- */
- if (pmState == PM_WAIT_BACKENDS)
- {
/*
- * PM_WAIT_BACKENDS state ends when we have no regular backends
- * (including autovac workers), no bgworkers (including unconnected
- * ones), and no walwriter, autovac launcher, bgwriter or slot sync
- * worker. If we are doing crash recovery or an immediate shutdown
- * then we expect the checkpointer to exit as well, otherwise not. The
- * stats and syslogger processes are disregarded since they are not
- * connected to shared memory; we also disregard dead_end children
- * here. Walsenders and archiver are also disregarded, they will be
- * terminated later after writing the checkpoint record.
+ * If we are doing crash recovery or an immediate shutdown then we
+ * expect the checkpointer to exit as well, otherwise not.
*/
- if (CountChildren(btmask_all_except2(B_WAL_SENDER, B_DEAD_END_BACKEND)) == 0 &&
- StartupPID == 0 &&
- WalReceiverPID == 0 &&
- WalSummarizerPID == 0 &&
- BgWriterPID == 0 &&
- (CheckpointerPID == 0 ||
- (!FatalError && Shutdown < ImmediateShutdown)) &&
- WalWriterPID == 0 &&
- AutoVacPID == 0 &&
- SlotSyncWorkerPID == 0)
+ if (FatalError || Shutdown >= ImmediateShutdown)
+ targetMask = btmask_add(targetMask, B_CHECKPOINTER);
+
+ /*
+ * Walsenders and archiver will continue running; they will be
+ * terminated later after writing the checkpoint record. We also let
+ * dead-end children to keep running for now. The syslogger process
+ * exits last.
+ *
+ * This assertion checks that we have covered all backend types,
+ * either by including them in targetMask, or by noting here that they
+ * are allowed to continue running.
+ */
+#ifdef USE_ASSERT_CHECKING
+ {
+ BackendTypeMask remainMask = BTYPE_MASK_NONE;
+
+ remainMask = btmask_add(remainMask, B_WAL_SENDER);
+ remainMask = btmask_add(remainMask, B_ARCHIVER);
+ remainMask = btmask_add(remainMask, B_DEAD_END_BACKEND);
+ remainMask = btmask_add(remainMask, B_LOGGER);
+
+ /* checkpointer may or may not be in targetMask already */
+ remainMask = btmask_add(remainMask, B_CHECKPOINTER);
+
+ /* these are not real postmaster children */
+ remainMask = btmask_add(remainMask, B_INVALID);
+ remainMask = btmask_add(remainMask, B_STANDALONE_BACKEND);
+
+ /* All types should be included in targetMask or remainMask */
+ Assert((remainMask.mask | targetMask.mask) == BTYPE_MASK_ALL.mask);
+ }
+#endif
+
+ /* If we had not yet signaled the processes to exit, do so now */
+ if (pmState == PM_STOP_BACKENDS)
+ {
+ /*
+ * Forget any pending requests for background workers, since we're
+ * no longer willing to launch any new workers. (If additional
+ * requests arrive, BackgroundWorkerStateChange will reject them.)
+ */
+ ForgetUnstartedBackgroundWorkers();
+
+ SignalChildren(SIGTERM, targetMask);
+
+ pmState = PM_WAIT_BACKENDS;
+ }
+
+ /* Are any of the target processes still running? */
+ if (CountChildren(targetMask) == 0)
{
if (Shutdown >= ImmediateShutdown || FatalError)
{
/*
- * Stop any dead_end children and stop creating new ones.
+ * Stop any dead-end children and stop creating new ones.
*/
pmState = PM_WAIT_DEAD_END;
ConfigurePostmasterWaitSet(false);
SignalChildren(SIGQUIT, btmask(B_DEAD_END_BACKEND));
/*
- * We already SIGQUIT'd the archiver and stats processes, if
- * any, when we started immediate shutdown or entered
- * FatalError state.
+ * We already SIGQUIT'd walsenders and the archiver, if any,
+ * when we started immediate shutdown or entered FatalError
+ * state.
*/
}
else
@@ -3006,12 +2930,12 @@ PostmasterStateMachine(void)
*/
Assert(Shutdown > NoShutdown);
/* Start the checkpointer if not running */
- if (CheckpointerPID == 0)
- CheckpointerPID = StartChildProcess(B_CHECKPOINTER);
+ if (CheckpointerPMChild == NULL)
+ CheckpointerPMChild = StartChildProcess(B_CHECKPOINTER);
/* And tell it to shut down */
- if (CheckpointerPID != 0)
+ if (CheckpointerPMChild != NULL)
{
- signal_child(CheckpointerPID, SIGUSR2);
+ signal_child(CheckpointerPMChild, SIGUSR2);
pmState = PM_SHUTDOWN;
}
else
@@ -3031,9 +2955,7 @@ PostmasterStateMachine(void)
ConfigurePostmasterWaitSet(false);
/* Kill the walsenders and archiver too */
- SignalChildren(SIGQUIT, BTYPE_MASK_ALL);
- if (PgArchPID != 0)
- signal_child(PgArchPID, SIGQUIT);
+ SignalChildren(SIGQUIT, btmask_all_except(B_LOGGER));
}
}
}
@@ -3043,43 +2965,44 @@ PostmasterStateMachine(void)
{
/*
* PM_SHUTDOWN_2 state ends when there's no other children than
- * dead_end children left. There shouldn't be any regular backends
+ * dead-end children left. There shouldn't be any regular backends
* left by now anyway; what we're really waiting for is walsenders and
* archiver.
*/
- if (PgArchPID == 0 && CountChildren(btmask_all_except(B_DEAD_END_BACKEND)) == 0)
+ if (CountChildren(btmask_all_except2(B_LOGGER, B_DEAD_END_BACKEND)) == 0)
{
pmState = PM_WAIT_DEAD_END;
ConfigurePostmasterWaitSet(false);
- SignalChildren(SIGTERM, BTYPE_MASK_ALL);
+ SignalChildren(SIGTERM, btmask_all_except(B_LOGGER));
}
}
if (pmState == PM_WAIT_DEAD_END)
{
/*
- * PM_WAIT_DEAD_END state ends when the BackendList is entirely empty
- * (ie, no dead_end children remain), and the archiver is gone too.
- *
- * The reason we wait for those two is to protect them against a new
- * postmaster starting conflicting subprocesses; this isn't an
- * ironclad protection, but it at least helps in the
- * shutdown-and-immediately-restart scenario. Note that they have
- * already been sent appropriate shutdown signals, either during a
- * normal state transition leading up to PM_WAIT_DEAD_END, or during
+ * PM_WAIT_DEAD_END state ends when all other children are gone except
+ * for the logger. During normal shutdown, all that remains are
+ * dead-end backends, but in FatalError processing we jump straight
+ * here with more processes remaining. Note that they have already
+ * been sent appropriate shutdown signals, either during a normal
+ * state transition leading up to PM_WAIT_DEAD_END, or during
* FatalError processing.
+ *
+ * The reason we wait is to protect against a new postmaster starting
+ * conflicting subprocesses; this isn't an ironclad protection, but it
+ * at least helps in the shutdown-and-immediately-restart scenario.
*/
- if (dlist_is_empty(&BackendList) && PgArchPID == 0)
+ if (CountChildren(btmask_all_except(B_LOGGER)) == 0)
{
/* These other guys should be dead already */
- Assert(StartupPID == 0);
- Assert(WalReceiverPID == 0);
- Assert(WalSummarizerPID == 0);
- Assert(BgWriterPID == 0);
- Assert(CheckpointerPID == 0);
- Assert(WalWriterPID == 0);
- Assert(AutoVacPID == 0);
- Assert(SlotSyncWorkerPID == 0);
+ Assert(StartupPMChild == NULL);
+ Assert(WalReceiverPMChild == NULL);
+ Assert(WalSummarizerPMChild == NULL);
+ Assert(BgWriterPMChild == NULL);
+ Assert(CheckpointerPMChild == NULL);
+ Assert(WalWriterPMChild == NULL);
+ Assert(AutoVacLauncherPMChild == NULL);
+ Assert(SlotSyncWorkerPMChild == NULL);
/* syslogger is not considered here */
pmState = PM_NO_CHILDREN;
}
@@ -3162,8 +3085,8 @@ PostmasterStateMachine(void)
/* re-create shared memory and semaphores */
CreateSharedMemoryAndSemaphores();
- StartupPID = StartChildProcess(B_STARTUP);
- Assert(StartupPID != 0);
+ StartupPMChild = StartChildProcess(B_STARTUP);
+ Assert(StartupPMChild != NULL);
StartupStatus = STARTUP_RUNNING;
pmState = PM_STARTUP;
/* crash recovery started, reset SIGKILL flag */
@@ -3186,8 +3109,8 @@ static void
LaunchMissingBackgroundProcesses(void)
{
/* Syslogger is active in all states */
- if (SysLoggerPID == 0 && Logging_collector)
- SysLoggerPID = SysLogger_Start();
+ if (SysLoggerPMChild == NULL && Logging_collector)
+ StartSysLogger();
/*
* The checkpointer and the background writer are active from the start,
@@ -3200,30 +3123,30 @@ LaunchMissingBackgroundProcesses(void)
if (pmState == PM_RUN || pmState == PM_RECOVERY ||
pmState == PM_HOT_STANDBY || pmState == PM_STARTUP)
{
- if (CheckpointerPID == 0)
- CheckpointerPID = StartChildProcess(B_CHECKPOINTER);
- if (BgWriterPID == 0)
- BgWriterPID = StartChildProcess(B_BG_WRITER);
+ if (CheckpointerPMChild == NULL)
+ CheckpointerPMChild = StartChildProcess(B_CHECKPOINTER);
+ if (BgWriterPMChild == NULL)
+ BgWriterPMChild = StartChildProcess(B_BG_WRITER);
}
/*
* WAL writer is needed only in normal operation (else we cannot be
* writing any new WAL).
*/
- if (WalWriterPID == 0 && pmState == PM_RUN)
- WalWriterPID = StartChildProcess(B_WAL_WRITER);
+ if (WalWriterPMChild == NULL && pmState == PM_RUN)
+ WalWriterPMChild = StartChildProcess(B_WAL_WRITER);
/*
* We don't want autovacuum to run in binary upgrade mode because
* autovacuum might update relfrozenxid for empty tables before the
* physical files are put in place.
*/
- if (!IsBinaryUpgrade && AutoVacPID == 0 &&
+ if (!IsBinaryUpgrade && AutoVacLauncherPMChild == NULL &&
(AutoVacuumingActive() || start_autovac_launcher) &&
pmState == PM_RUN)
{
- AutoVacPID = StartChildProcess(B_AUTOVAC_LAUNCHER);
- if (AutoVacPID != 0)
+ AutoVacLauncherPMChild = StartChildProcess(B_AUTOVAC_LAUNCHER);
+ if (AutoVacLauncherPMChild != NULL)
start_autovac_launcher = false; /* signal processed */
}
@@ -3231,11 +3154,11 @@ LaunchMissingBackgroundProcesses(void)
* If WAL archiving is enabled always, we are allowed to start archiver
* even during recovery.
*/
- if (PgArchPID == 0 &&
+ if (PgArchPMChild == NULL &&
((XLogArchivingActive() && pmState == PM_RUN) ||
(XLogArchivingAlways() && (pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY))) &&
PgArchCanRestart())
- PgArchPID = StartChildProcess(B_ARCHIVER);
+ PgArchPMChild = StartChildProcess(B_ARCHIVER);
/*
* If we need to start a slot sync worker, try to do that now
@@ -3245,42 +3168,42 @@ LaunchMissingBackgroundProcesses(void)
* configured correctly, and it is the first time of worker's launch, or
* enough time has passed since the worker was launched last.
*/
- if (SlotSyncWorkerPID == 0 && pmState == PM_HOT_STANDBY &&
+ if (SlotSyncWorkerPMChild == NULL && pmState == PM_HOT_STANDBY &&
Shutdown <= SmartShutdown && sync_replication_slots &&
ValidateSlotSyncParams(LOG) && SlotSyncWorkerCanRestart())
- SlotSyncWorkerPID = StartChildProcess(B_SLOTSYNC_WORKER);
+ SlotSyncWorkerPMChild = StartChildProcess(B_SLOTSYNC_WORKER);
/*
* If we need to start a WAL receiver, try to do that now
*
- * Note: if WalReceiverPID is already nonzero, it might seem that we
- * should clear WalReceiverRequested. However, there's a race condition
- * if the walreceiver terminates and the startup process immediately
- * requests a new one: it's quite possible to get the signal for the
- * request before reaping the dead walreceiver process. Better to risk
- * launching an extra walreceiver than to miss launching one we need. (The
- * walreceiver code has logic to recognize that it should go away if not
- * needed.)
+ * Note: if a walreceiver process is already running, it might seem that
+ * we should clear WalReceiverRequested. However, there's a race
+ * condition if the walreceiver terminates and the startup process
+ * immediately requests a new one: it's quite possible to get the signal
+ * for the request before reaping the dead walreceiver process. Better to
+ * risk launching an extra walreceiver than to miss launching one we need.
+ * (The walreceiver code has logic to recognize that it should go away if
+ * not needed.)
*/
if (WalReceiverRequested)
{
- if (WalReceiverPID == 0 &&
+ if (WalReceiverPMChild == NULL &&
(pmState == PM_STARTUP || pmState == PM_RECOVERY ||
pmState == PM_HOT_STANDBY) &&
Shutdown <= SmartShutdown)
{
- WalReceiverPID = StartChildProcess(B_WAL_RECEIVER);
- if (WalReceiverPID != 0)
+ WalReceiverPMChild = StartChildProcess(B_WAL_RECEIVER);
+ if (WalReceiverPMChild != 0)
WalReceiverRequested = false;
/* else leave the flag set, so we'll try again later */
}
}
/* If we need to start a WAL summarizer, try to do that now */
- if (summarize_wal && WalSummarizerPID == 0 &&
+ if (summarize_wal && WalSummarizerPMChild == NULL &&
(pmState == PM_RUN || pmState == PM_HOT_STANDBY) &&
Shutdown <= SmartShutdown)
- WalSummarizerPID = StartChildProcess(B_WAL_SUMMARIZER);
+ WalSummarizerPMChild = StartChildProcess(B_WAL_SUMMARIZER);
/* Get other worker processes running, if needed */
if (StartWorkerNeeded || HaveCrashedWorker)
@@ -3304,8 +3227,10 @@ LaunchMissingBackgroundProcesses(void)
* child twice will not cause any problems.
*/
static void
-signal_child(pid_t pid, int signal)
+signal_child(PMChild *pmchild, int signal)
{
+ pid_t pid = pmchild->pid;
+
if (kill(pid, signal) < 0)
elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) pid, signal);
#ifdef HAVE_SETSID
@@ -3334,17 +3259,17 @@ signal_child(pid_t pid, int signal)
* to use SIGABRT to collect per-child core dumps.
*/
static void
-sigquit_child(pid_t pid)
+sigquit_child(PMChild *pmchild)
{
ereport(DEBUG2,
(errmsg_internal("sending %s to process %d",
(send_abort_for_crash ? "SIGABRT" : "SIGQUIT"),
- (int) pid)));
- signal_child(pid, (send_abort_for_crash ? SIGABRT : SIGQUIT));
+ (int) pmchild->pid)));
+ signal_child(pmchild, (send_abort_for_crash ? SIGABRT : SIGQUIT));
}
/*
- * Send a signal to the targeted children (but NOT special children).
+ * Send a signal to the targeted children.
*/
static bool
SignalChildren(int signal, BackendTypeMask targetMask)
@@ -3352,9 +3277,9 @@ SignalChildren(int signal, BackendTypeMask targetMask)
dlist_iter iter;
bool signaled = false;
- dlist_foreach(iter, &BackendList)
+ dlist_foreach(iter, &ActiveChildList)
{
- Backend *bp = dlist_container(Backend, elem, iter.cur);
+ PMChild *bp = dlist_container(PMChild, elem, iter.cur);
/*
* If we need to distinguish between B_BACKEND and B_WAL_SENDER, check
@@ -3374,7 +3299,7 @@ SignalChildren(int signal, BackendTypeMask targetMask)
ereport(DEBUG4,
(errmsg_internal("sending signal %d to %s process %d",
signal, GetBackendTypeDesc(bp->bkend_type), (int) bp->pid)));
- signal_child(bp->pid, signal);
+ signal_child(bp, signal);
signaled = true;
}
return signaled;
@@ -3387,29 +3312,12 @@ SignalChildren(int signal, BackendTypeMask targetMask)
static void
TerminateChildren(int signal)
{
- SignalChildren(signal, BTYPE_MASK_ALL);
- if (StartupPID != 0)
+ SignalChildren(signal, btmask_all_except(B_LOGGER));
+ if (StartupPMChild != NULL)
{
- signal_child(StartupPID, signal);
if (signal == SIGQUIT || signal == SIGKILL || signal == SIGABRT)
StartupStatus = STARTUP_SIGNALED;
}
- if (BgWriterPID != 0)
- signal_child(BgWriterPID, signal);
- if (CheckpointerPID != 0)
- signal_child(CheckpointerPID, signal);
- if (WalWriterPID != 0)
- signal_child(WalWriterPID, signal);
- if (WalReceiverPID != 0)
- signal_child(WalReceiverPID, signal);
- if (WalSummarizerPID != 0)
- signal_child(WalSummarizerPID, signal);
- if (AutoVacPID != 0)
- signal_child(AutoVacPID, signal);
- if (PgArchPID != 0)
- signal_child(PgArchPID, signal);
- if (SlotSyncWorkerPID != 0)
- signal_child(SlotSyncWorkerPID, signal);
}
/*
@@ -3417,49 +3325,56 @@ TerminateChildren(int signal)
*
* returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
*
- * Note: if you change this code, also consider StartAutovacuumWorker.
+ * Note: if you change this code, also consider StartAutovacuumWorker and
+ * StartBackgroundWorker.
*/
static int
BackendStartup(ClientSocket *client_sock)
{
- Backend *bn; /* for backend cleanup */
+ PMChild *bn = NULL;
pid_t pid;
BackendStartupData startup_data;
+ CAC_state cac;
/*
- * Create backend data structure. Better before the fork() so we can
- * handle failure cleanly.
+ * Allocate and assign the child slot. Note we must do this before
+ * forking, so that we can handle failures (out of memory or child-process
+ * slots) cleanly.
*/
- bn = (Backend *) palloc_extended(sizeof(Backend), MCXT_ALLOC_NO_OOM);
+ cac = canAcceptConnections(B_BACKEND);
+ if (cac == CAC_OK)
+ {
+ /* Can change later to B_WAL_SENDER */
+ bn = AssignPostmasterChildSlot(B_BACKEND);
+ if (!bn)
+ {
+ /*
+ * Too many regular child processes; launch a dead-end child
+ * process instead.
+ */
+ cac = CAC_TOOMANY;
+ }
+ }
if (!bn)
{
- ereport(LOG,
- (errcode(ERRCODE_OUT_OF_MEMORY),
- errmsg("out of memory")));
- return STATUS_ERROR;
+ bn = AllocDeadEndChild();
+ if (!bn)
+ {
+ ereport(LOG,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of memory")));
+ return STATUS_ERROR;
+ }
}
/* Pass down canAcceptConnections state */
- startup_data.canAcceptConnections = canAcceptConnections(B_BACKEND);
+ startup_data.canAcceptConnections = cac;
bn->rw = NULL;
- /*
- * Unless it's a dead_end child, assign it a child slot number
- */
- if (startup_data.canAcceptConnections == CAC_OK)
- {
- bn->bkend_type = B_BACKEND; /* Can change later to B_WAL_SENDER */
- bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
- }
- else
- {
- bn->bkend_type = B_DEAD_END_BACKEND;
- bn->child_slot = 0;
- }
-
/* Hasn't asked to be notified about any bgworkers yet */
bn->bgworker_notify = false;
+ MyPMChildSlot = bn->child_slot;
pid = postmaster_child_launch(bn->bkend_type,
(char *) &startup_data, sizeof(startup_data),
client_sock);
@@ -3468,9 +3383,7 @@ BackendStartup(ClientSocket *client_sock)
/* in parent, fork failed */
int save_errno = errno;
- if (bn->child_slot != 0)
- (void) ReleasePostmasterChildSlot(bn->child_slot);
- pfree(bn);
+ (void) ReleasePostmasterChildSlot(bn);
errno = save_errno;
ereport(LOG,
(errmsg("could not fork new process for connection: %m")));
@@ -3489,8 +3402,6 @@ BackendStartup(ClientSocket *client_sock)
* of backends.
*/
bn->pid = pid;
- dlist_push_head(&BackendList, &bn->elem);
-
return STATUS_OK;
}
@@ -3588,9 +3499,9 @@ process_pm_pmsignal(void)
* Start the archiver if we're responsible for (re-)archiving received
* files.
*/
- Assert(PgArchPID == 0);
+ Assert(PgArchPMChild == NULL);
if (XLogArchivingAlways())
- PgArchPID = StartChildProcess(B_ARCHIVER);
+ PgArchPMChild = StartChildProcess(B_ARCHIVER);
/*
* If we aren't planning to enter hot standby mode later, treat
@@ -3636,16 +3547,16 @@ process_pm_pmsignal(void)
}
/* Tell syslogger to rotate logfile if requested */
- if (SysLoggerPID != 0)
+ if (SysLoggerPMChild != NULL)
{
if (CheckLogrotateSignal())
{
- signal_child(SysLoggerPID, SIGUSR1);
+ signal_child(SysLoggerPMChild, SIGUSR1);
RemoveLogrotateSignalFiles();
}
else if (CheckPostmasterSignal(PMSIGNAL_ROTATE_LOGFILE))
{
- signal_child(SysLoggerPID, SIGUSR1);
+ signal_child(SysLoggerPMChild, SIGUSR1);
}
}
@@ -3692,7 +3603,7 @@ process_pm_pmsignal(void)
PostmasterStateMachine();
}
- if (StartupPID != 0 &&
+ if (StartupPMChild != NULL &&
(pmState == PM_STARTUP || pmState == PM_RECOVERY ||
pmState == PM_HOT_STANDBY) &&
CheckPromoteSignal())
@@ -3703,7 +3614,7 @@ process_pm_pmsignal(void)
* Leave the promote signal file in place and let the Startup process
* do the unlink.
*/
- signal_child(StartupPID, SIGUSR2);
+ signal_child(StartupPMChild, SIGUSR2);
}
}
@@ -3722,8 +3633,7 @@ dummy_handler(SIGNAL_ARGS)
}
/*
- * Count up number of child processes of specified types (but NOT special
- * children).
+ * Count up number of child processes of specified types.
*/
static int
CountChildren(BackendTypeMask targetMask)
@@ -3731,9 +3641,9 @@ CountChildren(BackendTypeMask targetMask)
dlist_iter iter;
int cnt = 0;
- dlist_foreach(iter, &BackendList)
+ dlist_foreach(iter, &ActiveChildList)
{
- Backend *bp = dlist_container(Backend, elem, iter.cur);
+ PMChild *bp = dlist_container(PMChild, elem, iter.cur);
/*
* If we need to distinguish between B_BACKEND and B_WAL_SENDER, check
@@ -3750,6 +3660,10 @@ CountChildren(BackendTypeMask targetMask)
if (!btmask_contains(targetMask, bp->bkend_type))
continue;
+ ereport(DEBUG4,
+ (errmsg_internal("%s process %d is still running",
+ GetBackendTypeDesc(bp->bkend_type), (int) bp->pid)));
+
cnt++;
}
return cnt;
@@ -3762,18 +3676,36 @@ CountChildren(BackendTypeMask targetMask)
* "type" determines what kind of child will be started. All child types
* initially go to AuxiliaryProcessMain, which will handle common setup.
*
- * Return value of StartChildProcess is subprocess' PID, or 0 if failed
- * to start subprocess.
+ * Return value of StartChildProcess is subprocess' PMChild entry, or NULL on
+ * failure.
*/
-static pid_t
+static PMChild *
StartChildProcess(BackendType type)
{
+ PMChild *pmchild;
pid_t pid;
+ pmchild = AssignPostmasterChildSlot(type);
+ if (!pmchild)
+ {
+ if (type == B_AUTOVAC_WORKER)
+ ereport(LOG,
+ (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
+ errmsg("no slot available for new autovacuum worker process")));
+ else
+ {
+ /* shouldn't happen because we allocate enough slots */
+ elog(LOG, "no postmaster child slot available for aux process");
+ }
+ return NULL;
+ }
+
+ MyPMChildSlot = pmchild->child_slot;
pid = postmaster_child_launch(type, NULL, 0, NULL);
if (pid < 0)
{
/* in parent, fork failed */
+ ReleasePostmasterChildSlot(pmchild);
ereport(LOG,
(errmsg("could not fork \"%s\" process: %m", PostmasterChildName(type))));
@@ -3783,13 +3715,31 @@ StartChildProcess(BackendType type)
*/
if (type == B_STARTUP)
ExitPostmaster(1);
- return 0;
+ return NULL;
}
- /*
- * in parent, successful fork
- */
- return pid;
+ /* in parent, successful fork */
+ pmchild->pid = pid;
+ return pmchild;
+}
+
+/*
+ * StartSysLogger -- start the syslogger process
+ */
+void
+StartSysLogger(void)
+{
+ Assert(SysLoggerPMChild == NULL);
+
+ SysLoggerPMChild = AssignPostmasterChildSlot(B_LOGGER);
+ if (!SysLoggerPMChild)
+ elog(PANIC, "no postmaster child slot available for syslogger");
+ SysLoggerPMChild->pid = SysLogger_Start(SysLoggerPMChild->child_slot);
+ if (SysLoggerPMChild->pid == 0)
+ {
+ ReleasePostmasterChildSlot(SysLoggerPMChild);
+ SysLoggerPMChild = NULL;
+ }
}
/*
@@ -3804,7 +3754,7 @@ StartChildProcess(BackendType type)
static void
StartAutovacuumWorker(void)
{
- Backend *bn;
+ PMChild *bn;
/*
* If not in condition to run a process, don't try, but handle it like a
@@ -3815,34 +3765,20 @@ StartAutovacuumWorker(void)
*/
if (canAcceptConnections(B_AUTOVAC_WORKER) == CAC_OK)
{
- bn = (Backend *) palloc_extended(sizeof(Backend), MCXT_ALLOC_NO_OOM);
+ bn = StartChildProcess(B_AUTOVAC_WORKER);
if (bn)
{
- /* Autovac workers need a child slot */
- bn->bkend_type = B_AUTOVAC_WORKER;
- bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
bn->bgworker_notify = false;
bn->rw = NULL;
-
- bn->pid = StartChildProcess(B_AUTOVAC_WORKER);
- if (bn->pid > 0)
- {
- dlist_push_head(&BackendList, &bn->elem);
- /* all OK */
- return;
- }
-
+ return;
+ }
+ else
+ {
/*
* fork failed, fall through to report -- actual error message was
* logged by StartChildProcess
*/
- (void) ReleasePostmasterChildSlot(bn->child_slot);
- pfree(bn);
}
- else
- ereport(LOG,
- (errcode(ERRCODE_OUT_OF_MEMORY),
- errmsg("out of memory")));
}
/*
@@ -3854,7 +3790,7 @@ StartAutovacuumWorker(void)
* quick succession between the autovac launcher and postmaster in case
* things get ugly.
*/
- if (AutoVacPID != 0)
+ if (AutoVacLauncherPMChild != NULL)
{
AutoVacWorkerFailed();
avlauncher_needs_signal = true;
@@ -3899,41 +3835,24 @@ CreateOptsFile(int argc, char *argv[], char *fullprogname)
/*
- * MaxLivePostmasterChildren
- *
- * This reports the number of entries needed in the per-child-process array
- * (PMChildFlags). It includes regular backends, autovac workers, walsenders
- * and background workers, but not special children nor dead_end children.
- * This allows the array to have a fixed maximum size, to wit the same
- * too-many-children limit enforced by canAcceptConnections(). The exact value
- * isn't too critical as long as it's more than MaxBackends.
- */
-int
-MaxLivePostmasterChildren(void)
-{
- return 2 * (MaxConnections + autovacuum_max_workers + 1 +
- max_wal_senders + max_worker_processes);
-}
-
-/*
* Start a new bgworker.
* Starting time conditions must have been checked already.
*
* Returns true on success, false on failure.
* In either case, update the RegisteredBgWorker's state appropriately.
*
- * This code is heavily based on autovacuum.c, q.v.
+ * NB -- this code very roughly matches BackendStartup.
*/
static bool
-do_start_bgworker(RegisteredBgWorker *rw)
+StartBackgroundWorker(RegisteredBgWorker *rw)
{
- Backend *bn;
+ PMChild *bn;
pid_t worker_pid;
Assert(rw->rw_pid == 0);
/*
- * Allocate and assign the Backend element. Note we must do this before
+ * Allocate and assign the child slot. Note we must do this before
* forking, so that we can handle failures (out of memory or child-process
* slots) cleanly.
*
@@ -3942,27 +3861,32 @@ do_start_bgworker(RegisteredBgWorker *rw)
* tried again right away, most likely we'd find ourselves hitting the
* same resource-exhaustion condition.
*/
- bn = assign_backendlist_entry();
+ bn = AssignPostmasterChildSlot(B_BG_WORKER);
if (bn == NULL)
{
+ ereport(LOG,
+ (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
+ errmsg("no slot available for new background worker process")));
rw->rw_crashed_at = GetCurrentTimestamp();
return false;
}
bn->rw = rw;
+ bn->bkend_type = B_BG_WORKER;
+ bn->bgworker_notify = false;
ereport(DEBUG1,
(errmsg_internal("starting background worker process \"%s\"",
rw->rw_worker.bgw_name)));
+ MyPMChildSlot = bn->child_slot;
worker_pid = postmaster_child_launch(B_BG_WORKER, (char *) &rw->rw_worker, sizeof(BackgroundWorker), NULL);
if (worker_pid == -1)
{
/* in postmaster, fork failed ... */
ereport(LOG,
(errmsg("could not fork background worker process: %m")));
- /* undo what assign_backendlist_entry did */
- ReleasePostmasterChildSlot(bn->child_slot);
- pfree(bn);
+ /* undo what AssignPostmasterChildSlot did */
+ ReleasePostmasterChildSlot(bn);
/* mark entry as crashed, so we'll try again later */
rw->rw_crashed_at = GetCurrentTimestamp();
@@ -3973,8 +3897,6 @@ do_start_bgworker(RegisteredBgWorker *rw)
rw->rw_pid = worker_pid;
bn->pid = rw->rw_pid;
ReportBackgroundWorkerPID(rw);
- /* add new worker to lists of backends */
- dlist_push_head(&BackendList, &bn->elem);
return true;
}
@@ -4017,46 +3939,6 @@ bgworker_should_start_now(BgWorkerStartTime start_time)
}
/*
- * Allocate the Backend struct for a connected background worker, but don't
- * add it to the list of backends just yet.
- *
- * On failure, return NULL.
- */
-static Backend *
-assign_backendlist_entry(void)
-{
- Backend *bn;
-
- /*
- * Check that database state allows another connection. Currently the
- * only possible failure is CAC_TOOMANY, so we just log an error message
- * based on that rather than checking the error code precisely.
- */
- if (canAcceptConnections(B_BG_WORKER) != CAC_OK)
- {
- ereport(LOG,
- (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
- errmsg("no slot available for new background worker process")));
- return NULL;
- }
-
- bn = palloc_extended(sizeof(Backend), MCXT_ALLOC_NO_OOM);
- if (bn == NULL)
- {
- ereport(LOG,
- (errcode(ERRCODE_OUT_OF_MEMORY),
- errmsg("out of memory")));
- return NULL;
- }
-
- bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
- bn->bkend_type = B_BG_WORKER;
- bn->bgworker_notify = false;
-
- return bn;
-}
-
-/*
* If the time is right, start background worker(s).
*
* As a side effect, the bgworker control variables are set or reset
@@ -4160,7 +4042,7 @@ maybe_start_bgworkers(void)
* crashed, but there's no need because the next run of this
* function will do that.
*/
- if (!do_start_bgworker(rw))
+ if (!StartBackgroundWorker(rw))
{
StartWorkerNeeded = true;
return;
@@ -4190,11 +4072,11 @@ bool
PostmasterMarkPIDForWorkerNotify(int pid)
{
dlist_iter iter;
- Backend *bp;
+ PMChild *bp;
- dlist_foreach(iter, &BackendList)
+ dlist_foreach(iter, &ActiveChildList)
{
- bp = dlist_container(Backend, elem, iter.cur);
+ bp = dlist_container(PMChild, elem, iter.cur);
if (bp->pid == pid)
{
bp->bgworker_notify = true;
diff --git a/src/backend/postmaster/syslogger.c b/src/backend/postmaster/syslogger.c
index 7951599fa87..7ca24c66634 100644
--- a/src/backend/postmaster/syslogger.c
+++ b/src/backend/postmaster/syslogger.c
@@ -590,7 +590,7 @@ SysLoggerMain(char *startup_data, size_t startup_data_len)
* Postmaster subroutine to start a syslogger subprocess.
*/
int
-SysLogger_Start(void)
+SysLogger_Start(int child_slot)
{
pid_t sysloggerPid;
char *filename;
@@ -598,8 +598,7 @@ SysLogger_Start(void)
SysloggerStartupData startup_data;
#endif /* EXEC_BACKEND */
- if (!Logging_collector)
- return 0;
+ Assert(Logging_collector);
/*
* If first time through, create the pipe which will receive stderr
@@ -695,6 +694,7 @@ SysLogger_Start(void)
pfree(filename);
}
+ MyPMChildSlot = child_slot;
#ifdef EXEC_BACKEND
startup_data.syslogFile = syslogger_fdget(syslogFile);
startup_data.csvlogFile = syslogger_fdget(csvlogFile);
diff --git a/src/backend/storage/ipc/pmsignal.c b/src/backend/storage/ipc/pmsignal.c
index c801e9bec51..c764d6af4fc 100644
--- a/src/backend/storage/ipc/pmsignal.c
+++ b/src/backend/storage/ipc/pmsignal.c
@@ -47,11 +47,11 @@
* exited without performing proper shutdown. The per-child-process flags
* have three possible states: UNUSED, ASSIGNED, ACTIVE. An UNUSED slot is
* available for assignment. An ASSIGNED slot is associated with a postmaster
- * child process, but either the process has not touched shared memory yet,
- * or it has successfully cleaned up after itself. A ACTIVE slot means the
- * process is actively using shared memory. The slots are assigned to
- * child processes at random, and postmaster.c is responsible for tracking
- * which one goes with which PID.
+ * child process, but either the process has not touched shared memory yet, or
+ * it has successfully cleaned up after itself. An ACTIVE slot means the
+ * process is actively using shared memory. The slots are assigned to child
+ * processes by postmaster, and pmchild.c is responsible for tracking which
+ * one goes with which PID.
*
* Actually there is a fourth state, WALSENDER. This is just like ACTIVE,
* but carries the extra information that the child is a WAL sender.
@@ -84,13 +84,11 @@ struct PMSignalData
NON_EXEC_STATIC volatile PMSignalData *PMSignalState = NULL;
/*
- * These static variables are valid only in the postmaster. We keep a
- * duplicative private array so that we can trust its state even if some
- * failing child has clobbered the PMSignalData struct in shared memory.
+ * Local copy of PMSignalState->num_child_flags, only valid in the
+ * postmaster. Postmaster keeps a local copy so that it doesn't need to
+ * trust the value in shared memory.
*/
-static int num_child_inuse; /* # of entries in PMChildInUse[] */
-static int next_child_inuse; /* next slot to try to assign */
-static bool *PMChildInUse; /* true if i'th flag slot is assigned */
+static int num_child_flags;
/*
* Signal handler to be notified if postmaster dies.
@@ -155,25 +153,8 @@ PMSignalShmemInit(void)
{
/* initialize all flags to zeroes */
MemSet(unvolatize(PMSignalData *, PMSignalState), 0, PMSignalShmemSize());
- num_child_inuse = MaxLivePostmasterChildren();
- PMSignalState->num_child_flags = num_child_inuse;
-
- /*
- * Also allocate postmaster's private PMChildInUse[] array. We
- * might've already done that in a previous shared-memory creation
- * cycle, in which case free the old array to avoid a leak. (Do it
- * like this to support the possibility that MaxLivePostmasterChildren
- * changed.) In a standalone backend, we do not need this.
- */
- if (PostmasterContext != NULL)
- {
- if (PMChildInUse)
- pfree(PMChildInUse);
- PMChildInUse = (bool *)
- MemoryContextAllocZero(PostmasterContext,
- num_child_inuse * sizeof(bool));
- }
- next_child_inuse = 0;
+ num_child_flags = MaxLivePostmasterChildren();
+ PMSignalState->num_child_flags = num_child_flags;
}
}
@@ -239,56 +220,37 @@ GetQuitSignalReason(void)
/*
- * AssignPostmasterChildSlot - select an unused slot for a new postmaster
- * child process, and set its state to ASSIGNED. Returns a slot number
- * (one to N).
+ * MarkPostmasterChildSlotAssigned - mark the given slot as ASSIGNED for a
+ * new postmaster child process.
*
* Only the postmaster is allowed to execute this routine, so we need no
* special locking.
*/
-int
-AssignPostmasterChildSlot(void)
+void
+MarkPostmasterChildSlotAssigned(int slot)
{
- int slot = next_child_inuse;
- int n;
+ Assert(slot > 0 && slot <= num_child_flags);
+ slot--;
- /*
- * Scan for a free slot. Notice that we trust nothing about the contents
- * of PMSignalState, but use only postmaster-local data for this decision.
- * We track the last slot assigned so as not to waste time repeatedly
- * rescanning low-numbered slots.
- */
- for (n = num_child_inuse; n > 0; n--)
- {
- if (--slot < 0)
- slot = num_child_inuse - 1;
- if (!PMChildInUse[slot])
- {
- PMChildInUse[slot] = true;
- PMSignalState->PMChildFlags[slot] = PM_CHILD_ASSIGNED;
- next_child_inuse = slot;
- return slot + 1;
- }
- }
+ if (PMSignalState->PMChildFlags[slot] != PM_CHILD_UNUSED)
+ elog(FATAL, "postmaster child slot is already in use");
- /* Out of slots ... should never happen, else postmaster.c messed up */
- elog(FATAL, "no free slots in PMChildFlags array");
- return 0; /* keep compiler quiet */
+ PMSignalState->PMChildFlags[slot] = PM_CHILD_ASSIGNED;
}
/*
- * ReleasePostmasterChildSlot - release a slot after death of a postmaster
- * child process. This must be called in the postmaster process.
+ * MarkPostmasterChildSlotUnassigned - release a slot after death of a
+ * postmaster child process. This must be called in the postmaster process.
*
* Returns true if the slot had been in ASSIGNED state (the expected case),
* false otherwise (implying that the child failed to clean itself up).
*/
bool
-ReleasePostmasterChildSlot(int slot)
+MarkPostmasterChildSlotUnassigned(int slot)
{
bool result;
- Assert(slot > 0 && slot <= num_child_inuse);
+ Assert(slot > 0 && slot <= num_child_flags);
slot--;
/*
@@ -298,7 +260,6 @@ ReleasePostmasterChildSlot(int slot)
*/
result = (PMSignalState->PMChildFlags[slot] == PM_CHILD_ASSIGNED);
PMSignalState->PMChildFlags[slot] = PM_CHILD_UNUSED;
- PMChildInUse[slot] = false;
return result;
}
@@ -309,7 +270,7 @@ ReleasePostmasterChildSlot(int slot)
bool
IsPostmasterChildWalSender(int slot)
{
- Assert(slot > 0 && slot <= num_child_inuse);
+ Assert(slot > 0 && slot <= num_child_flags);
slot--;
if (PMSignalState->PMChildFlags[slot] == PM_CHILD_WALSENDER)
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index 772e6fdbf28..720ef99ee83 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -353,14 +353,9 @@ InitProcess(void)
/*
* Before we start accessing the shared memory in a serious way, mark
* ourselves as an active postmaster child; this is so that the postmaster
- * can detect it if we exit without cleaning up. (XXX autovac launcher
- * currently doesn't participate in this; it probably should.)
- *
- * Slot sync worker also does not participate in it, see comments atop
- * 'struct bkend' in postmaster.c.
+ * can detect it if we exit without cleaning up.
*/
- if (IsUnderPostmaster && !AmAutoVacuumLauncherProcess() &&
- !AmLogicalSlotSyncWorkerProcess())
+ if (IsUnderPostmaster)
RegisterPostmasterChildActive();
/* Decide which list should supply our PGPROC. */
@@ -578,6 +573,9 @@ InitAuxiliaryProcess(void)
if (MyProc != NULL)
elog(ERROR, "you already exist");
+ if (IsUnderPostmaster)
+ RegisterPostmasterChildActive();
+
/*
* We use the ProcStructLock to protect assignment and releasing of
* AuxiliaryProcs entries.
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index aac0b96bbc6..184b8301687 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -4189,6 +4189,12 @@ PostgresSingleUserMain(int argc, char *argv[],
/* Initialize MaxBackends */
InitializeMaxBackends();
+ /*
+ * We don't need postmaster child slots in single-user mode, but
+ * initialize them anyway to avoid having special handling.
+ */
+ InitPostmasterChildSlots();
+
/* Initialize size of fast-path lock cache. */
InitializeFastPathLocks();
diff --git a/src/include/postmaster/bgworker_internals.h b/src/include/postmaster/bgworker_internals.h
index 309a91124bd..f55adc85efc 100644
--- a/src/include/postmaster/bgworker_internals.h
+++ b/src/include/postmaster/bgworker_internals.h
@@ -27,7 +27,7 @@
* List of background workers, private to postmaster.
*
* All workers that are currently running will also have an entry in
- * BackendList.
+ * ActiveChildList.
*/
typedef struct RegisteredBgWorker
{
diff --git a/src/include/postmaster/postmaster.h b/src/include/postmaster/postmaster.h
index 63c12917cfe..a55e7a79fa4 100644
--- a/src/include/postmaster/postmaster.h
+++ b/src/include/postmaster/postmaster.h
@@ -13,8 +13,44 @@
#ifndef _POSTMASTER_H
#define _POSTMASTER_H
+#include "lib/ilist.h"
#include "miscadmin.h"
+/*
+ * A struct representing an active postmaster child process. This is used
+ * mainly to keep track of how many children we have and send them appropriate
+ * signals when necessary. All postmaster child processes are assigned a
+ * PMChild entry. That includes "normal" client sessions, but also autovacuum
+ * workers, walsenders, background workers, and aux processes. (Note that at
+ * the time of launch, walsenders are labeled B_BACKEND; we relabel them to
+ * B_WAL_SENDER upon noticing they've changed their PMChildFlags entry. Hence
+ * that check must be done before any operation that needs to distinguish
+ * walsenders from normal backends.)
+ *
+ * "dead-end" children are also allocated a PMChild entry: these are children
+ * launched just for the purpose of sending a friendly rejection message to a
+ * would-be client. We must track them because they are attached to shared
+ * memory, but we know they will never become live backends.
+ *
+ * child_slot is an identifier that is unique across all running child
+ * processes. It is used as an index into the PMChildFlags array. dead-end
+ * children are not assigned a child_slot and have child_slot == 0 (valid
+ * child_slot ids start from 1).
+ */
+typedef struct
+{
+ pid_t pid; /* process id of backend */
+ int child_slot; /* PMChildSlot for this backend, if any */
+ BackendType bkend_type; /* child process flavor, see above */
+ struct RegisteredBgWorker *rw; /* bgworker info, if this is a bgworker */
+ bool bgworker_notify; /* gets bgworker start/stop notifications */
+ dlist_node elem; /* list link in ActiveChildList */
+} PMChild;
+
+#ifdef EXEC_BACKEND
+extern int num_pmchild_slots;
+#endif
+
/* GUC options */
extern PGDLLIMPORT bool EnableSSL;
extern PGDLLIMPORT int SuperuserReservedConnections;
@@ -80,6 +116,15 @@ const char *PostmasterChildName(BackendType child_type);
extern void SubPostmasterMain(int argc, char *argv[]) pg_attribute_noreturn();
#endif
+/* defined in pmchild.c */
+extern dlist_head ActiveChildList;
+
+extern void InitPostmasterChildSlots(void);
+extern PMChild *AssignPostmasterChildSlot(BackendType btype);
+extern PMChild *AllocDeadEndChild(void);
+extern bool ReleasePostmasterChildSlot(PMChild *pmchild);
+extern PMChild *FindPostmasterChildByPid(int pid);
+
/*
* Note: MAX_BACKENDS is limited to 2^18-1 because that's the width reserved
* for buffer references in buf_internals.h. This limitation could be lifted
diff --git a/src/include/postmaster/syslogger.h b/src/include/postmaster/syslogger.h
index 94ea263f2bf..27bd16ae1da 100644
--- a/src/include/postmaster/syslogger.h
+++ b/src/include/postmaster/syslogger.h
@@ -86,7 +86,7 @@ extern PGDLLIMPORT HANDLE syslogPipe[2];
#endif
-extern int SysLogger_Start(void);
+extern int SysLogger_Start(int child_slot);
extern void write_syslogger_file(const char *buffer, int count, int destination);
diff --git a/src/include/storage/pmsignal.h b/src/include/storage/pmsignal.h
index ce4620af1f3..ea554ae895b 100644
--- a/src/include/storage/pmsignal.h
+++ b/src/include/storage/pmsignal.h
@@ -70,8 +70,8 @@ extern void SendPostmasterSignal(PMSignalReason reason);
extern bool CheckPostmasterSignal(PMSignalReason reason);
extern void SetQuitSignalReason(QuitSignalReason reason);
extern QuitSignalReason GetQuitSignalReason(void);
-extern int AssignPostmasterChildSlot(void);
-extern bool ReleasePostmasterChildSlot(int slot);
+extern void MarkPostmasterChildSlotAssigned(int slot);
+extern bool MarkPostmasterChildSlotUnassigned(int slot);
extern bool IsPostmasterChildWalSender(int slot);
extern void RegisterPostmasterChildActive(void);
extern void MarkPostmasterChildWalSender(void);
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 50bf81de2bc..5fbf0efe832 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -230,7 +230,6 @@ BTWriteState
BUF_MEM
BYTE
BY_HANDLE_FILE_INFORMATION
-Backend
BackendParameters
BackendStartupData
BackendState
@@ -1932,6 +1931,8 @@ PLyTransformToOb
PLyTupleToOb
PLyUnicode_FromStringAndSize_t
PLy_elog_impl_t
+PMChild
+PMChildPool
PMINIDUMP_CALLBACK_INFORMATION
PMINIDUMP_EXCEPTION_INFORMATION
PMINIDUMP_USER_STREAM_INFORMATION