diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/bootstrap/bootstrap.c | 8 | ||||
-rw-r--r-- | src/backend/postmaster/Makefile | 1 | ||||
-rw-r--r-- | src/backend/postmaster/launch_backend.c | 3 | ||||
-rw-r--r-- | src/backend/postmaster/meson.build | 1 | ||||
-rw-r--r-- | src/backend/postmaster/pmchild.c | 285 | ||||
-rw-r--r-- | src/backend/postmaster/postmaster.c | 912 | ||||
-rw-r--r-- | src/backend/postmaster/syslogger.c | 6 | ||||
-rw-r--r-- | src/backend/storage/ipc/pmsignal.c | 89 | ||||
-rw-r--r-- | src/backend/storage/lmgr/proc.c | 12 | ||||
-rw-r--r-- | src/backend/tcop/postgres.c | 6 | ||||
-rw-r--r-- | src/include/postmaster/bgworker_internals.h | 2 | ||||
-rw-r--r-- | src/include/postmaster/postmaster.h | 45 | ||||
-rw-r--r-- | src/include/postmaster/syslogger.h | 2 | ||||
-rw-r--r-- | src/include/storage/pmsignal.h | 4 | ||||
-rw-r--r-- | src/tools/pgindent/typedefs.list | 3 |
15 files changed, 785 insertions, 594 deletions
diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index ed59dfce893..d31a67599c9 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -31,6 +31,7 @@ #include "miscadmin.h" #include "nodes/makefuncs.h" #include "pg_getopt.h" +#include "postmaster/postmaster.h" #include "storage/bufpage.h" #include "storage/ipc.h" #include "storage/proc.h" @@ -309,6 +310,13 @@ BootstrapModeMain(int argc, char *argv[], bool check_only) InitializeMaxBackends(); + /* + * Even though bootstrapping runs in single-process mode, initialize + * postmaster child slots array so that --check can detect running out of + * shared memory or other resources if max_connections is set too high. + */ + InitPostmasterChildSlots(); + InitializeFastPathLocks(); CreateSharedMemoryAndSemaphores(); diff --git a/src/backend/postmaster/Makefile b/src/backend/postmaster/Makefile index db08543d195..0f4435d2d97 100644 --- a/src/backend/postmaster/Makefile +++ b/src/backend/postmaster/Makefile @@ -22,6 +22,7 @@ OBJS = \ interrupt.o \ launch_backend.o \ pgarch.o \ + pmchild.o \ postmaster.o \ startup.o \ syslogger.o \ diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c index 423e6120438..6ce75f6f77d 100644 --- a/src/backend/postmaster/launch_backend.c +++ b/src/backend/postmaster/launch_backend.c @@ -118,6 +118,7 @@ typedef struct bool query_id_enabled; int max_safe_fds; int MaxBackends; + int num_pmchild_slots; #ifdef WIN32 HANDLE PostmasterHandle; HANDLE initial_signal_pipe; @@ -735,6 +736,7 @@ save_backend_variables(BackendParameters *param, ClientSocket *client_sock, param->max_safe_fds = max_safe_fds; param->MaxBackends = MaxBackends; + param->num_pmchild_slots = num_pmchild_slots; #ifdef WIN32 param->PostmasterHandle = PostmasterHandle; @@ -994,6 +996,7 @@ restore_backend_variables(BackendParameters *param) max_safe_fds = param->max_safe_fds; MaxBackends = param->MaxBackends; + num_pmchild_slots = param->num_pmchild_slots; #ifdef WIN32 PostmasterHandle = param->PostmasterHandle; diff --git a/src/backend/postmaster/meson.build b/src/backend/postmaster/meson.build index 0ea4bbe084e..0e80f209863 100644 --- a/src/backend/postmaster/meson.build +++ b/src/backend/postmaster/meson.build @@ -10,6 +10,7 @@ backend_sources += files( 'interrupt.c', 'launch_backend.c', 'pgarch.c', + 'pmchild.c', 'postmaster.c', 'startup.c', 'syslogger.c', diff --git a/src/backend/postmaster/pmchild.c b/src/backend/postmaster/pmchild.c new file mode 100644 index 00000000000..381cf005a9b --- /dev/null +++ b/src/backend/postmaster/pmchild.c @@ -0,0 +1,285 @@ +/*------------------------------------------------------------------------- + * + * pmchild.c + * Functions for keeping track of postmaster child processes. + * + * Postmaster keeps track of all child processes so that when a process exits, + * it knows what kind of a process it was and can clean up accordingly. Every + * child process is allocated a PMChild struct from a fixed pool of structs. + * The size of the pool is determined by various settings that configure how + * many worker processes and backend connections are allowed, i.e. + * autovacuum_max_workers, max_worker_processes, max_wal_senders, and + * max_connections. + * + * Dead-end backends are handled slightly differently. There is no limit + * on the number of dead-end backends, and they do not need unique IDs, so + * their PMChild structs are allocated dynamically, not from a pool. + * + * The structures and functions in this file are private to the postmaster + * process. But note that there is an array in shared memory, managed by + * pmsignal.c, that mirrors this. + * + * + * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/postmaster/pmchild.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "miscadmin.h" +#include "postmaster/autovacuum.h" +#include "postmaster/postmaster.h" +#include "replication/walsender.h" +#include "storage/pmsignal.h" +#include "storage/proc.h" + +/* + * Freelists for different kinds of child processes. We maintain separate + * pools for each, so that for example launching a lot of regular backends + * cannot prevent autovacuum or an aux process from launching. + */ +typedef struct PMChildPool +{ + int size; /* number of PMChild slots reserved for this + * kind of processes */ + int first_slotno; /* first slot belonging to this pool */ + dlist_head freelist; /* currently unused PMChild entries */ +} PMChildPool; + +static PMChildPool pmchild_pools[BACKEND_NUM_TYPES]; +NON_EXEC_STATIC int num_pmchild_slots = 0; + +/* + * List of active child processes. This includes dead-end children. + */ +dlist_head ActiveChildList; + +/* + * MaxLivePostmasterChildren + * + * This reports the number of postmaster child processes that can be active. + * It includes all children except for dead-end children. This allows the + * array in shared memory (PMChildFlags) to have a fixed maximum size. + */ +int +MaxLivePostmasterChildren(void) +{ + if (num_pmchild_slots == 0) + elog(ERROR, "PM child array not initialized yet"); + return num_pmchild_slots; +} + +/* + * Initialize at postmaster startup + * + * Note: This is not called on crash restart. We rely on PMChild entries to + * remain valid through the restart process. This is important because the + * syslogger survives through the crash restart process, so we must not + * invalidate its PMChild slot. + */ +void +InitPostmasterChildSlots(void) +{ + int slotno; + PMChild *slots; + + /* + * We allow more connections here than we can have backends because some + * might still be authenticating; they might fail auth, or some existing + * backend might exit before the auth cycle is completed. The exact + * MaxConnections limit is enforced when a new backend tries to join the + * PGPROC array. + * + * WAL senders start out as regular backends, so they share the same pool. + */ + pmchild_pools[B_BACKEND].size = 2 * (MaxConnections + max_wal_senders); + + pmchild_pools[B_AUTOVAC_WORKER].size = autovacuum_max_workers; + pmchild_pools[B_BG_WORKER].size = max_worker_processes; + + /* + * There can be only one of each of these running at a time. They each + * get their own pool of just one entry. + */ + pmchild_pools[B_AUTOVAC_LAUNCHER].size = 1; + pmchild_pools[B_SLOTSYNC_WORKER].size = 1; + pmchild_pools[B_ARCHIVER].size = 1; + pmchild_pools[B_BG_WRITER].size = 1; + pmchild_pools[B_CHECKPOINTER].size = 1; + pmchild_pools[B_STARTUP].size = 1; + pmchild_pools[B_WAL_RECEIVER].size = 1; + pmchild_pools[B_WAL_SUMMARIZER].size = 1; + pmchild_pools[B_WAL_WRITER].size = 1; + pmchild_pools[B_LOGGER].size = 1; + + /* The rest of the pmchild_pools are left at zero size */ + + /* Count the total number of slots */ + num_pmchild_slots = 0; + for (int i = 0; i < BACKEND_NUM_TYPES; i++) + num_pmchild_slots += pmchild_pools[i].size; + + /* Initialize them */ + slots = palloc(num_pmchild_slots * sizeof(PMChild)); + slotno = 0; + for (int btype = 0; btype < BACKEND_NUM_TYPES; btype++) + { + pmchild_pools[btype].first_slotno = slotno + 1; + dlist_init(&pmchild_pools[btype].freelist); + + for (int j = 0; j < pmchild_pools[btype].size; j++) + { + slots[slotno].pid = 0; + slots[slotno].child_slot = slotno + 1; + slots[slotno].bkend_type = B_INVALID; + slots[slotno].rw = NULL; + slots[slotno].bgworker_notify = false; + dlist_push_tail(&pmchild_pools[btype].freelist, &slots[slotno].elem); + slotno++; + } + } + Assert(slotno == num_pmchild_slots); + + /* Initialize other structures */ + dlist_init(&ActiveChildList); +} + +/* + * Allocate a PMChild entry for a postmaster child process of given type. + * + * The entry is taken from the right pool for the type. + * + * pmchild->child_slot in the returned struct is unique among all active child + * processes. + */ +PMChild * +AssignPostmasterChildSlot(BackendType btype) +{ + dlist_head *freelist; + PMChild *pmchild; + + if (pmchild_pools[btype].size == 0) + elog(ERROR, "cannot allocate a PMChild slot for backend type %d", btype); + + freelist = &pmchild_pools[btype].freelist; + if (dlist_is_empty(freelist)) + return NULL; + + pmchild = dlist_container(PMChild, elem, dlist_pop_head_node(freelist)); + pmchild->pid = 0; + pmchild->bkend_type = btype; + pmchild->rw = NULL; + pmchild->bgworker_notify = true; + + /* + * pmchild->child_slot for each entry was initialized when the array of + * slots was allocated. Sanity check it. + */ + if (!(pmchild->child_slot >= pmchild_pools[btype].first_slotno && + pmchild->child_slot < pmchild_pools[btype].first_slotno + pmchild_pools[btype].size)) + { + elog(ERROR, "pmchild freelist for backend type %d is corrupt", + pmchild->bkend_type); + } + + dlist_push_head(&ActiveChildList, &pmchild->elem); + + /* Update the status in the shared memory array */ + MarkPostmasterChildSlotAssigned(pmchild->child_slot); + + elog(DEBUG2, "assigned pm child slot %d for %s", + pmchild->child_slot, PostmasterChildName(btype)); + + return pmchild; +} + +/* + * Allocate a PMChild struct for a dead-end backend. Dead-end children are + * not assigned a child_slot number. The struct is palloc'd; returns NULL if + * out of memory. + */ +PMChild * +AllocDeadEndChild(void) +{ + PMChild *pmchild; + + elog(DEBUG2, "allocating dead-end child"); + + pmchild = (PMChild *) palloc_extended(sizeof(PMChild), MCXT_ALLOC_NO_OOM); + if (pmchild) + { + pmchild->pid = 0; + pmchild->child_slot = 0; + pmchild->bkend_type = B_DEAD_END_BACKEND; + pmchild->rw = NULL; + pmchild->bgworker_notify = false; + + dlist_push_head(&ActiveChildList, &pmchild->elem); + } + + return pmchild; +} + +/* + * Release a PMChild slot, after the child process has exited. + * + * Returns true if the child detached cleanly from shared memory, false + * otherwise (see MarkPostmasterChildSlotUnassigned). + */ +bool +ReleasePostmasterChildSlot(PMChild *pmchild) +{ + dlist_delete(&pmchild->elem); + if (pmchild->bkend_type == B_DEAD_END_BACKEND) + { + elog(DEBUG2, "releasing dead-end backend"); + pfree(pmchild); + return true; + } + else + { + PMChildPool *pool; + + elog(DEBUG2, "releasing pm child slot %d", pmchild->child_slot); + + /* WAL senders start out as regular backends, and share the pool */ + if (pmchild->bkend_type == B_WAL_SENDER) + pool = &pmchild_pools[B_BACKEND]; + else + pool = &pmchild_pools[pmchild->bkend_type]; + + /* sanity check that we return the entry to the right pool */ + if (!(pmchild->child_slot >= pool->first_slotno && + pmchild->child_slot < pool->first_slotno + pool->size)) + { + elog(ERROR, "pmchild freelist for backend type %d is corrupt", + pmchild->bkend_type); + } + + dlist_push_head(&pool->freelist, &pmchild->elem); + return MarkPostmasterChildSlotUnassigned(pmchild->child_slot); + } +} + +/* + * Find the PMChild entry of a running child process by PID. + */ +PMChild * +FindPostmasterChildByPid(int pid) +{ + dlist_iter iter; + + dlist_foreach(iter, &ActiveChildList) + { + PMChild *bp = dlist_container(PMChild, elem, iter.cur); + + if (bp->pid == pid) + return bp; + } + return NULL; +} diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index b5300949843..4129c71efad 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -139,9 +139,7 @@ typedef struct StaticAssertDecl(BACKEND_NUM_TYPES < 32, "too many backend types for uint32"); static const BackendTypeMask BTYPE_MASK_ALL = {(1 << BACKEND_NUM_TYPES) - 1}; -#if 0 /* unused */ static const BackendTypeMask BTYPE_MASK_NONE = {0}; -#endif static inline BackendTypeMask btmask(BackendType t) @@ -151,14 +149,12 @@ btmask(BackendType t) return mask; } -#if 0 /* unused */ static inline BackendTypeMask btmask_add(BackendTypeMask mask, BackendType t) { mask.mask |= 1 << t; return mask; } -#endif static inline BackendTypeMask btmask_del(BackendTypeMask mask, BackendType t) @@ -192,48 +188,9 @@ btmask_contains(BackendTypeMask mask, BackendType t) return (mask.mask & (1 << t)) != 0; } -/* - * List of active backends (or child processes anyway; we don't actually - * know whether a given child has become a backend or is still in the - * authorization phase). This is used mainly to keep track of how many - * children we have and send them appropriate signals when necessary. - * - * As shown in the above set of backend types, this list includes not only - * "normal" client sessions, but also autovacuum workers, walsenders, and - * background workers. (Note that at the time of launch, walsenders are - * labeled B_BACKEND; we relabel them to B_WAL_SENDER - * upon noticing they've changed their PMChildFlags entry. Hence that check - * must be done before any operation that needs to distinguish walsenders - * from normal backends.) - * - * Also, "dead_end" children are in it: these are children launched just for - * the purpose of sending a friendly rejection message to a would-be client. - * We must track them because they are attached to shared memory, but we know - * they will never become live backends. dead_end children are not assigned a - * PMChildSlot. dead_end children have bkend_type B_DEAD_END_BACKEND. - * - * "Special" children such as the startup, bgwriter, autovacuum launcher, and - * slot sync worker tasks are not in this list. They are tracked via StartupPID - * and other pid_t variables below. (Thus, there can't be more than one of any - * given "special" child process type. We use BackendList entries for any - * child process there can be more than one of.) - */ -typedef struct bkend -{ - pid_t pid; /* process id of backend */ - int child_slot; /* PMChildSlot for this backend, if any */ - BackendType bkend_type; /* child process flavor, see above */ - RegisteredBgWorker *rw; /* bgworker info, if this is a bgworker */ - bool bgworker_notify; /* gets bgworker start/stop notifications */ - dlist_node elem; /* list link in BackendList */ -} Backend; - -static dlist_head BackendList = DLIST_STATIC_INIT(BackendList); BackgroundWorker *MyBgworkerEntry = NULL; - - /* The socket number we are listening for connections on */ int PostPortNumber = DEF_PGPORT; @@ -285,17 +242,17 @@ bool remove_temp_files_after_crash = true; bool send_abort_for_crash = false; bool send_abort_for_kill = false; -/* PIDs of special child processes; 0 when not running */ -static pid_t StartupPID = 0, - BgWriterPID = 0, - CheckpointerPID = 0, - WalWriterPID = 0, - WalReceiverPID = 0, - WalSummarizerPID = 0, - AutoVacPID = 0, - PgArchPID = 0, - SysLoggerPID = 0, - SlotSyncWorkerPID = 0; +/* special child processes; NULL when not running */ +static PMChild *StartupPMChild = NULL, + *BgWriterPMChild = NULL, + *CheckpointerPMChild = NULL, + *WalWriterPMChild = NULL, + *WalReceiverPMChild = NULL, + *WalSummarizerPMChild = NULL, + *AutoVacLauncherPMChild = NULL, + *PgArchPMChild = NULL, + *SysLoggerPMChild = NULL, + *SlotSyncWorkerPMChild = NULL; /* Startup process's status */ typedef enum @@ -341,13 +298,13 @@ static bool FatalError = false; /* T if recovering from backend crash */ * * Normal child backends can only be launched when we are in PM_RUN or * PM_HOT_STANDBY state. (connsAllowed can also restrict launching.) - * In other states we handle connection requests by launching "dead_end" + * In other states we handle connection requests by launching "dead-end" * child processes, which will simply send the client an error message and - * quit. (We track these in the BackendList so that we can know when they + * quit. (We track these in the ActiveChildList so that we can know when they * are all gone; this is important because they're still connected to shared * memory, and would interfere with an attempt to destroy the shmem segment, * possibly leading to SHMALL failure when we try to make a new one.) - * In PM_WAIT_DEAD_END state we are waiting for all the dead_end children + * In PM_WAIT_DEAD_END state we are waiting for all the dead-end children * to drain out of the system, and therefore stop accepting connection * requests at all until the last existing child has quit (which hopefully * will not be very long). @@ -372,7 +329,7 @@ typedef enum * ckpt */ PM_SHUTDOWN_2, /* waiting for archiver and walsenders to * finish */ - PM_WAIT_DEAD_END, /* waiting for dead_end children to exit */ + PM_WAIT_DEAD_END, /* waiting for dead-end children to exit */ PM_NO_CHILDREN, /* all important children have exited */ } PMState; @@ -449,7 +406,7 @@ static void process_pm_child_exit(void); static void process_pm_reload_request(void); static void process_pm_shutdown_request(void); static void dummy_handler(SIGNAL_ARGS); -static void CleanupBackend(Backend *bp, int exitstatus); +static void CleanupBackend(PMChild *bp, int exitstatus); static void HandleChildCrash(int pid, int exitstatus, const char *procname); static void LogChildExit(int lev, const char *procname, int pid, int exitstatus); @@ -460,17 +417,18 @@ static int ServerLoop(void); static int BackendStartup(ClientSocket *client_sock); static void report_fork_failure_to_client(ClientSocket *client_sock, int errnum); static CAC_state canAcceptConnections(BackendType backend_type); -static void signal_child(pid_t pid, int signal); -static void sigquit_child(pid_t pid); +static void signal_child(PMChild *pmchild, int signal); +static void sigquit_child(PMChild *pmchild); static bool SignalChildren(int signal, BackendTypeMask targetMask); static void TerminateChildren(int signal); static int CountChildren(BackendTypeMask targetMask); -static Backend *assign_backendlist_entry(void); static void LaunchMissingBackgroundProcesses(void); static void maybe_start_bgworkers(void); static bool CreateOptsFile(int argc, char *argv[], char *fullprogname); -static pid_t StartChildProcess(BackendType type); +static PMChild *StartChildProcess(BackendType type); +static void StartSysLogger(void); static void StartAutovacuumWorker(void); +static bool StartBackgroundWorker(RegisteredBgWorker *rw); static void InitPostmasterDeathWatchHandle(void); #ifdef WIN32 @@ -948,9 +906,11 @@ PostmasterMain(int argc, char *argv[]) /* * Now that loadable modules have had their chance to alter any GUCs, - * calculate MaxBackends. + * calculate MaxBackends and initialize the machinery to track child + * processes. */ InitializeMaxBackends(); + InitPostmasterChildSlots(); /* * Calculate the size of the PGPROC fast-path lock arrays. @@ -1079,7 +1039,8 @@ PostmasterMain(int argc, char *argv[]) /* * If enabled, start up syslogger collection subprocess */ - SysLoggerPID = SysLogger_Start(); + if (Logging_collector) + StartSysLogger(); /* * Reset whereToSendOutput from DestDebug (its starting state) to @@ -1381,16 +1342,16 @@ PostmasterMain(int argc, char *argv[]) AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STARTING); /* Start bgwriter and checkpointer so they can help with recovery */ - if (CheckpointerPID == 0) - CheckpointerPID = StartChildProcess(B_CHECKPOINTER); - if (BgWriterPID == 0) - BgWriterPID = StartChildProcess(B_BG_WRITER); + if (CheckpointerPMChild == NULL) + CheckpointerPMChild = StartChildProcess(B_CHECKPOINTER); + if (BgWriterPMChild == NULL) + BgWriterPMChild = StartChildProcess(B_BG_WRITER); /* * We're ready to rock and roll... */ - StartupPID = StartChildProcess(B_STARTUP); - Assert(StartupPID != 0); + StartupPMChild = StartChildProcess(B_STARTUP); + Assert(StartupPMChild != NULL); StartupStatus = STARTUP_RUNNING; pmState = PM_STARTUP; @@ -1720,8 +1681,8 @@ ServerLoop(void) if (avlauncher_needs_signal) { avlauncher_needs_signal = false; - if (AutoVacPID != 0) - kill(AutoVacPID, SIGUSR2); + if (AutoVacLauncherPMChild != NULL) + kill(AutoVacLauncherPMChild->pid, SIGUSR2); } #ifdef HAVE_PTHREAD_IS_THREADED_NP @@ -1803,23 +1764,23 @@ ServerLoop(void) /* * canAcceptConnections --- check to see if database state allows connections - * of the specified type. backend_type can be B_BACKEND, B_AUTOVAC_WORKER, or - * B_BG_WORKER. (Note that we don't yet know whether a normal B_BACKEND - * connection might turn into a walsender.) + * of the specified type. backend_type can be B_BACKEND or B_AUTOVAC_WORKER. + * (Note that we don't yet know whether a normal B_BACKEND connection might + * turn into a walsender.) */ static CAC_state canAcceptConnections(BackendType backend_type) { CAC_state result = CAC_OK; + Assert(backend_type == B_BACKEND || backend_type == B_AUTOVAC_WORKER); + /* * Can't start backends when in startup/shutdown/inconsistent recovery * state. We treat autovac workers the same as user backends for this - * purpose. However, bgworkers are excluded from this test; we expect - * bgworker_should_start_now() decided whether the DB state allows them. + * purpose. */ - if (pmState != PM_RUN && pmState != PM_HOT_STANDBY && - backend_type != B_BG_WORKER) + if (pmState != PM_RUN && pmState != PM_HOT_STANDBY) { if (Shutdown > NoShutdown) return CAC_SHUTDOWN; /* shutdown is pending */ @@ -1834,26 +1795,11 @@ canAcceptConnections(BackendType backend_type) /* * "Smart shutdown" restrictions are applied only to normal connections, - * not to autovac workers or bgworkers. + * not to autovac workers. */ if (!connsAllowed && backend_type == B_BACKEND) return CAC_SHUTDOWN; /* shutdown is pending */ - /* - * Don't start too many children. - * - * We allow more connections here than we can have backends because some - * might still be authenticating; they might fail auth, or some existing - * backend might exit before the auth cycle is completed. The exact - * MaxBackends limit is enforced when a new backend tries to join the - * shared-inval backend array. - * - * The limit here must match the sizes of the per-child-process arrays; - * see comments for MaxLivePostmasterChildren(). - */ - if (CountChildren(btmask_all_except(B_DEAD_END_BACKEND)) >= MaxLivePostmasterChildren()) - result = CAC_TOOMANY; - return result; } @@ -2021,26 +1967,6 @@ process_pm_reload_request(void) (errmsg("received SIGHUP, reloading configuration files"))); ProcessConfigFile(PGC_SIGHUP); SignalChildren(SIGHUP, btmask_all_except(B_DEAD_END_BACKEND)); - if (StartupPID != 0) - signal_child(StartupPID, SIGHUP); - if (BgWriterPID != 0) - signal_child(BgWriterPID, SIGHUP); - if (CheckpointerPID != 0) - signal_child(CheckpointerPID, SIGHUP); - if (WalWriterPID != 0) - signal_child(WalWriterPID, SIGHUP); - if (WalReceiverPID != 0) - signal_child(WalReceiverPID, SIGHUP); - if (WalSummarizerPID != 0) - signal_child(WalSummarizerPID, SIGHUP); - if (AutoVacPID != 0) - signal_child(AutoVacPID, SIGHUP); - if (PgArchPID != 0) - signal_child(PgArchPID, SIGHUP); - if (SysLoggerPID != 0) - signal_child(SysLoggerPID, SIGHUP); - if (SlotSyncWorkerPID != 0) - signal_child(SlotSyncWorkerPID, SIGHUP); /* Reload authentication config files too */ if (!load_hba()) @@ -2278,15 +2204,15 @@ process_pm_child_exit(void) while ((pid = waitpid(-1, &exitstatus, WNOHANG)) > 0) { - bool found; - dlist_mutable_iter iter; + PMChild *pmchild; /* * Check if this child was a startup process. */ - if (pid == StartupPID) + if (StartupPMChild && pid == StartupPMChild->pid) { - StartupPID = 0; + ReleasePostmasterChildSlot(StartupPMChild); + StartupPMChild = NULL; /* * Startup process exited in response to a shutdown request (or it @@ -2339,7 +2265,7 @@ process_pm_child_exit(void) * restart in that case. * * This stanza also handles the case where we sent a SIGQUIT - * during PM_STARTUP due to some dead_end child crashing: in that + * during PM_STARTUP due to some dead-end child crashing: in that * situation, if the startup process dies on the SIGQUIT, we need * to transition to PM_WAIT_BACKENDS state which will allow * PostmasterStateMachine to restart the startup process. (On the @@ -2397,9 +2323,10 @@ process_pm_child_exit(void) * one at the next iteration of the postmaster's main loop, if * necessary. Any other exit condition is treated as a crash. */ - if (pid == BgWriterPID) + if (BgWriterPMChild && pid == BgWriterPMChild->pid) { - BgWriterPID = 0; + ReleasePostmasterChildSlot(BgWriterPMChild); + BgWriterPMChild = NULL; if (!EXIT_STATUS_0(exitstatus)) HandleChildCrash(pid, exitstatus, _("background writer process")); @@ -2409,9 +2336,10 @@ process_pm_child_exit(void) /* * Was it the checkpointer? */ - if (pid == CheckpointerPID) + if (CheckpointerPMChild && pid == CheckpointerPMChild->pid) { - CheckpointerPID = 0; + ReleasePostmasterChildSlot(CheckpointerPMChild); + CheckpointerPMChild = NULL; if (EXIT_STATUS_0(exitstatus) && pmState == PM_SHUTDOWN) { /* @@ -2422,7 +2350,7 @@ process_pm_child_exit(void) * * At this point we should have no normal backend children * left (else we'd not be in PM_SHUTDOWN state) but we might - * have dead_end children to wait for. + * have dead-end children to wait for. * * If we have an archiver subprocess, tell it to do a last * archive cycle and quit. Likewise, if we have walsender @@ -2431,8 +2359,8 @@ process_pm_child_exit(void) Assert(Shutdown > NoShutdown); /* Waken archiver for the last time */ - if (PgArchPID != 0) - signal_child(PgArchPID, SIGUSR2); + if (PgArchPMChild != NULL) + signal_child(PgArchPMChild, SIGUSR2); /* * Waken walsenders for the last time. No regular backends @@ -2460,9 +2388,10 @@ process_pm_child_exit(void) * new one at the next iteration of the postmaster's main loop, if * necessary. Any other exit condition is treated as a crash. */ - if (pid == WalWriterPID) + if (WalWriterPMChild && pid == WalWriterPMChild->pid) { - WalWriterPID = 0; + ReleasePostmasterChildSlot(WalWriterPMChild); + WalWriterPMChild = NULL; if (!EXIT_STATUS_0(exitstatus)) HandleChildCrash(pid, exitstatus, _("WAL writer process")); @@ -2475,9 +2404,10 @@ process_pm_child_exit(void) * backends. (If we need a new wal receiver, we'll start one at the * next iteration of the postmaster's main loop.) */ - if (pid == WalReceiverPID) + if (WalReceiverPMChild && pid == WalReceiverPMChild->pid) { - WalReceiverPID = 0; + ReleasePostmasterChildSlot(WalReceiverPMChild); + WalReceiverPMChild = NULL; if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus)) HandleChildCrash(pid, exitstatus, _("WAL receiver process")); @@ -2489,9 +2419,10 @@ process_pm_child_exit(void) * a new one at the next iteration of the postmaster's main loop, if * necessary. Any other exit condition is treated as a crash. */ - if (pid == WalSummarizerPID) + if (WalSummarizerPMChild && pid == WalSummarizerPMChild->pid) { - WalSummarizerPID = 0; + ReleasePostmasterChildSlot(WalSummarizerPMChild); + WalSummarizerPMChild = NULL; if (!EXIT_STATUS_0(exitstatus)) HandleChildCrash(pid, exitstatus, _("WAL summarizer process")); @@ -2504,9 +2435,10 @@ process_pm_child_exit(void) * loop, if necessary. Any other exit condition is treated as a * crash. */ - if (pid == AutoVacPID) + if (AutoVacLauncherPMChild && pid == AutoVacLauncherPMChild->pid) { - AutoVacPID = 0; + ReleasePostmasterChildSlot(AutoVacLauncherPMChild); + AutoVacLauncherPMChild = NULL; if (!EXIT_STATUS_0(exitstatus)) HandleChildCrash(pid, exitstatus, _("autovacuum launcher process")); @@ -2519,9 +2451,10 @@ process_pm_child_exit(void) * and just try to start a new one on the next cycle of the * postmaster's main loop, to retry archiving remaining files. */ - if (pid == PgArchPID) + if (PgArchPMChild && pid == PgArchPMChild->pid) { - PgArchPID = 0; + ReleasePostmasterChildSlot(PgArchPMChild); + PgArchPMChild = NULL; if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus)) HandleChildCrash(pid, exitstatus, _("archiver process")); @@ -2529,11 +2462,15 @@ process_pm_child_exit(void) } /* Was it the system logger? If so, try to start a new one */ - if (pid == SysLoggerPID) + if (SysLoggerPMChild && pid == SysLoggerPMChild->pid) { - SysLoggerPID = 0; + ReleasePostmasterChildSlot(SysLoggerPMChild); + SysLoggerPMChild = NULL; + /* for safety's sake, launch new logger *first* */ - SysLoggerPID = SysLogger_Start(); + if (Logging_collector) + StartSysLogger(); + if (!EXIT_STATUS_0(exitstatus)) LogChildExit(LOG, _("system logger process"), pid, exitstatus); @@ -2547,9 +2484,10 @@ process_pm_child_exit(void) * start a new one at the next iteration of the postmaster's main * loop, if necessary. Any other exit condition is treated as a crash. */ - if (pid == SlotSyncWorkerPID) + if (SlotSyncWorkerPMChild && pid == SlotSyncWorkerPMChild->pid) { - SlotSyncWorkerPID = 0; + ReleasePostmasterChildSlot(SlotSyncWorkerPMChild); + SlotSyncWorkerPMChild = NULL; if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus)) HandleChildCrash(pid, exitstatus, _("slot sync worker process")); @@ -2559,25 +2497,17 @@ process_pm_child_exit(void) /* * Was it a backend or a background worker? */ - found = false; - dlist_foreach_modify(iter, &BackendList) + pmchild = FindPostmasterChildByPid(pid); + if (pmchild) { - Backend *bp = dlist_container(Backend, elem, iter.cur); - - if (bp->pid == pid) - { - dlist_delete(iter.cur); - CleanupBackend(bp, exitstatus); - found = true; - break; - } + CleanupBackend(pmchild, exitstatus); } /* * We don't know anything about this child process. That's highly * unexpected, as we do track all the child processes that we fork. */ - if (!found) + else { if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus)) HandleChildCrash(pid, exitstatus, _("untracked child process")); @@ -2596,17 +2526,21 @@ process_pm_child_exit(void) /* * CleanupBackend -- cleanup after terminated backend or background worker. * - * Remove all local state associated with backend. The Backend entry has - * already been unlinked from BackendList, but we will free it here. + * Remove all local state associated with the child process and release its + * PMChild slot. */ static void -CleanupBackend(Backend *bp, +CleanupBackend(PMChild *bp, int exitstatus) /* child's exit status. */ { char namebuf[MAXPGPATH]; const char *procname; bool crashed = false; bool logged = false; + pid_t bp_pid; + bool bp_bgworker_notify; + BackendType bp_bkend_type; + RegisteredBgWorker *rw; /* Construct a process name for the log message */ if (bp->bkend_type == B_BG_WORKER) @@ -2622,7 +2556,7 @@ CleanupBackend(Backend *bp, * If a backend dies in an ugly way then we must signal all other backends * to quickdie. If exit status is zero (normal) or one (FATAL exit), we * assume everything is all right and proceed to remove the backend from - * the active backend list. + * the active child list. */ if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus)) crashed = true; @@ -2645,25 +2579,28 @@ CleanupBackend(Backend *bp, #endif /* - * If the process attached to shared memory, check that it detached - * cleanly. + * Release the PMChild entry. + * + * If the process attached to shared memory, this also checks that it + * detached cleanly. */ - if (bp->bkend_type != B_DEAD_END_BACKEND) + bp_pid = bp->pid; + bp_bgworker_notify = bp->bgworker_notify; + bp_bkend_type = bp->bkend_type; + rw = bp->rw; + if (!ReleasePostmasterChildSlot(bp)) { - if (!ReleasePostmasterChildSlot(bp->child_slot)) - { - /* - * Uh-oh, the child failed to clean itself up. Treat as a crash - * after all. - */ - crashed = true; - } + /* + * Uh-oh, the child failed to clean itself up. Treat as a crash after + * all. + */ + crashed = true; } + bp = NULL; if (crashed) { - HandleChildCrash(bp->pid, exitstatus, procname); - pfree(bp); + HandleChildCrash(bp_pid, exitstatus, procname); return; } @@ -2674,17 +2611,15 @@ CleanupBackend(Backend *bp, * gets skipped in the (probably very common) case where the backend has * never requested any such notifications. */ - if (bp->bgworker_notify) - BackgroundWorkerStopNotifications(bp->pid); + if (bp_bgworker_notify) + BackgroundWorkerStopNotifications(bp_pid); /* * If it was a background worker, also update its RegisteredBgWorker * entry. */ - if (bp->bkend_type == B_BG_WORKER) + if (bp_bkend_type == B_BG_WORKER) { - RegisteredBgWorker *rw = bp->rw; - if (!EXIT_STATUS_0(exitstatus)) { /* Record timestamp, so we know when to restart the worker. */ @@ -2703,7 +2638,7 @@ CleanupBackend(Backend *bp, if (!logged) { LogChildExit(EXIT_STATUS_0(exitstatus) ? DEBUG1 : LOG, - procname, bp->pid, exitstatus); + procname, bp_pid, exitstatus); logged = true; } @@ -2712,9 +2647,7 @@ CleanupBackend(Backend *bp, } if (!logged) - LogChildExit(DEBUG2, procname, bp->pid, exitstatus); - - pfree(bp); + LogChildExit(DEBUG2, procname, bp_pid, exitstatus); } /* @@ -2724,9 +2657,7 @@ CleanupBackend(Backend *bp, * The objectives here are to clean up our local state about the child * process, and to signal all other remaining children to quickdie. * - * If it's a backend, the caller has already removed it from the BackendList. - * If it's an aux process, the corresponding *PID global variable has been - * reset already. + * The caller has already released its PMChild slot. */ static void HandleChildCrash(int pid, int exitstatus, const char *procname) @@ -2750,63 +2681,34 @@ HandleChildCrash(int pid, int exitstatus, const char *procname) SetQuitSignalReason(PMQUIT_FOR_CRASH); } + /* + * Signal all other child processes to exit. The crashed process has + * already been removed from ActiveChildList. + */ if (take_action) { dlist_iter iter; - dlist_foreach(iter, &BackendList) + dlist_foreach(iter, &ActiveChildList) { - Backend *bp = dlist_container(Backend, elem, iter.cur); + PMChild *bp = dlist_container(PMChild, elem, iter.cur); + + /* We do NOT restart the syslogger */ + if (bp == SysLoggerPMChild) + continue; + + if (bp == StartupPMChild) + StartupStatus = STARTUP_SIGNALED; /* * This backend is still alive. Unless we did so already, tell it * to commit hara-kiri. * - * We could exclude dead_end children here, but at least when + * We could exclude dead-end children here, but at least when * sending SIGABRT it seems better to include them. */ - sigquit_child(bp->pid); + sigquit_child(bp); } - - if (StartupPID != 0) - { - sigquit_child(StartupPID); - StartupStatus = STARTUP_SIGNALED; - } - - /* Take care of the bgwriter too */ - if (BgWriterPID != 0) - sigquit_child(BgWriterPID); - - /* Take care of the checkpointer too */ - if (CheckpointerPID != 0) - sigquit_child(CheckpointerPID); - - /* Take care of the walwriter too */ - if (WalWriterPID != 0) - sigquit_child(WalWriterPID); - - /* Take care of the walreceiver too */ - if (WalReceiverPID != 0) - sigquit_child(WalReceiverPID); - - /* Take care of the walsummarizer too */ - if (WalSummarizerPID != 0) - sigquit_child(WalSummarizerPID); - - /* Take care of the autovacuum launcher too */ - if (AutoVacPID != 0) - sigquit_child(AutoVacPID); - - /* Take care of the archiver too */ - if (PgArchPID != 0) - sigquit_child(PgArchPID); - - /* Take care of the slot sync worker too */ - if (SlotSyncWorkerPID != 0) - sigquit_child(SlotSyncWorkerPID); - - /* We do NOT restart the syslogger */ } if (Shutdown != ImmediateShutdown) @@ -2915,86 +2817,108 @@ PostmasterStateMachine(void) } /* - * If we're ready to do so, signal child processes to shut down. (This - * isn't a persistent state, but treating it as a distinct pmState allows - * us to share this code across multiple shutdown code paths.) + * In the PM_WAIT_BACKENDS state, wait for all the regular backends and + * procesess like autovacuum and background workers that are comparable to + * backends to exit. + * + * PM_STOP_BACKENDS is a transient state that means the same as + * PM_WAIT_BACKENDS, but we signal the processes first, before waiting for + * them. Treating it as a distinct pmState allows us to share this code + * across multiple shutdown code paths. */ - if (pmState == PM_STOP_BACKENDS) + if (pmState == PM_STOP_BACKENDS || pmState == PM_WAIT_BACKENDS) { + BackendTypeMask targetMask = BTYPE_MASK_NONE; + /* - * Forget any pending requests for background workers, since we're no - * longer willing to launch any new workers. (If additional requests - * arrive, BackgroundWorkerStateChange will reject them.) + * PM_WAIT_BACKENDS state ends when we have no regular backends, no + * autovac launcher or workers, and no bgworkers (including + * unconnected ones). No walwriter, bgwriter, slot sync worker, or + * WAL summarizer either. */ - ForgetUnstartedBackgroundWorkers(); - - /* Signal all backend children except walsenders and dead-end backends */ - SignalChildren(SIGTERM, btmask_all_except2(B_WAL_SENDER, B_DEAD_END_BACKEND)); - /* and the autovac launcher too */ - if (AutoVacPID != 0) - signal_child(AutoVacPID, SIGTERM); - /* and the bgwriter too */ - if (BgWriterPID != 0) - signal_child(BgWriterPID, SIGTERM); - /* and the walwriter too */ - if (WalWriterPID != 0) - signal_child(WalWriterPID, SIGTERM); + targetMask = btmask_add(targetMask, B_BACKEND); + targetMask = btmask_add(targetMask, B_AUTOVAC_LAUNCHER); + targetMask = btmask_add(targetMask, B_AUTOVAC_WORKER); + targetMask = btmask_add(targetMask, B_BG_WORKER); + + targetMask = btmask_add(targetMask, B_WAL_WRITER); + targetMask = btmask_add(targetMask, B_BG_WRITER); + targetMask = btmask_add(targetMask, B_SLOTSYNC_WORKER); + targetMask = btmask_add(targetMask, B_WAL_SUMMARIZER); + /* If we're in recovery, also stop startup and walreceiver procs */ - if (StartupPID != 0) - signal_child(StartupPID, SIGTERM); - if (WalReceiverPID != 0) - signal_child(WalReceiverPID, SIGTERM); - if (WalSummarizerPID != 0) - signal_child(WalSummarizerPID, SIGTERM); - if (SlotSyncWorkerPID != 0) - signal_child(SlotSyncWorkerPID, SIGTERM); - /* checkpointer, archiver, stats, and syslogger may continue for now */ - - /* Now transition to PM_WAIT_BACKENDS state to wait for them to die */ - pmState = PM_WAIT_BACKENDS; - } + targetMask = btmask_add(targetMask, B_STARTUP); + targetMask = btmask_add(targetMask, B_WAL_RECEIVER); - /* - * If we are in a state-machine state that implies waiting for backends to - * exit, see if they're all gone, and change state if so. - */ - if (pmState == PM_WAIT_BACKENDS) - { /* - * PM_WAIT_BACKENDS state ends when we have no regular backends - * (including autovac workers), no bgworkers (including unconnected - * ones), and no walwriter, autovac launcher, bgwriter or slot sync - * worker. If we are doing crash recovery or an immediate shutdown - * then we expect the checkpointer to exit as well, otherwise not. The - * stats and syslogger processes are disregarded since they are not - * connected to shared memory; we also disregard dead_end children - * here. Walsenders and archiver are also disregarded, they will be - * terminated later after writing the checkpoint record. + * If we are doing crash recovery or an immediate shutdown then we + * expect the checkpointer to exit as well, otherwise not. */ - if (CountChildren(btmask_all_except2(B_WAL_SENDER, B_DEAD_END_BACKEND)) == 0 && - StartupPID == 0 && - WalReceiverPID == 0 && - WalSummarizerPID == 0 && - BgWriterPID == 0 && - (CheckpointerPID == 0 || - (!FatalError && Shutdown < ImmediateShutdown)) && - WalWriterPID == 0 && - AutoVacPID == 0 && - SlotSyncWorkerPID == 0) + if (FatalError || Shutdown >= ImmediateShutdown) + targetMask = btmask_add(targetMask, B_CHECKPOINTER); + + /* + * Walsenders and archiver will continue running; they will be + * terminated later after writing the checkpoint record. We also let + * dead-end children to keep running for now. The syslogger process + * exits last. + * + * This assertion checks that we have covered all backend types, + * either by including them in targetMask, or by noting here that they + * are allowed to continue running. + */ +#ifdef USE_ASSERT_CHECKING + { + BackendTypeMask remainMask = BTYPE_MASK_NONE; + + remainMask = btmask_add(remainMask, B_WAL_SENDER); + remainMask = btmask_add(remainMask, B_ARCHIVER); + remainMask = btmask_add(remainMask, B_DEAD_END_BACKEND); + remainMask = btmask_add(remainMask, B_LOGGER); + + /* checkpointer may or may not be in targetMask already */ + remainMask = btmask_add(remainMask, B_CHECKPOINTER); + + /* these are not real postmaster children */ + remainMask = btmask_add(remainMask, B_INVALID); + remainMask = btmask_add(remainMask, B_STANDALONE_BACKEND); + + /* All types should be included in targetMask or remainMask */ + Assert((remainMask.mask | targetMask.mask) == BTYPE_MASK_ALL.mask); + } +#endif + + /* If we had not yet signaled the processes to exit, do so now */ + if (pmState == PM_STOP_BACKENDS) + { + /* + * Forget any pending requests for background workers, since we're + * no longer willing to launch any new workers. (If additional + * requests arrive, BackgroundWorkerStateChange will reject them.) + */ + ForgetUnstartedBackgroundWorkers(); + + SignalChildren(SIGTERM, targetMask); + + pmState = PM_WAIT_BACKENDS; + } + + /* Are any of the target processes still running? */ + if (CountChildren(targetMask) == 0) { if (Shutdown >= ImmediateShutdown || FatalError) { /* - * Stop any dead_end children and stop creating new ones. + * Stop any dead-end children and stop creating new ones. */ pmState = PM_WAIT_DEAD_END; ConfigurePostmasterWaitSet(false); SignalChildren(SIGQUIT, btmask(B_DEAD_END_BACKEND)); /* - * We already SIGQUIT'd the archiver and stats processes, if - * any, when we started immediate shutdown or entered - * FatalError state. + * We already SIGQUIT'd walsenders and the archiver, if any, + * when we started immediate shutdown or entered FatalError + * state. */ } else @@ -3006,12 +2930,12 @@ PostmasterStateMachine(void) */ Assert(Shutdown > NoShutdown); /* Start the checkpointer if not running */ - if (CheckpointerPID == 0) - CheckpointerPID = StartChildProcess(B_CHECKPOINTER); + if (CheckpointerPMChild == NULL) + CheckpointerPMChild = StartChildProcess(B_CHECKPOINTER); /* And tell it to shut down */ - if (CheckpointerPID != 0) + if (CheckpointerPMChild != NULL) { - signal_child(CheckpointerPID, SIGUSR2); + signal_child(CheckpointerPMChild, SIGUSR2); pmState = PM_SHUTDOWN; } else @@ -3031,9 +2955,7 @@ PostmasterStateMachine(void) ConfigurePostmasterWaitSet(false); /* Kill the walsenders and archiver too */ - SignalChildren(SIGQUIT, BTYPE_MASK_ALL); - if (PgArchPID != 0) - signal_child(PgArchPID, SIGQUIT); + SignalChildren(SIGQUIT, btmask_all_except(B_LOGGER)); } } } @@ -3043,43 +2965,44 @@ PostmasterStateMachine(void) { /* * PM_SHUTDOWN_2 state ends when there's no other children than - * dead_end children left. There shouldn't be any regular backends + * dead-end children left. There shouldn't be any regular backends * left by now anyway; what we're really waiting for is walsenders and * archiver. */ - if (PgArchPID == 0 && CountChildren(btmask_all_except(B_DEAD_END_BACKEND)) == 0) + if (CountChildren(btmask_all_except2(B_LOGGER, B_DEAD_END_BACKEND)) == 0) { pmState = PM_WAIT_DEAD_END; ConfigurePostmasterWaitSet(false); - SignalChildren(SIGTERM, BTYPE_MASK_ALL); + SignalChildren(SIGTERM, btmask_all_except(B_LOGGER)); } } if (pmState == PM_WAIT_DEAD_END) { /* - * PM_WAIT_DEAD_END state ends when the BackendList is entirely empty - * (ie, no dead_end children remain), and the archiver is gone too. - * - * The reason we wait for those two is to protect them against a new - * postmaster starting conflicting subprocesses; this isn't an - * ironclad protection, but it at least helps in the - * shutdown-and-immediately-restart scenario. Note that they have - * already been sent appropriate shutdown signals, either during a - * normal state transition leading up to PM_WAIT_DEAD_END, or during + * PM_WAIT_DEAD_END state ends when all other children are gone except + * for the logger. During normal shutdown, all that remains are + * dead-end backends, but in FatalError processing we jump straight + * here with more processes remaining. Note that they have already + * been sent appropriate shutdown signals, either during a normal + * state transition leading up to PM_WAIT_DEAD_END, or during * FatalError processing. + * + * The reason we wait is to protect against a new postmaster starting + * conflicting subprocesses; this isn't an ironclad protection, but it + * at least helps in the shutdown-and-immediately-restart scenario. */ - if (dlist_is_empty(&BackendList) && PgArchPID == 0) + if (CountChildren(btmask_all_except(B_LOGGER)) == 0) { /* These other guys should be dead already */ - Assert(StartupPID == 0); - Assert(WalReceiverPID == 0); - Assert(WalSummarizerPID == 0); - Assert(BgWriterPID == 0); - Assert(CheckpointerPID == 0); - Assert(WalWriterPID == 0); - Assert(AutoVacPID == 0); - Assert(SlotSyncWorkerPID == 0); + Assert(StartupPMChild == NULL); + Assert(WalReceiverPMChild == NULL); + Assert(WalSummarizerPMChild == NULL); + Assert(BgWriterPMChild == NULL); + Assert(CheckpointerPMChild == NULL); + Assert(WalWriterPMChild == NULL); + Assert(AutoVacLauncherPMChild == NULL); + Assert(SlotSyncWorkerPMChild == NULL); /* syslogger is not considered here */ pmState = PM_NO_CHILDREN; } @@ -3162,8 +3085,8 @@ PostmasterStateMachine(void) /* re-create shared memory and semaphores */ CreateSharedMemoryAndSemaphores(); - StartupPID = StartChildProcess(B_STARTUP); - Assert(StartupPID != 0); + StartupPMChild = StartChildProcess(B_STARTUP); + Assert(StartupPMChild != NULL); StartupStatus = STARTUP_RUNNING; pmState = PM_STARTUP; /* crash recovery started, reset SIGKILL flag */ @@ -3186,8 +3109,8 @@ static void LaunchMissingBackgroundProcesses(void) { /* Syslogger is active in all states */ - if (SysLoggerPID == 0 && Logging_collector) - SysLoggerPID = SysLogger_Start(); + if (SysLoggerPMChild == NULL && Logging_collector) + StartSysLogger(); /* * The checkpointer and the background writer are active from the start, @@ -3200,30 +3123,30 @@ LaunchMissingBackgroundProcesses(void) if (pmState == PM_RUN || pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY || pmState == PM_STARTUP) { - if (CheckpointerPID == 0) - CheckpointerPID = StartChildProcess(B_CHECKPOINTER); - if (BgWriterPID == 0) - BgWriterPID = StartChildProcess(B_BG_WRITER); + if (CheckpointerPMChild == NULL) + CheckpointerPMChild = StartChildProcess(B_CHECKPOINTER); + if (BgWriterPMChild == NULL) + BgWriterPMChild = StartChildProcess(B_BG_WRITER); } /* * WAL writer is needed only in normal operation (else we cannot be * writing any new WAL). */ - if (WalWriterPID == 0 && pmState == PM_RUN) - WalWriterPID = StartChildProcess(B_WAL_WRITER); + if (WalWriterPMChild == NULL && pmState == PM_RUN) + WalWriterPMChild = StartChildProcess(B_WAL_WRITER); /* * We don't want autovacuum to run in binary upgrade mode because * autovacuum might update relfrozenxid for empty tables before the * physical files are put in place. */ - if (!IsBinaryUpgrade && AutoVacPID == 0 && + if (!IsBinaryUpgrade && AutoVacLauncherPMChild == NULL && (AutoVacuumingActive() || start_autovac_launcher) && pmState == PM_RUN) { - AutoVacPID = StartChildProcess(B_AUTOVAC_LAUNCHER); - if (AutoVacPID != 0) + AutoVacLauncherPMChild = StartChildProcess(B_AUTOVAC_LAUNCHER); + if (AutoVacLauncherPMChild != NULL) start_autovac_launcher = false; /* signal processed */ } @@ -3231,11 +3154,11 @@ LaunchMissingBackgroundProcesses(void) * If WAL archiving is enabled always, we are allowed to start archiver * even during recovery. */ - if (PgArchPID == 0 && + if (PgArchPMChild == NULL && ((XLogArchivingActive() && pmState == PM_RUN) || (XLogArchivingAlways() && (pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY))) && PgArchCanRestart()) - PgArchPID = StartChildProcess(B_ARCHIVER); + PgArchPMChild = StartChildProcess(B_ARCHIVER); /* * If we need to start a slot sync worker, try to do that now @@ -3245,42 +3168,42 @@ LaunchMissingBackgroundProcesses(void) * configured correctly, and it is the first time of worker's launch, or * enough time has passed since the worker was launched last. */ - if (SlotSyncWorkerPID == 0 && pmState == PM_HOT_STANDBY && + if (SlotSyncWorkerPMChild == NULL && pmState == PM_HOT_STANDBY && Shutdown <= SmartShutdown && sync_replication_slots && ValidateSlotSyncParams(LOG) && SlotSyncWorkerCanRestart()) - SlotSyncWorkerPID = StartChildProcess(B_SLOTSYNC_WORKER); + SlotSyncWorkerPMChild = StartChildProcess(B_SLOTSYNC_WORKER); /* * If we need to start a WAL receiver, try to do that now * - * Note: if WalReceiverPID is already nonzero, it might seem that we - * should clear WalReceiverRequested. However, there's a race condition - * if the walreceiver terminates and the startup process immediately - * requests a new one: it's quite possible to get the signal for the - * request before reaping the dead walreceiver process. Better to risk - * launching an extra walreceiver than to miss launching one we need. (The - * walreceiver code has logic to recognize that it should go away if not - * needed.) + * Note: if a walreceiver process is already running, it might seem that + * we should clear WalReceiverRequested. However, there's a race + * condition if the walreceiver terminates and the startup process + * immediately requests a new one: it's quite possible to get the signal + * for the request before reaping the dead walreceiver process. Better to + * risk launching an extra walreceiver than to miss launching one we need. + * (The walreceiver code has logic to recognize that it should go away if + * not needed.) */ if (WalReceiverRequested) { - if (WalReceiverPID == 0 && + if (WalReceiverPMChild == NULL && (pmState == PM_STARTUP || pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY) && Shutdown <= SmartShutdown) { - WalReceiverPID = StartChildProcess(B_WAL_RECEIVER); - if (WalReceiverPID != 0) + WalReceiverPMChild = StartChildProcess(B_WAL_RECEIVER); + if (WalReceiverPMChild != 0) WalReceiverRequested = false; /* else leave the flag set, so we'll try again later */ } } /* If we need to start a WAL summarizer, try to do that now */ - if (summarize_wal && WalSummarizerPID == 0 && + if (summarize_wal && WalSummarizerPMChild == NULL && (pmState == PM_RUN || pmState == PM_HOT_STANDBY) && Shutdown <= SmartShutdown) - WalSummarizerPID = StartChildProcess(B_WAL_SUMMARIZER); + WalSummarizerPMChild = StartChildProcess(B_WAL_SUMMARIZER); /* Get other worker processes running, if needed */ if (StartWorkerNeeded || HaveCrashedWorker) @@ -3304,8 +3227,10 @@ LaunchMissingBackgroundProcesses(void) * child twice will not cause any problems. */ static void -signal_child(pid_t pid, int signal) +signal_child(PMChild *pmchild, int signal) { + pid_t pid = pmchild->pid; + if (kill(pid, signal) < 0) elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) pid, signal); #ifdef HAVE_SETSID @@ -3334,17 +3259,17 @@ signal_child(pid_t pid, int signal) * to use SIGABRT to collect per-child core dumps. */ static void -sigquit_child(pid_t pid) +sigquit_child(PMChild *pmchild) { ereport(DEBUG2, (errmsg_internal("sending %s to process %d", (send_abort_for_crash ? "SIGABRT" : "SIGQUIT"), - (int) pid))); - signal_child(pid, (send_abort_for_crash ? SIGABRT : SIGQUIT)); + (int) pmchild->pid))); + signal_child(pmchild, (send_abort_for_crash ? SIGABRT : SIGQUIT)); } /* - * Send a signal to the targeted children (but NOT special children). + * Send a signal to the targeted children. */ static bool SignalChildren(int signal, BackendTypeMask targetMask) @@ -3352,9 +3277,9 @@ SignalChildren(int signal, BackendTypeMask targetMask) dlist_iter iter; bool signaled = false; - dlist_foreach(iter, &BackendList) + dlist_foreach(iter, &ActiveChildList) { - Backend *bp = dlist_container(Backend, elem, iter.cur); + PMChild *bp = dlist_container(PMChild, elem, iter.cur); /* * If we need to distinguish between B_BACKEND and B_WAL_SENDER, check @@ -3374,7 +3299,7 @@ SignalChildren(int signal, BackendTypeMask targetMask) ereport(DEBUG4, (errmsg_internal("sending signal %d to %s process %d", signal, GetBackendTypeDesc(bp->bkend_type), (int) bp->pid))); - signal_child(bp->pid, signal); + signal_child(bp, signal); signaled = true; } return signaled; @@ -3387,29 +3312,12 @@ SignalChildren(int signal, BackendTypeMask targetMask) static void TerminateChildren(int signal) { - SignalChildren(signal, BTYPE_MASK_ALL); - if (StartupPID != 0) + SignalChildren(signal, btmask_all_except(B_LOGGER)); + if (StartupPMChild != NULL) { - signal_child(StartupPID, signal); if (signal == SIGQUIT || signal == SIGKILL || signal == SIGABRT) StartupStatus = STARTUP_SIGNALED; } - if (BgWriterPID != 0) - signal_child(BgWriterPID, signal); - if (CheckpointerPID != 0) - signal_child(CheckpointerPID, signal); - if (WalWriterPID != 0) - signal_child(WalWriterPID, signal); - if (WalReceiverPID != 0) - signal_child(WalReceiverPID, signal); - if (WalSummarizerPID != 0) - signal_child(WalSummarizerPID, signal); - if (AutoVacPID != 0) - signal_child(AutoVacPID, signal); - if (PgArchPID != 0) - signal_child(PgArchPID, signal); - if (SlotSyncWorkerPID != 0) - signal_child(SlotSyncWorkerPID, signal); } /* @@ -3417,49 +3325,56 @@ TerminateChildren(int signal) * * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise. * - * Note: if you change this code, also consider StartAutovacuumWorker. + * Note: if you change this code, also consider StartAutovacuumWorker and + * StartBackgroundWorker. */ static int BackendStartup(ClientSocket *client_sock) { - Backend *bn; /* for backend cleanup */ + PMChild *bn = NULL; pid_t pid; BackendStartupData startup_data; + CAC_state cac; /* - * Create backend data structure. Better before the fork() so we can - * handle failure cleanly. + * Allocate and assign the child slot. Note we must do this before + * forking, so that we can handle failures (out of memory or child-process + * slots) cleanly. */ - bn = (Backend *) palloc_extended(sizeof(Backend), MCXT_ALLOC_NO_OOM); + cac = canAcceptConnections(B_BACKEND); + if (cac == CAC_OK) + { + /* Can change later to B_WAL_SENDER */ + bn = AssignPostmasterChildSlot(B_BACKEND); + if (!bn) + { + /* + * Too many regular child processes; launch a dead-end child + * process instead. + */ + cac = CAC_TOOMANY; + } + } if (!bn) { - ereport(LOG, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - return STATUS_ERROR; + bn = AllocDeadEndChild(); + if (!bn) + { + ereport(LOG, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + return STATUS_ERROR; + } } /* Pass down canAcceptConnections state */ - startup_data.canAcceptConnections = canAcceptConnections(B_BACKEND); + startup_data.canAcceptConnections = cac; bn->rw = NULL; - /* - * Unless it's a dead_end child, assign it a child slot number - */ - if (startup_data.canAcceptConnections == CAC_OK) - { - bn->bkend_type = B_BACKEND; /* Can change later to B_WAL_SENDER */ - bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot(); - } - else - { - bn->bkend_type = B_DEAD_END_BACKEND; - bn->child_slot = 0; - } - /* Hasn't asked to be notified about any bgworkers yet */ bn->bgworker_notify = false; + MyPMChildSlot = bn->child_slot; pid = postmaster_child_launch(bn->bkend_type, (char *) &startup_data, sizeof(startup_data), client_sock); @@ -3468,9 +3383,7 @@ BackendStartup(ClientSocket *client_sock) /* in parent, fork failed */ int save_errno = errno; - if (bn->child_slot != 0) - (void) ReleasePostmasterChildSlot(bn->child_slot); - pfree(bn); + (void) ReleasePostmasterChildSlot(bn); errno = save_errno; ereport(LOG, (errmsg("could not fork new process for connection: %m"))); @@ -3489,8 +3402,6 @@ BackendStartup(ClientSocket *client_sock) * of backends. */ bn->pid = pid; - dlist_push_head(&BackendList, &bn->elem); - return STATUS_OK; } @@ -3588,9 +3499,9 @@ process_pm_pmsignal(void) * Start the archiver if we're responsible for (re-)archiving received * files. */ - Assert(PgArchPID == 0); + Assert(PgArchPMChild == NULL); if (XLogArchivingAlways()) - PgArchPID = StartChildProcess(B_ARCHIVER); + PgArchPMChild = StartChildProcess(B_ARCHIVER); /* * If we aren't planning to enter hot standby mode later, treat @@ -3636,16 +3547,16 @@ process_pm_pmsignal(void) } /* Tell syslogger to rotate logfile if requested */ - if (SysLoggerPID != 0) + if (SysLoggerPMChild != NULL) { if (CheckLogrotateSignal()) { - signal_child(SysLoggerPID, SIGUSR1); + signal_child(SysLoggerPMChild, SIGUSR1); RemoveLogrotateSignalFiles(); } else if (CheckPostmasterSignal(PMSIGNAL_ROTATE_LOGFILE)) { - signal_child(SysLoggerPID, SIGUSR1); + signal_child(SysLoggerPMChild, SIGUSR1); } } @@ -3692,7 +3603,7 @@ process_pm_pmsignal(void) PostmasterStateMachine(); } - if (StartupPID != 0 && + if (StartupPMChild != NULL && (pmState == PM_STARTUP || pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY) && CheckPromoteSignal()) @@ -3703,7 +3614,7 @@ process_pm_pmsignal(void) * Leave the promote signal file in place and let the Startup process * do the unlink. */ - signal_child(StartupPID, SIGUSR2); + signal_child(StartupPMChild, SIGUSR2); } } @@ -3722,8 +3633,7 @@ dummy_handler(SIGNAL_ARGS) } /* - * Count up number of child processes of specified types (but NOT special - * children). + * Count up number of child processes of specified types. */ static int CountChildren(BackendTypeMask targetMask) @@ -3731,9 +3641,9 @@ CountChildren(BackendTypeMask targetMask) dlist_iter iter; int cnt = 0; - dlist_foreach(iter, &BackendList) + dlist_foreach(iter, &ActiveChildList) { - Backend *bp = dlist_container(Backend, elem, iter.cur); + PMChild *bp = dlist_container(PMChild, elem, iter.cur); /* * If we need to distinguish between B_BACKEND and B_WAL_SENDER, check @@ -3750,6 +3660,10 @@ CountChildren(BackendTypeMask targetMask) if (!btmask_contains(targetMask, bp->bkend_type)) continue; + ereport(DEBUG4, + (errmsg_internal("%s process %d is still running", + GetBackendTypeDesc(bp->bkend_type), (int) bp->pid))); + cnt++; } return cnt; @@ -3762,18 +3676,36 @@ CountChildren(BackendTypeMask targetMask) * "type" determines what kind of child will be started. All child types * initially go to AuxiliaryProcessMain, which will handle common setup. * - * Return value of StartChildProcess is subprocess' PID, or 0 if failed - * to start subprocess. + * Return value of StartChildProcess is subprocess' PMChild entry, or NULL on + * failure. */ -static pid_t +static PMChild * StartChildProcess(BackendType type) { + PMChild *pmchild; pid_t pid; + pmchild = AssignPostmasterChildSlot(type); + if (!pmchild) + { + if (type == B_AUTOVAC_WORKER) + ereport(LOG, + (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED), + errmsg("no slot available for new autovacuum worker process"))); + else + { + /* shouldn't happen because we allocate enough slots */ + elog(LOG, "no postmaster child slot available for aux process"); + } + return NULL; + } + + MyPMChildSlot = pmchild->child_slot; pid = postmaster_child_launch(type, NULL, 0, NULL); if (pid < 0) { /* in parent, fork failed */ + ReleasePostmasterChildSlot(pmchild); ereport(LOG, (errmsg("could not fork \"%s\" process: %m", PostmasterChildName(type)))); @@ -3783,13 +3715,31 @@ StartChildProcess(BackendType type) */ if (type == B_STARTUP) ExitPostmaster(1); - return 0; + return NULL; } - /* - * in parent, successful fork - */ - return pid; + /* in parent, successful fork */ + pmchild->pid = pid; + return pmchild; +} + +/* + * StartSysLogger -- start the syslogger process + */ +void +StartSysLogger(void) +{ + Assert(SysLoggerPMChild == NULL); + + SysLoggerPMChild = AssignPostmasterChildSlot(B_LOGGER); + if (!SysLoggerPMChild) + elog(PANIC, "no postmaster child slot available for syslogger"); + SysLoggerPMChild->pid = SysLogger_Start(SysLoggerPMChild->child_slot); + if (SysLoggerPMChild->pid == 0) + { + ReleasePostmasterChildSlot(SysLoggerPMChild); + SysLoggerPMChild = NULL; + } } /* @@ -3804,7 +3754,7 @@ StartChildProcess(BackendType type) static void StartAutovacuumWorker(void) { - Backend *bn; + PMChild *bn; /* * If not in condition to run a process, don't try, but handle it like a @@ -3815,34 +3765,20 @@ StartAutovacuumWorker(void) */ if (canAcceptConnections(B_AUTOVAC_WORKER) == CAC_OK) { - bn = (Backend *) palloc_extended(sizeof(Backend), MCXT_ALLOC_NO_OOM); + bn = StartChildProcess(B_AUTOVAC_WORKER); if (bn) { - /* Autovac workers need a child slot */ - bn->bkend_type = B_AUTOVAC_WORKER; - bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot(); bn->bgworker_notify = false; bn->rw = NULL; - - bn->pid = StartChildProcess(B_AUTOVAC_WORKER); - if (bn->pid > 0) - { - dlist_push_head(&BackendList, &bn->elem); - /* all OK */ - return; - } - + return; + } + else + { /* * fork failed, fall through to report -- actual error message was * logged by StartChildProcess */ - (void) ReleasePostmasterChildSlot(bn->child_slot); - pfree(bn); } - else - ereport(LOG, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); } /* @@ -3854,7 +3790,7 @@ StartAutovacuumWorker(void) * quick succession between the autovac launcher and postmaster in case * things get ugly. */ - if (AutoVacPID != 0) + if (AutoVacLauncherPMChild != NULL) { AutoVacWorkerFailed(); avlauncher_needs_signal = true; @@ -3899,41 +3835,24 @@ CreateOptsFile(int argc, char *argv[], char *fullprogname) /* - * MaxLivePostmasterChildren - * - * This reports the number of entries needed in the per-child-process array - * (PMChildFlags). It includes regular backends, autovac workers, walsenders - * and background workers, but not special children nor dead_end children. - * This allows the array to have a fixed maximum size, to wit the same - * too-many-children limit enforced by canAcceptConnections(). The exact value - * isn't too critical as long as it's more than MaxBackends. - */ -int -MaxLivePostmasterChildren(void) -{ - return 2 * (MaxConnections + autovacuum_max_workers + 1 + - max_wal_senders + max_worker_processes); -} - -/* * Start a new bgworker. * Starting time conditions must have been checked already. * * Returns true on success, false on failure. * In either case, update the RegisteredBgWorker's state appropriately. * - * This code is heavily based on autovacuum.c, q.v. + * NB -- this code very roughly matches BackendStartup. */ static bool -do_start_bgworker(RegisteredBgWorker *rw) +StartBackgroundWorker(RegisteredBgWorker *rw) { - Backend *bn; + PMChild *bn; pid_t worker_pid; Assert(rw->rw_pid == 0); /* - * Allocate and assign the Backend element. Note we must do this before + * Allocate and assign the child slot. Note we must do this before * forking, so that we can handle failures (out of memory or child-process * slots) cleanly. * @@ -3942,27 +3861,32 @@ do_start_bgworker(RegisteredBgWorker *rw) * tried again right away, most likely we'd find ourselves hitting the * same resource-exhaustion condition. */ - bn = assign_backendlist_entry(); + bn = AssignPostmasterChildSlot(B_BG_WORKER); if (bn == NULL) { + ereport(LOG, + (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED), + errmsg("no slot available for new background worker process"))); rw->rw_crashed_at = GetCurrentTimestamp(); return false; } bn->rw = rw; + bn->bkend_type = B_BG_WORKER; + bn->bgworker_notify = false; ereport(DEBUG1, (errmsg_internal("starting background worker process \"%s\"", rw->rw_worker.bgw_name))); + MyPMChildSlot = bn->child_slot; worker_pid = postmaster_child_launch(B_BG_WORKER, (char *) &rw->rw_worker, sizeof(BackgroundWorker), NULL); if (worker_pid == -1) { /* in postmaster, fork failed ... */ ereport(LOG, (errmsg("could not fork background worker process: %m"))); - /* undo what assign_backendlist_entry did */ - ReleasePostmasterChildSlot(bn->child_slot); - pfree(bn); + /* undo what AssignPostmasterChildSlot did */ + ReleasePostmasterChildSlot(bn); /* mark entry as crashed, so we'll try again later */ rw->rw_crashed_at = GetCurrentTimestamp(); @@ -3973,8 +3897,6 @@ do_start_bgworker(RegisteredBgWorker *rw) rw->rw_pid = worker_pid; bn->pid = rw->rw_pid; ReportBackgroundWorkerPID(rw); - /* add new worker to lists of backends */ - dlist_push_head(&BackendList, &bn->elem); return true; } @@ -4017,46 +3939,6 @@ bgworker_should_start_now(BgWorkerStartTime start_time) } /* - * Allocate the Backend struct for a connected background worker, but don't - * add it to the list of backends just yet. - * - * On failure, return NULL. - */ -static Backend * -assign_backendlist_entry(void) -{ - Backend *bn; - - /* - * Check that database state allows another connection. Currently the - * only possible failure is CAC_TOOMANY, so we just log an error message - * based on that rather than checking the error code precisely. - */ - if (canAcceptConnections(B_BG_WORKER) != CAC_OK) - { - ereport(LOG, - (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED), - errmsg("no slot available for new background worker process"))); - return NULL; - } - - bn = palloc_extended(sizeof(Backend), MCXT_ALLOC_NO_OOM); - if (bn == NULL) - { - ereport(LOG, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - return NULL; - } - - bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot(); - bn->bkend_type = B_BG_WORKER; - bn->bgworker_notify = false; - - return bn; -} - -/* * If the time is right, start background worker(s). * * As a side effect, the bgworker control variables are set or reset @@ -4160,7 +4042,7 @@ maybe_start_bgworkers(void) * crashed, but there's no need because the next run of this * function will do that. */ - if (!do_start_bgworker(rw)) + if (!StartBackgroundWorker(rw)) { StartWorkerNeeded = true; return; @@ -4190,11 +4072,11 @@ bool PostmasterMarkPIDForWorkerNotify(int pid) { dlist_iter iter; - Backend *bp; + PMChild *bp; - dlist_foreach(iter, &BackendList) + dlist_foreach(iter, &ActiveChildList) { - bp = dlist_container(Backend, elem, iter.cur); + bp = dlist_container(PMChild, elem, iter.cur); if (bp->pid == pid) { bp->bgworker_notify = true; diff --git a/src/backend/postmaster/syslogger.c b/src/backend/postmaster/syslogger.c index 7951599fa87..7ca24c66634 100644 --- a/src/backend/postmaster/syslogger.c +++ b/src/backend/postmaster/syslogger.c @@ -590,7 +590,7 @@ SysLoggerMain(char *startup_data, size_t startup_data_len) * Postmaster subroutine to start a syslogger subprocess. */ int -SysLogger_Start(void) +SysLogger_Start(int child_slot) { pid_t sysloggerPid; char *filename; @@ -598,8 +598,7 @@ SysLogger_Start(void) SysloggerStartupData startup_data; #endif /* EXEC_BACKEND */ - if (!Logging_collector) - return 0; + Assert(Logging_collector); /* * If first time through, create the pipe which will receive stderr @@ -695,6 +694,7 @@ SysLogger_Start(void) pfree(filename); } + MyPMChildSlot = child_slot; #ifdef EXEC_BACKEND startup_data.syslogFile = syslogger_fdget(syslogFile); startup_data.csvlogFile = syslogger_fdget(csvlogFile); diff --git a/src/backend/storage/ipc/pmsignal.c b/src/backend/storage/ipc/pmsignal.c index c801e9bec51..c764d6af4fc 100644 --- a/src/backend/storage/ipc/pmsignal.c +++ b/src/backend/storage/ipc/pmsignal.c @@ -47,11 +47,11 @@ * exited without performing proper shutdown. The per-child-process flags * have three possible states: UNUSED, ASSIGNED, ACTIVE. An UNUSED slot is * available for assignment. An ASSIGNED slot is associated with a postmaster - * child process, but either the process has not touched shared memory yet, - * or it has successfully cleaned up after itself. A ACTIVE slot means the - * process is actively using shared memory. The slots are assigned to - * child processes at random, and postmaster.c is responsible for tracking - * which one goes with which PID. + * child process, but either the process has not touched shared memory yet, or + * it has successfully cleaned up after itself. An ACTIVE slot means the + * process is actively using shared memory. The slots are assigned to child + * processes by postmaster, and pmchild.c is responsible for tracking which + * one goes with which PID. * * Actually there is a fourth state, WALSENDER. This is just like ACTIVE, * but carries the extra information that the child is a WAL sender. @@ -84,13 +84,11 @@ struct PMSignalData NON_EXEC_STATIC volatile PMSignalData *PMSignalState = NULL; /* - * These static variables are valid only in the postmaster. We keep a - * duplicative private array so that we can trust its state even if some - * failing child has clobbered the PMSignalData struct in shared memory. + * Local copy of PMSignalState->num_child_flags, only valid in the + * postmaster. Postmaster keeps a local copy so that it doesn't need to + * trust the value in shared memory. */ -static int num_child_inuse; /* # of entries in PMChildInUse[] */ -static int next_child_inuse; /* next slot to try to assign */ -static bool *PMChildInUse; /* true if i'th flag slot is assigned */ +static int num_child_flags; /* * Signal handler to be notified if postmaster dies. @@ -155,25 +153,8 @@ PMSignalShmemInit(void) { /* initialize all flags to zeroes */ MemSet(unvolatize(PMSignalData *, PMSignalState), 0, PMSignalShmemSize()); - num_child_inuse = MaxLivePostmasterChildren(); - PMSignalState->num_child_flags = num_child_inuse; - - /* - * Also allocate postmaster's private PMChildInUse[] array. We - * might've already done that in a previous shared-memory creation - * cycle, in which case free the old array to avoid a leak. (Do it - * like this to support the possibility that MaxLivePostmasterChildren - * changed.) In a standalone backend, we do not need this. - */ - if (PostmasterContext != NULL) - { - if (PMChildInUse) - pfree(PMChildInUse); - PMChildInUse = (bool *) - MemoryContextAllocZero(PostmasterContext, - num_child_inuse * sizeof(bool)); - } - next_child_inuse = 0; + num_child_flags = MaxLivePostmasterChildren(); + PMSignalState->num_child_flags = num_child_flags; } } @@ -239,56 +220,37 @@ GetQuitSignalReason(void) /* - * AssignPostmasterChildSlot - select an unused slot for a new postmaster - * child process, and set its state to ASSIGNED. Returns a slot number - * (one to N). + * MarkPostmasterChildSlotAssigned - mark the given slot as ASSIGNED for a + * new postmaster child process. * * Only the postmaster is allowed to execute this routine, so we need no * special locking. */ -int -AssignPostmasterChildSlot(void) +void +MarkPostmasterChildSlotAssigned(int slot) { - int slot = next_child_inuse; - int n; + Assert(slot > 0 && slot <= num_child_flags); + slot--; - /* - * Scan for a free slot. Notice that we trust nothing about the contents - * of PMSignalState, but use only postmaster-local data for this decision. - * We track the last slot assigned so as not to waste time repeatedly - * rescanning low-numbered slots. - */ - for (n = num_child_inuse; n > 0; n--) - { - if (--slot < 0) - slot = num_child_inuse - 1; - if (!PMChildInUse[slot]) - { - PMChildInUse[slot] = true; - PMSignalState->PMChildFlags[slot] = PM_CHILD_ASSIGNED; - next_child_inuse = slot; - return slot + 1; - } - } + if (PMSignalState->PMChildFlags[slot] != PM_CHILD_UNUSED) + elog(FATAL, "postmaster child slot is already in use"); - /* Out of slots ... should never happen, else postmaster.c messed up */ - elog(FATAL, "no free slots in PMChildFlags array"); - return 0; /* keep compiler quiet */ + PMSignalState->PMChildFlags[slot] = PM_CHILD_ASSIGNED; } /* - * ReleasePostmasterChildSlot - release a slot after death of a postmaster - * child process. This must be called in the postmaster process. + * MarkPostmasterChildSlotUnassigned - release a slot after death of a + * postmaster child process. This must be called in the postmaster process. * * Returns true if the slot had been in ASSIGNED state (the expected case), * false otherwise (implying that the child failed to clean itself up). */ bool -ReleasePostmasterChildSlot(int slot) +MarkPostmasterChildSlotUnassigned(int slot) { bool result; - Assert(slot > 0 && slot <= num_child_inuse); + Assert(slot > 0 && slot <= num_child_flags); slot--; /* @@ -298,7 +260,6 @@ ReleasePostmasterChildSlot(int slot) */ result = (PMSignalState->PMChildFlags[slot] == PM_CHILD_ASSIGNED); PMSignalState->PMChildFlags[slot] = PM_CHILD_UNUSED; - PMChildInUse[slot] = false; return result; } @@ -309,7 +270,7 @@ ReleasePostmasterChildSlot(int slot) bool IsPostmasterChildWalSender(int slot) { - Assert(slot > 0 && slot <= num_child_inuse); + Assert(slot > 0 && slot <= num_child_flags); slot--; if (PMSignalState->PMChildFlags[slot] == PM_CHILD_WALSENDER) diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 772e6fdbf28..720ef99ee83 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -353,14 +353,9 @@ InitProcess(void) /* * Before we start accessing the shared memory in a serious way, mark * ourselves as an active postmaster child; this is so that the postmaster - * can detect it if we exit without cleaning up. (XXX autovac launcher - * currently doesn't participate in this; it probably should.) - * - * Slot sync worker also does not participate in it, see comments atop - * 'struct bkend' in postmaster.c. + * can detect it if we exit without cleaning up. */ - if (IsUnderPostmaster && !AmAutoVacuumLauncherProcess() && - !AmLogicalSlotSyncWorkerProcess()) + if (IsUnderPostmaster) RegisterPostmasterChildActive(); /* Decide which list should supply our PGPROC. */ @@ -578,6 +573,9 @@ InitAuxiliaryProcess(void) if (MyProc != NULL) elog(ERROR, "you already exist"); + if (IsUnderPostmaster) + RegisterPostmasterChildActive(); + /* * We use the ProcStructLock to protect assignment and releasing of * AuxiliaryProcs entries. diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index aac0b96bbc6..184b8301687 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -4189,6 +4189,12 @@ PostgresSingleUserMain(int argc, char *argv[], /* Initialize MaxBackends */ InitializeMaxBackends(); + /* + * We don't need postmaster child slots in single-user mode, but + * initialize them anyway to avoid having special handling. + */ + InitPostmasterChildSlots(); + /* Initialize size of fast-path lock cache. */ InitializeFastPathLocks(); diff --git a/src/include/postmaster/bgworker_internals.h b/src/include/postmaster/bgworker_internals.h index 309a91124bd..f55adc85efc 100644 --- a/src/include/postmaster/bgworker_internals.h +++ b/src/include/postmaster/bgworker_internals.h @@ -27,7 +27,7 @@ * List of background workers, private to postmaster. * * All workers that are currently running will also have an entry in - * BackendList. + * ActiveChildList. */ typedef struct RegisteredBgWorker { diff --git a/src/include/postmaster/postmaster.h b/src/include/postmaster/postmaster.h index 63c12917cfe..a55e7a79fa4 100644 --- a/src/include/postmaster/postmaster.h +++ b/src/include/postmaster/postmaster.h @@ -13,8 +13,44 @@ #ifndef _POSTMASTER_H #define _POSTMASTER_H +#include "lib/ilist.h" #include "miscadmin.h" +/* + * A struct representing an active postmaster child process. This is used + * mainly to keep track of how many children we have and send them appropriate + * signals when necessary. All postmaster child processes are assigned a + * PMChild entry. That includes "normal" client sessions, but also autovacuum + * workers, walsenders, background workers, and aux processes. (Note that at + * the time of launch, walsenders are labeled B_BACKEND; we relabel them to + * B_WAL_SENDER upon noticing they've changed their PMChildFlags entry. Hence + * that check must be done before any operation that needs to distinguish + * walsenders from normal backends.) + * + * "dead-end" children are also allocated a PMChild entry: these are children + * launched just for the purpose of sending a friendly rejection message to a + * would-be client. We must track them because they are attached to shared + * memory, but we know they will never become live backends. + * + * child_slot is an identifier that is unique across all running child + * processes. It is used as an index into the PMChildFlags array. dead-end + * children are not assigned a child_slot and have child_slot == 0 (valid + * child_slot ids start from 1). + */ +typedef struct +{ + pid_t pid; /* process id of backend */ + int child_slot; /* PMChildSlot for this backend, if any */ + BackendType bkend_type; /* child process flavor, see above */ + struct RegisteredBgWorker *rw; /* bgworker info, if this is a bgworker */ + bool bgworker_notify; /* gets bgworker start/stop notifications */ + dlist_node elem; /* list link in ActiveChildList */ +} PMChild; + +#ifdef EXEC_BACKEND +extern int num_pmchild_slots; +#endif + /* GUC options */ extern PGDLLIMPORT bool EnableSSL; extern PGDLLIMPORT int SuperuserReservedConnections; @@ -80,6 +116,15 @@ const char *PostmasterChildName(BackendType child_type); extern void SubPostmasterMain(int argc, char *argv[]) pg_attribute_noreturn(); #endif +/* defined in pmchild.c */ +extern dlist_head ActiveChildList; + +extern void InitPostmasterChildSlots(void); +extern PMChild *AssignPostmasterChildSlot(BackendType btype); +extern PMChild *AllocDeadEndChild(void); +extern bool ReleasePostmasterChildSlot(PMChild *pmchild); +extern PMChild *FindPostmasterChildByPid(int pid); + /* * Note: MAX_BACKENDS is limited to 2^18-1 because that's the width reserved * for buffer references in buf_internals.h. This limitation could be lifted diff --git a/src/include/postmaster/syslogger.h b/src/include/postmaster/syslogger.h index 94ea263f2bf..27bd16ae1da 100644 --- a/src/include/postmaster/syslogger.h +++ b/src/include/postmaster/syslogger.h @@ -86,7 +86,7 @@ extern PGDLLIMPORT HANDLE syslogPipe[2]; #endif -extern int SysLogger_Start(void); +extern int SysLogger_Start(int child_slot); extern void write_syslogger_file(const char *buffer, int count, int destination); diff --git a/src/include/storage/pmsignal.h b/src/include/storage/pmsignal.h index ce4620af1f3..ea554ae895b 100644 --- a/src/include/storage/pmsignal.h +++ b/src/include/storage/pmsignal.h @@ -70,8 +70,8 @@ extern void SendPostmasterSignal(PMSignalReason reason); extern bool CheckPostmasterSignal(PMSignalReason reason); extern void SetQuitSignalReason(QuitSignalReason reason); extern QuitSignalReason GetQuitSignalReason(void); -extern int AssignPostmasterChildSlot(void); -extern bool ReleasePostmasterChildSlot(int slot); +extern void MarkPostmasterChildSlotAssigned(int slot); +extern bool MarkPostmasterChildSlotUnassigned(int slot); extern bool IsPostmasterChildWalSender(int slot); extern void RegisterPostmasterChildActive(void); extern void MarkPostmasterChildWalSender(void); diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 50bf81de2bc..5fbf0efe832 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -230,7 +230,6 @@ BTWriteState BUF_MEM BYTE BY_HANDLE_FILE_INFORMATION -Backend BackendParameters BackendStartupData BackendState @@ -1932,6 +1931,8 @@ PLyTransformToOb PLyTupleToOb PLyUnicode_FromStringAndSize_t PLy_elog_impl_t +PMChild +PMChildPool PMINIDUMP_CALLBACK_INFORMATION PMINIDUMP_EXCEPTION_INFORMATION PMINIDUMP_USER_STREAM_INFORMATION |