diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/postmaster/pgstat.c | 198 | ||||
-rw-r--r-- | src/include/pgstat.h | 91 |
2 files changed, 186 insertions, 103 deletions
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 438d8a7b589..432b2e28831 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -2687,30 +2687,65 @@ pgstat_initialize(void) * * Initialize this backend's entry in the PgBackendStatus array. * Called from InitPostgres. - * MyDatabaseId, session userid, and application_name must be set - * (hence, this cannot be combined with pgstat_initialize). + * Apart from auxiliary processes, MyBackendId, MyDatabaseId, + * session userid, and application_name must be set for a + * backend (hence, this cannot be combined with pgstat_initialize). + * Note also that we must be inside a transaction if this isn't an aux + * process, as we may need to do encoding conversion on some strings. * ---------- */ void pgstat_bestart(void) { - TimestampTz proc_start_timestamp; - Oid userid; - SockAddr clientaddr; - volatile PgBackendStatus *beentry; + volatile PgBackendStatus *vbeentry = MyBEEntry; + PgBackendStatus lbeentry; +#ifdef USE_SSL + PgBackendSSLStatus lsslstatus; +#endif + + /* pgstats state must be initialized from pgstat_initialize() */ + Assert(vbeentry != NULL); /* - * To minimize the time spent modifying the PgBackendStatus entry, fetch - * all the needed data first. + * To minimize the time spent modifying the PgBackendStatus entry, and + * avoid risk of errors inside the critical section, we first copy the + * shared-memory struct to a local variable, then modify the data in the + * local variable, then copy the local variable back to shared memory. + * Only the last step has to be inside the critical section. * + * Most of the data we copy from shared memory is just going to be + * overwritten, but the struct's not so large that it's worth the + * maintenance hassle to copy only the needful fields. + */ + memcpy(&lbeentry, + (char *) vbeentry, + sizeof(PgBackendStatus)); + + /* This struct can just start from zeroes each time, though */ +#ifdef USE_SSL + memset(&lsslstatus, 0, sizeof(lsslstatus)); +#endif + + /* + * Now fill in all the fields of lbeentry, except for strings that are + * out-of-line data. Those have to be handled separately, below. + */ + lbeentry.st_procpid = MyProcPid; + + /* * If we have a MyProcPort, use its session start time (for consistency, * and to save a kernel call). */ if (MyProcPort) - proc_start_timestamp = MyProcPort->SessionStartTime; + lbeentry.st_proc_start_timestamp = MyProcPort->SessionStartTime; else - proc_start_timestamp = GetCurrentTimestamp(); - userid = GetSessionUserId(); + lbeentry.st_proc_start_timestamp = GetCurrentTimestamp(); + + lbeentry.st_activity_start_timestamp = 0; + lbeentry.st_state_start_timestamp = 0; + lbeentry.st_xact_start_timestamp = 0; + lbeentry.st_databaseid = MyDatabaseId; + lbeentry.st_userid = GetSessionUserId(); /* * We may not have a MyProcPort (eg, if this is the autovacuum process). @@ -2718,62 +2753,68 @@ pgstat_bestart(void) * pg_stat_get_backend_client_addr and pg_stat_get_backend_client_port. */ if (MyProcPort) - memcpy(&clientaddr, &MyProcPort->raddr, sizeof(clientaddr)); + memcpy(&lbeentry.st_clientaddr, &MyProcPort->raddr, + sizeof(lbeentry.st_clientaddr)); else - MemSet(&clientaddr, 0, sizeof(clientaddr)); + MemSet(&lbeentry.st_clientaddr, 0, sizeof(lbeentry.st_clientaddr)); - /* - * Initialize my status entry, following the protocol of bumping - * st_changecount before and after; and make sure it's even afterwards. We - * use a volatile pointer here to ensure the compiler doesn't try to get - * cute. - */ - beentry = MyBEEntry; - do - { - pgstat_increment_changecount_before(beentry); - } while ((beentry->st_changecount & 1) == 0); - - beentry->st_procpid = MyProcPid; - beentry->st_proc_start_timestamp = proc_start_timestamp; - beentry->st_activity_start_timestamp = 0; - beentry->st_state_start_timestamp = 0; - beentry->st_xact_start_timestamp = 0; - beentry->st_databaseid = MyDatabaseId; - beentry->st_userid = userid; - beentry->st_clientaddr = clientaddr; - if (MyProcPort && MyProcPort->remote_hostname) - strlcpy(beentry->st_clienthostname, MyProcPort->remote_hostname, - NAMEDATALEN); - else - beentry->st_clienthostname[0] = '\0'; #ifdef USE_SSL if (MyProcPort && MyProcPort->ssl != NULL) { - beentry->st_ssl = true; - beentry->st_sslstatus->ssl_bits = be_tls_get_cipher_bits(MyProcPort); - beentry->st_sslstatus->ssl_compression = be_tls_get_compression(MyProcPort); - be_tls_get_version(MyProcPort, beentry->st_sslstatus->ssl_version, NAMEDATALEN); - be_tls_get_cipher(MyProcPort, beentry->st_sslstatus->ssl_cipher, NAMEDATALEN); - be_tls_get_peerdn_name(MyProcPort, beentry->st_sslstatus->ssl_clientdn, NAMEDATALEN); + lbeentry.st_ssl = true; + lsslstatus.ssl_bits = be_tls_get_cipher_bits(MyProcPort); + lsslstatus.ssl_compression = be_tls_get_compression(MyProcPort); + be_tls_get_version(MyProcPort, lsslstatus.ssl_version, NAMEDATALEN); + be_tls_get_cipher(MyProcPort, lsslstatus.ssl_cipher, NAMEDATALEN); + be_tls_get_peerdn_name(MyProcPort, lsslstatus.ssl_clientdn, NAMEDATALEN); } else { - beentry->st_ssl = false; + lbeentry.st_ssl = false; } #else - beentry->st_ssl = false; + lbeentry.st_ssl = false; #endif - beentry->st_waiting = false; - beentry->st_state = STATE_UNDEFINED; - beentry->st_appname[0] = '\0'; - beentry->st_activity[0] = '\0'; + + lbeentry.st_waiting = false; + lbeentry.st_state = STATE_UNDEFINED; + + /* + * We're ready to enter the critical section that fills the shared-memory + * status entry. We follow the protocol of bumping st_changecount before + * and after; and make sure it's even afterwards. We use a volatile + * pointer here to ensure the compiler doesn't try to get cute. + */ + PGSTAT_BEGIN_WRITE_ACTIVITY(vbeentry); + + /* make sure we'll memcpy the same st_changecount back */ + lbeentry.st_changecount = vbeentry->st_changecount; + + memcpy((char *) vbeentry, + &lbeentry, + sizeof(PgBackendStatus)); + + /* + * We can write the out-of-line strings and structs using the pointers + * that are in lbeentry; this saves some de-volatilizing messiness. + */ + lbeentry.st_appname[0] = '\0'; + if (MyProcPort && MyProcPort->remote_hostname) + strlcpy(lbeentry.st_clienthostname, MyProcPort->remote_hostname, + NAMEDATALEN); + else + lbeentry.st_clienthostname[0] = '\0'; + lbeentry.st_activity[0] = '\0'; /* Also make sure the last byte in each string area is always 0 */ - beentry->st_clienthostname[NAMEDATALEN - 1] = '\0'; - beentry->st_appname[NAMEDATALEN - 1] = '\0'; - beentry->st_activity[pgstat_track_activity_query_size - 1] = '\0'; + lbeentry.st_appname[NAMEDATALEN - 1] = '\0'; + lbeentry.st_clienthostname[NAMEDATALEN - 1] = '\0'; + lbeentry.st_activity[pgstat_track_activity_query_size - 1] = '\0'; - pgstat_increment_changecount_after(beentry); +#ifdef USE_SSL + memcpy(lbeentry.st_sslstatus, &lsslstatus, sizeof(PgBackendSSLStatus)); +#endif + + PGSTAT_END_WRITE_ACTIVITY(vbeentry); /* Update app name to current GUC setting */ if (application_name) @@ -2808,11 +2849,11 @@ pgstat_beshutdown_hook(int code, Datum arg) * before and after. We use a volatile pointer here to ensure the * compiler doesn't try to get cute. */ - pgstat_increment_changecount_before(beentry); + PGSTAT_BEGIN_WRITE_ACTIVITY(beentry); beentry->st_procpid = 0; /* mark invalid */ - pgstat_increment_changecount_after(beentry); + PGSTAT_END_WRITE_ACTIVITY(beentry); } @@ -2849,7 +2890,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str) * non-disabled state. As our final update, change the state and * clear fields we will not be updating anymore. */ - pgstat_increment_changecount_before(beentry); + PGSTAT_BEGIN_WRITE_ACTIVITY(beentry); beentry->st_state = STATE_DISABLED; beentry->st_state_start_timestamp = 0; beentry->st_activity[0] = '\0'; @@ -2857,14 +2898,14 @@ pgstat_report_activity(BackendState state, const char *cmd_str) /* st_xact_start_timestamp and st_waiting are also disabled */ beentry->st_xact_start_timestamp = 0; beentry->st_waiting = false; - pgstat_increment_changecount_after(beentry); + PGSTAT_END_WRITE_ACTIVITY(beentry); } return; } /* - * To minimize the time spent modifying the entry, fetch all the needed - * data first. + * To minimize the time spent modifying the entry, and avoid risk of + * errors inside the critical section, fetch all the needed data first. */ start_timestamp = GetCurrentStatementStartTimestamp(); if (cmd_str != NULL) @@ -2877,7 +2918,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str) /* * Now update the status entry */ - pgstat_increment_changecount_before(beentry); + PGSTAT_BEGIN_WRITE_ACTIVITY(beentry); beentry->st_state = state; beentry->st_state_start_timestamp = current_timestamp; @@ -2889,7 +2930,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str) beentry->st_activity_start_timestamp = start_timestamp; } - pgstat_increment_changecount_after(beentry); + PGSTAT_END_WRITE_ACTIVITY(beentry); } /* ---------- @@ -2915,12 +2956,12 @@ pgstat_report_appname(const char *appname) * st_changecount before and after. We use a volatile pointer here to * ensure the compiler doesn't try to get cute. */ - pgstat_increment_changecount_before(beentry); + PGSTAT_BEGIN_WRITE_ACTIVITY(beentry); memcpy((char *) beentry->st_appname, appname, len); beentry->st_appname[len] = '\0'; - pgstat_increment_changecount_after(beentry); + PGSTAT_END_WRITE_ACTIVITY(beentry); } /* @@ -2940,9 +2981,11 @@ pgstat_report_xact_timestamp(TimestampTz tstamp) * st_changecount before and after. We use a volatile pointer here to * ensure the compiler doesn't try to get cute. */ - pgstat_increment_changecount_before(beentry); + PGSTAT_BEGIN_WRITE_ACTIVITY(beentry); + beentry->st_xact_start_timestamp = tstamp; - pgstat_increment_changecount_after(beentry); + + PGSTAT_END_WRITE_ACTIVITY(beentry); } /* ---------- @@ -3034,14 +3077,19 @@ pgstat_read_current_status(void) int before_changecount; int after_changecount; - pgstat_save_changecount_before(beentry, before_changecount); + pgstat_begin_read_activity(beentry, before_changecount); localentry->backendStatus.st_procpid = beentry->st_procpid; + /* Skip all the data-copying work if entry is not in use */ if (localentry->backendStatus.st_procpid > 0) { memcpy(&localentry->backendStatus, (char *) beentry, sizeof(PgBackendStatus)); /* + * For each PgBackendStatus field that is a pointer, copy the + * pointed-to data, then adjust the local copy of the pointer + * field to point at the local copy of the data. + * * strcpy is safe even if the string is modified concurrently, * because there's always a \0 at the end of the buffer. */ @@ -3051,7 +3099,6 @@ pgstat_read_current_status(void) localentry->backendStatus.st_clienthostname = localclienthostname; strcpy(localactivity, (char *) beentry->st_activity); localentry->backendStatus.st_activity = localactivity; - localentry->backendStatus.st_ssl = beentry->st_ssl; #ifdef USE_SSL if (beentry->st_ssl) { @@ -3061,9 +3108,10 @@ pgstat_read_current_status(void) #endif } - pgstat_save_changecount_after(beentry, after_changecount); - if (before_changecount == after_changecount && - (before_changecount & 1) == 0) + pgstat_end_read_activity(beentry, after_changecount); + + if (pgstat_read_activity_complete(before_changecount, + after_changecount)) break; /* Make sure we can break out of loop if stuck... */ @@ -3140,14 +3188,14 @@ pgstat_get_backend_current_activity(int pid, bool checkUser) int before_changecount; int after_changecount; - pgstat_save_changecount_before(vbeentry, before_changecount); + pgstat_begin_read_activity(vbeentry, before_changecount); found = (vbeentry->st_procpid == pid); - pgstat_save_changecount_after(vbeentry, after_changecount); + pgstat_end_read_activity(vbeentry, after_changecount); - if (before_changecount == after_changecount && - (before_changecount & 1) == 0) + if (pgstat_read_activity_complete(before_changecount, + after_changecount)) break; /* Make sure we can break out of loop if stuck... */ diff --git a/src/include/pgstat.h b/src/include/pgstat.h index fed4d2413c4..b020c81ac1d 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -752,11 +752,12 @@ typedef struct PgBackendStatus * the copy is valid; otherwise start over. This makes updates cheap * while reads are potentially expensive, but that's the tradeoff we want. * - * The above protocol needs the memory barriers to ensure that the - * apparent order of execution is as it desires. Otherwise, for example, - * the CPU might rearrange the code so that st_changecount is incremented - * twice before the modification on a machine with weak memory ordering. - * This surprising result can lead to bugs. + * The above protocol needs memory barriers to ensure that the apparent + * order of execution is as it desires. Otherwise, for example, the CPU + * might rearrange the code so that st_changecount is incremented twice + * before the modification on a machine with weak memory ordering. Hence, + * use the macros defined below for manipulating st_changecount, rather + * than touching it directly. */ int st_changecount; @@ -793,42 +794,76 @@ typedef struct PgBackendStatus } PgBackendStatus; /* - * Macros to load and store st_changecount with the memory barriers. + * Macros to load and store st_changecount with appropriate memory barriers. * - * pgstat_increment_changecount_before() and - * pgstat_increment_changecount_after() need to be called before and after - * PgBackendStatus entries are modified, respectively. This makes sure that - * st_changecount is incremented around the modification. + * Use PGSTAT_BEGIN_WRITE_ACTIVITY() before, and PGSTAT_END_WRITE_ACTIVITY() + * after, modifying the current process's PgBackendStatus data. Note that, + * since there is no mechanism for cleaning up st_changecount after an error, + * THESE MACROS FORM A CRITICAL SECTION. Any error between them will be + * promoted to PANIC, causing a database restart to clean up shared memory! + * Hence, keep the critical section as short and straight-line as possible. + * Aside from being safer, that minimizes the window in which readers will + * have to loop. * - * Also pgstat_save_changecount_before() and pgstat_save_changecount_after() - * need to be called before and after PgBackendStatus entries are copied into - * private memory, respectively. + * Reader logic should follow this sketch: + * + * for (;;) + * { + * int before_ct, after_ct; + * + * pgstat_begin_read_activity(beentry, before_ct); + * ... copy beentry data to local memory ... + * pgstat_end_read_activity(beentry, after_ct); + * if (pgstat_read_activity_complete(before_ct, after_ct)) + * break; + * CHECK_FOR_INTERRUPTS(); + * } + * + * For extra safety, we generally use volatile beentry pointers, although + * the memory barriers should theoretically be sufficient. */ -#define pgstat_increment_changecount_before(beentry) \ - do { \ - beentry->st_changecount++; \ +#define PGSTAT_BEGIN_WRITE_ACTIVITY(beentry) \ + do { \ + START_CRIT_SECTION(); \ + (beentry)->st_changecount++; \ pg_write_barrier(); \ } while (0) -#define pgstat_increment_changecount_after(beentry) \ - do { \ +#define PGSTAT_END_WRITE_ACTIVITY(beentry) \ + do { \ pg_write_barrier(); \ - beentry->st_changecount++; \ - Assert((beentry->st_changecount & 1) == 0); \ + (beentry)->st_changecount++; \ + Assert(((beentry)->st_changecount & 1) == 0); \ + END_CRIT_SECTION(); \ } while (0) -#define pgstat_save_changecount_before(beentry, save_changecount) \ - do { \ - save_changecount = beentry->st_changecount; \ - pg_read_barrier(); \ +#define pgstat_begin_read_activity(beentry, before_changecount) \ + do { \ + (before_changecount) = (beentry)->st_changecount; \ + pg_read_barrier(); \ } while (0) -#define pgstat_save_changecount_after(beentry, save_changecount) \ - do { \ - pg_read_barrier(); \ - save_changecount = beentry->st_changecount; \ +#define pgstat_end_read_activity(beentry, after_changecount) \ + do { \ + pg_read_barrier(); \ + (after_changecount) = (beentry)->st_changecount; \ } while (0) +#define pgstat_read_activity_complete(before_changecount, after_changecount) \ + ((before_changecount) == (after_changecount) && \ + ((before_changecount) & 1) == 0) + +/* deprecated names for above macros; these are gone in v12 */ +#define pgstat_increment_changecount_before(beentry) \ + PGSTAT_BEGIN_WRITE_ACTIVITY(beentry) +#define pgstat_increment_changecount_after(beentry) \ + PGSTAT_END_WRITE_ACTIVITY(beentry) +#define pgstat_save_changecount_before(beentry, save_changecount) \ + pgstat_begin_read_activity(beentry, save_changecount) +#define pgstat_save_changecount_after(beentry, save_changecount) \ + pgstat_end_read_activity(beentry, save_changecount) + + /* ---------- * LocalPgBackendStatus * |