diff options
author | Andres Freund <andres@anarazel.de> | 2022-04-06 21:29:46 -0700 |
---|---|---|
committer | Andres Freund <andres@anarazel.de> | 2022-04-06 21:29:46 -0700 |
commit | 5891c7a8ed8f2d3d577e7eea34dacff12d7b6bbd (patch) | |
tree | 909f20fa511d5fde6463c58403bb82508c35cfab /src/backend/postmaster/autovacuum.c | |
parent | be902e26510788c70a874ea54bad753b723d018f (diff) | |
download | postgresql-5891c7a8ed8f2d3d577e7eea34dacff12d7b6bbd.tar.gz postgresql-5891c7a8ed8f2d3d577e7eea34dacff12d7b6bbd.zip |
pgstat: store statistics in shared memory.
Previously the statistics collector received statistics updates via UDP and
shared statistics data by writing them out to temporary files regularly. These
files can reach tens of megabytes and are written out up to twice a
second. This has repeatedly prevented us from adding additional useful
statistics.
Now statistics are stored in shared memory. Statistics for variable-numbered
objects are stored in a dshash hashtable (backed by dynamic shared
memory). Fixed-numbered stats are stored in plain shared memory.
The header for pgstat.c contains an overview of the architecture.
The stats collector is not needed anymore, remove it.
By utilizing the transactional statistics drop infrastructure introduced in a
prior commit statistics entries cannot "leak" anymore. Previously leaked
statistics were dropped by pgstat_vacuum_stat(), called from [auto-]vacuum. On
systems with many small relations pgstat_vacuum_stat() could be quite
expensive.
Now that replicas drop statistics entries for dropped objects, it is not
necessary anymore to reset stats when starting from a cleanly shut down
replica.
Subsequent commits will perform some further code cleanup, adapt docs and add
tests.
Bumps PGSTAT_FILE_FORMAT_ID.
Author: Kyotaro Horiguchi <horikyota.ntt@gmail.com>
Author: Andres Freund <andres@anarazel.de>
Author: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-By: Andres Freund <andres@anarazel.de>
Reviewed-By: Thomas Munro <thomas.munro@gmail.com>
Reviewed-By: Justin Pryzby <pryzby@telsasoft.com>
Reviewed-By: "David G. Johnston" <david.g.johnston@gmail.com>
Reviewed-By: Tomas Vondra <tomas.vondra@2ndquadrant.com> (in a much earlier version)
Reviewed-By: Arthur Zakirov <a.zakirov@postgrespro.ru> (in a much earlier version)
Reviewed-By: Antonin Houska <ah@cybertec.at> (in a much earlier version)
Discussion: https://postgr.es/m/20220303021600.hs34ghqcw6zcokdh@alap3.anarazel.de
Discussion: https://postgr.es/m/20220308205351.2xcn6k4x5yivcxyd@alap3.anarazel.de
Discussion: https://postgr.es/m/20210319235115.y3wz7hpnnrshdyv6@alap3.anarazel.de
Diffstat (limited to 'src/backend/postmaster/autovacuum.c')
-rw-r--r-- | src/backend/postmaster/autovacuum.c | 197 |
1 files changed, 31 insertions, 166 deletions
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index c6d30fa5271..f36c40e852f 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -44,11 +44,12 @@ * Note that there can be more than one worker in a database concurrently. * They will store the table they are currently vacuuming in shared memory, so * that other workers avoid being blocked waiting for the vacuum lock for that - * table. They will also reload the pgstats data just before vacuuming each - * table, to avoid vacuuming a table that was just finished being vacuumed by - * another worker and thus is no longer noted in shared memory. However, - * there is a window (caused by pgstat delay) on which a worker may choose a - * table that was already vacuumed; this is a bug in the current design. + * table. They will also fetch the last time the table was vacuumed from + * pgstats just before vacuuming each table, to avoid vacuuming a table that + * was just finished being vacuumed by another worker and thus is no longer + * noted in shared memory. However, there is a small window (due to not yet + * holding the relation lock) during which a worker may choose a table that was + * already vacuumed; this is a bug in the current design. * * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California @@ -129,9 +130,6 @@ int autovacuum_vac_cost_limit; int Log_autovacuum_min_duration = 600000; -/* how long to keep pgstat data in the launcher, in milliseconds */ -#define STATS_READ_DELAY 1000 - /* the minimum allowed time between two awakenings of the launcher */ #define MIN_AUTOVAC_SLEEPTIME 100.0 /* milliseconds */ #define MAX_AUTOVAC_SLEEPTIME 300 /* seconds */ @@ -342,15 +340,11 @@ static void autovacuum_do_vac_analyze(autovac_table *tab, BufferAccessStrategy bstrategy); static AutoVacOpts *extract_autovac_opts(HeapTuple tup, TupleDesc pg_class_desc); -static PgStat_StatTabEntry *get_pgstat_tabentry_relid(Oid relid, bool isshared, - PgStat_StatDBEntry *shared, - PgStat_StatDBEntry *dbentry); static void perform_work_item(AutoVacuumWorkItem *workitem); static void autovac_report_activity(autovac_table *tab); static void autovac_report_workitem(AutoVacuumWorkItem *workitem, const char *nspname, const char *relname); static void avl_sigusr2_handler(SIGNAL_ARGS); -static void autovac_refresh_stats(void); @@ -555,12 +549,6 @@ AutoVacLauncherMain(int argc, char *argv[]) DatabaseListCxt = NULL; dlist_init(&DatabaseList); - /* - * Make sure pgstat also considers our stat data as gone. Note: we - * mustn't use autovac_refresh_stats here. - */ - pgstat_clear_snapshot(); - /* Now we can allow interrupts again */ RESUME_INTERRUPTS(); @@ -612,6 +600,12 @@ AutoVacLauncherMain(int argc, char *argv[]) PGC_SUSET, PGC_S_OVERRIDE); /* + * Even when system is configured to use a different fetch consistency, + * for autovac we always want fresh stats. + */ + SetConfigOption("stats_fetch_consistency", "none", PGC_SUSET, PGC_S_OVERRIDE); + + /* * In emergency mode, just start a worker (unless shutdown was requested) * and go away. */ @@ -963,9 +957,6 @@ rebuild_database_list(Oid newdb) HTAB *dbhash; dlist_iter iter; - /* use fresh stats */ - autovac_refresh_stats(); - newcxt = AllocSetContextCreate(AutovacMemCxt, "Autovacuum database list", ALLOCSET_DEFAULT_SIZES); @@ -1184,9 +1175,6 @@ do_start_worker(void) ALLOCSET_DEFAULT_SIZES); oldcxt = MemoryContextSwitchTo(tmpcxt); - /* use fresh stats */ - autovac_refresh_stats(); - /* Get a list of databases */ dblist = get_database_list(); @@ -1643,6 +1631,12 @@ AutoVacWorkerMain(int argc, char *argv[]) PGC_SUSET, PGC_S_OVERRIDE); /* + * Even when system is configured to use a different fetch consistency, + * for autovac we always want fresh stats. + */ + SetConfigOption("stats_fetch_consistency", "none", PGC_SUSET, PGC_S_OVERRIDE); + + /* * Get the info about the database we're going to work on. */ LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE); @@ -1966,8 +1960,6 @@ do_autovacuum(void) HASHCTL ctl; HTAB *table_toast_map; ListCell *volatile cell; - PgStat_StatDBEntry *shared; - PgStat_StatDBEntry *dbentry; BufferAccessStrategy bstrategy; ScanKeyData key; TupleDesc pg_class_desc; @@ -1986,23 +1978,10 @@ do_autovacuum(void) ALLOCSET_DEFAULT_SIZES); MemoryContextSwitchTo(AutovacMemCxt); - /* - * may be NULL if we couldn't find an entry (only happens if we are - * forcing a vacuum for anti-wrap purposes). - */ - dbentry = pgstat_fetch_stat_dbentry(MyDatabaseId); - /* Start a transaction so our commands have one to play into. */ StartTransactionCommand(); /* - * Clean up any dead statistics entries for this DB. We always want to do - * this exactly once per DB-processing cycle, even if we find nothing - * worth vacuuming in the database. - */ - pgstat_vacuum_stat(); - - /* * Compute the multixact age for which freezing is urgent. This is * normally autovacuum_multixact_freeze_max_age, but may be less if we are * short of multixact member space. @@ -2039,9 +2018,6 @@ do_autovacuum(void) /* StartTransactionCommand changed elsewhere */ MemoryContextSwitchTo(AutovacMemCxt); - /* The database hash where pgstat keeps shared relations */ - shared = pgstat_fetch_stat_dbentry(InvalidOid); - classRel = table_open(RelationRelationId, AccessShareLock); /* create a copy so we can use it after closing pg_class */ @@ -2119,8 +2095,8 @@ do_autovacuum(void) /* Fetch reloptions and the pgstat entry for this table */ relopts = extract_autovac_opts(tuple, pg_class_desc); - tabentry = get_pgstat_tabentry_relid(relid, classForm->relisshared, - shared, dbentry); + tabentry = pgstat_fetch_stat_tabentry_ext(classForm->relisshared, + relid); /* Check if it needs vacuum or analyze */ relation_needs_vacanalyze(relid, relopts, classForm, tabentry, @@ -2203,8 +2179,8 @@ do_autovacuum(void) } /* Fetch the pgstat entry for this table */ - tabentry = get_pgstat_tabentry_relid(relid, classForm->relisshared, - shared, dbentry); + tabentry = pgstat_fetch_stat_tabentry_ext(classForm->relisshared, + relid); relation_needs_vacanalyze(relid, relopts, classForm, tabentry, effective_multixact_freeze_max_age, @@ -2418,12 +2394,8 @@ do_autovacuum(void) /* * Check whether pgstat data still says we need to vacuum this table. * It could have changed if something else processed the table while - * we weren't looking. - * - * Note: we have a special case in pgstat code to ensure that the - * stats we read are as up-to-date as possible, to avoid the problem - * that somebody just finished vacuuming this table. The window to - * the race condition is not closed but it is very small. + * we weren't looking. This doesn't entirely close the race condition, + * but it is very small. */ MemoryContextSwitchTo(AutovacMemCxt); tab = table_recheck_autovac(relid, table_toast_map, pg_class_desc, @@ -2768,29 +2740,6 @@ extract_autovac_opts(HeapTuple tup, TupleDesc pg_class_desc) return av; } -/* - * get_pgstat_tabentry_relid - * - * Fetch the pgstat entry of a table, either local to a database or shared. - */ -static PgStat_StatTabEntry * -get_pgstat_tabentry_relid(Oid relid, bool isshared, PgStat_StatDBEntry *shared, - PgStat_StatDBEntry *dbentry) -{ - PgStat_StatTabEntry *tabentry = NULL; - - if (isshared) - { - if (PointerIsValid(shared)) - tabentry = hash_search(shared->tables, &relid, - HASH_FIND, NULL); - } - else if (PointerIsValid(dbentry)) - tabentry = hash_search(dbentry->tables, &relid, - HASH_FIND, NULL); - - return tabentry; -} /* * table_recheck_autovac @@ -2812,7 +2761,6 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map, autovac_table *tab = NULL; bool wraparound; AutoVacOpts *avopts; - static bool reuse_stats = false; /* fetch the relation's relcache entry */ classTup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid)); @@ -2836,35 +2784,6 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map, avopts = &hentry->ar_reloptions; } - /* - * Reuse the stats to recheck whether a relation needs to be vacuumed or - * analyzed if it was reloaded before and has not been cleared yet. This - * is necessary to avoid frequent refresh of stats, especially when there - * are very large number of relations and the refresh can cause lots of - * overhead. - * - * If we determined that a relation needs to be vacuumed or analyzed, - * based on the old stats, we refresh stats and recheck the necessity - * again. Because a relation may have already been vacuumed or analyzed by - * someone since the last reload of stats. - */ - if (reuse_stats) - { - recheck_relation_needs_vacanalyze(relid, avopts, classForm, - effective_multixact_freeze_max_age, - &dovacuum, &doanalyze, &wraparound); - - /* Quick exit if a relation doesn't need to be vacuumed or analyzed */ - if (!doanalyze && !dovacuum) - { - heap_freetuple(classTup); - return NULL; - } - } - - /* Use fresh stats and recheck again */ - autovac_refresh_stats(); - recheck_relation_needs_vacanalyze(relid, avopts, classForm, effective_multixact_freeze_max_age, &dovacuum, &doanalyze, &wraparound); @@ -2962,21 +2881,6 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map, tab->at_dobalance = !(avopts && (avopts->vacuum_cost_limit > 0 || avopts->vacuum_cost_delay > 0)); - - /* - * When we decide to do vacuum or analyze, the existing stats cannot - * be reused in the next cycle because it's cleared at the end of - * vacuum or analyze (by AtEOXact_PgStat()). - */ - reuse_stats = false; - } - else - { - /* - * If neither vacuum nor analyze is necessary, the existing stats is - * not cleared and can be reused in the next cycle. - */ - reuse_stats = true; } heap_freetuple(classTup); @@ -3001,17 +2905,10 @@ recheck_relation_needs_vacanalyze(Oid relid, bool *wraparound) { PgStat_StatTabEntry *tabentry; - PgStat_StatDBEntry *shared = NULL; - PgStat_StatDBEntry *dbentry = NULL; - - if (classForm->relisshared) - shared = pgstat_fetch_stat_dbentry(InvalidOid); - else - dbentry = pgstat_fetch_stat_dbentry(MyDatabaseId); /* fetch the pgstat table entry */ - tabentry = get_pgstat_tabentry_relid(relid, classForm->relisshared, - shared, dbentry); + tabentry = pgstat_fetch_stat_tabentry_ext(classForm->relisshared, + relid); relation_needs_vacanalyze(relid, avopts, classForm, tabentry, effective_multixact_freeze_max_age, @@ -3169,11 +3066,11 @@ relation_needs_vacanalyze(Oid relid, } /* - * If we found the table in the stats hash, and autovacuum is currently - * enabled, make a threshold-based decision whether to vacuum and/or - * analyze. If autovacuum is currently disabled, we must be here for - * anti-wraparound vacuuming only, so don't vacuum (or analyze) anything - * that's not being forced. + * If we found stats for the table, and autovacuum is currently enabled, + * make a threshold-based decision whether to vacuum and/or analyze. If + * autovacuum is currently disabled, we must be here for anti-wraparound + * vacuuming only, so don't vacuum (or analyze) anything that's not being + * forced. */ if (PointerIsValid(tabentry) && AutoVacuumingActive()) { @@ -3472,35 +3369,3 @@ AutoVacuumShmemInit(void) else Assert(found); } - -/* - * autovac_refresh_stats - * Refresh pgstats data for an autovacuum process - * - * Cause the next pgstats read operation to obtain fresh data, but throttle - * such refreshing in the autovacuum launcher. This is mostly to avoid - * rereading the pgstats files too many times in quick succession when there - * are many databases. - * - * Note: we avoid throttling in the autovac worker, as it would be - * counterproductive in the recheck logic. - */ -static void -autovac_refresh_stats(void) -{ - if (IsAutoVacuumLauncherProcess()) - { - static TimestampTz last_read = 0; - TimestampTz current_time; - - current_time = GetCurrentTimestamp(); - - if (!TimestampDifferenceExceeds(last_read, current_time, - STATS_READ_DELAY)) - return; - - last_read = current_time; - } - - pgstat_clear_snapshot(); -} |