diff options
Diffstat (limited to 'src/backend/commands/vacuum.c')
-rw-r--r-- | src/backend/commands/vacuum.c | 98 |
1 files changed, 82 insertions, 16 deletions
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 9606569617a..224c34f6e78 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -20,6 +20,8 @@ */ #include "postgres.h" +#include <math.h> + #include "access/clog.h" #include "access/genam.h" #include "access/heapam.h" @@ -62,7 +64,7 @@ static BufferAccessStrategy vac_strategy; static List *get_rel_oids(Oid relid, const RangeVar *vacrel); static void vac_truncate_clog(TransactionId frozenXID); static bool vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, - bool for_wraparound, bool *scanned_all); + bool for_wraparound); /* @@ -219,12 +221,10 @@ vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast, foreach(cur, relations) { Oid relid = lfirst_oid(cur); - bool scanned_all = false; if (vacstmt->options & VACOPT_VACUUM) { - if (!vacuum_rel(relid, vacstmt, do_toast, for_wraparound, - &scanned_all)) + if (!vacuum_rel(relid, vacstmt, do_toast, for_wraparound)) continue; } @@ -241,7 +241,7 @@ vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast, PushActiveSnapshot(GetTransactionSnapshot()); } - analyze_rel(relid, vacstmt, vac_strategy, !scanned_all); + analyze_rel(relid, vacstmt, vac_strategy); if (use_own_xacts) { @@ -454,6 +454,79 @@ vacuum_set_xid_limits(int freeze_min_age, /* + * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples + * + * If we scanned the whole relation then we should just use the count of + * live tuples seen; but if we did not, we should not trust the count + * unreservedly, especially not in VACUUM, which may have scanned a quite + * nonrandom subset of the table. When we have only partial information, + * we take the old value of pg_class.reltuples as a measurement of the + * tuple density in the unscanned pages. + * + * This routine is shared by VACUUM and ANALYZE. + */ +double +vac_estimate_reltuples(Relation relation, bool is_analyze, + BlockNumber total_pages, + BlockNumber scanned_pages, + double scanned_tuples) +{ + BlockNumber old_rel_pages = relation->rd_rel->relpages; + double old_rel_tuples = relation->rd_rel->reltuples; + double old_density; + double new_density; + double multiplier; + double updated_density; + + /* If we did scan the whole table, just use the count as-is */ + if (scanned_pages >= total_pages) + return scanned_tuples; + + /* + * If scanned_pages is zero but total_pages isn't, keep the existing + * value of reltuples. + */ + if (scanned_pages == 0) + return old_rel_tuples; + + /* + * If old value of relpages is zero, old density is indeterminate; we + * can't do much except scale up scanned_tuples to match total_pages. + */ + if (old_rel_pages == 0) + return floor((scanned_tuples / scanned_pages) * total_pages + 0.5); + + /* + * Okay, we've covered the corner cases. The normal calculation is to + * convert the old measurement to a density (tuples per page), then + * update the density using an exponential-moving-average approach, + * and finally compute reltuples as updated_density * total_pages. + * + * For ANALYZE, the moving average multiplier is just the fraction of + * the table's pages we scanned. This is equivalent to assuming + * that the tuple density in the unscanned pages didn't change. Of + * course, it probably did, if the new density measurement is different. + * But over repeated cycles, the value of reltuples will converge towards + * the correct value, if repeated measurements show the same new density. + * + * For VACUUM, the situation is a bit different: we have looked at a + * nonrandom sample of pages, but we know for certain that the pages we + * didn't look at are precisely the ones that haven't changed lately. + * Thus, there is a reasonable argument for doing exactly the same thing + * as for the ANALYZE case, that is use the old density measurement as + * the value for the unscanned pages. + * + * This logic could probably use further refinement. + */ + old_density = old_rel_tuples / old_rel_pages; + new_density = scanned_tuples / scanned_pages; + multiplier = (double) scanned_pages / (double) total_pages; + updated_density = old_density + (new_density - old_density) * multiplier; + return floor(updated_density * total_pages + 0.5); +} + + +/* * vac_update_relstats() -- update statistics for one relation * * Update the whole-relation statistics that are kept in its pg_class @@ -480,7 +553,7 @@ vacuum_set_xid_limits(int freeze_min_age, * somebody vacuuming pg_class might think they could delete a tuple * marked with xmin = our xid. * - * This routine is shared by VACUUM and stand-alone ANALYZE. + * This routine is shared by VACUUM and ANALYZE. */ void vac_update_relstats(Relation relation, @@ -758,14 +831,10 @@ vac_truncate_clog(TransactionId frozenXID) * many small transactions. Otherwise, two-phase locking would require * us to lock the entire database during one pass of the vacuum cleaner. * - * We'll return true in *scanned_all if the vacuum scanned all heap - * pages, and updated pg_class. - * * At entry and exit, we are not inside a transaction. */ static bool -vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound, - bool *scanned_all) +vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound) { LOCKMODE lmode; Relation onerel; @@ -775,9 +844,6 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound, int save_sec_context; int save_nestlevel; - if (scanned_all) - *scanned_all = false; - /* Begin a transaction for vacuuming this relation */ StartTransactionCommand(); @@ -971,7 +1037,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound, vacstmt->freeze_min_age, vacstmt->freeze_table_age); } else - lazy_vacuum_rel(onerel, vacstmt, vac_strategy, scanned_all); + lazy_vacuum_rel(onerel, vacstmt, vac_strategy); /* Roll back any GUC changes executed by index functions */ AtEOXact_GUC(false, save_nestlevel); @@ -997,7 +1063,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound, * totally unimportant for toast relations. */ if (toast_relid != InvalidOid) - vacuum_rel(toast_relid, vacstmt, false, for_wraparound, NULL); + vacuum_rel(toast_relid, vacstmt, false, for_wraparound); /* * Now release the session-level lock on the master table. |