diff options
-rw-r--r-- | contrib/pgstattuple/pgstatapprox.c | 2 | ||||
-rw-r--r-- | src/backend/commands/analyze.c | 19 | ||||
-rw-r--r-- | src/backend/commands/vacuum.c | 46 | ||||
-rw-r--r-- | src/backend/commands/vacuumlazy.c | 2 | ||||
-rw-r--r-- | src/include/commands/vacuum.h | 2 |
5 files changed, 27 insertions, 44 deletions
diff --git a/contrib/pgstattuple/pgstatapprox.c b/contrib/pgstattuple/pgstatapprox.c index 3cfbc086490..474c3bd517f 100644 --- a/contrib/pgstattuple/pgstatapprox.c +++ b/contrib/pgstattuple/pgstatapprox.c @@ -184,7 +184,7 @@ statapprox_heap(Relation rel, output_type *stat) stat->table_len = (uint64) nblocks * BLCKSZ; - stat->tuple_count = vac_estimate_reltuples(rel, false, nblocks, scanned, + stat->tuple_count = vac_estimate_reltuples(rel, nblocks, scanned, stat->tuple_count + misc_count); /* diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 5f21fcb5f40..ef93fb4d172 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -1249,19 +1249,22 @@ acquire_sample_rows(Relation onerel, int elevel, qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows); /* - * Estimate total numbers of rows in relation. For live rows, use - * vac_estimate_reltuples; for dead rows, we have no source of old - * information, so we have to assume the density is the same in unseen - * pages as in the pages we scanned. + * Estimate total numbers of live and dead rows in relation, extrapolating + * on the assumption that the average tuple density in pages we didn't + * scan is the same as in the pages we did scan. Since what we scanned is + * a random sample of the pages in the relation, this should be a good + * assumption. */ - *totalrows = vac_estimate_reltuples(onerel, true, - totalblocks, - bs.m, - liverows); if (bs.m > 0) + { + *totalrows = floor((liverows / bs.m) * totalblocks + 0.5); *totaldeadrows = floor((deadrows / bs.m) * totalblocks + 0.5); + } else + { + *totalrows = 0.0; *totaldeadrows = 0.0; + } /* * Emit some interesting relation info diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 7aca69a0ba0..b50c554c517 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -766,16 +766,14 @@ vacuum_set_xid_limits(Relation rel, * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples * * If we scanned the whole relation then we should just use the count of - * live tuples seen; but if we did not, we should not trust the count - * unreservedly, especially not in VACUUM, which may have scanned a quite - * nonrandom subset of the table. When we have only partial information, - * we take the old value of pg_class.reltuples as a measurement of the + * live tuples seen; but if we did not, we should not blindly extrapolate + * from that number, since VACUUM may have scanned a quite nonrandom + * subset of the table. When we have only partial information, we take + * the old value of pg_class.reltuples as a measurement of the * tuple density in the unscanned pages. - * - * This routine is shared by VACUUM and ANALYZE. */ double -vac_estimate_reltuples(Relation relation, bool is_analyze, +vac_estimate_reltuples(Relation relation, BlockNumber total_pages, BlockNumber scanned_pages, double scanned_tuples) @@ -783,9 +781,8 @@ vac_estimate_reltuples(Relation relation, bool is_analyze, BlockNumber old_rel_pages = relation->rd_rel->relpages; double old_rel_tuples = relation->rd_rel->reltuples; double old_density; - double new_density; - double multiplier; - double updated_density; + double unscanned_pages; + double total_tuples; /* If we did scan the whole table, just use the count as-is */ if (scanned_pages >= total_pages) @@ -809,31 +806,14 @@ vac_estimate_reltuples(Relation relation, bool is_analyze, /* * Okay, we've covered the corner cases. The normal calculation is to - * convert the old measurement to a density (tuples per page), then update - * the density using an exponential-moving-average approach, and finally - * compute reltuples as updated_density * total_pages. - * - * For ANALYZE, the moving average multiplier is just the fraction of the - * table's pages we scanned. This is equivalent to assuming that the - * tuple density in the unscanned pages didn't change. Of course, it - * probably did, if the new density measurement is different. But over - * repeated cycles, the value of reltuples will converge towards the - * correct value, if repeated measurements show the same new density. - * - * For VACUUM, the situation is a bit different: we have looked at a - * nonrandom sample of pages, but we know for certain that the pages we - * didn't look at are precisely the ones that haven't changed lately. - * Thus, there is a reasonable argument for doing exactly the same thing - * as for the ANALYZE case, that is use the old density measurement as the - * value for the unscanned pages. - * - * This logic could probably use further refinement. + * convert the old measurement to a density (tuples per page), then + * estimate the number of tuples in the unscanned pages using that figure, + * and finally add on the number of tuples in the scanned pages. */ old_density = old_rel_tuples / old_rel_pages; - new_density = scanned_tuples / scanned_pages; - multiplier = (double) scanned_pages / (double) total_pages; - updated_density = old_density + (new_density - old_density) * multiplier; - return floor(updated_density * total_pages + 0.5); + unscanned_pages = (double) total_pages - (double) scanned_pages; + total_tuples = old_density * unscanned_pages + scanned_tuples; + return floor(total_tuples + 0.5); } diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c index cf7f5e11629..9ac84e8293a 100644 --- a/src/backend/commands/vacuumlazy.c +++ b/src/backend/commands/vacuumlazy.c @@ -1286,7 +1286,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, vacrelstats->new_dead_tuples = nkeep; /* now we can compute the new value for pg_class.reltuples */ - vacrelstats->new_rel_tuples = vac_estimate_reltuples(onerel, false, + vacrelstats->new_rel_tuples = vac_estimate_reltuples(onerel, nblocks, vacrelstats->tupcount_pages, num_tuples); diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index 797b6dfec8d..85d472f0a54 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -162,7 +162,7 @@ extern void vacuum(int options, List *relations, VacuumParams *params, extern void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel); extern void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode); -extern double vac_estimate_reltuples(Relation relation, bool is_analyze, +extern double vac_estimate_reltuples(Relation relation, BlockNumber total_pages, BlockNumber scanned_pages, double scanned_tuples); |