diff options
Diffstat (limited to 'src/backend/commands/vacuum.c')
-rw-r--r-- | src/backend/commands/vacuum.c | 46 |
1 files changed, 13 insertions, 33 deletions
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 7aca69a0ba0..b50c554c517 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -766,16 +766,14 @@ vacuum_set_xid_limits(Relation rel, * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples * * If we scanned the whole relation then we should just use the count of - * live tuples seen; but if we did not, we should not trust the count - * unreservedly, especially not in VACUUM, which may have scanned a quite - * nonrandom subset of the table. When we have only partial information, - * we take the old value of pg_class.reltuples as a measurement of the + * live tuples seen; but if we did not, we should not blindly extrapolate + * from that number, since VACUUM may have scanned a quite nonrandom + * subset of the table. When we have only partial information, we take + * the old value of pg_class.reltuples as a measurement of the * tuple density in the unscanned pages. - * - * This routine is shared by VACUUM and ANALYZE. */ double -vac_estimate_reltuples(Relation relation, bool is_analyze, +vac_estimate_reltuples(Relation relation, BlockNumber total_pages, BlockNumber scanned_pages, double scanned_tuples) @@ -783,9 +781,8 @@ vac_estimate_reltuples(Relation relation, bool is_analyze, BlockNumber old_rel_pages = relation->rd_rel->relpages; double old_rel_tuples = relation->rd_rel->reltuples; double old_density; - double new_density; - double multiplier; - double updated_density; + double unscanned_pages; + double total_tuples; /* If we did scan the whole table, just use the count as-is */ if (scanned_pages >= total_pages) @@ -809,31 +806,14 @@ vac_estimate_reltuples(Relation relation, bool is_analyze, /* * Okay, we've covered the corner cases. The normal calculation is to - * convert the old measurement to a density (tuples per page), then update - * the density using an exponential-moving-average approach, and finally - * compute reltuples as updated_density * total_pages. - * - * For ANALYZE, the moving average multiplier is just the fraction of the - * table's pages we scanned. This is equivalent to assuming that the - * tuple density in the unscanned pages didn't change. Of course, it - * probably did, if the new density measurement is different. But over - * repeated cycles, the value of reltuples will converge towards the - * correct value, if repeated measurements show the same new density. - * - * For VACUUM, the situation is a bit different: we have looked at a - * nonrandom sample of pages, but we know for certain that the pages we - * didn't look at are precisely the ones that haven't changed lately. - * Thus, there is a reasonable argument for doing exactly the same thing - * as for the ANALYZE case, that is use the old density measurement as the - * value for the unscanned pages. - * - * This logic could probably use further refinement. + * convert the old measurement to a density (tuples per page), then + * estimate the number of tuples in the unscanned pages using that figure, + * and finally add on the number of tuples in the scanned pages. */ old_density = old_rel_tuples / old_rel_pages; - new_density = scanned_tuples / scanned_pages; - multiplier = (double) scanned_pages / (double) total_pages; - updated_density = old_density + (new_density - old_density) * multiplier; - return floor(updated_density * total_pages + 0.5); + unscanned_pages = (double) total_pages - (double) scanned_pages; + total_tuples = old_density * unscanned_pages + scanned_tuples; + return floor(total_tuples + 0.5); } |