aboutsummaryrefslogtreecommitdiff
path: root/src/backend/commands/vacuum.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/commands/vacuum.c')
-rw-r--r--src/backend/commands/vacuum.c98
1 files changed, 82 insertions, 16 deletions
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 9606569617a..224c34f6e78 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -20,6 +20,8 @@
*/
#include "postgres.h"
+#include <math.h>
+
#include "access/clog.h"
#include "access/genam.h"
#include "access/heapam.h"
@@ -62,7 +64,7 @@ static BufferAccessStrategy vac_strategy;
static List *get_rel_oids(Oid relid, const RangeVar *vacrel);
static void vac_truncate_clog(TransactionId frozenXID);
static bool vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast,
- bool for_wraparound, bool *scanned_all);
+ bool for_wraparound);
/*
@@ -219,12 +221,10 @@ vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,
foreach(cur, relations)
{
Oid relid = lfirst_oid(cur);
- bool scanned_all = false;
if (vacstmt->options & VACOPT_VACUUM)
{
- if (!vacuum_rel(relid, vacstmt, do_toast, for_wraparound,
- &scanned_all))
+ if (!vacuum_rel(relid, vacstmt, do_toast, for_wraparound))
continue;
}
@@ -241,7 +241,7 @@ vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,
PushActiveSnapshot(GetTransactionSnapshot());
}
- analyze_rel(relid, vacstmt, vac_strategy, !scanned_all);
+ analyze_rel(relid, vacstmt, vac_strategy);
if (use_own_xacts)
{
@@ -454,6 +454,79 @@ vacuum_set_xid_limits(int freeze_min_age,
/*
+ * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
+ *
+ * If we scanned the whole relation then we should just use the count of
+ * live tuples seen; but if we did not, we should not trust the count
+ * unreservedly, especially not in VACUUM, which may have scanned a quite
+ * nonrandom subset of the table. When we have only partial information,
+ * we take the old value of pg_class.reltuples as a measurement of the
+ * tuple density in the unscanned pages.
+ *
+ * This routine is shared by VACUUM and ANALYZE.
+ */
+double
+vac_estimate_reltuples(Relation relation, bool is_analyze,
+ BlockNumber total_pages,
+ BlockNumber scanned_pages,
+ double scanned_tuples)
+{
+ BlockNumber old_rel_pages = relation->rd_rel->relpages;
+ double old_rel_tuples = relation->rd_rel->reltuples;
+ double old_density;
+ double new_density;
+ double multiplier;
+ double updated_density;
+
+ /* If we did scan the whole table, just use the count as-is */
+ if (scanned_pages >= total_pages)
+ return scanned_tuples;
+
+ /*
+ * If scanned_pages is zero but total_pages isn't, keep the existing
+ * value of reltuples.
+ */
+ if (scanned_pages == 0)
+ return old_rel_tuples;
+
+ /*
+ * If old value of relpages is zero, old density is indeterminate; we
+ * can't do much except scale up scanned_tuples to match total_pages.
+ */
+ if (old_rel_pages == 0)
+ return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
+
+ /*
+ * Okay, we've covered the corner cases. The normal calculation is to
+ * convert the old measurement to a density (tuples per page), then
+ * update the density using an exponential-moving-average approach,
+ * and finally compute reltuples as updated_density * total_pages.
+ *
+ * For ANALYZE, the moving average multiplier is just the fraction of
+ * the table's pages we scanned. This is equivalent to assuming
+ * that the tuple density in the unscanned pages didn't change. Of
+ * course, it probably did, if the new density measurement is different.
+ * But over repeated cycles, the value of reltuples will converge towards
+ * the correct value, if repeated measurements show the same new density.
+ *
+ * For VACUUM, the situation is a bit different: we have looked at a
+ * nonrandom sample of pages, but we know for certain that the pages we
+ * didn't look at are precisely the ones that haven't changed lately.
+ * Thus, there is a reasonable argument for doing exactly the same thing
+ * as for the ANALYZE case, that is use the old density measurement as
+ * the value for the unscanned pages.
+ *
+ * This logic could probably use further refinement.
+ */
+ old_density = old_rel_tuples / old_rel_pages;
+ new_density = scanned_tuples / scanned_pages;
+ multiplier = (double) scanned_pages / (double) total_pages;
+ updated_density = old_density + (new_density - old_density) * multiplier;
+ return floor(updated_density * total_pages + 0.5);
+}
+
+
+/*
* vac_update_relstats() -- update statistics for one relation
*
* Update the whole-relation statistics that are kept in its pg_class
@@ -480,7 +553,7 @@ vacuum_set_xid_limits(int freeze_min_age,
* somebody vacuuming pg_class might think they could delete a tuple
* marked with xmin = our xid.
*
- * This routine is shared by VACUUM and stand-alone ANALYZE.
+ * This routine is shared by VACUUM and ANALYZE.
*/
void
vac_update_relstats(Relation relation,
@@ -758,14 +831,10 @@ vac_truncate_clog(TransactionId frozenXID)
* many small transactions. Otherwise, two-phase locking would require
* us to lock the entire database during one pass of the vacuum cleaner.
*
- * We'll return true in *scanned_all if the vacuum scanned all heap
- * pages, and updated pg_class.
- *
* At entry and exit, we are not inside a transaction.
*/
static bool
-vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
- bool *scanned_all)
+vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound)
{
LOCKMODE lmode;
Relation onerel;
@@ -775,9 +844,6 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
int save_sec_context;
int save_nestlevel;
- if (scanned_all)
- *scanned_all = false;
-
/* Begin a transaction for vacuuming this relation */
StartTransactionCommand();
@@ -971,7 +1037,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
vacstmt->freeze_min_age, vacstmt->freeze_table_age);
}
else
- lazy_vacuum_rel(onerel, vacstmt, vac_strategy, scanned_all);
+ lazy_vacuum_rel(onerel, vacstmt, vac_strategy);
/* Roll back any GUC changes executed by index functions */
AtEOXact_GUC(false, save_nestlevel);
@@ -997,7 +1063,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
* totally unimportant for toast relations.
*/
if (toast_relid != InvalidOid)
- vacuum_rel(toast_relid, vacstmt, false, for_wraparound, NULL);
+ vacuum_rel(toast_relid, vacstmt, false, for_wraparound);
/*
* Now release the session-level lock on the master table.