aboutsummaryrefslogtreecommitdiff
path: root/src/backend/commands/analyze.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/commands/analyze.c')
-rw-r--r--src/backend/commands/analyze.c358
1 files changed, 286 insertions, 72 deletions
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 6d13eeb5ec1..0739db99f50 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.143 2009/12/09 21:57:50 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.144 2009/12/29 20:11:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -18,11 +18,13 @@
#include "access/heapam.h"
#include "access/transam.h"
+#include "access/tupconvert.h"
#include "access/tuptoaster.h"
#include "access/xact.h"
#include "catalog/index.h"
#include "catalog/indexing.h"
#include "catalog/namespace.h"
+#include "catalog/pg_inherits_fn.h"
#include "catalog/pg_namespace.h"
#include "commands/dbcommands.h"
#include "commands/vacuum.h"
@@ -55,6 +57,7 @@ typedef struct
BlockNumber t; /* current block number */
int m; /* blocks selected so far */
} BlockSamplerData;
+
typedef BlockSamplerData *BlockSampler;
/* Per-index data for ANALYZE */
@@ -78,6 +81,8 @@ static MemoryContext anl_context = NULL;
static BufferAccessStrategy vac_strategy;
+static void do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
+ bool update_reltuples, bool inh);
static void BlockSampler_Init(BlockSampler bs, BlockNumber nblocks,
int samplesize);
static bool BlockSampler_HasMore(BlockSampler bs);
@@ -93,7 +98,11 @@ static double random_fract(void);
static double init_selection_state(int n);
static double get_next_S(double t, int n, double *stateptr);
static int compare_rows(const void *a, const void *b);
-static void update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats);
+static int acquire_inherited_sample_rows(Relation onerel,
+ HeapTuple *rows, int targrows,
+ double *totalrows, double *totaldeadrows);
+static void update_attstats(Oid relid, bool inh,
+ int natts, VacAttrStats **vacattrstats);
static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
@@ -116,27 +125,8 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
BufferAccessStrategy bstrategy, bool update_reltuples)
{
Relation onerel;
- int attr_cnt,
- tcnt,
- i,
- ind;
- Relation *Irel;
- int nindexes;
- bool hasindex;
- bool analyzableindex;
- VacAttrStats **vacattrstats;
- AnlIndexData *indexdata;
- int targrows,
- numrows;
- double totalrows,
- totaldeadrows;
- HeapTuple *rows;
- PGRUsage ru0;
- TimestampTz starttime = 0;
- Oid save_userid;
- int save_sec_context;
- int save_nestlevel;
+ /* Set up static variables */
if (vacstmt->options & VACOPT_VERBOSE)
elevel = INFO;
else
@@ -145,15 +135,7 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
vac_strategy = bstrategy;
/*
- * Use the current context for storing analysis info. vacuum.c ensures
- * that this context will be cleared when I return, thus releasing the
- * memory allocated here.
- */
- anl_context = CurrentMemoryContext;
-
- /*
- * Check for user-requested abort. Note we want this to be inside a
- * transaction, so xact.c doesn't issue useless WARNING.
+ * Check for user-requested abort.
*/
CHECK_FOR_INTERRUPTS();
@@ -230,10 +212,91 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
return;
}
- ereport(elevel,
- (errmsg("analyzing \"%s.%s\"",
- get_namespace_name(RelationGetNamespace(onerel)),
- RelationGetRelationName(onerel))));
+ /*
+ * OK, let's do it. First let other backends know I'm in ANALYZE.
+ */
+ LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ MyProc->vacuumFlags |= PROC_IN_ANALYZE;
+ LWLockRelease(ProcArrayLock);
+
+ /*
+ * Do the normal non-recursive ANALYZE.
+ */
+ do_analyze_rel(onerel, vacstmt, update_reltuples, false);
+
+ /*
+ * If there are child tables, do recursive ANALYZE.
+ */
+ if (onerel->rd_rel->relhassubclass)
+ do_analyze_rel(onerel, vacstmt, false, true);
+
+ /*
+ * Close source relation now, but keep lock so that no one deletes it
+ * before we commit. (If someone did, they'd fail to clean up the entries
+ * we made in pg_statistic. Also, releasing the lock before commit would
+ * expose us to concurrent-update failures in update_attstats.)
+ */
+ relation_close(onerel, NoLock);
+
+ /*
+ * Reset my PGPROC flag. Note: we need this here, and not in vacuum_rel,
+ * because the vacuum flag is cleared by the end-of-xact code.
+ */
+ LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ MyProc->vacuumFlags &= ~PROC_IN_ANALYZE;
+ LWLockRelease(ProcArrayLock);
+}
+
+/*
+ * do_analyze_rel() -- analyze one relation, recursively or not
+ */
+static void
+do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
+ bool update_reltuples, bool inh)
+{
+ int attr_cnt,
+ tcnt,
+ i,
+ ind;
+ Relation *Irel;
+ int nindexes;
+ bool hasindex;
+ bool analyzableindex;
+ VacAttrStats **vacattrstats;
+ AnlIndexData *indexdata;
+ int targrows,
+ numrows;
+ double totalrows,
+ totaldeadrows;
+ HeapTuple *rows;
+ PGRUsage ru0;
+ TimestampTz starttime = 0;
+ MemoryContext caller_context;
+ Oid save_userid;
+ int save_sec_context;
+ int save_nestlevel;
+
+ if (inh)
+ ereport(elevel,
+ (errmsg("analyzing \"%s.%s\" inheritance tree",
+ get_namespace_name(RelationGetNamespace(onerel)),
+ RelationGetRelationName(onerel))));
+ else
+ ereport(elevel,
+ (errmsg("analyzing \"%s.%s\"",
+ get_namespace_name(RelationGetNamespace(onerel)),
+ RelationGetRelationName(onerel))));
+
+ /*
+ * Set up a working context so that we can easily free whatever junk
+ * gets created.
+ */
+ anl_context = AllocSetContextCreate(CurrentMemoryContext,
+ "Analyze",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+ caller_context = MemoryContextSwitchTo(anl_context);
/*
* Switch to the table owner's userid, so that any index functions are run
@@ -245,11 +308,6 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
save_sec_context | SECURITY_RESTRICTED_OPERATION);
save_nestlevel = NewGUCNestLevel();
- /* let others know what I'm doing */
- LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
- MyProc->vacuumFlags |= PROC_IN_ANALYZE;
- LWLockRelease(ProcArrayLock);
-
/* measure elapsed time iff autovacuum logging requires it */
if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
{
@@ -304,9 +362,17 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
/*
* Open all indexes of the relation, and see if there are any analyzable
* columns in the indexes. We do not analyze index columns if there was
- * an explicit column list in the ANALYZE command, however.
+ * an explicit column list in the ANALYZE command, however. If we are
+ * doing a recursive scan, we don't want to touch the parent's indexes
+ * at all.
*/
- vac_open_indexes(onerel, AccessShareLock, &nindexes, &Irel);
+ if (!inh)
+ vac_open_indexes(onerel, AccessShareLock, &nindexes, &Irel);
+ else
+ {
+ Irel = NULL;
+ nindexes = 0;
+ }
hasindex = (nindexes > 0);
indexdata = NULL;
analyzableindex = false;
@@ -399,8 +465,12 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
* Acquire the sample rows
*/
rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple));
- numrows = acquire_sample_rows(onerel, rows, targrows,
- &totalrows, &totaldeadrows);
+ if (inh)
+ numrows = acquire_inherited_sample_rows(onerel, rows, targrows,
+ &totalrows, &totaldeadrows);
+ else
+ numrows = acquire_sample_rows(onerel, rows, targrows,
+ &totalrows, &totaldeadrows);
/*
* Compute the statistics. Temporary results during the calculations for
@@ -452,13 +522,14 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
* previous statistics for the target columns. (If there are stats in
* pg_statistic for columns we didn't process, we leave them alone.)
*/
- update_attstats(relid, attr_cnt, vacattrstats);
+ update_attstats(RelationGetRelid(onerel), inh,
+ attr_cnt, vacattrstats);
for (ind = 0; ind < nindexes; ind++)
{
AnlIndexData *thisdata = &indexdata[ind];
- update_attstats(RelationGetRelid(Irel[ind]),
+ update_attstats(RelationGetRelid(Irel[ind]), false,
thisdata->attr_cnt, thisdata->vacattrstats);
}
}
@@ -537,27 +608,16 @@ cleanup:
pg_rusage_show(&ru0))));
}
- /*
- * Close source relation now, but keep lock so that no one deletes it
- * before we commit. (If someone did, they'd fail to clean up the entries
- * we made in pg_statistic. Also, releasing the lock before commit would
- * expose us to concurrent-update failures in update_attstats.)
- */
- relation_close(onerel, NoLock);
-
- /*
- * Reset my PGPROC flag. Note: we need this here, and not in vacuum_rel,
- * because the vacuum flag is cleared by the end-of-xact code.
- */
- LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
- MyProc->vacuumFlags &= ~PROC_IN_ANALYZE;
- LWLockRelease(ProcArrayLock);
-
/* Roll back any GUC changes executed by index functions */
AtEOXact_GUC(false, save_nestlevel);
/* Restore userid and security context */
SetUserIdAndSecContext(save_userid, save_sec_context);
+
+ /* Restore current context and release memory */
+ MemoryContextSwitchTo(caller_context);
+ MemoryContextDelete(anl_context);
+ anl_context = NULL;
}
/*
@@ -877,6 +937,15 @@ BlockSampler_Next(BlockSampler bs)
/*
* acquire_sample_rows -- acquire a random sample of rows from the table
*
+ * Selected rows are returned in the caller-allocated array rows[], which
+ * must have at least targrows entries.
+ * The actual number of rows selected is returned as the function result.
+ * We also estimate the total numbers of live and dead rows in the table,
+ * and return them into *totalrows and *totaldeadrows, respectively.
+ *
+ * The returned list of tuples is in order by physical position in the table.
+ * (We will rely on this later to derive correlation estimates.)
+ *
* As of May 2004 we use a new two-stage method: Stage one selects up
* to targrows random blocks (or all blocks, if there aren't so many).
* Stage two scans these blocks and uses the Vitter algorithm to create
@@ -892,17 +961,11 @@ BlockSampler_Next(BlockSampler bs)
* the number of different blocks represented by the sample tends to be
* too small. We can live with that for now. Improvements are welcome.
*
- * We also estimate the total numbers of live and dead rows in the table,
- * and return them into *totalrows and *totaldeadrows, respectively.
- *
* An important property of this sampling method is that because we do
* look at a statistically unbiased set of blocks, we should get
* unbiased estimates of the average numbers of live and dead rows per
* block. The previous sampling method put too much credence in the row
* density near the start of the table.
- *
- * The returned list of tuples is in order by physical position in the table.
- * (We will rely on this later to derive correlation estimates.)
*/
static int
acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
@@ -918,7 +981,7 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
BlockSamplerData bs;
double rstate;
- Assert(targrows > 1);
+ Assert(targrows > 0);
totalblocks = RelationGetNumberOfBlocks(onerel);
@@ -1277,6 +1340,155 @@ compare_rows(const void *a, const void *b)
/*
+ * acquire_inherited_sample_rows -- acquire sample rows from inheritance tree
+ *
+ * This has the same API as acquire_sample_rows, except that rows are
+ * collected from all inheritance children as well as the specified table.
+ * We fail and return zero if there are no inheritance children.
+ */
+static int
+acquire_inherited_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
+ double *totalrows, double *totaldeadrows)
+{
+ List *tableOIDs;
+ Relation *rels;
+ double *relblocks;
+ double totalblocks;
+ int numrows,
+ nrels,
+ i;
+ ListCell *lc;
+
+ /*
+ * Find all members of inheritance set. We only need AccessShareLock on
+ * the children.
+ */
+ tableOIDs = find_all_inheritors(RelationGetRelid(onerel), AccessShareLock);
+
+ /*
+ * Check that there's at least one descendant, else fail. This could
+ * happen despite analyze_rel's relhassubclass check, if table once had a
+ * child but no longer does.
+ */
+ if (list_length(tableOIDs) < 2)
+ {
+ /*
+ * XXX It would be desirable to clear relhassubclass here, but we
+ * don't have adequate lock to do that safely.
+ */
+ return 0;
+ }
+
+ /*
+ * Count the blocks in all the relations. The result could overflow
+ * BlockNumber, so we use double arithmetic.
+ */
+ rels = (Relation *) palloc(list_length(tableOIDs) * sizeof(Relation));
+ relblocks = (double *) palloc(list_length(tableOIDs) * sizeof(double));
+ totalblocks = 0;
+ nrels = 0;
+ foreach(lc, tableOIDs)
+ {
+ Oid childOID = lfirst_oid(lc);
+ Relation childrel;
+
+ /* We already got the needed lock */
+ childrel = heap_open(childOID, NoLock);
+
+ /* Ignore if temp table of another backend */
+ if (RELATION_IS_OTHER_TEMP(childrel))
+ {
+ /* ... but release the lock on it */
+ Assert(childrel != onerel);
+ heap_close(childrel, AccessShareLock);
+ continue;
+ }
+
+ rels[nrels] = childrel;
+ relblocks[nrels] = (double) RelationGetNumberOfBlocks(childrel);
+ totalblocks += relblocks[nrels];
+ nrels++;
+ }
+
+ /*
+ * Now sample rows from each relation, proportionally to its fraction
+ * of the total block count. (This might be less than desirable if the
+ * child rels have radically different free-space percentages, but it's
+ * not clear that it's worth working harder.)
+ */
+ numrows = 0;
+ *totalrows = 0;
+ *totaldeadrows = 0;
+ for (i = 0; i < nrels; i++)
+ {
+ Relation childrel = rels[i];
+ double childblocks = relblocks[i];
+
+ if (childblocks > 0)
+ {
+ int childtargrows;
+
+ childtargrows = (int) rint(targrows * childblocks / totalblocks);
+ /* Make sure we don't overrun due to roundoff error */
+ childtargrows = Min(childtargrows, targrows - numrows);
+ if (childtargrows > 0)
+ {
+ int childrows;
+ double trows,
+ tdrows;
+
+ /* Fetch a random sample of the child's rows */
+ childrows = acquire_sample_rows(childrel,
+ rows + numrows,
+ childtargrows,
+ &trows,
+ &tdrows);
+
+ /* We may need to convert from child's rowtype to parent's */
+ if (childrows > 0 &&
+ !equalTupleDescs(RelationGetDescr(childrel),
+ RelationGetDescr(onerel)))
+ {
+ TupleConversionMap *map;
+
+ map = convert_tuples_by_name(RelationGetDescr(childrel),
+ RelationGetDescr(onerel),
+ gettext_noop("could not convert row type"));
+ if (map != NULL)
+ {
+ int j;
+
+ for (j = 0; j < childrows; j++)
+ {
+ HeapTuple newtup;
+
+ newtup = do_convert_tuple(rows[numrows + j], map);
+ heap_freetuple(rows[numrows + j]);
+ rows[numrows + j] = newtup;
+ }
+ free_conversion_map(map);
+ }
+ }
+
+ /* And add to counts */
+ numrows += childrows;
+ *totalrows += trows;
+ *totaldeadrows += tdrows;
+ }
+ }
+
+ /*
+ * Note: we cannot release the child-table locks, since we may have
+ * pointers to their TOAST tables in the sampled rows.
+ */
+ heap_close(childrel, NoLock);
+ }
+
+ return numrows;
+}
+
+
+/*
* update_attstats() -- update attribute statistics for one relation
*
* Statistics are stored in several places: the pg_class row for the
@@ -1299,7 +1511,7 @@ compare_rows(const void *a, const void *b)
* by taking a self-exclusive lock on the relation in analyze_rel().
*/
static void
-update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats)
+update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats)
{
Relation sd;
int attno;
@@ -1337,6 +1549,7 @@ update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats)
i = 0;
values[i++] = ObjectIdGetDatum(relid); /* starelid */
values[i++] = Int16GetDatum(stats->attr->attnum); /* staattnum */
+ values[i++] = BoolGetDatum(inh); /* stainherit */
values[i++] = Float4GetDatum(stats->stanullfrac); /* stanullfrac */
values[i++] = Int32GetDatum(stats->stawidth); /* stawidth */
values[i++] = Float4GetDatum(stats->stadistinct); /* stadistinct */
@@ -1393,10 +1606,11 @@ update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats)
}
/* Is there already a pg_statistic tuple for this attribute? */
- oldtup = SearchSysCache(STATRELATT,
+ oldtup = SearchSysCache(STATRELATTINH,
ObjectIdGetDatum(relid),
Int16GetDatum(stats->attr->attnum),
- 0, 0);
+ BoolGetDatum(inh),
+ 0);
if (HeapTupleIsValid(oldtup))
{