diff options
Diffstat (limited to 'src/backend/commands/analyze.c')
-rw-r--r-- | src/backend/commands/analyze.c | 358 |
1 files changed, 286 insertions, 72 deletions
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 6d13eeb5ec1..0739db99f50 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.143 2009/12/09 21:57:50 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.144 2009/12/29 20:11:44 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -18,11 +18,13 @@ #include "access/heapam.h" #include "access/transam.h" +#include "access/tupconvert.h" #include "access/tuptoaster.h" #include "access/xact.h" #include "catalog/index.h" #include "catalog/indexing.h" #include "catalog/namespace.h" +#include "catalog/pg_inherits_fn.h" #include "catalog/pg_namespace.h" #include "commands/dbcommands.h" #include "commands/vacuum.h" @@ -55,6 +57,7 @@ typedef struct BlockNumber t; /* current block number */ int m; /* blocks selected so far */ } BlockSamplerData; + typedef BlockSamplerData *BlockSampler; /* Per-index data for ANALYZE */ @@ -78,6 +81,8 @@ static MemoryContext anl_context = NULL; static BufferAccessStrategy vac_strategy; +static void do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, + bool update_reltuples, bool inh); static void BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, int samplesize); static bool BlockSampler_HasMore(BlockSampler bs); @@ -93,7 +98,11 @@ static double random_fract(void); static double init_selection_state(int n); static double get_next_S(double t, int n, double *stateptr); static int compare_rows(const void *a, const void *b); -static void update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats); +static int acquire_inherited_sample_rows(Relation onerel, + HeapTuple *rows, int targrows, + double *totalrows, double *totaldeadrows); +static void update_attstats(Oid relid, bool inh, + int natts, VacAttrStats **vacattrstats); static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull); static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull); @@ -116,27 +125,8 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy, bool update_reltuples) { Relation onerel; - int attr_cnt, - tcnt, - i, - ind; - Relation *Irel; - int nindexes; - bool hasindex; - bool analyzableindex; - VacAttrStats **vacattrstats; - AnlIndexData *indexdata; - int targrows, - numrows; - double totalrows, - totaldeadrows; - HeapTuple *rows; - PGRUsage ru0; - TimestampTz starttime = 0; - Oid save_userid; - int save_sec_context; - int save_nestlevel; + /* Set up static variables */ if (vacstmt->options & VACOPT_VERBOSE) elevel = INFO; else @@ -145,15 +135,7 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, vac_strategy = bstrategy; /* - * Use the current context for storing analysis info. vacuum.c ensures - * that this context will be cleared when I return, thus releasing the - * memory allocated here. - */ - anl_context = CurrentMemoryContext; - - /* - * Check for user-requested abort. Note we want this to be inside a - * transaction, so xact.c doesn't issue useless WARNING. + * Check for user-requested abort. */ CHECK_FOR_INTERRUPTS(); @@ -230,10 +212,91 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, return; } - ereport(elevel, - (errmsg("analyzing \"%s.%s\"", - get_namespace_name(RelationGetNamespace(onerel)), - RelationGetRelationName(onerel)))); + /* + * OK, let's do it. First let other backends know I'm in ANALYZE. + */ + LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); + MyProc->vacuumFlags |= PROC_IN_ANALYZE; + LWLockRelease(ProcArrayLock); + + /* + * Do the normal non-recursive ANALYZE. + */ + do_analyze_rel(onerel, vacstmt, update_reltuples, false); + + /* + * If there are child tables, do recursive ANALYZE. + */ + if (onerel->rd_rel->relhassubclass) + do_analyze_rel(onerel, vacstmt, false, true); + + /* + * Close source relation now, but keep lock so that no one deletes it + * before we commit. (If someone did, they'd fail to clean up the entries + * we made in pg_statistic. Also, releasing the lock before commit would + * expose us to concurrent-update failures in update_attstats.) + */ + relation_close(onerel, NoLock); + + /* + * Reset my PGPROC flag. Note: we need this here, and not in vacuum_rel, + * because the vacuum flag is cleared by the end-of-xact code. + */ + LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); + MyProc->vacuumFlags &= ~PROC_IN_ANALYZE; + LWLockRelease(ProcArrayLock); +} + +/* + * do_analyze_rel() -- analyze one relation, recursively or not + */ +static void +do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, + bool update_reltuples, bool inh) +{ + int attr_cnt, + tcnt, + i, + ind; + Relation *Irel; + int nindexes; + bool hasindex; + bool analyzableindex; + VacAttrStats **vacattrstats; + AnlIndexData *indexdata; + int targrows, + numrows; + double totalrows, + totaldeadrows; + HeapTuple *rows; + PGRUsage ru0; + TimestampTz starttime = 0; + MemoryContext caller_context; + Oid save_userid; + int save_sec_context; + int save_nestlevel; + + if (inh) + ereport(elevel, + (errmsg("analyzing \"%s.%s\" inheritance tree", + get_namespace_name(RelationGetNamespace(onerel)), + RelationGetRelationName(onerel)))); + else + ereport(elevel, + (errmsg("analyzing \"%s.%s\"", + get_namespace_name(RelationGetNamespace(onerel)), + RelationGetRelationName(onerel)))); + + /* + * Set up a working context so that we can easily free whatever junk + * gets created. + */ + anl_context = AllocSetContextCreate(CurrentMemoryContext, + "Analyze", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + caller_context = MemoryContextSwitchTo(anl_context); /* * Switch to the table owner's userid, so that any index functions are run @@ -245,11 +308,6 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, save_sec_context | SECURITY_RESTRICTED_OPERATION); save_nestlevel = NewGUCNestLevel(); - /* let others know what I'm doing */ - LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); - MyProc->vacuumFlags |= PROC_IN_ANALYZE; - LWLockRelease(ProcArrayLock); - /* measure elapsed time iff autovacuum logging requires it */ if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0) { @@ -304,9 +362,17 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, /* * Open all indexes of the relation, and see if there are any analyzable * columns in the indexes. We do not analyze index columns if there was - * an explicit column list in the ANALYZE command, however. + * an explicit column list in the ANALYZE command, however. If we are + * doing a recursive scan, we don't want to touch the parent's indexes + * at all. */ - vac_open_indexes(onerel, AccessShareLock, &nindexes, &Irel); + if (!inh) + vac_open_indexes(onerel, AccessShareLock, &nindexes, &Irel); + else + { + Irel = NULL; + nindexes = 0; + } hasindex = (nindexes > 0); indexdata = NULL; analyzableindex = false; @@ -399,8 +465,12 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, * Acquire the sample rows */ rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple)); - numrows = acquire_sample_rows(onerel, rows, targrows, - &totalrows, &totaldeadrows); + if (inh) + numrows = acquire_inherited_sample_rows(onerel, rows, targrows, + &totalrows, &totaldeadrows); + else + numrows = acquire_sample_rows(onerel, rows, targrows, + &totalrows, &totaldeadrows); /* * Compute the statistics. Temporary results during the calculations for @@ -452,13 +522,14 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, * previous statistics for the target columns. (If there are stats in * pg_statistic for columns we didn't process, we leave them alone.) */ - update_attstats(relid, attr_cnt, vacattrstats); + update_attstats(RelationGetRelid(onerel), inh, + attr_cnt, vacattrstats); for (ind = 0; ind < nindexes; ind++) { AnlIndexData *thisdata = &indexdata[ind]; - update_attstats(RelationGetRelid(Irel[ind]), + update_attstats(RelationGetRelid(Irel[ind]), false, thisdata->attr_cnt, thisdata->vacattrstats); } } @@ -537,27 +608,16 @@ cleanup: pg_rusage_show(&ru0)))); } - /* - * Close source relation now, but keep lock so that no one deletes it - * before we commit. (If someone did, they'd fail to clean up the entries - * we made in pg_statistic. Also, releasing the lock before commit would - * expose us to concurrent-update failures in update_attstats.) - */ - relation_close(onerel, NoLock); - - /* - * Reset my PGPROC flag. Note: we need this here, and not in vacuum_rel, - * because the vacuum flag is cleared by the end-of-xact code. - */ - LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); - MyProc->vacuumFlags &= ~PROC_IN_ANALYZE; - LWLockRelease(ProcArrayLock); - /* Roll back any GUC changes executed by index functions */ AtEOXact_GUC(false, save_nestlevel); /* Restore userid and security context */ SetUserIdAndSecContext(save_userid, save_sec_context); + + /* Restore current context and release memory */ + MemoryContextSwitchTo(caller_context); + MemoryContextDelete(anl_context); + anl_context = NULL; } /* @@ -877,6 +937,15 @@ BlockSampler_Next(BlockSampler bs) /* * acquire_sample_rows -- acquire a random sample of rows from the table * + * Selected rows are returned in the caller-allocated array rows[], which + * must have at least targrows entries. + * The actual number of rows selected is returned as the function result. + * We also estimate the total numbers of live and dead rows in the table, + * and return them into *totalrows and *totaldeadrows, respectively. + * + * The returned list of tuples is in order by physical position in the table. + * (We will rely on this later to derive correlation estimates.) + * * As of May 2004 we use a new two-stage method: Stage one selects up * to targrows random blocks (or all blocks, if there aren't so many). * Stage two scans these blocks and uses the Vitter algorithm to create @@ -892,17 +961,11 @@ BlockSampler_Next(BlockSampler bs) * the number of different blocks represented by the sample tends to be * too small. We can live with that for now. Improvements are welcome. * - * We also estimate the total numbers of live and dead rows in the table, - * and return them into *totalrows and *totaldeadrows, respectively. - * * An important property of this sampling method is that because we do * look at a statistically unbiased set of blocks, we should get * unbiased estimates of the average numbers of live and dead rows per * block. The previous sampling method put too much credence in the row * density near the start of the table. - * - * The returned list of tuples is in order by physical position in the table. - * (We will rely on this later to derive correlation estimates.) */ static int acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows, @@ -918,7 +981,7 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows, BlockSamplerData bs; double rstate; - Assert(targrows > 1); + Assert(targrows > 0); totalblocks = RelationGetNumberOfBlocks(onerel); @@ -1277,6 +1340,155 @@ compare_rows(const void *a, const void *b) /* + * acquire_inherited_sample_rows -- acquire sample rows from inheritance tree + * + * This has the same API as acquire_sample_rows, except that rows are + * collected from all inheritance children as well as the specified table. + * We fail and return zero if there are no inheritance children. + */ +static int +acquire_inherited_sample_rows(Relation onerel, HeapTuple *rows, int targrows, + double *totalrows, double *totaldeadrows) +{ + List *tableOIDs; + Relation *rels; + double *relblocks; + double totalblocks; + int numrows, + nrels, + i; + ListCell *lc; + + /* + * Find all members of inheritance set. We only need AccessShareLock on + * the children. + */ + tableOIDs = find_all_inheritors(RelationGetRelid(onerel), AccessShareLock); + + /* + * Check that there's at least one descendant, else fail. This could + * happen despite analyze_rel's relhassubclass check, if table once had a + * child but no longer does. + */ + if (list_length(tableOIDs) < 2) + { + /* + * XXX It would be desirable to clear relhassubclass here, but we + * don't have adequate lock to do that safely. + */ + return 0; + } + + /* + * Count the blocks in all the relations. The result could overflow + * BlockNumber, so we use double arithmetic. + */ + rels = (Relation *) palloc(list_length(tableOIDs) * sizeof(Relation)); + relblocks = (double *) palloc(list_length(tableOIDs) * sizeof(double)); + totalblocks = 0; + nrels = 0; + foreach(lc, tableOIDs) + { + Oid childOID = lfirst_oid(lc); + Relation childrel; + + /* We already got the needed lock */ + childrel = heap_open(childOID, NoLock); + + /* Ignore if temp table of another backend */ + if (RELATION_IS_OTHER_TEMP(childrel)) + { + /* ... but release the lock on it */ + Assert(childrel != onerel); + heap_close(childrel, AccessShareLock); + continue; + } + + rels[nrels] = childrel; + relblocks[nrels] = (double) RelationGetNumberOfBlocks(childrel); + totalblocks += relblocks[nrels]; + nrels++; + } + + /* + * Now sample rows from each relation, proportionally to its fraction + * of the total block count. (This might be less than desirable if the + * child rels have radically different free-space percentages, but it's + * not clear that it's worth working harder.) + */ + numrows = 0; + *totalrows = 0; + *totaldeadrows = 0; + for (i = 0; i < nrels; i++) + { + Relation childrel = rels[i]; + double childblocks = relblocks[i]; + + if (childblocks > 0) + { + int childtargrows; + + childtargrows = (int) rint(targrows * childblocks / totalblocks); + /* Make sure we don't overrun due to roundoff error */ + childtargrows = Min(childtargrows, targrows - numrows); + if (childtargrows > 0) + { + int childrows; + double trows, + tdrows; + + /* Fetch a random sample of the child's rows */ + childrows = acquire_sample_rows(childrel, + rows + numrows, + childtargrows, + &trows, + &tdrows); + + /* We may need to convert from child's rowtype to parent's */ + if (childrows > 0 && + !equalTupleDescs(RelationGetDescr(childrel), + RelationGetDescr(onerel))) + { + TupleConversionMap *map; + + map = convert_tuples_by_name(RelationGetDescr(childrel), + RelationGetDescr(onerel), + gettext_noop("could not convert row type")); + if (map != NULL) + { + int j; + + for (j = 0; j < childrows; j++) + { + HeapTuple newtup; + + newtup = do_convert_tuple(rows[numrows + j], map); + heap_freetuple(rows[numrows + j]); + rows[numrows + j] = newtup; + } + free_conversion_map(map); + } + } + + /* And add to counts */ + numrows += childrows; + *totalrows += trows; + *totaldeadrows += tdrows; + } + } + + /* + * Note: we cannot release the child-table locks, since we may have + * pointers to their TOAST tables in the sampled rows. + */ + heap_close(childrel, NoLock); + } + + return numrows; +} + + +/* * update_attstats() -- update attribute statistics for one relation * * Statistics are stored in several places: the pg_class row for the @@ -1299,7 +1511,7 @@ compare_rows(const void *a, const void *b) * by taking a self-exclusive lock on the relation in analyze_rel(). */ static void -update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats) +update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats) { Relation sd; int attno; @@ -1337,6 +1549,7 @@ update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats) i = 0; values[i++] = ObjectIdGetDatum(relid); /* starelid */ values[i++] = Int16GetDatum(stats->attr->attnum); /* staattnum */ + values[i++] = BoolGetDatum(inh); /* stainherit */ values[i++] = Float4GetDatum(stats->stanullfrac); /* stanullfrac */ values[i++] = Int32GetDatum(stats->stawidth); /* stawidth */ values[i++] = Float4GetDatum(stats->stadistinct); /* stadistinct */ @@ -1393,10 +1606,11 @@ update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats) } /* Is there already a pg_statistic tuple for this attribute? */ - oldtup = SearchSysCache(STATRELATT, + oldtup = SearchSysCache(STATRELATTINH, ObjectIdGetDatum(relid), Int16GetDatum(stats->attr->attnum), - 0, 0); + BoolGetDatum(inh), + 0); if (HeapTupleIsValid(oldtup)) { |