diff options
Diffstat (limited to 'src/backend/utils')
-rw-r--r-- | src/backend/utils/adt/ruleutils.c | 81 | ||||
-rw-r--r-- | src/backend/utils/adt/selfuncs.c | 181 | ||||
-rw-r--r-- | src/backend/utils/cache/relcache.c | 79 | ||||
-rw-r--r-- | src/backend/utils/cache/syscache.c | 23 |
4 files changed, 357 insertions, 7 deletions
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 5c823250bc2..81c91039e40 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -35,6 +35,7 @@ #include "catalog/pg_operator.h" #include "catalog/pg_partitioned_table.h" #include "catalog/pg_proc.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_trigger.h" #include "catalog/pg_type.h" #include "commands/defrem.h" @@ -317,6 +318,7 @@ static char *pg_get_indexdef_worker(Oid indexrelid, int colno, const Oid *excludeOps, bool attrsOnly, bool showTblSpc, int prettyFlags, bool missing_ok); +static char *pg_get_statisticsext_worker(Oid statextid, bool missing_ok); static char *pg_get_partkeydef_worker(Oid relid, int prettyFlags, bool attrsOnly); static char *pg_get_constraintdef_worker(Oid constraintId, bool fullCommand, @@ -1422,6 +1424,85 @@ pg_get_indexdef_worker(Oid indexrelid, int colno, } /* + * pg_get_statisticsextdef + * Get the definition of an extended statistics object + */ +Datum +pg_get_statisticsextdef(PG_FUNCTION_ARGS) +{ + Oid statextid = PG_GETARG_OID(0); + char *res; + + res = pg_get_statisticsext_worker(statextid, true); + + if (res == NULL) + PG_RETURN_NULL(); + + PG_RETURN_TEXT_P(string_to_text(res)); +} + +/* + * Internal workhorse to decompile an extended statistics object. + */ +static char * +pg_get_statisticsext_worker(Oid statextid, bool missing_ok) +{ + Form_pg_statistic_ext statextrec; + Form_pg_class pgclassrec; + HeapTuple statexttup; + HeapTuple pgclasstup; + StringInfoData buf; + int colno; + + statexttup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statextid)); + + if (!HeapTupleIsValid(statexttup)) + { + if (missing_ok) + return NULL; + elog(ERROR, "cache lookup failed for extended statistics %u", statextid); + } + + statextrec = (Form_pg_statistic_ext) GETSTRUCT(statexttup); + + pgclasstup = SearchSysCache1(RELOID, ObjectIdGetDatum(statextrec->starelid)); + + if (!HeapTupleIsValid(statexttup)) + { + ReleaseSysCache(statexttup); + elog(ERROR, "cache lookup failed for relation %u", statextrec->starelid); + } + + pgclassrec = (Form_pg_class) GETSTRUCT(pgclasstup); + + initStringInfo(&buf); + + appendStringInfo(&buf, "CREATE STATISTICS %s ON (", + quote_identifier(NameStr(statextrec->staname))); + + for (colno = 0; colno < statextrec->stakeys.dim1; colno++) + { + AttrNumber attnum = statextrec->stakeys.values[colno]; + char *attname; + + if (colno > 0) + appendStringInfoString(&buf, ", "); + + attname = get_relid_attribute_name(statextrec->starelid, attnum); + + appendStringInfoString(&buf, quote_identifier(attname)); + } + + appendStringInfo(&buf, ") FROM %s", + quote_identifier(NameStr(pgclassrec->relname))); + + ReleaseSysCache(statexttup); + ReleaseSysCache(pgclasstup); + + return buf.data; +} + +/* * pg_get_partkeydef * * Returns the partition key specification, ie, the following: diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index f8b28fe0e61..cc24c8aeb56 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -110,6 +110,7 @@ #include "catalog/pg_operator.h" #include "catalog/pg_opfamily.h" #include "catalog/pg_statistic.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_type.h" #include "executor/executor.h" #include "mb/pg_wchar.h" @@ -126,6 +127,7 @@ #include "parser/parse_clause.h" #include "parser/parse_coerce.h" #include "parser/parsetree.h" +#include "statistics/statistics.h" #include "utils/builtins.h" #include "utils/bytea.h" #include "utils/date.h" @@ -164,6 +166,8 @@ static double eqjoinsel_inner(Oid operator, static double eqjoinsel_semi(Oid operator, VariableStatData *vardata1, VariableStatData *vardata2, RelOptInfo *inner_rel); +static bool estimate_multivariate_ndistinct(PlannerInfo *root, + RelOptInfo *rel, List **varinfos, double *ndistinct); static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue, Datum lobound, Datum hibound, Oid boundstypid, double *scaledlobound, double *scaledhibound); @@ -3398,25 +3402,25 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, { GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos); RelOptInfo *rel = varinfo1->rel; - double reldistinct = varinfo1->ndistinct; + double reldistinct = 1; double relmaxndistinct = reldistinct; int relvarcount = 1; List *newvarinfos = NIL; + List *relvarinfos = NIL; /* - * Get the product of numdistinct estimates of the Vars for this rel. - * Also, construct new varinfos list of remaining Vars. + * Split the list of varinfos in two - one for the current rel, + * one for remaining Vars on other rels. */ + relvarinfos = lcons(varinfo1, relvarinfos); for_each_cell(l, lnext(list_head(varinfos))) { GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l); if (varinfo2->rel == varinfo1->rel) { - reldistinct *= varinfo2->ndistinct; - if (relmaxndistinct < varinfo2->ndistinct) - relmaxndistinct = varinfo2->ndistinct; - relvarcount++; + /* varinfos on current rel */ + relvarinfos = lcons(varinfo2, relvarinfos); } else { @@ -3426,6 +3430,43 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, } /* + * Get the numdistinct estimate for the Vars of this rel. We + * iteratively search for multivariate n-distinct with maximum number + * of vars; assuming that each var group is independent of the others, + * we multiply them together. Any remaining relvarinfos after + * no more multivariate matches are found are assumed independent too, + * so their individual ndistinct estimates are multiplied also. + */ + while (relvarinfos) + { + double mvndistinct; + + if (estimate_multivariate_ndistinct(root, rel, &relvarinfos, + &mvndistinct)) + { + reldistinct *= mvndistinct; + if (relmaxndistinct < mvndistinct) + relmaxndistinct = mvndistinct; + relvarcount++; /* inaccurate, but doesn't matter */ + } + else + { + foreach (l, relvarinfos) + { + GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l); + + reldistinct *= varinfo2->ndistinct; + if (relmaxndistinct < varinfo2->ndistinct) + relmaxndistinct = varinfo2->ndistinct; + relvarcount++; + } + + /* we're done with this relation */ + relvarinfos = NIL; + } + } + + /* * Sanity check --- don't divide by zero if empty relation. */ Assert(rel->reloptkind == RELOPT_BASEREL); @@ -3668,6 +3709,132 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets) */ /* + * Find applicable ndistinct statistics for the given list of VarInfos (which + * must all belong to the given rel), and update *ndistinct to the estimate of + * the MVNDistinctItem that best matches. If a match it found, *varinfos is + * updated to remove the list of matched varinfos. + * + * Varinfos that aren't for simple Vars are ignored. + * + * Return TRUE if we're able to find a match, FALSE otherwise. + */ +static bool +estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel, + List **varinfos, double *ndistinct) +{ + ListCell *lc; + Bitmapset *attnums = NULL; + int nmatches; + Oid statOid = InvalidOid; + MVNDistinct *stats; + Bitmapset *matched = NULL; + + /* bail out immediately if the table has no extended statistics */ + if (!rel->statlist) + return false; + + /* Determine the attnums we're looking for */ + foreach(lc, *varinfos) + { + GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc); + + Assert(varinfo->rel == rel); + + if (IsA(varinfo->var, Var)) + { + attnums = bms_add_member(attnums, + ((Var *) varinfo->var)->varattno); + } + } + + /* look for the ndistinct statistics matching the most vars */ + nmatches = 1; /* we require at least two matches */ + foreach(lc, rel->statlist) + { + StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc); + Bitmapset *shared; + + /* skip statistics of other kinds */ + if (info->kind != STATS_EXT_NDISTINCT) + continue; + + /* compute attnums shared by the vars and the statistic */ + shared = bms_intersect(info->keys, attnums); + + /* + * Does this statistics matches more columns than the currently + * best statistic? If so, use this one instead. + * + * XXX This should break ties using name of the statistic, or + * something like that, to make the outcome stable. + */ + if (bms_num_members(shared) > nmatches) + { + statOid = info->statOid; + nmatches = bms_num_members(shared); + matched = shared; + } + } + + /* No match? */ + if (statOid == InvalidOid) + return false; + Assert(nmatches > 1 && matched != NULL); + + stats = statext_ndistinct_load(statOid); + + /* + * If we have a match, search it for the specific item that matches (there + * must be one), and construct the output values. + */ + if (stats) + { + int i; + List *newlist = NIL; + MVNDistinctItem *item = NULL; + + /* Find the specific item that exactly matches the combination */ + for (i = 0; i < stats->nitems; i++) + { + MVNDistinctItem *tmpitem = &stats->items[i]; + + if (bms_subset_compare(tmpitem->attrs, matched) == BMS_EQUAL) + { + item = tmpitem; + break; + } + } + + /* make sure we found an item */ + if (!item) + elog(ERROR, "corrupt MVNDistinct entry"); + + /* Form the output varinfo list, keeping only unmatched ones */ + foreach(lc, *varinfos) + { + GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc); + AttrNumber attnum; + + if (!IsA(varinfo->var, Var)) + { + newlist = lappend(newlist, varinfo); + continue; + } + + attnum = ((Var *) varinfo->var)->varattno; + if (!bms_is_member(attnum, matched)) + newlist = lappend(newlist, varinfo); + } + + *varinfos = newlist; + *ndistinct = item->ndistinct; + return true; + } + + return false; +} + +/* * convert_to_scalar * Convert non-NULL values of the indicated types to the comparison * scale needed by scalarineqsel(). diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index ce55fc52777..a6b60c67caa 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -56,6 +56,7 @@ #include "catalog/pg_publication.h" #include "catalog/pg_rewrite.h" #include "catalog/pg_shseclabel.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_subscription.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_trigger.h" @@ -4452,6 +4453,82 @@ RelationGetIndexList(Relation relation) } /* + * RelationGetStatExtList + * get a list of OIDs of extended statistics on this relation + * + * The statistics list is created only if someone requests it, in a way + * similar to RelationGetIndexList(). We scan pg_statistic_ext to find + * relevant statistics, and add the list to the relcache entry so that we + * won't have to compute it again. Note that shared cache inval of a + * relcache entry will delete the old list and set rd_statvalid to 0, + * so that we must recompute the statistics list on next request. This + * handles creation or deletion of a statistic. + * + * The returned list is guaranteed to be sorted in order by OID, although + * this is not currently needed. + * + * Since shared cache inval causes the relcache's copy of the list to go away, + * we return a copy of the list palloc'd in the caller's context. The caller + * may list_free() the returned list after scanning it. This is necessary + * since the caller will typically be doing syscache lookups on the relevant + * statistics, and syscache lookup could cause SI messages to be processed! + */ +List * +RelationGetStatExtList(Relation relation) +{ + Relation indrel; + SysScanDesc indscan; + ScanKeyData skey; + HeapTuple htup; + List *result; + List *oldlist; + MemoryContext oldcxt; + + /* Quick exit if we already computed the list. */ + if (relation->rd_statvalid != 0) + return list_copy(relation->rd_statlist); + + /* + * We build the list we intend to return (in the caller's context) while + * doing the scan. After successfully completing the scan, we copy that + * list into the relcache entry. This avoids cache-context memory leakage + * if we get some sort of error partway through. + */ + result = NIL; + + /* Prepare to scan pg_statistic_ext for entries having starelid = this rel. */ + ScanKeyInit(&skey, + Anum_pg_statistic_ext_starelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(relation))); + + indrel = heap_open(StatisticExtRelationId, AccessShareLock); + indscan = systable_beginscan(indrel, StatisticExtRelidIndexId, true, + NULL, 1, &skey); + + while (HeapTupleIsValid(htup = systable_getnext(indscan))) + /* TODO maybe include only already built statistics? */ + result = insert_ordered_oid(result, HeapTupleGetOid(htup)); + + systable_endscan(indscan); + + heap_close(indrel, AccessShareLock); + + /* Now save a copy of the completed list in the relcache entry. */ + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + oldlist = relation->rd_statlist; + relation->rd_statlist = list_copy(result); + + relation->rd_statvalid = true; + MemoryContextSwitchTo(oldcxt); + + /* Don't leak the old list, if there is one */ + list_free(oldlist); + + return result; +} + +/* * insert_ordered_oid * Insert a new Oid into a sorted list of Oids, preserving ordering * @@ -5560,6 +5637,8 @@ load_relcache_init_file(bool shared) rel->rd_pkattr = NULL; rel->rd_idattr = NULL; rel->rd_pubactions = NULL; + rel->rd_statvalid = false; + rel->rd_statlist = NIL; rel->rd_createSubid = InvalidSubTransactionId; rel->rd_newRelfilenodeSubid = InvalidSubTransactionId; rel->rd_amcache = NULL; diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c index d5a376406fe..d8c823f42b5 100644 --- a/src/backend/utils/cache/syscache.c +++ b/src/backend/utils/cache/syscache.c @@ -61,6 +61,7 @@ #include "catalog/pg_shseclabel.h" #include "catalog/pg_replication_origin.h" #include "catalog/pg_statistic.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_subscription.h" #include "catalog/pg_subscription_rel.h" #include "catalog/pg_tablespace.h" @@ -726,6 +727,28 @@ static const struct cachedesc cacheinfo[] = { }, 32 }, + {StatisticExtRelationId, /* STATEXTNAMENSP */ + StatisticExtNameIndexId, + 2, + { + Anum_pg_statistic_ext_staname, + Anum_pg_statistic_ext_stanamespace, + 0, + 0 + }, + 4 + }, + {StatisticExtRelationId, /* STATEXTOID */ + StatisticExtOidIndexId, + 1, + { + ObjectIdAttributeNumber, + 0, + 0, + 0 + }, + 4 + }, {StatisticRelationId, /* STATRELATTINH */ StatisticRelidAttnumInhIndexId, 3, |