aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils')
-rw-r--r--src/backend/utils/adt/ruleutils.c81
-rw-r--r--src/backend/utils/adt/selfuncs.c181
-rw-r--r--src/backend/utils/cache/relcache.c79
-rw-r--r--src/backend/utils/cache/syscache.c23
4 files changed, 357 insertions, 7 deletions
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index 5c823250bc2..81c91039e40 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -35,6 +35,7 @@
#include "catalog/pg_operator.h"
#include "catalog/pg_partitioned_table.h"
#include "catalog/pg_proc.h"
+#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_trigger.h"
#include "catalog/pg_type.h"
#include "commands/defrem.h"
@@ -317,6 +318,7 @@ static char *pg_get_indexdef_worker(Oid indexrelid, int colno,
const Oid *excludeOps,
bool attrsOnly, bool showTblSpc,
int prettyFlags, bool missing_ok);
+static char *pg_get_statisticsext_worker(Oid statextid, bool missing_ok);
static char *pg_get_partkeydef_worker(Oid relid, int prettyFlags,
bool attrsOnly);
static char *pg_get_constraintdef_worker(Oid constraintId, bool fullCommand,
@@ -1422,6 +1424,85 @@ pg_get_indexdef_worker(Oid indexrelid, int colno,
}
/*
+ * pg_get_statisticsextdef
+ * Get the definition of an extended statistics object
+ */
+Datum
+pg_get_statisticsextdef(PG_FUNCTION_ARGS)
+{
+ Oid statextid = PG_GETARG_OID(0);
+ char *res;
+
+ res = pg_get_statisticsext_worker(statextid, true);
+
+ if (res == NULL)
+ PG_RETURN_NULL();
+
+ PG_RETURN_TEXT_P(string_to_text(res));
+}
+
+/*
+ * Internal workhorse to decompile an extended statistics object.
+ */
+static char *
+pg_get_statisticsext_worker(Oid statextid, bool missing_ok)
+{
+ Form_pg_statistic_ext statextrec;
+ Form_pg_class pgclassrec;
+ HeapTuple statexttup;
+ HeapTuple pgclasstup;
+ StringInfoData buf;
+ int colno;
+
+ statexttup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statextid));
+
+ if (!HeapTupleIsValid(statexttup))
+ {
+ if (missing_ok)
+ return NULL;
+ elog(ERROR, "cache lookup failed for extended statistics %u", statextid);
+ }
+
+ statextrec = (Form_pg_statistic_ext) GETSTRUCT(statexttup);
+
+ pgclasstup = SearchSysCache1(RELOID, ObjectIdGetDatum(statextrec->starelid));
+
+ if (!HeapTupleIsValid(statexttup))
+ {
+ ReleaseSysCache(statexttup);
+ elog(ERROR, "cache lookup failed for relation %u", statextrec->starelid);
+ }
+
+ pgclassrec = (Form_pg_class) GETSTRUCT(pgclasstup);
+
+ initStringInfo(&buf);
+
+ appendStringInfo(&buf, "CREATE STATISTICS %s ON (",
+ quote_identifier(NameStr(statextrec->staname)));
+
+ for (colno = 0; colno < statextrec->stakeys.dim1; colno++)
+ {
+ AttrNumber attnum = statextrec->stakeys.values[colno];
+ char *attname;
+
+ if (colno > 0)
+ appendStringInfoString(&buf, ", ");
+
+ attname = get_relid_attribute_name(statextrec->starelid, attnum);
+
+ appendStringInfoString(&buf, quote_identifier(attname));
+ }
+
+ appendStringInfo(&buf, ") FROM %s",
+ quote_identifier(NameStr(pgclassrec->relname)));
+
+ ReleaseSysCache(statexttup);
+ ReleaseSysCache(pgclasstup);
+
+ return buf.data;
+}
+
+/*
* pg_get_partkeydef
*
* Returns the partition key specification, ie, the following:
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index f8b28fe0e61..cc24c8aeb56 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -110,6 +110,7 @@
#include "catalog/pg_operator.h"
#include "catalog/pg_opfamily.h"
#include "catalog/pg_statistic.h"
+#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_type.h"
#include "executor/executor.h"
#include "mb/pg_wchar.h"
@@ -126,6 +127,7 @@
#include "parser/parse_clause.h"
#include "parser/parse_coerce.h"
#include "parser/parsetree.h"
+#include "statistics/statistics.h"
#include "utils/builtins.h"
#include "utils/bytea.h"
#include "utils/date.h"
@@ -164,6 +166,8 @@ static double eqjoinsel_inner(Oid operator,
static double eqjoinsel_semi(Oid operator,
VariableStatData *vardata1, VariableStatData *vardata2,
RelOptInfo *inner_rel);
+static bool estimate_multivariate_ndistinct(PlannerInfo *root,
+ RelOptInfo *rel, List **varinfos, double *ndistinct);
static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
Datum lobound, Datum hibound, Oid boundstypid,
double *scaledlobound, double *scaledhibound);
@@ -3398,25 +3402,25 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
{
GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
RelOptInfo *rel = varinfo1->rel;
- double reldistinct = varinfo1->ndistinct;
+ double reldistinct = 1;
double relmaxndistinct = reldistinct;
int relvarcount = 1;
List *newvarinfos = NIL;
+ List *relvarinfos = NIL;
/*
- * Get the product of numdistinct estimates of the Vars for this rel.
- * Also, construct new varinfos list of remaining Vars.
+ * Split the list of varinfos in two - one for the current rel,
+ * one for remaining Vars on other rels.
*/
+ relvarinfos = lcons(varinfo1, relvarinfos);
for_each_cell(l, lnext(list_head(varinfos)))
{
GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
if (varinfo2->rel == varinfo1->rel)
{
- reldistinct *= varinfo2->ndistinct;
- if (relmaxndistinct < varinfo2->ndistinct)
- relmaxndistinct = varinfo2->ndistinct;
- relvarcount++;
+ /* varinfos on current rel */
+ relvarinfos = lcons(varinfo2, relvarinfos);
}
else
{
@@ -3426,6 +3430,43 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
}
/*
+ * Get the numdistinct estimate for the Vars of this rel. We
+ * iteratively search for multivariate n-distinct with maximum number
+ * of vars; assuming that each var group is independent of the others,
+ * we multiply them together. Any remaining relvarinfos after
+ * no more multivariate matches are found are assumed independent too,
+ * so their individual ndistinct estimates are multiplied also.
+ */
+ while (relvarinfos)
+ {
+ double mvndistinct;
+
+ if (estimate_multivariate_ndistinct(root, rel, &relvarinfos,
+ &mvndistinct))
+ {
+ reldistinct *= mvndistinct;
+ if (relmaxndistinct < mvndistinct)
+ relmaxndistinct = mvndistinct;
+ relvarcount++; /* inaccurate, but doesn't matter */
+ }
+ else
+ {
+ foreach (l, relvarinfos)
+ {
+ GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
+
+ reldistinct *= varinfo2->ndistinct;
+ if (relmaxndistinct < varinfo2->ndistinct)
+ relmaxndistinct = varinfo2->ndistinct;
+ relvarcount++;
+ }
+
+ /* we're done with this relation */
+ relvarinfos = NIL;
+ }
+ }
+
+ /*
* Sanity check --- don't divide by zero if empty relation.
*/
Assert(rel->reloptkind == RELOPT_BASEREL);
@@ -3668,6 +3709,132 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
*/
/*
+ * Find applicable ndistinct statistics for the given list of VarInfos (which
+ * must all belong to the given rel), and update *ndistinct to the estimate of
+ * the MVNDistinctItem that best matches. If a match it found, *varinfos is
+ * updated to remove the list of matched varinfos.
+ *
+ * Varinfos that aren't for simple Vars are ignored.
+ *
+ * Return TRUE if we're able to find a match, FALSE otherwise.
+ */
+static bool
+estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
+ List **varinfos, double *ndistinct)
+{
+ ListCell *lc;
+ Bitmapset *attnums = NULL;
+ int nmatches;
+ Oid statOid = InvalidOid;
+ MVNDistinct *stats;
+ Bitmapset *matched = NULL;
+
+ /* bail out immediately if the table has no extended statistics */
+ if (!rel->statlist)
+ return false;
+
+ /* Determine the attnums we're looking for */
+ foreach(lc, *varinfos)
+ {
+ GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
+
+ Assert(varinfo->rel == rel);
+
+ if (IsA(varinfo->var, Var))
+ {
+ attnums = bms_add_member(attnums,
+ ((Var *) varinfo->var)->varattno);
+ }
+ }
+
+ /* look for the ndistinct statistics matching the most vars */
+ nmatches = 1; /* we require at least two matches */
+ foreach(lc, rel->statlist)
+ {
+ StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
+ Bitmapset *shared;
+
+ /* skip statistics of other kinds */
+ if (info->kind != STATS_EXT_NDISTINCT)
+ continue;
+
+ /* compute attnums shared by the vars and the statistic */
+ shared = bms_intersect(info->keys, attnums);
+
+ /*
+ * Does this statistics matches more columns than the currently
+ * best statistic? If so, use this one instead.
+ *
+ * XXX This should break ties using name of the statistic, or
+ * something like that, to make the outcome stable.
+ */
+ if (bms_num_members(shared) > nmatches)
+ {
+ statOid = info->statOid;
+ nmatches = bms_num_members(shared);
+ matched = shared;
+ }
+ }
+
+ /* No match? */
+ if (statOid == InvalidOid)
+ return false;
+ Assert(nmatches > 1 && matched != NULL);
+
+ stats = statext_ndistinct_load(statOid);
+
+ /*
+ * If we have a match, search it for the specific item that matches (there
+ * must be one), and construct the output values.
+ */
+ if (stats)
+ {
+ int i;
+ List *newlist = NIL;
+ MVNDistinctItem *item = NULL;
+
+ /* Find the specific item that exactly matches the combination */
+ for (i = 0; i < stats->nitems; i++)
+ {
+ MVNDistinctItem *tmpitem = &stats->items[i];
+
+ if (bms_subset_compare(tmpitem->attrs, matched) == BMS_EQUAL)
+ {
+ item = tmpitem;
+ break;
+ }
+ }
+
+ /* make sure we found an item */
+ if (!item)
+ elog(ERROR, "corrupt MVNDistinct entry");
+
+ /* Form the output varinfo list, keeping only unmatched ones */
+ foreach(lc, *varinfos)
+ {
+ GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
+ AttrNumber attnum;
+
+ if (!IsA(varinfo->var, Var))
+ {
+ newlist = lappend(newlist, varinfo);
+ continue;
+ }
+
+ attnum = ((Var *) varinfo->var)->varattno;
+ if (!bms_is_member(attnum, matched))
+ newlist = lappend(newlist, varinfo);
+ }
+
+ *varinfos = newlist;
+ *ndistinct = item->ndistinct;
+ return true;
+ }
+
+ return false;
+}
+
+/*
* convert_to_scalar
* Convert non-NULL values of the indicated types to the comparison
* scale needed by scalarineqsel().
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index ce55fc52777..a6b60c67caa 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -56,6 +56,7 @@
#include "catalog/pg_publication.h"
#include "catalog/pg_rewrite.h"
#include "catalog/pg_shseclabel.h"
+#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_subscription.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_trigger.h"
@@ -4452,6 +4453,82 @@ RelationGetIndexList(Relation relation)
}
/*
+ * RelationGetStatExtList
+ * get a list of OIDs of extended statistics on this relation
+ *
+ * The statistics list is created only if someone requests it, in a way
+ * similar to RelationGetIndexList(). We scan pg_statistic_ext to find
+ * relevant statistics, and add the list to the relcache entry so that we
+ * won't have to compute it again. Note that shared cache inval of a
+ * relcache entry will delete the old list and set rd_statvalid to 0,
+ * so that we must recompute the statistics list on next request. This
+ * handles creation or deletion of a statistic.
+ *
+ * The returned list is guaranteed to be sorted in order by OID, although
+ * this is not currently needed.
+ *
+ * Since shared cache inval causes the relcache's copy of the list to go away,
+ * we return a copy of the list palloc'd in the caller's context. The caller
+ * may list_free() the returned list after scanning it. This is necessary
+ * since the caller will typically be doing syscache lookups on the relevant
+ * statistics, and syscache lookup could cause SI messages to be processed!
+ */
+List *
+RelationGetStatExtList(Relation relation)
+{
+ Relation indrel;
+ SysScanDesc indscan;
+ ScanKeyData skey;
+ HeapTuple htup;
+ List *result;
+ List *oldlist;
+ MemoryContext oldcxt;
+
+ /* Quick exit if we already computed the list. */
+ if (relation->rd_statvalid != 0)
+ return list_copy(relation->rd_statlist);
+
+ /*
+ * We build the list we intend to return (in the caller's context) while
+ * doing the scan. After successfully completing the scan, we copy that
+ * list into the relcache entry. This avoids cache-context memory leakage
+ * if we get some sort of error partway through.
+ */
+ result = NIL;
+
+ /* Prepare to scan pg_statistic_ext for entries having starelid = this rel. */
+ ScanKeyInit(&skey,
+ Anum_pg_statistic_ext_starelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationGetRelid(relation)));
+
+ indrel = heap_open(StatisticExtRelationId, AccessShareLock);
+ indscan = systable_beginscan(indrel, StatisticExtRelidIndexId, true,
+ NULL, 1, &skey);
+
+ while (HeapTupleIsValid(htup = systable_getnext(indscan)))
+ /* TODO maybe include only already built statistics? */
+ result = insert_ordered_oid(result, HeapTupleGetOid(htup));
+
+ systable_endscan(indscan);
+
+ heap_close(indrel, AccessShareLock);
+
+ /* Now save a copy of the completed list in the relcache entry. */
+ oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
+ oldlist = relation->rd_statlist;
+ relation->rd_statlist = list_copy(result);
+
+ relation->rd_statvalid = true;
+ MemoryContextSwitchTo(oldcxt);
+
+ /* Don't leak the old list, if there is one */
+ list_free(oldlist);
+
+ return result;
+}
+
+/*
* insert_ordered_oid
* Insert a new Oid into a sorted list of Oids, preserving ordering
*
@@ -5560,6 +5637,8 @@ load_relcache_init_file(bool shared)
rel->rd_pkattr = NULL;
rel->rd_idattr = NULL;
rel->rd_pubactions = NULL;
+ rel->rd_statvalid = false;
+ rel->rd_statlist = NIL;
rel->rd_createSubid = InvalidSubTransactionId;
rel->rd_newRelfilenodeSubid = InvalidSubTransactionId;
rel->rd_amcache = NULL;
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c
index d5a376406fe..d8c823f42b5 100644
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -61,6 +61,7 @@
#include "catalog/pg_shseclabel.h"
#include "catalog/pg_replication_origin.h"
#include "catalog/pg_statistic.h"
+#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_subscription.h"
#include "catalog/pg_subscription_rel.h"
#include "catalog/pg_tablespace.h"
@@ -726,6 +727,28 @@ static const struct cachedesc cacheinfo[] = {
},
32
},
+ {StatisticExtRelationId, /* STATEXTNAMENSP */
+ StatisticExtNameIndexId,
+ 2,
+ {
+ Anum_pg_statistic_ext_staname,
+ Anum_pg_statistic_ext_stanamespace,
+ 0,
+ 0
+ },
+ 4
+ },
+ {StatisticExtRelationId, /* STATEXTOID */
+ StatisticExtOidIndexId,
+ 1,
+ {
+ ObjectIdAttributeNumber,
+ 0,
+ 0,
+ 0
+ },
+ 4
+ },
{StatisticRelationId, /* STATRELATTINH */
StatisticRelidAttnumInhIndexId,
3,