aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/selfuncs.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils/adt/selfuncs.c')
-rw-r--r--src/backend/utils/adt/selfuncs.c413
1 files changed, 371 insertions, 42 deletions
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 2348d4a772a..7e41bc56418 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3430,6 +3430,14 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
* If examine_variable is able to deduce anything about the GROUP BY
* expression, treat it as a single variable even if it's really more
* complicated.
+ *
+ * XXX This has the consequence that if there's a statistics on the
+ * expression, we don't split it into individual Vars. This affects
+ * our selection of statistics in estimate_multivariate_ndistinct,
+ * because it's probably better to use more accurate estimate for
+ * each expression and treat them as independent, than to combine
+ * estimates for the extracted variables when we don't know how that
+ * relates to the expressions.
*/
examine_variable(root, groupexpr, 0, &vardata);
if (HeapTupleIsValid(vardata.statsTuple) || vardata.isunique)
@@ -3880,50 +3888,77 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
List **varinfos, double *ndistinct)
{
ListCell *lc;
- Bitmapset *attnums = NULL;
- int nmatches;
+ int nmatches_vars;
+ int nmatches_exprs;
Oid statOid = InvalidOid;
MVNDistinct *stats;
- Bitmapset *matched = NULL;
+ StatisticExtInfo *matched_info = NULL;
/* bail out immediately if the table has no extended statistics */
if (!rel->statlist)
return false;
- /* Determine the attnums we're looking for */
- foreach(lc, *varinfos)
- {
- GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
- AttrNumber attnum;
-
- Assert(varinfo->rel == rel);
-
- if (!IsA(varinfo->var, Var))
- continue;
-
- attnum = ((Var *) varinfo->var)->varattno;
-
- if (!AttrNumberIsForUserDefinedAttr(attnum))
- continue;
-
- attnums = bms_add_member(attnums, attnum);
- }
-
/* look for the ndistinct statistics matching the most vars */
- nmatches = 1; /* we require at least two matches */
+ nmatches_vars = 0; /* we require at least two matches */
+ nmatches_exprs = 0;
foreach(lc, rel->statlist)
{
+ ListCell *lc2;
StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
- Bitmapset *shared;
- int nshared;
+ int nshared_vars = 0;
+ int nshared_exprs = 0;
/* skip statistics of other kinds */
if (info->kind != STATS_EXT_NDISTINCT)
continue;
- /* compute attnums shared by the vars and the statistics object */
- shared = bms_intersect(info->keys, attnums);
- nshared = bms_num_members(shared);
+ /*
+ * Determine how many expressions (and variables in non-matched
+ * expressions) match. We'll then use these numbers to pick the
+ * statistics object that best matches the clauses.
+ */
+ foreach(lc2, *varinfos)
+ {
+ ListCell *lc3;
+ GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc2);
+ AttrNumber attnum;
+
+ Assert(varinfo->rel == rel);
+
+ /* simple Var, search in statistics keys directly */
+ if (IsA(varinfo->var, Var))
+ {
+ attnum = ((Var *) varinfo->var)->varattno;
+
+ /*
+ * Ignore system attributes - we don't support statistics on
+ * them, so can't match them (and it'd fail as the values are
+ * negative).
+ */
+ if (!AttrNumberIsForUserDefinedAttr(attnum))
+ continue;
+
+ if (bms_is_member(attnum, info->keys))
+ nshared_vars++;
+
+ continue;
+ }
+
+ /* expression - see if it's in the statistics */
+ foreach(lc3, info->exprs)
+ {
+ Node *expr = (Node *) lfirst(lc3);
+
+ if (equal(varinfo->var, expr))
+ {
+ nshared_exprs++;
+ break;
+ }
+ }
+ }
+
+ if (nshared_vars + nshared_exprs < 2)
+ continue;
/*
* Does this statistics object match more columns than the currently
@@ -3932,18 +3967,21 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
* XXX This should break ties using name of the object, or something
* like that, to make the outcome stable.
*/
- if (nshared > nmatches)
+ if ((nshared_exprs > nmatches_exprs) ||
+ (((nshared_exprs == nmatches_exprs)) && (nshared_vars > nmatches_vars)))
{
statOid = info->statOid;
- nmatches = nshared;
- matched = shared;
+ nmatches_vars = nshared_vars;
+ nmatches_exprs = nshared_exprs;
+ matched_info = info;
}
}
/* No match? */
if (statOid == InvalidOid)
return false;
- Assert(nmatches > 1 && matched != NULL);
+
+ Assert(nmatches_vars + nmatches_exprs > 1);
stats = statext_ndistinct_load(statOid);
@@ -3956,20 +3994,135 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
int i;
List *newlist = NIL;
MVNDistinctItem *item = NULL;
+ ListCell *lc2;
+ Bitmapset *matched = NULL;
+ AttrNumber attnum_offset;
+
+ /*
+ * How much we need to offset the attnums? If there are no
+ * expressions, no offset is needed. Otherwise offset enough to move
+ * the lowest one (which is equal to number of expressions) to 1.
+ */
+ if (matched_info->exprs)
+ attnum_offset = (list_length(matched_info->exprs) + 1);
+ else
+ attnum_offset = 0;
+
+ /* see what actually matched */
+ foreach(lc2, *varinfos)
+ {
+ ListCell *lc3;
+ int idx;
+ bool found = false;
+
+ GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc2);
+
+ /*
+ * Process a simple Var expression, by matching it to keys
+ * directly. If there's a matchine expression, we'll try
+ * matching it later.
+ */
+ if (IsA(varinfo->var, Var))
+ {
+ AttrNumber attnum = ((Var *) varinfo->var)->varattno;
+
+ /*
+ * Ignore expressions on system attributes. Can't rely on
+ * the bms check for negative values.
+ */
+ if (!AttrNumberIsForUserDefinedAttr(attnum))
+ continue;
+
+ /* Is the variable covered by the statistics? */
+ if (!bms_is_member(attnum, matched_info->keys))
+ continue;
+
+ attnum = attnum + attnum_offset;
+
+ /* ensure sufficient offset */
+ Assert(AttrNumberIsForUserDefinedAttr(attnum));
+
+ matched = bms_add_member(matched, attnum);
+
+ found = true;
+ }
+
+ /*
+ * XXX Maybe we should allow searching the expressions even if we
+ * found an attribute matching the expression? That would handle
+ * trivial expressions like "(a)" but it seems fairly useless.
+ */
+ if (found)
+ continue;
+
+ /* expression - see if it's in the statistics */
+ idx = 0;
+ foreach(lc3, matched_info->exprs)
+ {
+ Node *expr = (Node *) lfirst(lc3);
+
+ if (equal(varinfo->var, expr))
+ {
+ AttrNumber attnum = -(idx + 1);
+
+ attnum = attnum + attnum_offset;
+
+ /* ensure sufficient offset */
+ Assert(AttrNumberIsForUserDefinedAttr(attnum));
+
+ matched = bms_add_member(matched, attnum);
+
+ /* there should be just one matching expression */
+ break;
+ }
+
+ idx++;
+ }
+ }
/* Find the specific item that exactly matches the combination */
for (i = 0; i < stats->nitems; i++)
{
+ int j;
MVNDistinctItem *tmpitem = &stats->items[i];
- if (bms_subset_compare(tmpitem->attrs, matched) == BMS_EQUAL)
+ if (tmpitem->nattributes != bms_num_members(matched))
+ continue;
+
+ /* assume it's the right item */
+ item = tmpitem;
+
+ /* check that all item attributes/expressions fit the match */
+ for (j = 0; j < tmpitem->nattributes; j++)
{
- item = tmpitem;
- break;
+ AttrNumber attnum = tmpitem->attributes[j];
+
+ /*
+ * Thanks to how we constructed the matched bitmap above, we
+ * can just offset all attnums the same way.
+ */
+ attnum = attnum + attnum_offset;
+
+ if (!bms_is_member(attnum, matched))
+ {
+ /* nah, it's not this item */
+ item = NULL;
+ break;
+ }
}
+
+ /*
+ * If the item has all the matched attributes, we know it's the
+ * right one - there can't be a better one. matching more.
+ */
+ if (item)
+ break;
}
- /* make sure we found an item */
+ /*
+ * Make sure we found an item. There has to be one, because ndistinct
+ * statistics includes all combinations of attributes.
+ */
if (!item)
elog(ERROR, "corrupt MVNDistinct entry");
@@ -3977,18 +4130,63 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
foreach(lc, *varinfos)
{
GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
- AttrNumber attnum;
+ ListCell *lc3;
+ bool found = false;
- if (!IsA(varinfo->var, Var))
+ /*
+ * Let's look at plain variables first, because it's the most
+ * common case and the check is quite cheap. We can simply get the
+ * attnum and check (with an offset) matched bitmap.
+ */
+ if (IsA(varinfo->var, Var))
{
- newlist = lappend(newlist, varinfo);
+ AttrNumber attnum = ((Var *) varinfo->var)->varattno;
+
+ /*
+ * If it's a system attribute, we're done. We don't support
+ * extended statistics on system attributes, so it's clearly
+ * not matched. Just keep the expression and continue.
+ */
+ if (!AttrNumberIsForUserDefinedAttr(attnum))
+ {
+ newlist = lappend(newlist, varinfo);
+ continue;
+ }
+
+ /* apply the same offset as above */
+ attnum += attnum_offset;
+
+ /* if it's not matched, keep the varinfo */
+ if (!bms_is_member(attnum, matched))
+ newlist = lappend(newlist, varinfo);
+
+ /* The rest of the loop deals with complex expressions. */
continue;
}
- attnum = ((Var *) varinfo->var)->varattno;
+ /*
+ * Process complex expressions, not just simple Vars.
+ *
+ * First, we search for an exact match of an expression. If we
+ * find one, we can just discard the whole GroupExprInfo, with all
+ * the variables we extracted from it.
+ *
+ * Otherwise we inspect the individual vars, and try matching it
+ * to variables in the item.
+ */
+ foreach(lc3, matched_info->exprs)
+ {
+ Node *expr = (Node *) lfirst(lc3);
+
+ if (equal(varinfo->var, expr))
+ {
+ found = true;
+ break;
+ }
+ }
- if (AttrNumberIsForUserDefinedAttr(attnum) &&
- bms_is_member(attnum, matched))
+ /* found exact match, skip */
+ if (found)
continue;
newlist = lappend(newlist, varinfo);
@@ -4690,6 +4888,13 @@ get_join_variables(PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo,
*join_is_reversed = false;
}
+/* statext_expressions_load copies the tuple, so just pfree it. */
+static void
+ReleaseDummy(HeapTuple tuple)
+{
+ pfree(tuple);
+}
+
/*
* examine_variable
* Try to look up statistical data about an expression.
@@ -4830,6 +5035,7 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
* operator we are estimating for. FIXME later.
*/
ListCell *ilist;
+ ListCell *slist;
foreach(ilist, onerel->indexlist)
{
@@ -4986,6 +5192,129 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
if (vardata->statsTuple)
break;
}
+
+ /*
+ * Search extended statistics for one with a matching expression.
+ * There might be multiple ones, so just grab the first one. In the
+ * future, we might consider the statistics target (and pick the most
+ * accurate statistics) and maybe some other parameters.
+ */
+ foreach(slist, onerel->statlist)
+ {
+ StatisticExtInfo *info = (StatisticExtInfo *) lfirst(slist);
+ ListCell *expr_item;
+ int pos;
+
+ /*
+ * Stop once we've found statistics for the expression (either
+ * from extended stats, or for an index in the preceding loop).
+ */
+ if (vardata->statsTuple)
+ break;
+
+ /* skip stats without per-expression stats */
+ if (info->kind != STATS_EXT_EXPRESSIONS)
+ continue;
+
+ pos = 0;
+ foreach(expr_item, info->exprs)
+ {
+ Node *expr = (Node *) lfirst(expr_item);
+
+ Assert(expr);
+
+ /* strip RelabelType before comparing it */
+ if (expr && IsA(expr, RelabelType))
+ expr = (Node *) ((RelabelType *) expr)->arg;
+
+ /* found a match, see if we can extract pg_statistic row */
+ if (equal(node, expr))
+ {
+ HeapTuple t = statext_expressions_load(info->statOid, pos);
+
+ /* Get index's table for permission check */
+ RangeTblEntry *rte;
+ Oid userid;
+
+ vardata->statsTuple = t;
+
+ /*
+ * XXX Not sure if we should cache the tuple somewhere.
+ * Now we just create a new copy every time.
+ */
+ vardata->freefunc = ReleaseDummy;
+
+ rte = planner_rt_fetch(onerel->relid, root);
+ Assert(rte->rtekind == RTE_RELATION);
+
+ /*
+ * Use checkAsUser if it's set, in case we're accessing
+ * the table via a view.
+ */
+ userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
+
+ /*
+ * For simplicity, we insist on the whole table being
+ * selectable, rather than trying to identify which
+ * column(s) the statistics depends on. Also require all
+ * rows to be selectable --- there must be no
+ * securityQuals from security barrier views or RLS
+ * policies.
+ */
+ vardata->acl_ok =
+ rte->securityQuals == NIL &&
+ (pg_class_aclcheck(rte->relid, userid,
+ ACL_SELECT) == ACLCHECK_OK);
+
+ /*
+ * If the user doesn't have permissions to access an
+ * inheritance child relation, check the permissions of
+ * the table actually mentioned in the query, since most
+ * likely the user does have that permission. Note that
+ * whole-table select privilege on the parent doesn't
+ * quite guarantee that the user could read all columns of
+ * the child. But in practice it's unlikely that any
+ * interesting security violation could result from
+ * allowing access to the expression stats, so we allow it
+ * anyway. See similar code in examine_simple_variable()
+ * for additional comments.
+ */
+ if (!vardata->acl_ok &&
+ root->append_rel_array != NULL)
+ {
+ AppendRelInfo *appinfo;
+ Index varno = onerel->relid;
+
+ appinfo = root->append_rel_array[varno];
+ while (appinfo &&
+ planner_rt_fetch(appinfo->parent_relid,
+ root)->rtekind == RTE_RELATION)
+ {
+ varno = appinfo->parent_relid;
+ appinfo = root->append_rel_array[varno];
+ }
+ if (varno != onerel->relid)
+ {
+ /* Repeat access check on this rel */
+ rte = planner_rt_fetch(varno, root);
+ Assert(rte->rtekind == RTE_RELATION);
+
+ userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
+
+ vardata->acl_ok =
+ rte->securityQuals == NIL &&
+ (pg_class_aclcheck(rte->relid,
+ userid,
+ ACL_SELECT) == ACLCHECK_OK);
+ }
+ }
+
+ break;
+ }
+
+ pos++;
+ }
+ }
}
}