aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2021-04-20 11:32:02 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2021-04-20 11:32:02 -0400
commit7bfba4f1933003716d432d29d4d228bcf28e2e70 (patch)
tree3ce9f1591698c88d727ed3de330ab2abf935220f
parentbf5d1f1e0052100b716f7b7b0934aefb28f72e73 (diff)
downloadpostgresql-7bfba4f1933003716d432d29d4d228bcf28e2e70.tar.gz
postgresql-7bfba4f1933003716d432d29d4d228bcf28e2e70.zip
Fix planner failure in some cases of sorting by an aggregate.
An oversight introduced by the incremental-sort patches caused "could not find pathkey item to sort" errors in some situations where a sort key involves an aggregate or window function. The basic problem here is that find_em_expr_usable_for_sorting_rel isn't properly modeling what prepare_sort_from_pathkeys will do later. Rather than hoping we can keep those functions in sync, let's refactor so that they actually share the code for identifying a suitable sort expression. With this refactoring, tlist.c's tlist_member_ignore_relabel is unused. I removed it in HEAD but left it in place in v13, in case any extensions are using it. Per report from Luc Vlaming. Back-patch to v13 where the problem arose. James Coleman and Tom Lane Discussion: https://postgr.es/m/91f3ec99-85a4-fa55-ea74-33f85a5c651f@swarm64.com
-rw-r--r--src/backend/optimizer/path/equivclass.c255
-rw-r--r--src/backend/optimizer/plan/createplan.c118
-rw-r--r--src/include/optimizer/paths.h8
-rw-r--r--src/test/regress/expected/incremental_sort.out26
-rw-r--r--src/test/regress/sql/incremental_sort.sql7
5 files changed, 262 insertions, 152 deletions
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 4a9c0388661..7da261c0312 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -39,6 +39,7 @@
static EquivalenceMember *add_eq_member(EquivalenceClass *ec,
Expr *expr, Relids relids, Relids nullable_relids,
bool is_child, Oid datatype);
+static bool is_exprlist_member(Expr *node, List *exprs);
static void generate_base_implied_equalities_const(PlannerInfo *root,
EquivalenceClass *ec);
static void generate_base_implied_equalities_no_const(PlannerInfo *root,
@@ -771,6 +772,167 @@ get_eclass_for_sort_expr(PlannerInfo *root,
}
/*
+ * find_ec_member_matching_expr
+ * Locate an EquivalenceClass member matching the given expr, if any;
+ * return NULL if no match.
+ *
+ * "Matching" is defined as "equal after stripping RelabelTypes".
+ * This is used for identifying sort expressions, and we need to allow
+ * binary-compatible relabeling for some cases involving binary-compatible
+ * sort operators.
+ *
+ * Child EC members are ignored unless they belong to given 'relids'.
+ */
+EquivalenceMember *
+find_ec_member_matching_expr(EquivalenceClass *ec,
+ Expr *expr,
+ Relids relids)
+{
+ ListCell *lc;
+
+ /* We ignore binary-compatible relabeling on both ends */
+ while (expr && IsA(expr, RelabelType))
+ expr = ((RelabelType *) expr)->arg;
+
+ foreach(lc, ec->ec_members)
+ {
+ EquivalenceMember *em = (EquivalenceMember *) lfirst(lc);
+ Expr *emexpr;
+
+ /*
+ * We shouldn't be trying to sort by an equivalence class that
+ * contains a constant, so no need to consider such cases any further.
+ */
+ if (em->em_is_const)
+ continue;
+
+ /*
+ * Ignore child members unless they belong to the requested rel.
+ */
+ if (em->em_is_child &&
+ !bms_is_subset(em->em_relids, relids))
+ continue;
+
+ /*
+ * Match if same expression (after stripping relabel).
+ */
+ emexpr = em->em_expr;
+ while (emexpr && IsA(emexpr, RelabelType))
+ emexpr = ((RelabelType *) emexpr)->arg;
+
+ if (equal(emexpr, expr))
+ return em;
+ }
+
+ return NULL;
+}
+
+/*
+ * find_computable_ec_member
+ * Locate an EquivalenceClass member that can be computed from the
+ * expressions appearing in "exprs"; return NULL if no match.
+ *
+ * "exprs" can be either a list of bare expression trees, or a list of
+ * TargetEntry nodes. Either way, it should contain Vars and possibly
+ * Aggrefs and WindowFuncs, which are matched to the corresponding elements
+ * of the EquivalenceClass's expressions.
+ *
+ * Unlike find_ec_member_matching_expr, there's no special provision here
+ * for binary-compatible relabeling. This is intentional: if we have to
+ * compute an expression in this way, setrefs.c is going to insist on exact
+ * matches of Vars to the source tlist.
+ *
+ * Child EC members are ignored unless they belong to given 'relids'.
+ * Also, non-parallel-safe expressions are ignored if 'require_parallel_safe'.
+ *
+ * Note: some callers pass root == NULL for notational reasons. This is OK
+ * when require_parallel_safe is false.
+ */
+EquivalenceMember *
+find_computable_ec_member(PlannerInfo *root,
+ EquivalenceClass *ec,
+ List *exprs,
+ Relids relids,
+ bool require_parallel_safe)
+{
+ ListCell *lc;
+
+ foreach(lc, ec->ec_members)
+ {
+ EquivalenceMember *em = (EquivalenceMember *) lfirst(lc);
+ List *exprvars;
+ ListCell *lc2;
+
+ /*
+ * We shouldn't be trying to sort by an equivalence class that
+ * contains a constant, so no need to consider such cases any further.
+ */
+ if (em->em_is_const)
+ continue;
+
+ /*
+ * Ignore child members unless they belong to the requested rel.
+ */
+ if (em->em_is_child &&
+ !bms_is_subset(em->em_relids, relids))
+ continue;
+
+ /*
+ * Match if all Vars and quasi-Vars are available in "exprs".
+ */
+ exprvars = pull_var_clause((Node *) em->em_expr,
+ PVC_INCLUDE_AGGREGATES |
+ PVC_INCLUDE_WINDOWFUNCS |
+ PVC_INCLUDE_PLACEHOLDERS);
+ foreach(lc2, exprvars)
+ {
+ if (!is_exprlist_member(lfirst(lc2), exprs))
+ break;
+ }
+ list_free(exprvars);
+ if (lc2)
+ continue; /* we hit a non-available Var */
+
+ /*
+ * If requested, reject expressions that are not parallel-safe. We
+ * check this last because it's a rather expensive test.
+ */
+ if (require_parallel_safe &&
+ !is_parallel_safe(root, (Node *) em->em_expr))
+ continue;
+
+ return em; /* found usable expression */
+ }
+
+ return NULL;
+}
+
+/*
+ * is_exprlist_member
+ * Subroutine for find_computable_ec_member: is "node" in "exprs"?
+ *
+ * Per the requirements of that function, "exprs" might or might not have
+ * TargetEntry superstructure.
+ */
+static bool
+is_exprlist_member(Expr *node, List *exprs)
+{
+ ListCell *lc;
+
+ foreach(lc, exprs)
+ {
+ Expr *expr = (Expr *) lfirst(lc);
+
+ if (expr && IsA(expr, TargetEntry))
+ expr = ((TargetEntry *) expr)->expr;
+
+ if (equal(node, expr))
+ return true;
+ }
+ return false;
+}
+
+/*
* Find an equivalence class member expression, all of whose Vars, come from
* the indicated relation.
*/
@@ -800,71 +962,78 @@ find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
}
/*
- * Find an equivalence class member expression that can be safely used to build
- * a sort node using the provided relation. The rules are a subset of those
- * applied in prepare_sort_from_pathkeys since that function deals with sorts
- * that must be delayed until the last stages of query execution, while here
- * we only care about proactive sorts.
+ * Find an equivalence class member expression that can be used to build
+ * a sort node using the provided relation; return NULL if no candidate.
+ *
+ * To succeed, we must find an EC member that prepare_sort_from_pathkeys knows
+ * how to sort on, given the rel's reltarget as input. There are also a few
+ * additional constraints based on the fact that the desired sort will be done
+ * within the scan/join part of the plan. Also, non-parallel-safe expressions
+ * are ignored if 'require_parallel_safe'.
*/
Expr *
find_em_expr_usable_for_sorting_rel(PlannerInfo *root, EquivalenceClass *ec,
RelOptInfo *rel, bool require_parallel_safe)
{
- ListCell *lc_em;
+ PathTarget *target = rel->reltarget;
+ EquivalenceMember *em;
+ ListCell *lc;
/*
- * If there is more than one equivalence member matching these
- * requirements we'll be content to choose any one of them.
+ * Reject volatile ECs immediately; such sorts must always be postponed.
*/
- foreach(lc_em, ec->ec_members)
- {
- EquivalenceMember *em = lfirst(lc_em);
- Expr *em_expr = em->em_expr;
+ if (ec->ec_has_volatile)
+ return NULL;
- /*
- * We shouldn't be trying to sort by an equivalence class that
- * contains a constant, so no need to consider such cases any further.
- */
- if (em->em_is_const)
- continue;
+ /*
+ * Try to find an EM directly matching some reltarget member.
+ */
+ foreach(lc, target->exprs)
+ {
+ Expr *targetexpr = (Expr *) lfirst(lc);
- /*
- * Any Vars in the equivalence class member need to come from this
- * relation. This is a superset of prepare_sort_from_pathkeys ignoring
- * child members unless they belong to the rel being sorted.
- */
- if (!bms_is_subset(em->em_relids, rel->relids))
+ em = find_ec_member_matching_expr(ec, targetexpr, rel->relids);
+ if (!em)
continue;
/*
- * If requested, reject expressions that are not parallel-safe.
+ * Reject expressions involving set-returning functions, as those
+ * can't be computed early either. (Note: this test and the following
+ * one are effectively checking properties of targetexpr, so there's
+ * no point in asking whether some other EC member would be better.)
*/
- if (require_parallel_safe && !is_parallel_safe(root, (Node *) em_expr))
+ if (IS_SRF_CALL((Node *) em->em_expr))
continue;
/*
- * Disallow SRFs so that all of them can be evaluated at the correct
- * time as determined by make_sort_input_target.
+ * If requested, reject expressions that are not parallel-safe. We
+ * check this last because it's a rather expensive test.
*/
- if (IS_SRF_CALL((Node *) em_expr))
+ if (require_parallel_safe &&
+ !is_parallel_safe(root, (Node *) em->em_expr))
continue;
- /*
- * As long as the expression isn't volatile then
- * prepare_sort_from_pathkeys is able to generate a new target entry,
- * so there's no need to verify that one already exists.
- *
- * While prepare_sort_from_pathkeys has to be concerned about matching
- * up a volatile expression to the proper tlist entry, it doesn't seem
- * valuable here to expend the work trying to find a match in the
- * target's exprs since such a sort will have to be postponed anyway.
- */
- if (!ec->ec_has_volatile)
- return em->em_expr;
+ return em->em_expr;
}
- /* We didn't find any suitable equivalence class expression */
- return NULL;
+ /*
+ * Try to find a expression computable from the reltarget.
+ */
+ em = find_computable_ec_member(root, ec, target->exprs, rel->relids,
+ require_parallel_safe);
+ if (!em)
+ return NULL;
+
+ /*
+ * Reject expressions involving set-returning functions, as those can't be
+ * computed early either. (There's no point in looking for another EC
+ * member in this case; since SRFs can't appear in WHERE, they cannot
+ * belong to multi-member ECs.)
+ */
+ if (IS_SRF_CALL((Node *) em->em_expr))
+ return NULL;
+
+ return em->em_expr;
}
/*
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 84f2d186d95..f7cd738eeac 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -259,9 +259,6 @@ static Plan *prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
Oid **p_sortOperators,
Oid **p_collations,
bool **p_nullsFirst);
-static EquivalenceMember *find_ec_member_for_tle(EquivalenceClass *ec,
- TargetEntry *tle,
- Relids relids);
static Sort *make_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
Relids relids);
static IncrementalSort *make_incrementalsort_from_pathkeys(Plan *lefttree,
@@ -1993,7 +1990,7 @@ create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags)
flags | CP_SMALL_TLIST);
/*
- * make_sort_from_pathkeys() indirectly calls find_ec_member_for_tle(),
+ * make_sort_from_pathkeys indirectly calls find_ec_member_matching_expr,
* which will ignore any child EC members that don't belong to the given
* relids. Thus, if this sort path is based on a child relation, we must
* pass its relids.
@@ -5844,9 +5841,6 @@ make_incrementalsort(Plan *lefttree, int numCols, int nPresortedCols,
*
* Returns the node which is to be the input to the Sort (either lefttree,
* or a Result stacked atop lefttree).
- *
- * Note: Restrictions on what expressions are safely sortable may also need to
- * be added to find_em_expr_usable_for_sorting_rel.
*/
static Plan *
prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
@@ -5916,7 +5910,7 @@ prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
tle = get_tle_by_resno(tlist, reqColIdx[numsortkeys]);
if (tle)
{
- em = find_ec_member_for_tle(ec, tle, relids);
+ em = find_ec_member_matching_expr(ec, tle->expr, relids);
if (em)
{
/* found expr at right place in tlist */
@@ -5947,7 +5941,7 @@ prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
foreach(j, tlist)
{
tle = (TargetEntry *) lfirst(j);
- em = find_ec_member_for_tle(ec, tle, relids);
+ em = find_ec_member_matching_expr(ec, tle->expr, relids);
if (em)
{
/* found expr already in tlist */
@@ -5961,56 +5955,12 @@ prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
if (!tle)
{
/*
- * No matching tlist item; look for a computable expression. Note
- * that we treat Aggrefs as if they were variables; this is
- * necessary when attempting to sort the output from an Agg node
- * for use in a WindowFunc (since grouping_planner will have
- * treated the Aggrefs as variables, too). Likewise, if we find a
- * WindowFunc in a sort expression, treat it as a variable.
+ * No matching tlist item; look for a computable expression.
*/
- Expr *sortexpr = NULL;
-
- foreach(j, ec->ec_members)
- {
- EquivalenceMember *em = (EquivalenceMember *) lfirst(j);
- List *exprvars;
- ListCell *k;
-
- /*
- * We shouldn't be trying to sort by an equivalence class that
- * contains a constant, so no need to consider such cases any
- * further.
- */
- if (em->em_is_const)
- continue;
-
- /*
- * Ignore child members unless they belong to the rel being
- * sorted.
- */
- if (em->em_is_child &&
- !bms_is_subset(em->em_relids, relids))
- continue;
-
- sortexpr = em->em_expr;
- exprvars = pull_var_clause((Node *) sortexpr,
- PVC_INCLUDE_AGGREGATES |
- PVC_INCLUDE_WINDOWFUNCS |
- PVC_INCLUDE_PLACEHOLDERS);
- foreach(k, exprvars)
- {
- if (!tlist_member_ignore_relabel(lfirst(k), tlist))
- break;
- }
- list_free(exprvars);
- if (!k)
- {
- pk_datatype = em->em_datatype;
- break; /* found usable expression */
- }
- }
- if (!j)
+ em = find_computable_ec_member(NULL, ec, tlist, relids, false);
+ if (!em)
elog(ERROR, "could not find pathkey item to sort");
+ pk_datatype = em->em_datatype;
/*
* Do we need to insert a Result node?
@@ -6030,7 +5980,7 @@ prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
/*
* Add resjunk entry to input's tlist
*/
- tle = makeTargetEntry(sortexpr,
+ tle = makeTargetEntry(copyObject(em->em_expr),
list_length(tlist) + 1,
NULL,
true);
@@ -6070,56 +6020,6 @@ prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
}
/*
- * find_ec_member_for_tle
- * Locate an EquivalenceClass member matching the given TLE, if any
- *
- * Child EC members are ignored unless they belong to given 'relids'.
- */
-static EquivalenceMember *
-find_ec_member_for_tle(EquivalenceClass *ec,
- TargetEntry *tle,
- Relids relids)
-{
- Expr *tlexpr;
- ListCell *lc;
-
- /* We ignore binary-compatible relabeling on both ends */
- tlexpr = tle->expr;
- while (tlexpr && IsA(tlexpr, RelabelType))
- tlexpr = ((RelabelType *) tlexpr)->arg;
-
- foreach(lc, ec->ec_members)
- {
- EquivalenceMember *em = (EquivalenceMember *) lfirst(lc);
- Expr *emexpr;
-
- /*
- * We shouldn't be trying to sort by an equivalence class that
- * contains a constant, so no need to consider such cases any further.
- */
- if (em->em_is_const)
- continue;
-
- /*
- * Ignore child members unless they belong to the rel being sorted.
- */
- if (em->em_is_child &&
- !bms_is_subset(em->em_relids, relids))
- continue;
-
- /* Match if same expression (after stripping relabel) */
- emexpr = em->em_expr;
- while (emexpr && IsA(emexpr, RelabelType))
- emexpr = ((RelabelType *) emexpr)->arg;
-
- if (equal(emexpr, tlexpr))
- return em;
- }
-
- return NULL;
-}
-
-/*
* make_sort_from_pathkeys
* Create sort plan to sort according to given pathkeys
*
@@ -6558,7 +6458,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols)
foreach(j, plan->targetlist)
{
tle = (TargetEntry *) lfirst(j);
- em = find_ec_member_for_tle(ec, tle, NULL);
+ em = find_ec_member_matching_expr(ec, tle->expr, NULL);
if (em)
{
/* found expr already in tlist */
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index ea405d4e154..9254ee088b4 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -135,6 +135,14 @@ extern EquivalenceClass *get_eclass_for_sort_expr(PlannerInfo *root,
Index sortref,
Relids rel,
bool create_it);
+extern EquivalenceMember *find_ec_member_matching_expr(EquivalenceClass *ec,
+ Expr *expr,
+ Relids relids);
+extern EquivalenceMember *find_computable_ec_member(PlannerInfo *root,
+ EquivalenceClass *ec,
+ List *exprs,
+ Relids relids,
+ bool require_parallel_safe);
extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
extern Expr *find_em_expr_usable_for_sorting_rel(PlannerInfo *root,
EquivalenceClass *ec,
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out
index 68ca321163b..b3ef5f16829 100644
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1577,6 +1577,32 @@ order by 1, 2;
-> Function Scan on generate_series
(7 rows)
+-- Parallel sort with an aggregate that can be safely generated in parallel,
+-- but we can't sort by partial aggregate values.
+explain (costs off) select count(*)
+from tenk1 t1
+join tenk1 t2 on t1.unique1 = t2.unique2
+join tenk1 t3 on t2.unique1 = t3.unique1
+order by count(*);
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------
+ Sort
+ Sort Key: (count(*))
+ -> Finalize Aggregate
+ -> Gather
+ Workers Planned: 2
+ -> Partial Aggregate
+ -> Parallel Hash Join
+ Hash Cond: (t2.unique1 = t3.unique1)
+ -> Parallel Hash Join
+ Hash Cond: (t1.unique1 = t2.unique2)
+ -> Parallel Index Only Scan using tenk1_unique1 on tenk1 t1
+ -> Parallel Hash
+ -> Parallel Index Scan using tenk1_unique2 on tenk1 t2
+ -> Parallel Hash
+ -> Parallel Index Only Scan using tenk1_unique1 on tenk1 t3
+(15 rows)
+
-- Parallel sort but with expression (correlated subquery) that
-- is prohibited in parallel plans.
explain (costs off) select distinct
diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql
index 81429164d45..d8768a6b54d 100644
--- a/src/test/regress/sql/incremental_sort.sql
+++ b/src/test/regress/sql/incremental_sort.sql
@@ -257,6 +257,13 @@ from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
explain (costs off) select sub.unique1, md5(stringu1)
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
order by 1, 2;
+-- Parallel sort with an aggregate that can be safely generated in parallel,
+-- but we can't sort by partial aggregate values.
+explain (costs off) select count(*)
+from tenk1 t1
+join tenk1 t2 on t1.unique1 = t2.unique2
+join tenk1 t3 on t2.unique1 = t3.unique1
+order by count(*);
-- Parallel sort but with expression (correlated subquery) that
-- is prohibited in parallel plans.
explain (costs off) select distinct