aboutsummaryrefslogtreecommitdiff
path: root/src/backend/optimizer/plan/createplan.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2019-04-05 19:20:30 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2019-04-05 19:20:43 -0400
commit959d00e9dbe4cfcf4a63bb655ac2c29a5e579246 (patch)
tree4c260f752780d317d1f2ebab8aae553dc4dc8236 /src/backend/optimizer/plan/createplan.c
parent9f06d79ef831ffa333f908f6d3debdb654292414 (diff)
downloadpostgresql-959d00e9dbe4cfcf4a63bb655ac2c29a5e579246.tar.gz
postgresql-959d00e9dbe4cfcf4a63bb655ac2c29a5e579246.zip
Use Append rather than MergeAppend for scanning ordered partitions.
If we need ordered output from a scan of a partitioned table, but the ordering matches the partition ordering, then we don't need to use a MergeAppend to combine the pre-ordered per-partition scan results: a plain Append will produce the same results. This both saves useless comparison work inside the MergeAppend proper, and allows us to start returning tuples after istarting up just the first child node not all of them. However, all is not peaches and cream, because if some of the child nodes have high startup costs then there will be big discontinuities in the tuples-returned-versus-elapsed-time curve. The planner's cost model cannot handle that (yet, anyway). If we model the Append's startup cost as being just the first child's startup cost, we may drastically underestimate the cost of fetching slightly more tuples than are available from the first child. Since we've had bad experiences with over-optimistic choices of "fast start" plans for ORDER BY LIMIT queries, that seems scary. As a klugy workaround, set the startup cost estimate for an ordered Append to be the sum of its children's startup costs (as MergeAppend would). This doesn't really describe reality, but it's less likely to cause a bad plan choice than an underestimated startup cost would. In practice, the cases where we really care about this optimization will have child plans that are IndexScans with zero startup cost, so that the overly conservative estimate is still just zero. David Rowley, reviewed by Julien Rouhaud and Antonin Houska Discussion: https://postgr.es/m/CAKJS1f-hAqhPLRk_RaSFTgYxd=Tz5hA7kQ2h4-DhJufQk8TGuw@mail.gmail.com
Diffstat (limited to 'src/backend/optimizer/plan/createplan.c')
-rw-r--r--src/backend/optimizer/plan/createplan.c126
1 files changed, 97 insertions, 29 deletions
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index cc222cb06cf..efe073a3eeb 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -205,8 +205,6 @@ static NamedTuplestoreScan *make_namedtuplestorescan(List *qptlist, List *qpqual
Index scanrelid, char *enrname);
static WorkTableScan *make_worktablescan(List *qptlist, List *qpqual,
Index scanrelid, int wtParam);
-static Append *make_append(List *appendplans, int first_partial_plan,
- List *tlist, PartitionPruneInfo *partpruneinfo);
static RecursiveUnion *make_recursive_union(List *tlist,
Plan *lefttree,
Plan *righttree,
@@ -1060,10 +1058,16 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path)
{
Append *plan;
List *tlist = build_path_tlist(root, &best_path->path);
+ List *pathkeys = best_path->path.pathkeys;
List *subplans = NIL;
ListCell *subpaths;
RelOptInfo *rel = best_path->path.parent;
PartitionPruneInfo *partpruneinfo = NULL;
+ int nodenumsortkeys = 0;
+ AttrNumber *nodeSortColIdx = NULL;
+ Oid *nodeSortOperators = NULL;
+ Oid *nodeCollations = NULL;
+ bool *nodeNullsFirst = NULL;
/*
* The subpaths list could be empty, if every child was proven empty by
@@ -1089,6 +1093,37 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path)
return plan;
}
+ /*
+ * Otherwise build an Append plan. Note that if there's just one child,
+ * the Append is pretty useless; but we wait till setrefs.c to get rid of
+ * it. Doing so here doesn't work because the varno of the child scan
+ * plan won't match the parent-rel Vars it'll be asked to emit.
+ *
+ * We don't have the actual creation of the Append node split out into a
+ * separate make_xxx function. This is because we want to run
+ * prepare_sort_from_pathkeys on it before we do so on the individual
+ * child plans, to make cross-checking the sort info easier.
+ */
+ plan = makeNode(Append);
+ plan->plan.targetlist = tlist;
+ plan->plan.qual = NIL;
+ plan->plan.lefttree = NULL;
+ plan->plan.righttree = NULL;
+
+ if (pathkeys != NIL)
+ {
+ /* Compute sort column info, and adjust the Append's tlist as needed */
+ (void) prepare_sort_from_pathkeys((Plan *) plan, pathkeys,
+ best_path->path.parent->relids,
+ NULL,
+ true,
+ &nodenumsortkeys,
+ &nodeSortColIdx,
+ &nodeSortOperators,
+ &nodeCollations,
+ &nodeNullsFirst);
+ }
+
/* Build the plan for each child */
foreach(subpaths, best_path->subpaths)
{
@@ -1098,6 +1133,63 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path)
/* Must insist that all children return the same tlist */
subplan = create_plan_recurse(root, subpath, CP_EXACT_TLIST);
+ /*
+ * For ordered Appends, we must insert a Sort node if subplan isn't
+ * sufficiently ordered.
+ */
+ if (pathkeys != NIL)
+ {
+ int numsortkeys;
+ AttrNumber *sortColIdx;
+ Oid *sortOperators;
+ Oid *collations;
+ bool *nullsFirst;
+
+ /*
+ * Compute sort column info, and adjust subplan's tlist as needed.
+ * We must apply prepare_sort_from_pathkeys even to subplans that
+ * don't need an explicit sort, to make sure they are returning
+ * the same sort key columns the Append expects.
+ */
+ subplan = prepare_sort_from_pathkeys(subplan, pathkeys,
+ subpath->parent->relids,
+ nodeSortColIdx,
+ false,
+ &numsortkeys,
+ &sortColIdx,
+ &sortOperators,
+ &collations,
+ &nullsFirst);
+
+ /*
+ * Check that we got the same sort key information. We just
+ * Assert that the sortops match, since those depend only on the
+ * pathkeys; but it seems like a good idea to check the sort
+ * column numbers explicitly, to ensure the tlists match up.
+ */
+ Assert(numsortkeys == nodenumsortkeys);
+ if (memcmp(sortColIdx, nodeSortColIdx,
+ numsortkeys * sizeof(AttrNumber)) != 0)
+ elog(ERROR, "Append child's targetlist doesn't match Append");
+ Assert(memcmp(sortOperators, nodeSortOperators,
+ numsortkeys * sizeof(Oid)) == 0);
+ Assert(memcmp(collations, nodeCollations,
+ numsortkeys * sizeof(Oid)) == 0);
+ Assert(memcmp(nullsFirst, nodeNullsFirst,
+ numsortkeys * sizeof(bool)) == 0);
+
+ /* Now, insert a Sort node if subplan isn't sufficiently ordered */
+ if (!pathkeys_contained_in(pathkeys, subpath->pathkeys))
+ {
+ Sort *sort = make_sort(subplan, numsortkeys,
+ sortColIdx, sortOperators,
+ collations, nullsFirst);
+
+ label_sort_with_costsize(root, sort, best_path->limit_tuples);
+ subplan = (Plan *) sort;
+ }
+ }
+
subplans = lappend(subplans, subplan);
}
@@ -1133,15 +1225,9 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path)
prunequal);
}
- /*
- * And build the Append plan. Note that if there's just one child, the
- * Append is pretty useless; but we wait till setrefs.c to get rid of it.
- * Doing so here doesn't work because the varno of the child scan plan
- * won't match the parent-rel Vars it'll be asked to emit.
- */
-
- plan = make_append(subplans, best_path->first_partial_path,
- tlist, partpruneinfo);
+ plan->appendplans = subplans;
+ plan->first_partial_plan = best_path->first_partial_path;
+ plan->part_prune_info = partpruneinfo;
copy_generic_path_info(&plan->plan, (Path *) best_path);
@@ -1266,7 +1352,6 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
if (best_path->path.param_info)
{
-
List *prmquals = best_path->path.param_info->ppi_clauses;
prmquals = extract_actual_clauses(prmquals, false);
@@ -5300,23 +5385,6 @@ make_foreignscan(List *qptlist,
return node;
}
-static Append *
-make_append(List *appendplans, int first_partial_plan,
- List *tlist, PartitionPruneInfo *partpruneinfo)
-{
- Append *node = makeNode(Append);
- Plan *plan = &node->plan;
-
- plan->targetlist = tlist;
- plan->qual = NIL;
- plan->lefttree = NULL;
- plan->righttree = NULL;
- node->appendplans = appendplans;
- node->first_partial_plan = first_partial_plan;
- node->part_prune_info = partpruneinfo;
- return node;
-}
-
static RecursiveUnion *
make_recursive_union(List *tlist,
Plan *lefttree,