diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2019-04-05 19:20:30 -0400 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2019-04-05 19:20:43 -0400 |
commit | 959d00e9dbe4cfcf4a63bb655ac2c29a5e579246 (patch) | |
tree | 4c260f752780d317d1f2ebab8aae553dc4dc8236 /src/backend/optimizer/plan/createplan.c | |
parent | 9f06d79ef831ffa333f908f6d3debdb654292414 (diff) | |
download | postgresql-959d00e9dbe4cfcf4a63bb655ac2c29a5e579246.tar.gz postgresql-959d00e9dbe4cfcf4a63bb655ac2c29a5e579246.zip |
Use Append rather than MergeAppend for scanning ordered partitions.
If we need ordered output from a scan of a partitioned table, but
the ordering matches the partition ordering, then we don't need to
use a MergeAppend to combine the pre-ordered per-partition scan
results: a plain Append will produce the same results. This
both saves useless comparison work inside the MergeAppend proper,
and allows us to start returning tuples after istarting up just
the first child node not all of them.
However, all is not peaches and cream, because if some of the
child nodes have high startup costs then there will be big
discontinuities in the tuples-returned-versus-elapsed-time curve.
The planner's cost model cannot handle that (yet, anyway).
If we model the Append's startup cost as being just the first
child's startup cost, we may drastically underestimate the cost
of fetching slightly more tuples than are available from the first
child. Since we've had bad experiences with over-optimistic choices
of "fast start" plans for ORDER BY LIMIT queries, that seems scary.
As a klugy workaround, set the startup cost estimate for an ordered
Append to be the sum of its children's startup costs (as MergeAppend
would). This doesn't really describe reality, but it's less likely
to cause a bad plan choice than an underestimated startup cost would.
In practice, the cases where we really care about this optimization
will have child plans that are IndexScans with zero startup cost,
so that the overly conservative estimate is still just zero.
David Rowley, reviewed by Julien Rouhaud and Antonin Houska
Discussion: https://postgr.es/m/CAKJS1f-hAqhPLRk_RaSFTgYxd=Tz5hA7kQ2h4-DhJufQk8TGuw@mail.gmail.com
Diffstat (limited to 'src/backend/optimizer/plan/createplan.c')
-rw-r--r-- | src/backend/optimizer/plan/createplan.c | 126 |
1 files changed, 97 insertions, 29 deletions
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index cc222cb06cf..efe073a3eeb 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -205,8 +205,6 @@ static NamedTuplestoreScan *make_namedtuplestorescan(List *qptlist, List *qpqual Index scanrelid, char *enrname); static WorkTableScan *make_worktablescan(List *qptlist, List *qpqual, Index scanrelid, int wtParam); -static Append *make_append(List *appendplans, int first_partial_plan, - List *tlist, PartitionPruneInfo *partpruneinfo); static RecursiveUnion *make_recursive_union(List *tlist, Plan *lefttree, Plan *righttree, @@ -1060,10 +1058,16 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) { Append *plan; List *tlist = build_path_tlist(root, &best_path->path); + List *pathkeys = best_path->path.pathkeys; List *subplans = NIL; ListCell *subpaths; RelOptInfo *rel = best_path->path.parent; PartitionPruneInfo *partpruneinfo = NULL; + int nodenumsortkeys = 0; + AttrNumber *nodeSortColIdx = NULL; + Oid *nodeSortOperators = NULL; + Oid *nodeCollations = NULL; + bool *nodeNullsFirst = NULL; /* * The subpaths list could be empty, if every child was proven empty by @@ -1089,6 +1093,37 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) return plan; } + /* + * Otherwise build an Append plan. Note that if there's just one child, + * the Append is pretty useless; but we wait till setrefs.c to get rid of + * it. Doing so here doesn't work because the varno of the child scan + * plan won't match the parent-rel Vars it'll be asked to emit. + * + * We don't have the actual creation of the Append node split out into a + * separate make_xxx function. This is because we want to run + * prepare_sort_from_pathkeys on it before we do so on the individual + * child plans, to make cross-checking the sort info easier. + */ + plan = makeNode(Append); + plan->plan.targetlist = tlist; + plan->plan.qual = NIL; + plan->plan.lefttree = NULL; + plan->plan.righttree = NULL; + + if (pathkeys != NIL) + { + /* Compute sort column info, and adjust the Append's tlist as needed */ + (void) prepare_sort_from_pathkeys((Plan *) plan, pathkeys, + best_path->path.parent->relids, + NULL, + true, + &nodenumsortkeys, + &nodeSortColIdx, + &nodeSortOperators, + &nodeCollations, + &nodeNullsFirst); + } + /* Build the plan for each child */ foreach(subpaths, best_path->subpaths) { @@ -1098,6 +1133,63 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) /* Must insist that all children return the same tlist */ subplan = create_plan_recurse(root, subpath, CP_EXACT_TLIST); + /* + * For ordered Appends, we must insert a Sort node if subplan isn't + * sufficiently ordered. + */ + if (pathkeys != NIL) + { + int numsortkeys; + AttrNumber *sortColIdx; + Oid *sortOperators; + Oid *collations; + bool *nullsFirst; + + /* + * Compute sort column info, and adjust subplan's tlist as needed. + * We must apply prepare_sort_from_pathkeys even to subplans that + * don't need an explicit sort, to make sure they are returning + * the same sort key columns the Append expects. + */ + subplan = prepare_sort_from_pathkeys(subplan, pathkeys, + subpath->parent->relids, + nodeSortColIdx, + false, + &numsortkeys, + &sortColIdx, + &sortOperators, + &collations, + &nullsFirst); + + /* + * Check that we got the same sort key information. We just + * Assert that the sortops match, since those depend only on the + * pathkeys; but it seems like a good idea to check the sort + * column numbers explicitly, to ensure the tlists match up. + */ + Assert(numsortkeys == nodenumsortkeys); + if (memcmp(sortColIdx, nodeSortColIdx, + numsortkeys * sizeof(AttrNumber)) != 0) + elog(ERROR, "Append child's targetlist doesn't match Append"); + Assert(memcmp(sortOperators, nodeSortOperators, + numsortkeys * sizeof(Oid)) == 0); + Assert(memcmp(collations, nodeCollations, + numsortkeys * sizeof(Oid)) == 0); + Assert(memcmp(nullsFirst, nodeNullsFirst, + numsortkeys * sizeof(bool)) == 0); + + /* Now, insert a Sort node if subplan isn't sufficiently ordered */ + if (!pathkeys_contained_in(pathkeys, subpath->pathkeys)) + { + Sort *sort = make_sort(subplan, numsortkeys, + sortColIdx, sortOperators, + collations, nullsFirst); + + label_sort_with_costsize(root, sort, best_path->limit_tuples); + subplan = (Plan *) sort; + } + } + subplans = lappend(subplans, subplan); } @@ -1133,15 +1225,9 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) prunequal); } - /* - * And build the Append plan. Note that if there's just one child, the - * Append is pretty useless; but we wait till setrefs.c to get rid of it. - * Doing so here doesn't work because the varno of the child scan plan - * won't match the parent-rel Vars it'll be asked to emit. - */ - - plan = make_append(subplans, best_path->first_partial_path, - tlist, partpruneinfo); + plan->appendplans = subplans; + plan->first_partial_plan = best_path->first_partial_path; + plan->part_prune_info = partpruneinfo; copy_generic_path_info(&plan->plan, (Path *) best_path); @@ -1266,7 +1352,6 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path) if (best_path->path.param_info) { - List *prmquals = best_path->path.param_info->ppi_clauses; prmquals = extract_actual_clauses(prmquals, false); @@ -5300,23 +5385,6 @@ make_foreignscan(List *qptlist, return node; } -static Append * -make_append(List *appendplans, int first_partial_plan, - List *tlist, PartitionPruneInfo *partpruneinfo) -{ - Append *node = makeNode(Append); - Plan *plan = &node->plan; - - plan->targetlist = tlist; - plan->qual = NIL; - plan->lefttree = NULL; - plan->righttree = NULL; - node->appendplans = appendplans; - node->first_partial_plan = first_partial_plan; - node->part_prune_info = partpruneinfo; - return node; -} - static RecursiveUnion * make_recursive_union(List *tlist, Plan *lefttree, |