diff options
Diffstat (limited to 'src/backend')
-rw-r--r-- | src/backend/executor/execProcnode.c | 13 | ||||
-rw-r--r-- | src/backend/nodes/outfuncs.c | 1 | ||||
-rw-r--r-- | src/backend/optimizer/path/allpaths.c | 237 | ||||
-rw-r--r-- | src/backend/optimizer/path/costsize.c | 81 | ||||
-rw-r--r-- | src/backend/optimizer/path/joinrels.c | 2 | ||||
-rw-r--r-- | src/backend/optimizer/path/pathkeys.c | 164 | ||||
-rw-r--r-- | src/backend/optimizer/plan/createplan.c | 126 | ||||
-rw-r--r-- | src/backend/optimizer/plan/planner.c | 4 | ||||
-rw-r--r-- | src/backend/optimizer/prep/prepunion.c | 7 | ||||
-rw-r--r-- | src/backend/optimizer/util/pathnode.c | 30 | ||||
-rw-r--r-- | src/backend/partitioning/partbounds.c | 64 |
11 files changed, 638 insertions, 91 deletions
diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index 4ab290360ac..c227282975a 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -840,6 +840,19 @@ ExecSetTupleBound(int64 tuples_needed, PlanState *child_node) sortState->bound = tuples_needed; } } + else if (IsA(child_node, AppendState)) + { + /* + * If it is an Append, we can apply the bound to any nodes that are + * children of the Append, since the Append surely need read no more + * than that many tuples from any one input. + */ + AppendState *aState = (AppendState *) child_node; + int i; + + for (i = 0; i < aState->as_nplans; i++) + ExecSetTupleBound(tuples_needed, aState->appendplans[i]); + } else if (IsA(child_node, MergeAppendState)) { /* diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 3282be0e4bd..82ca6826ab1 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -1847,6 +1847,7 @@ _outAppendPath(StringInfo str, const AppendPath *node) WRITE_NODE_FIELD(partitioned_rels); WRITE_NODE_FIELD(subpaths); WRITE_INT_FIELD(first_partial_path); + WRITE_FLOAT_FIELD(limit_tuples, "%.0f"); } static void diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 727da333388..af05bb7511e 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -44,6 +44,7 @@ #include "optimizer/tlist.h" #include "parser/parse_clause.h" #include "parser/parsetree.h" +#include "partitioning/partbounds.h" #include "partitioning/partprune.h" #include "rewrite/rewriteManip.h" #include "utils/lsyscache.h" @@ -96,15 +97,16 @@ static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte); static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte); -static void generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, - List *live_childrels, - List *all_child_pathkeys, - List *partitioned_rels); +static void generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel, + List *live_childrels, + List *all_child_pathkeys, + List *partitioned_rels); static Path *get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer); static void accumulate_append_subpath(Path *path, List **subpaths, List **special_subpaths); +static Path *get_singleton_append_subpath(Path *path); static void set_dummy_rel_pathlist(RelOptInfo *rel); static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte); @@ -1520,7 +1522,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, */ if (subpaths_valid) add_path(rel, (Path *) create_append_path(root, rel, subpaths, NIL, - NULL, 0, false, + NIL, NULL, 0, false, partitioned_rels, -1)); /* @@ -1562,7 +1564,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, /* Generate a partial append path. */ appendpath = create_append_path(root, rel, NIL, partial_subpaths, - NULL, parallel_workers, + NIL, NULL, parallel_workers, enable_parallel_append, partitioned_rels, -1); @@ -1612,19 +1614,19 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, appendpath = create_append_path(root, rel, pa_nonpartial_subpaths, pa_partial_subpaths, - NULL, parallel_workers, true, + NIL, NULL, parallel_workers, true, partitioned_rels, partial_rows); add_partial_path(rel, (Path *) appendpath); } /* - * Also build unparameterized MergeAppend paths based on the collected + * Also build unparameterized ordered append paths based on the collected * list of child pathkeys. */ if (subpaths_valid) - generate_mergeappend_paths(root, rel, live_childrels, - all_child_pathkeys, - partitioned_rels); + generate_orderedappend_paths(root, rel, live_childrels, + all_child_pathkeys, + partitioned_rels); /* * Build Append paths for each parameterization seen among the child rels. @@ -1674,7 +1676,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, if (subpaths_valid) add_path(rel, (Path *) create_append_path(root, rel, subpaths, NIL, - required_outer, 0, false, + NIL, required_outer, 0, false, partitioned_rels, -1)); } @@ -1703,8 +1705,8 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, continue; appendpath = create_append_path(root, rel, NIL, list_make1(path), - NULL, path->parallel_workers, - true, + NIL, NULL, + path->parallel_workers, true, partitioned_rels, partial_rows); add_partial_path(rel, (Path *) appendpath); } @@ -1712,17 +1714,21 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, } /* - * generate_mergeappend_paths - * Generate MergeAppend paths for an append relation + * generate_orderedappend_paths + * Generate ordered append paths for an append relation * - * Generate a path for each ordering (pathkey list) appearing in + * Usually we generate MergeAppend paths here, but there are some special + * cases where we can generate simple Append paths, because the subpaths + * can provide tuples in the required order already. + * + * We generate a path for each ordering (pathkey list) appearing in * all_child_pathkeys. * * We consider both cheapest-startup and cheapest-total cases, ie, for each * interesting ordering, collect all the cheapest startup subpaths and all the - * cheapest total paths, and build a MergeAppend path for each case. + * cheapest total paths, and build a suitable path for each case. * - * We don't currently generate any parameterized MergeAppend paths. While + * We don't currently generate any parameterized ordered paths here. While * it would not take much more code here to do so, it's very unclear that it * is worth the planning cycles to investigate such paths: there's little * use for an ordered path on the inside of a nestloop. In fact, it's likely @@ -1731,17 +1737,52 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, * and a parameterized MergeAppend is going to be more expensive than the * corresponding parameterized Append path. If we ever try harder to support * parameterized mergejoin plans, it might be worth adding support for - * parameterized MergeAppends to feed such joins. (See notes in + * parameterized paths here to feed such joins. (See notes in * optimizer/README for why that might not ever happen, though.) */ static void -generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, - List *live_childrels, - List *all_child_pathkeys, - List *partitioned_rels) +generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel, + List *live_childrels, + List *all_child_pathkeys, + List *partitioned_rels) { ListCell *lcp; + List *partition_pathkeys = NIL; + List *partition_pathkeys_desc = NIL; + bool partition_pathkeys_partial = true; + bool partition_pathkeys_desc_partial = true; + + /* + * Some partitioned table setups may allow us to use an Append node + * instead of a MergeAppend. This is possible in cases such as RANGE + * partitioned tables where it's guaranteed that an earlier partition must + * contain rows which come earlier in the sort order. To detect whether + * this is relevant, build pathkey descriptions of the partition ordering, + * for both forward and reverse scans. + */ + if (rel->part_scheme != NULL && IS_SIMPLE_REL(rel) && + partitions_are_ordered(rel->boundinfo, rel->nparts)) + { + partition_pathkeys = build_partition_pathkeys(root, rel, + ForwardScanDirection, + &partition_pathkeys_partial); + + partition_pathkeys_desc = build_partition_pathkeys(root, rel, + BackwardScanDirection, + &partition_pathkeys_desc_partial); + + /* + * You might think we should truncate_useless_pathkeys here, but + * allowing partition keys which are a subset of the query's pathkeys + * can often be useful. For example, consider a table partitioned by + * RANGE (a, b), and a query with ORDER BY a, b, c. If we have child + * paths that can produce the a, b, c ordering (perhaps via indexes on + * (a, b, c)) then it works to consider the appendrel output as + * ordered by a, b, c. + */ + } + /* Now consider each interesting sort ordering */ foreach(lcp, all_child_pathkeys) { List *pathkeys = (List *) lfirst(lcp); @@ -1749,6 +1790,27 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, List *total_subpaths = NIL; bool startup_neq_total = false; ListCell *lcr; + bool match_partition_order; + bool match_partition_order_desc; + + /* + * Determine if this sort ordering matches any partition pathkeys we + * have, for both ascending and descending partition order. If the + * partition pathkeys happen to be contained in pathkeys then it still + * works, as described above, providing that the partition pathkeys + * are complete and not just a prefix of the partition keys. (In such + * cases we'll be relying on the child paths to have sorted the + * lower-order columns of the required pathkeys.) + */ + match_partition_order = + pathkeys_contained_in(pathkeys, partition_pathkeys) || + (!partition_pathkeys_partial && + pathkeys_contained_in(partition_pathkeys, pathkeys)); + + match_partition_order_desc = !match_partition_order && + (pathkeys_contained_in(pathkeys, partition_pathkeys_desc) || + (!partition_pathkeys_desc_partial && + pathkeys_contained_in(partition_pathkeys_desc, pathkeys))); /* Select the child paths for this ordering... */ foreach(lcr, live_childrels) @@ -1791,26 +1853,94 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, if (cheapest_startup != cheapest_total) startup_neq_total = true; - accumulate_append_subpath(cheapest_startup, - &startup_subpaths, NULL); - accumulate_append_subpath(cheapest_total, - &total_subpaths, NULL); + /* + * Collect the appropriate child paths. The required logic varies + * for the Append and MergeAppend cases. + */ + if (match_partition_order) + { + /* + * We're going to make a plain Append path. We don't need + * most of what accumulate_append_subpath would do, but we do + * want to cut out child Appends or MergeAppends if they have + * just a single subpath (and hence aren't doing anything + * useful). + */ + cheapest_startup = get_singleton_append_subpath(cheapest_startup); + cheapest_total = get_singleton_append_subpath(cheapest_total); + + startup_subpaths = lappend(startup_subpaths, cheapest_startup); + total_subpaths = lappend(total_subpaths, cheapest_total); + } + else if (match_partition_order_desc) + { + /* + * As above, but we need to reverse the order of the children, + * because nodeAppend.c doesn't know anything about reverse + * ordering and will scan the children in the order presented. + */ + cheapest_startup = get_singleton_append_subpath(cheapest_startup); + cheapest_total = get_singleton_append_subpath(cheapest_total); + + startup_subpaths = lcons(cheapest_startup, startup_subpaths); + total_subpaths = lcons(cheapest_total, total_subpaths); + } + else + { + /* + * Otherwise, rely on accumulate_append_subpath to collect the + * child paths for the MergeAppend. + */ + accumulate_append_subpath(cheapest_startup, + &startup_subpaths, NULL); + accumulate_append_subpath(cheapest_total, + &total_subpaths, NULL); + } } - /* ... and build the MergeAppend paths */ - add_path(rel, (Path *) create_merge_append_path(root, - rel, - startup_subpaths, - pathkeys, - NULL, - partitioned_rels)); - if (startup_neq_total) + /* ... and build the Append or MergeAppend paths */ + if (match_partition_order || match_partition_order_desc) + { + /* We only need Append */ + add_path(rel, (Path *) create_append_path(root, + rel, + startup_subpaths, + NIL, + pathkeys, + NULL, + 0, + false, + partitioned_rels, + -1)); + if (startup_neq_total) + add_path(rel, (Path *) create_append_path(root, + rel, + total_subpaths, + NIL, + pathkeys, + NULL, + 0, + false, + partitioned_rels, + -1)); + } + else + { + /* We need MergeAppend */ add_path(rel, (Path *) create_merge_append_path(root, rel, - total_subpaths, + startup_subpaths, pathkeys, NULL, partitioned_rels)); + if (startup_neq_total) + add_path(rel, (Path *) create_merge_append_path(root, + rel, + total_subpaths, + pathkeys, + NULL, + partitioned_rels)); + } } } @@ -1901,7 +2031,6 @@ get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel, * omitting a sort step, which seems fine: if the parent is to be an Append, * its result would be unsorted anyway, while if the parent is to be a * MergeAppend, there's no point in a separate sort on a child. - * its result would be unsorted anyway. * * Normally, either path is a partial path and subpaths is a list of partial * paths, or else path is a non-partial plan and subpaths is a list of those. @@ -1952,6 +2081,36 @@ accumulate_append_subpath(Path *path, List **subpaths, List **special_subpaths) } /* + * get_singleton_append_subpath + * Returns the single subpath of an Append/MergeAppend, or just + * return 'path' if it's not a single sub-path Append/MergeAppend. + * + * Note: 'path' must not be a parallel-aware path. + */ +static Path * +get_singleton_append_subpath(Path *path) +{ + Assert(!path->parallel_aware); + + if (IsA(path, AppendPath)) + { + AppendPath *apath = (AppendPath *) path; + + if (list_length(apath->subpaths) == 1) + return (Path *) linitial(apath->subpaths); + } + else if (IsA(path, MergeAppendPath)) + { + MergeAppendPath *mpath = (MergeAppendPath *) path; + + if (list_length(mpath->subpaths) == 1) + return (Path *) linitial(mpath->subpaths); + } + + return path; +} + +/* * set_dummy_rel_pathlist * Build a dummy path for a relation that's been excluded by constraints * @@ -1975,7 +2134,7 @@ set_dummy_rel_pathlist(RelOptInfo *rel) /* Set up the dummy path */ add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL, - rel->lateral_relids, + NIL, rel->lateral_relids, 0, false, NIL, -1)); /* diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 4b9be13f08e..afd32884a25 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1878,27 +1878,83 @@ cost_append(AppendPath *apath) apath->path.startup_cost = 0; apath->path.total_cost = 0; + apath->path.rows = 0; if (apath->subpaths == NIL) return; if (!apath->path.parallel_aware) { - Path *subpath = (Path *) linitial(apath->subpaths); + List *pathkeys = apath->path.pathkeys; - /* - * Startup cost of non-parallel-aware Append is the startup cost of - * first subpath. - */ - apath->path.startup_cost = subpath->startup_cost; + if (pathkeys == NIL) + { + Path *subpath = (Path *) linitial(apath->subpaths); - /* Compute rows and costs as sums of subplan rows and costs. */ - foreach(l, apath->subpaths) + /* + * For an unordered, non-parallel-aware Append we take the startup + * cost as the startup cost of the first subpath. + */ + apath->path.startup_cost = subpath->startup_cost; + + /* Compute rows and costs as sums of subplan rows and costs. */ + foreach(l, apath->subpaths) + { + Path *subpath = (Path *) lfirst(l); + + apath->path.rows += subpath->rows; + apath->path.total_cost += subpath->total_cost; + } + } + else { - Path *subpath = (Path *) lfirst(l); + /* + * For an ordered, non-parallel-aware Append we take the startup + * cost as the sum of the subpath startup costs. This ensures + * that we don't underestimate the startup cost when a query's + * LIMIT is such that several of the children have to be run to + * satisfy it. This might be overkill --- another plausible hack + * would be to take the Append's startup cost as the maximum of + * the child startup costs. But we don't want to risk believing + * that an ORDER BY LIMIT query can be satisfied at small cost + * when the first child has small startup cost but later ones + * don't. (If we had the ability to deal with nonlinear cost + * interpolation for partial retrievals, we would not need to be + * so conservative about this.) + * + * This case is also different from the above in that we have to + * account for possibly injecting sorts into subpaths that aren't + * natively ordered. + */ + foreach(l, apath->subpaths) + { + Path *subpath = (Path *) lfirst(l); + Path sort_path; /* dummy for result of cost_sort */ - apath->path.rows += subpath->rows; - apath->path.total_cost += subpath->total_cost; + if (!pathkeys_contained_in(pathkeys, subpath->pathkeys)) + { + /* + * We'll need to insert a Sort node, so include costs for + * that. We can use the parent's LIMIT if any, since we + * certainly won't pull more than that many tuples from + * any child. + */ + cost_sort(&sort_path, + NULL, /* doesn't currently need root */ + pathkeys, + subpath->total_cost, + subpath->rows, + subpath->pathtarget->width, + 0.0, + work_mem, + apath->limit_tuples); + subpath = &sort_path; + } + + apath->path.rows += subpath->rows; + apath->path.startup_cost += subpath->startup_cost; + apath->path.total_cost += subpath->total_cost; + } } } else /* parallel-aware */ @@ -1906,6 +1962,9 @@ cost_append(AppendPath *apath) int i = 0; double parallel_divisor = get_parallel_divisor(&apath->path); + /* Parallel-aware Append never produces ordered output. */ + Assert(apath->path.pathkeys == NIL); + /* Calculate startup cost. */ foreach(l, apath->subpaths) { diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 34cc7dacdf0..e66cf328bea 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -1261,7 +1261,7 @@ mark_dummy_rel(RelOptInfo *rel) /* Set up the dummy path */ add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL, - rel->lateral_relids, + NIL, rel->lateral_relids, 0, false, NIL, -1)); /* Set or update cheapest_total_path and related fields */ diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 56d839bb31c..68b2204b3b4 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -18,16 +18,21 @@ #include "postgres.h" #include "access/stratnum.h" +#include "catalog/pg_opfamily.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "nodes/plannodes.h" #include "optimizer/optimizer.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" +#include "partitioning/partbounds.h" #include "utils/lsyscache.h" static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys); +static bool matches_boolean_partition_clause(RestrictInfo *rinfo, + RelOptInfo *partrel, + int partkeycol); static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey); @@ -547,6 +552,165 @@ build_index_pathkeys(PlannerInfo *root, } /* + * partkey_is_bool_constant_for_query + * + * If a partition key column is constrained to have a constant value by the + * query's WHERE conditions, then it's irrelevant for sort-order + * considerations. Usually that means we have a restriction clause + * WHERE partkeycol = constant, which gets turned into an EquivalenceClass + * containing a constant, which is recognized as redundant by + * build_partition_pathkeys(). But if the partition key column is a + * boolean variable (or expression), then we are not going to see such a + * WHERE clause, because expression preprocessing will have simplified it + * to "WHERE partkeycol" or "WHERE NOT partkeycol". So we are not going + * to have a matching EquivalenceClass (unless the query also contains + * "ORDER BY partkeycol"). To allow such cases to work the same as they would + * for non-boolean values, this function is provided to detect whether the + * specified partition key column matches a boolean restriction clause. + */ +static bool +partkey_is_bool_constant_for_query(RelOptInfo *partrel, int partkeycol) +{ + PartitionScheme partscheme = partrel->part_scheme; + ListCell *lc; + + /* If the partkey isn't boolean, we can't possibly get a match */ + if (!IsBooleanOpfamily(partscheme->partopfamily[partkeycol])) + return false; + + /* Check each restriction clause for the partitioned rel */ + foreach(lc, partrel->baserestrictinfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + /* Ignore pseudoconstant quals, they won't match */ + if (rinfo->pseudoconstant) + continue; + + /* See if we can match the clause's expression to the partkey column */ + if (matches_boolean_partition_clause(rinfo, partrel, partkeycol)) + return true; + } + + return false; +} + +/* + * matches_boolean_partition_clause + * Determine if the boolean clause described by rinfo matches + * partrel's partkeycol-th partition key column. + * + * "Matches" can be either an exact match (equivalent to partkey = true), + * or a NOT above an exact match (equivalent to partkey = false). + */ +static bool +matches_boolean_partition_clause(RestrictInfo *rinfo, + RelOptInfo *partrel, int partkeycol) +{ + Node *clause = (Node *) rinfo->clause; + Node *partexpr = (Node *) linitial(partrel->partexprs[partkeycol]); + + /* Direct match? */ + if (equal(partexpr, clause)) + return true; + /* NOT clause? */ + else if (is_notclause(clause)) + { + Node *arg = (Node *) get_notclausearg((Expr *) clause); + + if (equal(partexpr, arg)) + return true; + } + + return false; +} + +/* + * build_partition_pathkeys + * Build a pathkeys list that describes the ordering induced by the + * partitions of partrel, under either forward or backward scan + * as per scandir. + * + * Caller must have checked that the partitions are properly ordered, + * as detected by partitions_are_ordered(). + * + * Sets *partialkeys to true if pathkeys were only built for a prefix of the + * partition key, or false if the pathkeys include all columns of the + * partition key. + */ +List * +build_partition_pathkeys(PlannerInfo *root, RelOptInfo *partrel, + ScanDirection scandir, bool *partialkeys) +{ + List *retval = NIL; + PartitionScheme partscheme = partrel->part_scheme; + int i; + + Assert(partscheme != NULL); + Assert(partitions_are_ordered(partrel->boundinfo, partrel->nparts)); + /* For now, we can only cope with baserels */ + Assert(IS_SIMPLE_REL(partrel)); + + for (i = 0; i < partscheme->partnatts; i++) + { + PathKey *cpathkey; + Expr *keyCol = (Expr *) linitial(partrel->partexprs[i]); + + /* + * Try to make a canonical pathkey for this partkey. + * + * We're considering a baserel scan, so nullable_relids should be + * NULL. Also, we assume the PartitionDesc lists any NULL partition + * last, so we treat the scan like a NULLS LAST index: we have + * nulls_first for backwards scan only. + */ + cpathkey = make_pathkey_from_sortinfo(root, + keyCol, + NULL, + partscheme->partopfamily[i], + partscheme->partopcintype[i], + partscheme->partcollation[i], + ScanDirectionIsBackward(scandir), + ScanDirectionIsBackward(scandir), + 0, + partrel->relids, + false); + + + if (cpathkey) + { + /* + * We found the sort key in an EquivalenceClass, so it's relevant + * for this query. Add it to list, unless it's redundant. + */ + if (!pathkey_is_redundant(cpathkey, retval)) + retval = lappend(retval, cpathkey); + } + else + { + /* + * Boolean partition keys might be redundant even if they do not + * appear in an EquivalenceClass, because of our special treatment + * of boolean equality conditions --- see the comment for + * partkey_is_bool_constant_for_query(). If that applies, we can + * continue to examine lower-order partition keys. Otherwise, the + * sort key is not an interesting sort order for this query, so we + * should stop considering partition columns; any lower-order sort + * keys won't be useful either. + */ + if (!partkey_is_bool_constant_for_query(partrel, i)) + { + *partialkeys = true; + return retval; + } + } + } + + *partialkeys = false; + return retval; +} + +/* * build_expression_pathkey * Build a pathkeys list that describes an ordering by a single expression * using the given sort operator. diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index cc222cb06cf..efe073a3eeb 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -205,8 +205,6 @@ static NamedTuplestoreScan *make_namedtuplestorescan(List *qptlist, List *qpqual Index scanrelid, char *enrname); static WorkTableScan *make_worktablescan(List *qptlist, List *qpqual, Index scanrelid, int wtParam); -static Append *make_append(List *appendplans, int first_partial_plan, - List *tlist, PartitionPruneInfo *partpruneinfo); static RecursiveUnion *make_recursive_union(List *tlist, Plan *lefttree, Plan *righttree, @@ -1060,10 +1058,16 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) { Append *plan; List *tlist = build_path_tlist(root, &best_path->path); + List *pathkeys = best_path->path.pathkeys; List *subplans = NIL; ListCell *subpaths; RelOptInfo *rel = best_path->path.parent; PartitionPruneInfo *partpruneinfo = NULL; + int nodenumsortkeys = 0; + AttrNumber *nodeSortColIdx = NULL; + Oid *nodeSortOperators = NULL; + Oid *nodeCollations = NULL; + bool *nodeNullsFirst = NULL; /* * The subpaths list could be empty, if every child was proven empty by @@ -1089,6 +1093,37 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) return plan; } + /* + * Otherwise build an Append plan. Note that if there's just one child, + * the Append is pretty useless; but we wait till setrefs.c to get rid of + * it. Doing so here doesn't work because the varno of the child scan + * plan won't match the parent-rel Vars it'll be asked to emit. + * + * We don't have the actual creation of the Append node split out into a + * separate make_xxx function. This is because we want to run + * prepare_sort_from_pathkeys on it before we do so on the individual + * child plans, to make cross-checking the sort info easier. + */ + plan = makeNode(Append); + plan->plan.targetlist = tlist; + plan->plan.qual = NIL; + plan->plan.lefttree = NULL; + plan->plan.righttree = NULL; + + if (pathkeys != NIL) + { + /* Compute sort column info, and adjust the Append's tlist as needed */ + (void) prepare_sort_from_pathkeys((Plan *) plan, pathkeys, + best_path->path.parent->relids, + NULL, + true, + &nodenumsortkeys, + &nodeSortColIdx, + &nodeSortOperators, + &nodeCollations, + &nodeNullsFirst); + } + /* Build the plan for each child */ foreach(subpaths, best_path->subpaths) { @@ -1098,6 +1133,63 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) /* Must insist that all children return the same tlist */ subplan = create_plan_recurse(root, subpath, CP_EXACT_TLIST); + /* + * For ordered Appends, we must insert a Sort node if subplan isn't + * sufficiently ordered. + */ + if (pathkeys != NIL) + { + int numsortkeys; + AttrNumber *sortColIdx; + Oid *sortOperators; + Oid *collations; + bool *nullsFirst; + + /* + * Compute sort column info, and adjust subplan's tlist as needed. + * We must apply prepare_sort_from_pathkeys even to subplans that + * don't need an explicit sort, to make sure they are returning + * the same sort key columns the Append expects. + */ + subplan = prepare_sort_from_pathkeys(subplan, pathkeys, + subpath->parent->relids, + nodeSortColIdx, + false, + &numsortkeys, + &sortColIdx, + &sortOperators, + &collations, + &nullsFirst); + + /* + * Check that we got the same sort key information. We just + * Assert that the sortops match, since those depend only on the + * pathkeys; but it seems like a good idea to check the sort + * column numbers explicitly, to ensure the tlists match up. + */ + Assert(numsortkeys == nodenumsortkeys); + if (memcmp(sortColIdx, nodeSortColIdx, + numsortkeys * sizeof(AttrNumber)) != 0) + elog(ERROR, "Append child's targetlist doesn't match Append"); + Assert(memcmp(sortOperators, nodeSortOperators, + numsortkeys * sizeof(Oid)) == 0); + Assert(memcmp(collations, nodeCollations, + numsortkeys * sizeof(Oid)) == 0); + Assert(memcmp(nullsFirst, nodeNullsFirst, + numsortkeys * sizeof(bool)) == 0); + + /* Now, insert a Sort node if subplan isn't sufficiently ordered */ + if (!pathkeys_contained_in(pathkeys, subpath->pathkeys)) + { + Sort *sort = make_sort(subplan, numsortkeys, + sortColIdx, sortOperators, + collations, nullsFirst); + + label_sort_with_costsize(root, sort, best_path->limit_tuples); + subplan = (Plan *) sort; + } + } + subplans = lappend(subplans, subplan); } @@ -1133,15 +1225,9 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) prunequal); } - /* - * And build the Append plan. Note that if there's just one child, the - * Append is pretty useless; but we wait till setrefs.c to get rid of it. - * Doing so here doesn't work because the varno of the child scan plan - * won't match the parent-rel Vars it'll be asked to emit. - */ - - plan = make_append(subplans, best_path->first_partial_path, - tlist, partpruneinfo); + plan->appendplans = subplans; + plan->first_partial_plan = best_path->first_partial_path; + plan->part_prune_info = partpruneinfo; copy_generic_path_info(&plan->plan, (Path *) best_path); @@ -1266,7 +1352,6 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path) if (best_path->path.param_info) { - List *prmquals = best_path->path.param_info->ppi_clauses; prmquals = extract_actual_clauses(prmquals, false); @@ -5300,23 +5385,6 @@ make_foreignscan(List *qptlist, return node; } -static Append * -make_append(List *appendplans, int first_partial_plan, - List *tlist, PartitionPruneInfo *partpruneinfo) -{ - Append *node = makeNode(Append); - Plan *plan = &node->plan; - - plan->targetlist = tlist; - plan->qual = NIL; - plan->lefttree = NULL; - plan->righttree = NULL; - node->appendplans = appendplans; - node->first_partial_plan = first_partial_plan; - node->part_prune_info = partpruneinfo; - return node; -} - static RecursiveUnion * make_recursive_union(List *tlist, Plan *lefttree, diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index e2cdc83613d..cbd3fb8e0ea 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -1721,7 +1721,8 @@ inheritance_planner(PlannerInfo *root) /* Make a dummy path, cf set_dummy_rel_pathlist() */ dummy_path = (Path *) create_append_path(NULL, final_rel, NIL, NIL, - NULL, 0, false, NIL, -1); + NIL, NULL, 0, false, + NIL, -1); /* These lists must be nonempty to make a valid ModifyTable node */ subpaths = list_make1(dummy_path); @@ -4003,6 +4004,7 @@ create_degenerate_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, grouped_rel, paths, NIL, + NIL, NULL, 0, false, diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index eb815c2f126..c1def3823fa 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -617,7 +617,7 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root, * Append the child results together. */ path = (Path *) create_append_path(root, result_rel, pathlist, NIL, - NULL, 0, false, NIL, -1); + NIL, NULL, 0, false, NIL, -1); /* * For UNION ALL, we just need the Append path. For UNION, need to add @@ -672,7 +672,8 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root, ppath = (Path *) create_append_path(root, result_rel, NIL, partial_pathlist, - NULL, parallel_workers, enable_parallel_append, + NIL, NULL, + parallel_workers, enable_parallel_append, NIL, -1); ppath = (Path *) create_gather_path(root, result_rel, ppath, @@ -783,7 +784,7 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root, * Append the child results together. */ path = (Path *) create_append_path(root, result_rel, pathlist, NIL, - NULL, 0, false, NIL, -1); + NIL, NULL, 0, false, NIL, -1); /* Identify the grouping semantics */ groupList = generate_setop_grouplist(op, tlist); diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 1ea89ff54c9..36aee35d462 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1203,12 +1203,13 @@ create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals, * pathnode. * * Note that we must handle subpaths = NIL, representing a dummy access path. + * Also, there are callers that pass root = NULL. */ AppendPath * create_append_path(PlannerInfo *root, RelOptInfo *rel, List *subpaths, List *partial_subpaths, - Relids required_outer, + List *pathkeys, Relids required_outer, int parallel_workers, bool parallel_aware, List *partitioned_rels, double rows) { @@ -1242,6 +1243,7 @@ create_append_path(PlannerInfo *root, pathnode->path.parallel_aware = parallel_aware; pathnode->path.parallel_safe = rel->consider_parallel; pathnode->path.parallel_workers = parallel_workers; + pathnode->path.pathkeys = pathkeys; pathnode->partitioned_rels = list_copy(partitioned_rels); /* @@ -1255,6 +1257,13 @@ create_append_path(PlannerInfo *root, */ if (pathnode->path.parallel_aware) { + /* + * We mustn't fiddle with the order of subpaths when the Append has + * pathkeys. The order they're listed in is critical to keeping the + * pathkeys valid. + */ + Assert(pathkeys == NIL); + subpaths = list_qsort(subpaths, append_total_cost_compare); partial_subpaths = list_qsort(partial_subpaths, append_startup_cost_compare); @@ -1262,6 +1271,15 @@ create_append_path(PlannerInfo *root, pathnode->first_partial_path = list_length(subpaths); pathnode->subpaths = list_concat(subpaths, partial_subpaths); + /* + * Apply query-wide LIMIT if known and path is for sole base relation. + * (Handling this at this low level is a bit klugy.) + */ + if (root != NULL && bms_equal(rel->relids, root->all_baserels)) + pathnode->limit_tuples = root->limit_tuples; + else + pathnode->limit_tuples = -1.0; + foreach(l, pathnode->subpaths) { Path *subpath = (Path *) lfirst(l); @@ -1278,8 +1296,9 @@ create_append_path(PlannerInfo *root, /* * If there's exactly one child path, the Append is a no-op and will be * discarded later (in setrefs.c); therefore, we can inherit the child's - * size, cost, and pathkeys if any. Otherwise, it's unsorted, and we must - * do the normal costsize calculation. + * size and cost, as well as its pathkeys if any (overriding whatever the + * caller might've said). Otherwise, we must do the normal costsize + * calculation. */ if (list_length(pathnode->subpaths) == 1) { @@ -1291,10 +1310,7 @@ create_append_path(PlannerInfo *root, pathnode->path.pathkeys = child->pathkeys; } else - { - pathnode->path.pathkeys = NIL; /* unsorted if more than 1 subpath */ cost_append(pathnode); - } /* If the caller provided a row estimate, override the computed value. */ if (rows >= 0) @@ -3759,7 +3775,7 @@ reparameterize_path(PlannerInfo *root, Path *path, } return (Path *) create_append_path(root, rel, childpaths, partialpaths, - required_outer, + apath->path.pathkeys, required_outer, apath->path.parallel_workers, apath->path.parallel_aware, apath->partitioned_rels, diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c index bdd0d238542..c8770ccfee0 100644 --- a/src/backend/partitioning/partbounds.c +++ b/src/backend/partitioning/partbounds.c @@ -862,6 +862,70 @@ partition_bounds_copy(PartitionBoundInfo src, } /* + * partitions_are_ordered + * Determine whether the partitions described by 'boundinfo' are ordered, + * that is partitions appearing earlier in the PartitionDesc sequence + * contain partition keys strictly less than those appearing later. + * Also, if NULL values are possible, they must come in the last + * partition defined in the PartitionDesc. + * + * If out of order, or there is insufficient info to know the order, + * then we return false. + */ +bool +partitions_are_ordered(PartitionBoundInfo boundinfo, int nparts) +{ + Assert(boundinfo != NULL); + + switch (boundinfo->strategy) + { + case PARTITION_STRATEGY_RANGE: + + /* + * RANGE-type partitioning guarantees that the partitions can be + * scanned in the order that they're defined in the PartitionDesc + * to provide sequential, non-overlapping ranges of tuples. + * However, if a DEFAULT partition exists then it doesn't work, as + * that could contain tuples from either below or above the + * defined range, or tuples belonging to gaps between partitions. + */ + if (!partition_bound_has_default(boundinfo)) + return true; + break; + + case PARTITION_STRATEGY_LIST: + + /* + * LIST partitioning can also guarantee ordering, but only if the + * partitions don't accept interleaved values. We could likely + * check for this by looping over the PartitionBound's indexes + * array to check that the indexes are in order. For now, let's + * just keep it simple and just accept LIST partitioning when + * there's no DEFAULT partition, exactly one value per partition, + * and optionally a NULL partition that does not accept any other + * values. Such a NULL partition will come last in the + * PartitionDesc, and the other partitions will be properly + * ordered. This is a cheap test to make as it does not require + * any per-partition processing. Maybe we'd like to handle more + * complex cases in the future. + */ + if (partition_bound_has_default(boundinfo)) + return false; + + if (boundinfo->ndatums + partition_bound_accepts_nulls(boundinfo) + == nparts) + return true; + break; + + default: + /* HASH, or some other strategy */ + break; + } + + return false; +} + +/* * check_new_partition_bound * * Checks if the new partition's bound overlaps any of the existing partitions |