aboutsummaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/executor/execProcnode.c13
-rw-r--r--src/backend/nodes/outfuncs.c1
-rw-r--r--src/backend/optimizer/path/allpaths.c237
-rw-r--r--src/backend/optimizer/path/costsize.c81
-rw-r--r--src/backend/optimizer/path/joinrels.c2
-rw-r--r--src/backend/optimizer/path/pathkeys.c164
-rw-r--r--src/backend/optimizer/plan/createplan.c126
-rw-r--r--src/backend/optimizer/plan/planner.c4
-rw-r--r--src/backend/optimizer/prep/prepunion.c7
-rw-r--r--src/backend/optimizer/util/pathnode.c30
-rw-r--r--src/backend/partitioning/partbounds.c64
11 files changed, 638 insertions, 91 deletions
diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c
index 4ab290360ac..c227282975a 100644
--- a/src/backend/executor/execProcnode.c
+++ b/src/backend/executor/execProcnode.c
@@ -840,6 +840,19 @@ ExecSetTupleBound(int64 tuples_needed, PlanState *child_node)
sortState->bound = tuples_needed;
}
}
+ else if (IsA(child_node, AppendState))
+ {
+ /*
+ * If it is an Append, we can apply the bound to any nodes that are
+ * children of the Append, since the Append surely need read no more
+ * than that many tuples from any one input.
+ */
+ AppendState *aState = (AppendState *) child_node;
+ int i;
+
+ for (i = 0; i < aState->as_nplans; i++)
+ ExecSetTupleBound(tuples_needed, aState->appendplans[i]);
+ }
else if (IsA(child_node, MergeAppendState))
{
/*
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 3282be0e4bd..82ca6826ab1 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -1847,6 +1847,7 @@ _outAppendPath(StringInfo str, const AppendPath *node)
WRITE_NODE_FIELD(partitioned_rels);
WRITE_NODE_FIELD(subpaths);
WRITE_INT_FIELD(first_partial_path);
+ WRITE_FLOAT_FIELD(limit_tuples, "%.0f");
}
static void
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 727da333388..af05bb7511e 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -44,6 +44,7 @@
#include "optimizer/tlist.h"
#include "parser/parse_clause.h"
#include "parser/parsetree.h"
+#include "partitioning/partbounds.h"
#include "partitioning/partprune.h"
#include "rewrite/rewriteManip.h"
#include "utils/lsyscache.h"
@@ -96,15 +97,16 @@ static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte);
static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte);
-static void generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
- List *live_childrels,
- List *all_child_pathkeys,
- List *partitioned_rels);
+static void generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel,
+ List *live_childrels,
+ List *all_child_pathkeys,
+ List *partitioned_rels);
static Path *get_cheapest_parameterized_child_path(PlannerInfo *root,
RelOptInfo *rel,
Relids required_outer);
static void accumulate_append_subpath(Path *path,
List **subpaths, List **special_subpaths);
+static Path *get_singleton_append_subpath(Path *path);
static void set_dummy_rel_pathlist(RelOptInfo *rel);
static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte);
@@ -1520,7 +1522,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
*/
if (subpaths_valid)
add_path(rel, (Path *) create_append_path(root, rel, subpaths, NIL,
- NULL, 0, false,
+ NIL, NULL, 0, false,
partitioned_rels, -1));
/*
@@ -1562,7 +1564,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
/* Generate a partial append path. */
appendpath = create_append_path(root, rel, NIL, partial_subpaths,
- NULL, parallel_workers,
+ NIL, NULL, parallel_workers,
enable_parallel_append,
partitioned_rels, -1);
@@ -1612,19 +1614,19 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
appendpath = create_append_path(root, rel, pa_nonpartial_subpaths,
pa_partial_subpaths,
- NULL, parallel_workers, true,
+ NIL, NULL, parallel_workers, true,
partitioned_rels, partial_rows);
add_partial_path(rel, (Path *) appendpath);
}
/*
- * Also build unparameterized MergeAppend paths based on the collected
+ * Also build unparameterized ordered append paths based on the collected
* list of child pathkeys.
*/
if (subpaths_valid)
- generate_mergeappend_paths(root, rel, live_childrels,
- all_child_pathkeys,
- partitioned_rels);
+ generate_orderedappend_paths(root, rel, live_childrels,
+ all_child_pathkeys,
+ partitioned_rels);
/*
* Build Append paths for each parameterization seen among the child rels.
@@ -1674,7 +1676,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
if (subpaths_valid)
add_path(rel, (Path *)
create_append_path(root, rel, subpaths, NIL,
- required_outer, 0, false,
+ NIL, required_outer, 0, false,
partitioned_rels, -1));
}
@@ -1703,8 +1705,8 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
continue;
appendpath = create_append_path(root, rel, NIL, list_make1(path),
- NULL, path->parallel_workers,
- true,
+ NIL, NULL,
+ path->parallel_workers, true,
partitioned_rels, partial_rows);
add_partial_path(rel, (Path *) appendpath);
}
@@ -1712,17 +1714,21 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
}
/*
- * generate_mergeappend_paths
- * Generate MergeAppend paths for an append relation
+ * generate_orderedappend_paths
+ * Generate ordered append paths for an append relation
*
- * Generate a path for each ordering (pathkey list) appearing in
+ * Usually we generate MergeAppend paths here, but there are some special
+ * cases where we can generate simple Append paths, because the subpaths
+ * can provide tuples in the required order already.
+ *
+ * We generate a path for each ordering (pathkey list) appearing in
* all_child_pathkeys.
*
* We consider both cheapest-startup and cheapest-total cases, ie, for each
* interesting ordering, collect all the cheapest startup subpaths and all the
- * cheapest total paths, and build a MergeAppend path for each case.
+ * cheapest total paths, and build a suitable path for each case.
*
- * We don't currently generate any parameterized MergeAppend paths. While
+ * We don't currently generate any parameterized ordered paths here. While
* it would not take much more code here to do so, it's very unclear that it
* is worth the planning cycles to investigate such paths: there's little
* use for an ordered path on the inside of a nestloop. In fact, it's likely
@@ -1731,17 +1737,52 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
* and a parameterized MergeAppend is going to be more expensive than the
* corresponding parameterized Append path. If we ever try harder to support
* parameterized mergejoin plans, it might be worth adding support for
- * parameterized MergeAppends to feed such joins. (See notes in
+ * parameterized paths here to feed such joins. (See notes in
* optimizer/README for why that might not ever happen, though.)
*/
static void
-generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
- List *live_childrels,
- List *all_child_pathkeys,
- List *partitioned_rels)
+generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel,
+ List *live_childrels,
+ List *all_child_pathkeys,
+ List *partitioned_rels)
{
ListCell *lcp;
+ List *partition_pathkeys = NIL;
+ List *partition_pathkeys_desc = NIL;
+ bool partition_pathkeys_partial = true;
+ bool partition_pathkeys_desc_partial = true;
+
+ /*
+ * Some partitioned table setups may allow us to use an Append node
+ * instead of a MergeAppend. This is possible in cases such as RANGE
+ * partitioned tables where it's guaranteed that an earlier partition must
+ * contain rows which come earlier in the sort order. To detect whether
+ * this is relevant, build pathkey descriptions of the partition ordering,
+ * for both forward and reverse scans.
+ */
+ if (rel->part_scheme != NULL && IS_SIMPLE_REL(rel) &&
+ partitions_are_ordered(rel->boundinfo, rel->nparts))
+ {
+ partition_pathkeys = build_partition_pathkeys(root, rel,
+ ForwardScanDirection,
+ &partition_pathkeys_partial);
+
+ partition_pathkeys_desc = build_partition_pathkeys(root, rel,
+ BackwardScanDirection,
+ &partition_pathkeys_desc_partial);
+
+ /*
+ * You might think we should truncate_useless_pathkeys here, but
+ * allowing partition keys which are a subset of the query's pathkeys
+ * can often be useful. For example, consider a table partitioned by
+ * RANGE (a, b), and a query with ORDER BY a, b, c. If we have child
+ * paths that can produce the a, b, c ordering (perhaps via indexes on
+ * (a, b, c)) then it works to consider the appendrel output as
+ * ordered by a, b, c.
+ */
+ }
+ /* Now consider each interesting sort ordering */
foreach(lcp, all_child_pathkeys)
{
List *pathkeys = (List *) lfirst(lcp);
@@ -1749,6 +1790,27 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
List *total_subpaths = NIL;
bool startup_neq_total = false;
ListCell *lcr;
+ bool match_partition_order;
+ bool match_partition_order_desc;
+
+ /*
+ * Determine if this sort ordering matches any partition pathkeys we
+ * have, for both ascending and descending partition order. If the
+ * partition pathkeys happen to be contained in pathkeys then it still
+ * works, as described above, providing that the partition pathkeys
+ * are complete and not just a prefix of the partition keys. (In such
+ * cases we'll be relying on the child paths to have sorted the
+ * lower-order columns of the required pathkeys.)
+ */
+ match_partition_order =
+ pathkeys_contained_in(pathkeys, partition_pathkeys) ||
+ (!partition_pathkeys_partial &&
+ pathkeys_contained_in(partition_pathkeys, pathkeys));
+
+ match_partition_order_desc = !match_partition_order &&
+ (pathkeys_contained_in(pathkeys, partition_pathkeys_desc) ||
+ (!partition_pathkeys_desc_partial &&
+ pathkeys_contained_in(partition_pathkeys_desc, pathkeys)));
/* Select the child paths for this ordering... */
foreach(lcr, live_childrels)
@@ -1791,26 +1853,94 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
if (cheapest_startup != cheapest_total)
startup_neq_total = true;
- accumulate_append_subpath(cheapest_startup,
- &startup_subpaths, NULL);
- accumulate_append_subpath(cheapest_total,
- &total_subpaths, NULL);
+ /*
+ * Collect the appropriate child paths. The required logic varies
+ * for the Append and MergeAppend cases.
+ */
+ if (match_partition_order)
+ {
+ /*
+ * We're going to make a plain Append path. We don't need
+ * most of what accumulate_append_subpath would do, but we do
+ * want to cut out child Appends or MergeAppends if they have
+ * just a single subpath (and hence aren't doing anything
+ * useful).
+ */
+ cheapest_startup = get_singleton_append_subpath(cheapest_startup);
+ cheapest_total = get_singleton_append_subpath(cheapest_total);
+
+ startup_subpaths = lappend(startup_subpaths, cheapest_startup);
+ total_subpaths = lappend(total_subpaths, cheapest_total);
+ }
+ else if (match_partition_order_desc)
+ {
+ /*
+ * As above, but we need to reverse the order of the children,
+ * because nodeAppend.c doesn't know anything about reverse
+ * ordering and will scan the children in the order presented.
+ */
+ cheapest_startup = get_singleton_append_subpath(cheapest_startup);
+ cheapest_total = get_singleton_append_subpath(cheapest_total);
+
+ startup_subpaths = lcons(cheapest_startup, startup_subpaths);
+ total_subpaths = lcons(cheapest_total, total_subpaths);
+ }
+ else
+ {
+ /*
+ * Otherwise, rely on accumulate_append_subpath to collect the
+ * child paths for the MergeAppend.
+ */
+ accumulate_append_subpath(cheapest_startup,
+ &startup_subpaths, NULL);
+ accumulate_append_subpath(cheapest_total,
+ &total_subpaths, NULL);
+ }
}
- /* ... and build the MergeAppend paths */
- add_path(rel, (Path *) create_merge_append_path(root,
- rel,
- startup_subpaths,
- pathkeys,
- NULL,
- partitioned_rels));
- if (startup_neq_total)
+ /* ... and build the Append or MergeAppend paths */
+ if (match_partition_order || match_partition_order_desc)
+ {
+ /* We only need Append */
+ add_path(rel, (Path *) create_append_path(root,
+ rel,
+ startup_subpaths,
+ NIL,
+ pathkeys,
+ NULL,
+ 0,
+ false,
+ partitioned_rels,
+ -1));
+ if (startup_neq_total)
+ add_path(rel, (Path *) create_append_path(root,
+ rel,
+ total_subpaths,
+ NIL,
+ pathkeys,
+ NULL,
+ 0,
+ false,
+ partitioned_rels,
+ -1));
+ }
+ else
+ {
+ /* We need MergeAppend */
add_path(rel, (Path *) create_merge_append_path(root,
rel,
- total_subpaths,
+ startup_subpaths,
pathkeys,
NULL,
partitioned_rels));
+ if (startup_neq_total)
+ add_path(rel, (Path *) create_merge_append_path(root,
+ rel,
+ total_subpaths,
+ pathkeys,
+ NULL,
+ partitioned_rels));
+ }
}
}
@@ -1901,7 +2031,6 @@ get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel,
* omitting a sort step, which seems fine: if the parent is to be an Append,
* its result would be unsorted anyway, while if the parent is to be a
* MergeAppend, there's no point in a separate sort on a child.
- * its result would be unsorted anyway.
*
* Normally, either path is a partial path and subpaths is a list of partial
* paths, or else path is a non-partial plan and subpaths is a list of those.
@@ -1952,6 +2081,36 @@ accumulate_append_subpath(Path *path, List **subpaths, List **special_subpaths)
}
/*
+ * get_singleton_append_subpath
+ * Returns the single subpath of an Append/MergeAppend, or just
+ * return 'path' if it's not a single sub-path Append/MergeAppend.
+ *
+ * Note: 'path' must not be a parallel-aware path.
+ */
+static Path *
+get_singleton_append_subpath(Path *path)
+{
+ Assert(!path->parallel_aware);
+
+ if (IsA(path, AppendPath))
+ {
+ AppendPath *apath = (AppendPath *) path;
+
+ if (list_length(apath->subpaths) == 1)
+ return (Path *) linitial(apath->subpaths);
+ }
+ else if (IsA(path, MergeAppendPath))
+ {
+ MergeAppendPath *mpath = (MergeAppendPath *) path;
+
+ if (list_length(mpath->subpaths) == 1)
+ return (Path *) linitial(mpath->subpaths);
+ }
+
+ return path;
+}
+
+/*
* set_dummy_rel_pathlist
* Build a dummy path for a relation that's been excluded by constraints
*
@@ -1975,7 +2134,7 @@ set_dummy_rel_pathlist(RelOptInfo *rel)
/* Set up the dummy path */
add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL,
- rel->lateral_relids,
+ NIL, rel->lateral_relids,
0, false, NIL, -1));
/*
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 4b9be13f08e..afd32884a25 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1878,27 +1878,83 @@ cost_append(AppendPath *apath)
apath->path.startup_cost = 0;
apath->path.total_cost = 0;
+ apath->path.rows = 0;
if (apath->subpaths == NIL)
return;
if (!apath->path.parallel_aware)
{
- Path *subpath = (Path *) linitial(apath->subpaths);
+ List *pathkeys = apath->path.pathkeys;
- /*
- * Startup cost of non-parallel-aware Append is the startup cost of
- * first subpath.
- */
- apath->path.startup_cost = subpath->startup_cost;
+ if (pathkeys == NIL)
+ {
+ Path *subpath = (Path *) linitial(apath->subpaths);
- /* Compute rows and costs as sums of subplan rows and costs. */
- foreach(l, apath->subpaths)
+ /*
+ * For an unordered, non-parallel-aware Append we take the startup
+ * cost as the startup cost of the first subpath.
+ */
+ apath->path.startup_cost = subpath->startup_cost;
+
+ /* Compute rows and costs as sums of subplan rows and costs. */
+ foreach(l, apath->subpaths)
+ {
+ Path *subpath = (Path *) lfirst(l);
+
+ apath->path.rows += subpath->rows;
+ apath->path.total_cost += subpath->total_cost;
+ }
+ }
+ else
{
- Path *subpath = (Path *) lfirst(l);
+ /*
+ * For an ordered, non-parallel-aware Append we take the startup
+ * cost as the sum of the subpath startup costs. This ensures
+ * that we don't underestimate the startup cost when a query's
+ * LIMIT is such that several of the children have to be run to
+ * satisfy it. This might be overkill --- another plausible hack
+ * would be to take the Append's startup cost as the maximum of
+ * the child startup costs. But we don't want to risk believing
+ * that an ORDER BY LIMIT query can be satisfied at small cost
+ * when the first child has small startup cost but later ones
+ * don't. (If we had the ability to deal with nonlinear cost
+ * interpolation for partial retrievals, we would not need to be
+ * so conservative about this.)
+ *
+ * This case is also different from the above in that we have to
+ * account for possibly injecting sorts into subpaths that aren't
+ * natively ordered.
+ */
+ foreach(l, apath->subpaths)
+ {
+ Path *subpath = (Path *) lfirst(l);
+ Path sort_path; /* dummy for result of cost_sort */
- apath->path.rows += subpath->rows;
- apath->path.total_cost += subpath->total_cost;
+ if (!pathkeys_contained_in(pathkeys, subpath->pathkeys))
+ {
+ /*
+ * We'll need to insert a Sort node, so include costs for
+ * that. We can use the parent's LIMIT if any, since we
+ * certainly won't pull more than that many tuples from
+ * any child.
+ */
+ cost_sort(&sort_path,
+ NULL, /* doesn't currently need root */
+ pathkeys,
+ subpath->total_cost,
+ subpath->rows,
+ subpath->pathtarget->width,
+ 0.0,
+ work_mem,
+ apath->limit_tuples);
+ subpath = &sort_path;
+ }
+
+ apath->path.rows += subpath->rows;
+ apath->path.startup_cost += subpath->startup_cost;
+ apath->path.total_cost += subpath->total_cost;
+ }
}
}
else /* parallel-aware */
@@ -1906,6 +1962,9 @@ cost_append(AppendPath *apath)
int i = 0;
double parallel_divisor = get_parallel_divisor(&apath->path);
+ /* Parallel-aware Append never produces ordered output. */
+ Assert(apath->path.pathkeys == NIL);
+
/* Calculate startup cost. */
foreach(l, apath->subpaths)
{
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c
index 34cc7dacdf0..e66cf328bea 100644
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -1261,7 +1261,7 @@ mark_dummy_rel(RelOptInfo *rel)
/* Set up the dummy path */
add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL,
- rel->lateral_relids,
+ NIL, rel->lateral_relids,
0, false, NIL, -1));
/* Set or update cheapest_total_path and related fields */
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 56d839bb31c..68b2204b3b4 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -18,16 +18,21 @@
#include "postgres.h"
#include "access/stratnum.h"
+#include "catalog/pg_opfamily.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "nodes/plannodes.h"
#include "optimizer/optimizer.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
+#include "partitioning/partbounds.h"
#include "utils/lsyscache.h"
static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
+static bool matches_boolean_partition_clause(RestrictInfo *rinfo,
+ RelOptInfo *partrel,
+ int partkeycol);
static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey);
@@ -547,6 +552,165 @@ build_index_pathkeys(PlannerInfo *root,
}
/*
+ * partkey_is_bool_constant_for_query
+ *
+ * If a partition key column is constrained to have a constant value by the
+ * query's WHERE conditions, then it's irrelevant for sort-order
+ * considerations. Usually that means we have a restriction clause
+ * WHERE partkeycol = constant, which gets turned into an EquivalenceClass
+ * containing a constant, which is recognized as redundant by
+ * build_partition_pathkeys(). But if the partition key column is a
+ * boolean variable (or expression), then we are not going to see such a
+ * WHERE clause, because expression preprocessing will have simplified it
+ * to "WHERE partkeycol" or "WHERE NOT partkeycol". So we are not going
+ * to have a matching EquivalenceClass (unless the query also contains
+ * "ORDER BY partkeycol"). To allow such cases to work the same as they would
+ * for non-boolean values, this function is provided to detect whether the
+ * specified partition key column matches a boolean restriction clause.
+ */
+static bool
+partkey_is_bool_constant_for_query(RelOptInfo *partrel, int partkeycol)
+{
+ PartitionScheme partscheme = partrel->part_scheme;
+ ListCell *lc;
+
+ /* If the partkey isn't boolean, we can't possibly get a match */
+ if (!IsBooleanOpfamily(partscheme->partopfamily[partkeycol]))
+ return false;
+
+ /* Check each restriction clause for the partitioned rel */
+ foreach(lc, partrel->baserestrictinfo)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+ /* Ignore pseudoconstant quals, they won't match */
+ if (rinfo->pseudoconstant)
+ continue;
+
+ /* See if we can match the clause's expression to the partkey column */
+ if (matches_boolean_partition_clause(rinfo, partrel, partkeycol))
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * matches_boolean_partition_clause
+ * Determine if the boolean clause described by rinfo matches
+ * partrel's partkeycol-th partition key column.
+ *
+ * "Matches" can be either an exact match (equivalent to partkey = true),
+ * or a NOT above an exact match (equivalent to partkey = false).
+ */
+static bool
+matches_boolean_partition_clause(RestrictInfo *rinfo,
+ RelOptInfo *partrel, int partkeycol)
+{
+ Node *clause = (Node *) rinfo->clause;
+ Node *partexpr = (Node *) linitial(partrel->partexprs[partkeycol]);
+
+ /* Direct match? */
+ if (equal(partexpr, clause))
+ return true;
+ /* NOT clause? */
+ else if (is_notclause(clause))
+ {
+ Node *arg = (Node *) get_notclausearg((Expr *) clause);
+
+ if (equal(partexpr, arg))
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * build_partition_pathkeys
+ * Build a pathkeys list that describes the ordering induced by the
+ * partitions of partrel, under either forward or backward scan
+ * as per scandir.
+ *
+ * Caller must have checked that the partitions are properly ordered,
+ * as detected by partitions_are_ordered().
+ *
+ * Sets *partialkeys to true if pathkeys were only built for a prefix of the
+ * partition key, or false if the pathkeys include all columns of the
+ * partition key.
+ */
+List *
+build_partition_pathkeys(PlannerInfo *root, RelOptInfo *partrel,
+ ScanDirection scandir, bool *partialkeys)
+{
+ List *retval = NIL;
+ PartitionScheme partscheme = partrel->part_scheme;
+ int i;
+
+ Assert(partscheme != NULL);
+ Assert(partitions_are_ordered(partrel->boundinfo, partrel->nparts));
+ /* For now, we can only cope with baserels */
+ Assert(IS_SIMPLE_REL(partrel));
+
+ for (i = 0; i < partscheme->partnatts; i++)
+ {
+ PathKey *cpathkey;
+ Expr *keyCol = (Expr *) linitial(partrel->partexprs[i]);
+
+ /*
+ * Try to make a canonical pathkey for this partkey.
+ *
+ * We're considering a baserel scan, so nullable_relids should be
+ * NULL. Also, we assume the PartitionDesc lists any NULL partition
+ * last, so we treat the scan like a NULLS LAST index: we have
+ * nulls_first for backwards scan only.
+ */
+ cpathkey = make_pathkey_from_sortinfo(root,
+ keyCol,
+ NULL,
+ partscheme->partopfamily[i],
+ partscheme->partopcintype[i],
+ partscheme->partcollation[i],
+ ScanDirectionIsBackward(scandir),
+ ScanDirectionIsBackward(scandir),
+ 0,
+ partrel->relids,
+ false);
+
+
+ if (cpathkey)
+ {
+ /*
+ * We found the sort key in an EquivalenceClass, so it's relevant
+ * for this query. Add it to list, unless it's redundant.
+ */
+ if (!pathkey_is_redundant(cpathkey, retval))
+ retval = lappend(retval, cpathkey);
+ }
+ else
+ {
+ /*
+ * Boolean partition keys might be redundant even if they do not
+ * appear in an EquivalenceClass, because of our special treatment
+ * of boolean equality conditions --- see the comment for
+ * partkey_is_bool_constant_for_query(). If that applies, we can
+ * continue to examine lower-order partition keys. Otherwise, the
+ * sort key is not an interesting sort order for this query, so we
+ * should stop considering partition columns; any lower-order sort
+ * keys won't be useful either.
+ */
+ if (!partkey_is_bool_constant_for_query(partrel, i))
+ {
+ *partialkeys = true;
+ return retval;
+ }
+ }
+ }
+
+ *partialkeys = false;
+ return retval;
+}
+
+/*
* build_expression_pathkey
* Build a pathkeys list that describes an ordering by a single expression
* using the given sort operator.
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index cc222cb06cf..efe073a3eeb 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -205,8 +205,6 @@ static NamedTuplestoreScan *make_namedtuplestorescan(List *qptlist, List *qpqual
Index scanrelid, char *enrname);
static WorkTableScan *make_worktablescan(List *qptlist, List *qpqual,
Index scanrelid, int wtParam);
-static Append *make_append(List *appendplans, int first_partial_plan,
- List *tlist, PartitionPruneInfo *partpruneinfo);
static RecursiveUnion *make_recursive_union(List *tlist,
Plan *lefttree,
Plan *righttree,
@@ -1060,10 +1058,16 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path)
{
Append *plan;
List *tlist = build_path_tlist(root, &best_path->path);
+ List *pathkeys = best_path->path.pathkeys;
List *subplans = NIL;
ListCell *subpaths;
RelOptInfo *rel = best_path->path.parent;
PartitionPruneInfo *partpruneinfo = NULL;
+ int nodenumsortkeys = 0;
+ AttrNumber *nodeSortColIdx = NULL;
+ Oid *nodeSortOperators = NULL;
+ Oid *nodeCollations = NULL;
+ bool *nodeNullsFirst = NULL;
/*
* The subpaths list could be empty, if every child was proven empty by
@@ -1089,6 +1093,37 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path)
return plan;
}
+ /*
+ * Otherwise build an Append plan. Note that if there's just one child,
+ * the Append is pretty useless; but we wait till setrefs.c to get rid of
+ * it. Doing so here doesn't work because the varno of the child scan
+ * plan won't match the parent-rel Vars it'll be asked to emit.
+ *
+ * We don't have the actual creation of the Append node split out into a
+ * separate make_xxx function. This is because we want to run
+ * prepare_sort_from_pathkeys on it before we do so on the individual
+ * child plans, to make cross-checking the sort info easier.
+ */
+ plan = makeNode(Append);
+ plan->plan.targetlist = tlist;
+ plan->plan.qual = NIL;
+ plan->plan.lefttree = NULL;
+ plan->plan.righttree = NULL;
+
+ if (pathkeys != NIL)
+ {
+ /* Compute sort column info, and adjust the Append's tlist as needed */
+ (void) prepare_sort_from_pathkeys((Plan *) plan, pathkeys,
+ best_path->path.parent->relids,
+ NULL,
+ true,
+ &nodenumsortkeys,
+ &nodeSortColIdx,
+ &nodeSortOperators,
+ &nodeCollations,
+ &nodeNullsFirst);
+ }
+
/* Build the plan for each child */
foreach(subpaths, best_path->subpaths)
{
@@ -1098,6 +1133,63 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path)
/* Must insist that all children return the same tlist */
subplan = create_plan_recurse(root, subpath, CP_EXACT_TLIST);
+ /*
+ * For ordered Appends, we must insert a Sort node if subplan isn't
+ * sufficiently ordered.
+ */
+ if (pathkeys != NIL)
+ {
+ int numsortkeys;
+ AttrNumber *sortColIdx;
+ Oid *sortOperators;
+ Oid *collations;
+ bool *nullsFirst;
+
+ /*
+ * Compute sort column info, and adjust subplan's tlist as needed.
+ * We must apply prepare_sort_from_pathkeys even to subplans that
+ * don't need an explicit sort, to make sure they are returning
+ * the same sort key columns the Append expects.
+ */
+ subplan = prepare_sort_from_pathkeys(subplan, pathkeys,
+ subpath->parent->relids,
+ nodeSortColIdx,
+ false,
+ &numsortkeys,
+ &sortColIdx,
+ &sortOperators,
+ &collations,
+ &nullsFirst);
+
+ /*
+ * Check that we got the same sort key information. We just
+ * Assert that the sortops match, since those depend only on the
+ * pathkeys; but it seems like a good idea to check the sort
+ * column numbers explicitly, to ensure the tlists match up.
+ */
+ Assert(numsortkeys == nodenumsortkeys);
+ if (memcmp(sortColIdx, nodeSortColIdx,
+ numsortkeys * sizeof(AttrNumber)) != 0)
+ elog(ERROR, "Append child's targetlist doesn't match Append");
+ Assert(memcmp(sortOperators, nodeSortOperators,
+ numsortkeys * sizeof(Oid)) == 0);
+ Assert(memcmp(collations, nodeCollations,
+ numsortkeys * sizeof(Oid)) == 0);
+ Assert(memcmp(nullsFirst, nodeNullsFirst,
+ numsortkeys * sizeof(bool)) == 0);
+
+ /* Now, insert a Sort node if subplan isn't sufficiently ordered */
+ if (!pathkeys_contained_in(pathkeys, subpath->pathkeys))
+ {
+ Sort *sort = make_sort(subplan, numsortkeys,
+ sortColIdx, sortOperators,
+ collations, nullsFirst);
+
+ label_sort_with_costsize(root, sort, best_path->limit_tuples);
+ subplan = (Plan *) sort;
+ }
+ }
+
subplans = lappend(subplans, subplan);
}
@@ -1133,15 +1225,9 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path)
prunequal);
}
- /*
- * And build the Append plan. Note that if there's just one child, the
- * Append is pretty useless; but we wait till setrefs.c to get rid of it.
- * Doing so here doesn't work because the varno of the child scan plan
- * won't match the parent-rel Vars it'll be asked to emit.
- */
-
- plan = make_append(subplans, best_path->first_partial_path,
- tlist, partpruneinfo);
+ plan->appendplans = subplans;
+ plan->first_partial_plan = best_path->first_partial_path;
+ plan->part_prune_info = partpruneinfo;
copy_generic_path_info(&plan->plan, (Path *) best_path);
@@ -1266,7 +1352,6 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
if (best_path->path.param_info)
{
-
List *prmquals = best_path->path.param_info->ppi_clauses;
prmquals = extract_actual_clauses(prmquals, false);
@@ -5300,23 +5385,6 @@ make_foreignscan(List *qptlist,
return node;
}
-static Append *
-make_append(List *appendplans, int first_partial_plan,
- List *tlist, PartitionPruneInfo *partpruneinfo)
-{
- Append *node = makeNode(Append);
- Plan *plan = &node->plan;
-
- plan->targetlist = tlist;
- plan->qual = NIL;
- plan->lefttree = NULL;
- plan->righttree = NULL;
- node->appendplans = appendplans;
- node->first_partial_plan = first_partial_plan;
- node->part_prune_info = partpruneinfo;
- return node;
-}
-
static RecursiveUnion *
make_recursive_union(List *tlist,
Plan *lefttree,
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index e2cdc83613d..cbd3fb8e0ea 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -1721,7 +1721,8 @@ inheritance_planner(PlannerInfo *root)
/* Make a dummy path, cf set_dummy_rel_pathlist() */
dummy_path = (Path *) create_append_path(NULL, final_rel, NIL, NIL,
- NULL, 0, false, NIL, -1);
+ NIL, NULL, 0, false,
+ NIL, -1);
/* These lists must be nonempty to make a valid ModifyTable node */
subpaths = list_make1(dummy_path);
@@ -4003,6 +4004,7 @@ create_degenerate_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
grouped_rel,
paths,
NIL,
+ NIL,
NULL,
0,
false,
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index eb815c2f126..c1def3823fa 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -617,7 +617,7 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
* Append the child results together.
*/
path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
- NULL, 0, false, NIL, -1);
+ NIL, NULL, 0, false, NIL, -1);
/*
* For UNION ALL, we just need the Append path. For UNION, need to add
@@ -672,7 +672,8 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
ppath = (Path *)
create_append_path(root, result_rel, NIL, partial_pathlist,
- NULL, parallel_workers, enable_parallel_append,
+ NIL, NULL,
+ parallel_workers, enable_parallel_append,
NIL, -1);
ppath = (Path *)
create_gather_path(root, result_rel, ppath,
@@ -783,7 +784,7 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
* Append the child results together.
*/
path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
- NULL, 0, false, NIL, -1);
+ NIL, NULL, 0, false, NIL, -1);
/* Identify the grouping semantics */
groupList = generate_setop_grouplist(op, tlist);
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 1ea89ff54c9..36aee35d462 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1203,12 +1203,13 @@ create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals,
* pathnode.
*
* Note that we must handle subpaths = NIL, representing a dummy access path.
+ * Also, there are callers that pass root = NULL.
*/
AppendPath *
create_append_path(PlannerInfo *root,
RelOptInfo *rel,
List *subpaths, List *partial_subpaths,
- Relids required_outer,
+ List *pathkeys, Relids required_outer,
int parallel_workers, bool parallel_aware,
List *partitioned_rels, double rows)
{
@@ -1242,6 +1243,7 @@ create_append_path(PlannerInfo *root,
pathnode->path.parallel_aware = parallel_aware;
pathnode->path.parallel_safe = rel->consider_parallel;
pathnode->path.parallel_workers = parallel_workers;
+ pathnode->path.pathkeys = pathkeys;
pathnode->partitioned_rels = list_copy(partitioned_rels);
/*
@@ -1255,6 +1257,13 @@ create_append_path(PlannerInfo *root,
*/
if (pathnode->path.parallel_aware)
{
+ /*
+ * We mustn't fiddle with the order of subpaths when the Append has
+ * pathkeys. The order they're listed in is critical to keeping the
+ * pathkeys valid.
+ */
+ Assert(pathkeys == NIL);
+
subpaths = list_qsort(subpaths, append_total_cost_compare);
partial_subpaths = list_qsort(partial_subpaths,
append_startup_cost_compare);
@@ -1262,6 +1271,15 @@ create_append_path(PlannerInfo *root,
pathnode->first_partial_path = list_length(subpaths);
pathnode->subpaths = list_concat(subpaths, partial_subpaths);
+ /*
+ * Apply query-wide LIMIT if known and path is for sole base relation.
+ * (Handling this at this low level is a bit klugy.)
+ */
+ if (root != NULL && bms_equal(rel->relids, root->all_baserels))
+ pathnode->limit_tuples = root->limit_tuples;
+ else
+ pathnode->limit_tuples = -1.0;
+
foreach(l, pathnode->subpaths)
{
Path *subpath = (Path *) lfirst(l);
@@ -1278,8 +1296,9 @@ create_append_path(PlannerInfo *root,
/*
* If there's exactly one child path, the Append is a no-op and will be
* discarded later (in setrefs.c); therefore, we can inherit the child's
- * size, cost, and pathkeys if any. Otherwise, it's unsorted, and we must
- * do the normal costsize calculation.
+ * size and cost, as well as its pathkeys if any (overriding whatever the
+ * caller might've said). Otherwise, we must do the normal costsize
+ * calculation.
*/
if (list_length(pathnode->subpaths) == 1)
{
@@ -1291,10 +1310,7 @@ create_append_path(PlannerInfo *root,
pathnode->path.pathkeys = child->pathkeys;
}
else
- {
- pathnode->path.pathkeys = NIL; /* unsorted if more than 1 subpath */
cost_append(pathnode);
- }
/* If the caller provided a row estimate, override the computed value. */
if (rows >= 0)
@@ -3759,7 +3775,7 @@ reparameterize_path(PlannerInfo *root, Path *path,
}
return (Path *)
create_append_path(root, rel, childpaths, partialpaths,
- required_outer,
+ apath->path.pathkeys, required_outer,
apath->path.parallel_workers,
apath->path.parallel_aware,
apath->partitioned_rels,
diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c
index bdd0d238542..c8770ccfee0 100644
--- a/src/backend/partitioning/partbounds.c
+++ b/src/backend/partitioning/partbounds.c
@@ -862,6 +862,70 @@ partition_bounds_copy(PartitionBoundInfo src,
}
/*
+ * partitions_are_ordered
+ * Determine whether the partitions described by 'boundinfo' are ordered,
+ * that is partitions appearing earlier in the PartitionDesc sequence
+ * contain partition keys strictly less than those appearing later.
+ * Also, if NULL values are possible, they must come in the last
+ * partition defined in the PartitionDesc.
+ *
+ * If out of order, or there is insufficient info to know the order,
+ * then we return false.
+ */
+bool
+partitions_are_ordered(PartitionBoundInfo boundinfo, int nparts)
+{
+ Assert(boundinfo != NULL);
+
+ switch (boundinfo->strategy)
+ {
+ case PARTITION_STRATEGY_RANGE:
+
+ /*
+ * RANGE-type partitioning guarantees that the partitions can be
+ * scanned in the order that they're defined in the PartitionDesc
+ * to provide sequential, non-overlapping ranges of tuples.
+ * However, if a DEFAULT partition exists then it doesn't work, as
+ * that could contain tuples from either below or above the
+ * defined range, or tuples belonging to gaps between partitions.
+ */
+ if (!partition_bound_has_default(boundinfo))
+ return true;
+ break;
+
+ case PARTITION_STRATEGY_LIST:
+
+ /*
+ * LIST partitioning can also guarantee ordering, but only if the
+ * partitions don't accept interleaved values. We could likely
+ * check for this by looping over the PartitionBound's indexes
+ * array to check that the indexes are in order. For now, let's
+ * just keep it simple and just accept LIST partitioning when
+ * there's no DEFAULT partition, exactly one value per partition,
+ * and optionally a NULL partition that does not accept any other
+ * values. Such a NULL partition will come last in the
+ * PartitionDesc, and the other partitions will be properly
+ * ordered. This is a cheap test to make as it does not require
+ * any per-partition processing. Maybe we'd like to handle more
+ * complex cases in the future.
+ */
+ if (partition_bound_has_default(boundinfo))
+ return false;
+
+ if (boundinfo->ndatums + partition_bound_accepts_nulls(boundinfo)
+ == nparts)
+ return true;
+ break;
+
+ default:
+ /* HASH, or some other strategy */
+ break;
+ }
+
+ return false;
+}
+
+/*
* check_new_partition_bound
*
* Checks if the new partition's bound overlaps any of the existing partitions