Use Append rather than MergeAppend for scanning ordered partitions.

If we need ordered output from a scan of a partitioned table, but the ordering matches the partition ordering, then we don't need to use a MergeAppend to combine the pre-ordered per-partition scan results: a plain Append will produce the same results. This both saves useless comparison work inside the MergeAppend proper, and allows us to start returning tuples after istarting up just the first child node not all of them. However, all is not peaches and cream, because if some of the child nodes have high startup costs then there will be big discontinuities in the tuples-returned-versus-elapsed-time curve. The planner's cost model cannot handle that (yet, anyway). If we model the Append's startup cost as being just the first child's startup cost, we may drastically underestimate the cost of fetching slightly more tuples than are available from the first child. Since we've had bad experiences with over-optimistic choices of "fast start" plans for ORDER BY LIMIT queries, that seems scary. As a klugy workaround, set the startup cost estimate for an ordered Append to be the sum of its children's startup costs (as MergeAppend would). This doesn't really describe reality, but it's less likely to cause a bad plan choice than an underestimated startup cost would. In practice, the cases where we really care about this optimization will have child plans that are IndexScans with zero startup cost, so that the overly conservative estimate is still just zero. David Rowley, reviewed by Julien Rouhaud and Antonin Houska Discussion: https://postgr.es/m/CAKJS1f-hAqhPLRk_RaSFTgYxd=Tz5hA7kQ2h4-DhJufQk8TGuw@mail.gmail.com
author: Tom Lane <tgl@sss.pgh.pa.us> 2019-04-05 19:20:30 -0400
committer: Tom Lane <tgl@sss.pgh.pa.us> 2019-04-05 19:20:43 -0400
commit: 959d00e9dbe4cfcf4a63bb655ac2c29a5e579246 (patch)
tree: 4c260f752780d317d1f2ebab8aae553dc4dc8236 /src/backend/optimizer/util/pathnode.c
parent: 9f06d79ef831ffa333f908f6d3debdb654292414 (diff)
download: postgresql-959d00e9dbe4cfcf4a63bb655ac2c29a5e579246.tar.gz
postgresql-959d00e9dbe4cfcf4a63bb655ac2c29a5e579246.zip
1 files changed, 23 insertions, 7 deletions
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 1ea89ff54c9..36aee35d462 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1203,12 +1203,13 @@ create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals,
  *	  pathnode.
  *
  * Note that we must handle subpaths = NIL, representing a dummy access path.
+ * Also, there are callers that pass root = NULL.
  */
 AppendPath *
 create_append_path(PlannerInfo *root,
 				   RelOptInfo *rel,
 				   List *subpaths, List *partial_subpaths,
-				   Relids required_outer,
+				   List *pathkeys, Relids required_outer,
 				   int parallel_workers, bool parallel_aware,
 				   List *partitioned_rels, double rows)
 {
@@ -1242,6 +1243,7 @@ create_append_path(PlannerInfo *root,
 	pathnode->path.parallel_aware = parallel_aware;
 	pathnode->path.parallel_safe = rel->consider_parallel;
 	pathnode->path.parallel_workers = parallel_workers;
+	pathnode->path.pathkeys = pathkeys;
 	pathnode->partitioned_rels = list_copy(partitioned_rels);
 
 	/*
@@ -1255,6 +1257,13 @@ create_append_path(PlannerInfo *root,
 	 */
 	if (pathnode->path.parallel_aware)
 	{
+		/*
+		 * We mustn't fiddle with the order of subpaths when the Append has
+		 * pathkeys.  The order they're listed in is critical to keeping the
+		 * pathkeys valid.
+		 */
+		Assert(pathkeys == NIL);
+
 		subpaths = list_qsort(subpaths, append_total_cost_compare);
 		partial_subpaths = list_qsort(partial_subpaths,
 									  append_startup_cost_compare);
@@ -1262,6 +1271,15 @@ create_append_path(PlannerInfo *root,
 	pathnode->first_partial_path = list_length(subpaths);
 	pathnode->subpaths = list_concat(subpaths, partial_subpaths);
 
+	/*
+	 * Apply query-wide LIMIT if known and path is for sole base relation.
+	 * (Handling this at this low level is a bit klugy.)
+	 */
+	if (root != NULL && bms_equal(rel->relids, root->all_baserels))
+		pathnode->limit_tuples = root->limit_tuples;
+	else
+		pathnode->limit_tuples = -1.0;
+
 	foreach(l, pathnode->subpaths)
 	{
 		Path	   *subpath = (Path *) lfirst(l);
@@ -1278,8 +1296,9 @@ create_append_path(PlannerInfo *root,
 	/*
 	 * If there's exactly one child path, the Append is a no-op and will be
 	 * discarded later (in setrefs.c); therefore, we can inherit the child's
-	 * size, cost, and pathkeys if any.  Otherwise, it's unsorted, and we must
-	 * do the normal costsize calculation.
+	 * size and cost, as well as its pathkeys if any (overriding whatever the
+	 * caller might've said).  Otherwise, we must do the normal costsize
+	 * calculation.
 	 */
 	if (list_length(pathnode->subpaths) == 1)
 	{
@@ -1291,10 +1310,7 @@ create_append_path(PlannerInfo *root,
 		pathnode->path.pathkeys = child->pathkeys;
 	}
 	else
-	{
-		pathnode->path.pathkeys = NIL;	/* unsorted if more than 1 subpath */
 		cost_append(pathnode);
-	}
 
 	/* If the caller provided a row estimate, override the computed value. */
 	if (rows >= 0)
@@ -3759,7 +3775,7 @@ reparameterize_path(PlannerInfo *root, Path *path,
 				}
 				return (Path *)
 					create_append_path(root, rel, childpaths, partialpaths,
-									   required_outer,
+									   apath->path.pathkeys, required_outer,
 									   apath->path.parallel_workers,
 									   apath->path.parallel_aware,
 									   apath->partitioned_rels,
author	Tom Lane <tgl@sss.pgh.pa.us>	2019-04-05 19:20:30 -0400
committer	Tom Lane <tgl@sss.pgh.pa.us>	2019-04-05 19:20:43 -0400
commit	959d00e9dbe4cfcf4a63bb655ac2c29a5e579246 (patch)
tree	4c260f752780d317d1f2ebab8aae553dc4dc8236 /src/backend/optimizer/util/pathnode.c
parent	9f06d79ef831ffa333f908f6d3debdb654292414 (diff)
download	postgresql-959d00e9dbe4cfcf4a63bb655ac2c29a5e579246.tar.gz postgresql-959d00e9dbe4cfcf4a63bb655ac2c29a5e579246.zip