aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--contrib/postgres_fdw/expected/postgres_fdw.out7
-rw-r--r--contrib/postgres_fdw/sql/postgres_fdw.sql9
-rw-r--r--src/backend/optimizer/path/allpaths.c5
-rw-r--r--src/backend/optimizer/path/equivclass.c61
-rw-r--r--src/backend/optimizer/path/pathkeys.c19
-rw-r--r--src/backend/optimizer/plan/planner.c116
-rw-r--r--src/backend/optimizer/plan/subselect.c15
-rw-r--r--src/backend/optimizer/prep/prepunion.c720
-rw-r--r--src/backend/parser/analyze.c3
-rw-r--r--src/include/nodes/pathnodes.h2
-rw-r--r--src/include/optimizer/paths.h4
-rw-r--r--src/include/optimizer/planner.h3
-rw-r--r--src/include/optimizer/prep.h2
-rw-r--r--src/test/regress/expected/collate.icu.utf8.out2
-rw-r--r--src/test/regress/expected/incremental_sort.out13
-rw-r--r--src/test/regress/expected/union.out46
-rw-r--r--src/test/regress/sql/collate.icu.utf8.sql2
-rw-r--r--src/test/regress/sql/union.sql19
18 files changed, 287 insertions, 761 deletions
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 078b8a966f8..9ae36d3059d 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -11511,10 +11511,6 @@ DROP INDEX base_tbl1_idx;
DROP INDEX base_tbl2_idx;
DROP INDEX async_p3_idx;
-- UNION queries
-SET enable_sort TO off;
-SET enable_incremental_sort TO off;
--- Adjust fdw_startup_cost so that we get an unordered path in the Append.
-ALTER SERVER loopback2 OPTIONS (ADD fdw_startup_cost '0.00');
EXPLAIN (VERBOSE, COSTS OFF)
INSERT INTO result_tbl
(SELECT a, b, 'AAA' || c FROM async_p1 ORDER BY a LIMIT 10)
@@ -11596,9 +11592,6 @@ SELECT * FROM result_tbl ORDER BY a;
(12 rows)
DELETE FROM result_tbl;
-RESET enable_incremental_sort;
-RESET enable_sort;
-ALTER SERVER loopback2 OPTIONS (DROP fdw_startup_cost);
-- Disable async execution if we use gating Result nodes for pseudoconstant
-- quals
EXPLAIN (VERBOSE, COSTS OFF)
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 09ba234e43d..1f31ac14df0 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -3885,11 +3885,6 @@ DROP INDEX base_tbl2_idx;
DROP INDEX async_p3_idx;
-- UNION queries
-SET enable_sort TO off;
-SET enable_incremental_sort TO off;
--- Adjust fdw_startup_cost so that we get an unordered path in the Append.
-ALTER SERVER loopback2 OPTIONS (ADD fdw_startup_cost '0.00');
-
EXPLAIN (VERBOSE, COSTS OFF)
INSERT INTO result_tbl
(SELECT a, b, 'AAA' || c FROM async_p1 ORDER BY a LIMIT 10)
@@ -3916,10 +3911,6 @@ UNION ALL
SELECT * FROM result_tbl ORDER BY a;
DELETE FROM result_tbl;
-RESET enable_incremental_sort;
-RESET enable_sort;
-ALTER SERVER loopback2 OPTIONS (DROP fdw_startup_cost);
-
-- Disable async execution if we use gating Result nodes for pseudoconstant
-- quals
EXPLAIN (VERBOSE, COSTS OFF)
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 4895cee9944..10aeebc2c1d 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -2633,8 +2633,9 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
Assert(root->plan_params == NIL);
/* Generate a subroot and Paths for the subquery */
- rel->subroot = subquery_planner(root->glob, subquery, root, false,
- tuple_fraction, NULL);
+ rel->subroot = subquery_planner(root->glob, subquery,
+ root,
+ false, tuple_fraction);
/* Isolate the params needed by this specific subplan */
rel->subplan_params = root->plan_params;
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 21ce1ae2e13..13400af3ef5 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -2882,67 +2882,6 @@ add_child_join_rel_equivalences(PlannerInfo *root,
MemoryContextSwitchTo(oldcontext);
}
-/*
- * add_setop_child_rel_equivalences
- * Add equivalence members for each non-resjunk target in 'child_tlist'
- * to the EquivalenceClass in the corresponding setop_pathkey's pk_eclass.
- *
- * 'root' is the PlannerInfo belonging to the top-level set operation.
- * 'child_rel' is the RelOptInfo of the child relation we're adding
- * EquivalenceMembers for.
- * 'child_tlist' is the target list for the setop child relation. The target
- * list expressions are what we add as EquivalenceMembers.
- * 'setop_pathkeys' is a list of PathKeys which must contain an entry for each
- * non-resjunk target in 'child_tlist'.
- */
-void
-add_setop_child_rel_equivalences(PlannerInfo *root, RelOptInfo *child_rel,
- List *child_tlist, List *setop_pathkeys)
-{
- ListCell *lc;
- ListCell *lc2 = list_head(setop_pathkeys);
-
- foreach(lc, child_tlist)
- {
- TargetEntry *tle = lfirst_node(TargetEntry, lc);
- EquivalenceMember *parent_em;
- PathKey *pk;
-
- if (tle->resjunk)
- continue;
-
- if (lc2 == NULL)
- elog(ERROR, "too few pathkeys for set operation");
-
- pk = lfirst_node(PathKey, lc2);
- parent_em = linitial(pk->pk_eclass->ec_members);
-
- /*
- * We can safely pass the parent member as the first member in the
- * ec_members list as this is added first in generate_union_paths,
- * likewise, the JoinDomain can be that of the initial member of the
- * Pathkey's EquivalenceClass.
- */
- add_eq_member(pk->pk_eclass,
- tle->expr,
- child_rel->relids,
- parent_em->em_jdomain,
- parent_em,
- exprType((Node *) tle->expr));
-
- lc2 = lnext(setop_pathkeys, lc2);
- }
-
- /*
- * transformSetOperationStmt() ensures that the targetlist never contains
- * any resjunk columns, so all eclasses that exist in 'root' must have
- * received a new member in the loop above. Add them to the child_rel's
- * eclass_indexes.
- */
- child_rel->eclass_indexes = bms_add_range(child_rel->eclass_indexes, 0,
- list_length(root->eq_classes) - 1);
-}
-
/*
* generate_implied_equalities_for_column
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 8b258cbef92..157bc6a36d4 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -2192,22 +2192,6 @@ pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
}
/*
- * pathkeys_useful_for_setop
- * Count the number of leading common pathkeys root's 'setop_pathkeys' in
- * 'pathkeys'.
- */
-static int
-pathkeys_useful_for_setop(PlannerInfo *root, List *pathkeys)
-{
- int n_common_pathkeys;
-
- (void) pathkeys_count_contained_in(root->setop_pathkeys, pathkeys,
- &n_common_pathkeys);
-
- return n_common_pathkeys;
-}
-
-/*
* truncate_useless_pathkeys
* Shorten the given pathkey list to just the useful pathkeys.
*/
@@ -2226,9 +2210,6 @@ truncate_useless_pathkeys(PlannerInfo *root,
nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
if (nuseful2 > nuseful)
nuseful = nuseful2;
- nuseful2 = pathkeys_useful_for_setop(root, pathkeys);
- if (nuseful2 > nuseful)
- nuseful = nuseful2;
/*
* Note: not safe to modify input list destructively, but we can avoid
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 032818423f6..ddd23878409 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -54,7 +54,6 @@
#include "optimizer/tlist.h"
#include "parser/analyze.h"
#include "parser/parse_agg.h"
-#include "parser/parse_clause.h"
#include "parser/parse_relation.h"
#include "parser/parsetree.h"
#include "partitioning/partdesc.h"
@@ -120,15 +119,12 @@ typedef struct
{
List *activeWindows; /* active windows, if any */
grouping_sets_data *gset_data; /* grouping sets data, if any */
- SetOperationStmt *setop; /* parent set operation or NULL if not a
- * subquery belonging to a set operation */
} standard_qp_extra;
/* Local functions */
static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
-static void grouping_planner(PlannerInfo *root, double tuple_fraction,
- SetOperationStmt *setops);
+static void grouping_planner(PlannerInfo *root, double tuple_fraction);
static grouping_sets_data *preprocess_grouping_sets(PlannerInfo *root);
static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
int *tleref_to_colnum_map);
@@ -253,8 +249,6 @@ static bool group_by_has_partkey(RelOptInfo *input_rel,
List *targetList,
List *groupClause);
static int common_prefix_cmp(const void *a, const void *b);
-static List *generate_setop_child_grouplist(SetOperationStmt *op,
- List *targetlist);
/*****************************************************************************
@@ -412,7 +406,8 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions,
}
/* primary planning entry point (may recurse for subqueries) */
- root = subquery_planner(glob, parse, NULL, false, tuple_fraction, NULL);
+ root = subquery_planner(glob, parse, NULL,
+ false, tuple_fraction);
/* Select best Path and turn it into a Plan */
final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
@@ -603,10 +598,6 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions,
* hasRecursion is true if this is a recursive WITH query.
* tuple_fraction is the fraction of tuples we expect will be retrieved.
* tuple_fraction is interpreted as explained for grouping_planner, below.
- * setops is used for set operation subqueries to provide the subquery with
- * the context in which it's being used so that Paths correctly sorted for the
- * set operation can be generated. NULL when not planning a set operation
- * child.
*
* Basically, this routine does the stuff that should only be done once
* per Query object. It then calls grouping_planner. At one time,
@@ -625,9 +616,9 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions,
*--------------------
*/
PlannerInfo *
-subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root,
- bool hasRecursion, double tuple_fraction,
- SetOperationStmt *setops)
+subquery_planner(PlannerGlobal *glob, Query *parse,
+ PlannerInfo *parent_root,
+ bool hasRecursion, double tuple_fraction)
{
PlannerInfo *root;
List *newWithCheckOptions;
@@ -1086,7 +1077,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root,
/*
* Do the main planning.
*/
- grouping_planner(root, tuple_fraction, setops);
+ grouping_planner(root, tuple_fraction);
/*
* Capture the set of outer-level param IDs we have access to, for use in
@@ -1284,11 +1275,7 @@ preprocess_phv_expression(PlannerInfo *root, Expr *expr)
* 0 < tuple_fraction < 1: expect the given fraction of tuples available
* from the plan to be retrieved
* tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
- * expected to be retrieved (ie, a LIMIT specification).
- * setops is used for set operation subqueries to provide the subquery with
- * the context in which it's being used so that Paths correctly sorted for the
- * set operation can be generated. NULL when not planning a set operation
- * child.
+ * expected to be retrieved (ie, a LIMIT specification)
*
* Returns nothing; the useful output is in the Paths we attach to the
* (UPPERREL_FINAL, NULL) upperrel in *root. In addition,
@@ -1299,8 +1286,7 @@ preprocess_phv_expression(PlannerInfo *root, Expr *expr)
*--------------------
*/
static void
-grouping_planner(PlannerInfo *root, double tuple_fraction,
- SetOperationStmt *setops)
+grouping_planner(PlannerInfo *root, double tuple_fraction)
{
Query *parse = root->parse;
int64 offset_est = 0;
@@ -1336,6 +1322,17 @@ grouping_planner(PlannerInfo *root, double tuple_fraction,
if (parse->setOperations)
{
/*
+ * If there's a top-level ORDER BY, assume we have to fetch all the
+ * tuples. This might be too simplistic given all the hackery below
+ * to possibly avoid the sort; but the odds of accurate estimates here
+ * are pretty low anyway. XXX try to get rid of this in favor of
+ * letting plan_set_operations generate both fast-start and
+ * cheapest-total paths.
+ */
+ if (parse->sortClause)
+ root->tuple_fraction = 0.0;
+
+ /*
* Construct Paths for set operations. The results will not need any
* work except perhaps a top-level sort and/or LIMIT. Note that any
* special work for recursive unions is the responsibility of
@@ -1505,12 +1502,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction,
qp_extra.gset_data = gset_data;
/*
- * If we're a subquery for a set operation, store the SetOperationStmt
- * in qp_extra.
- */
- qp_extra.setop = setops;
-
- /*
* Generate the best unsorted and presorted paths for the scan/join
* portion of this Query, ie the processing represented by the
* FROM/WHERE clauses. (Note there may not be any presorted paths.)
@@ -3462,27 +3453,6 @@ standard_qp_callback(PlannerInfo *root, void *extra)
parse->sortClause,
tlist);
- /* setting setop_pathkeys might be useful to the union planner */
- if (qp_extra->setop != NULL &&
- set_operation_ordered_results_useful(qp_extra->setop))
- {
- List *groupClauses;
- bool sortable;
-
- groupClauses = generate_setop_child_grouplist(qp_extra->setop, tlist);
-
- root->setop_pathkeys =
- make_pathkeys_for_sortclauses_extended(root,
- &groupClauses,
- tlist,
- false,
- &sortable);
- if (!sortable)
- root->setop_pathkeys = NIL;
- }
- else
- root->setop_pathkeys = NIL;
-
/*
* Figure out whether we want a sorted result from query_planner.
*
@@ -3492,9 +3462,7 @@ standard_qp_callback(PlannerInfo *root, void *extra)
* sortable DISTINCT clause that's more rigorous than the ORDER BY clause,
* we try to produce output that's sufficiently well sorted for the
* DISTINCT. Otherwise, if there is an ORDER BY clause, we want to sort
- * by the ORDER BY clause. Otherwise, if we're a subquery being planned
- * for a set operation which can benefit from presorted results and have a
- * sortable targetlist, we want to sort by the target list.
+ * by the ORDER BY clause.
*
* Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a superset
* of GROUP BY, it would be tempting to request sort by ORDER BY --- but
@@ -3512,8 +3480,6 @@ standard_qp_callback(PlannerInfo *root, void *extra)
root->query_pathkeys = root->distinct_pathkeys;
else if (root->sort_pathkeys)
root->query_pathkeys = root->sort_pathkeys;
- else if (root->setop_pathkeys != NIL)
- root->query_pathkeys = root->setop_pathkeys;
else
root->query_pathkeys = NIL;
}
@@ -7957,43 +7923,3 @@ group_by_has_partkey(RelOptInfo *input_rel,
return true;
}
-
-/*
- * generate_setop_child_grouplist
- * Build a SortGroupClause list defining the sort/grouping properties
- * of the child of a set operation.
- *
- * This is similar to generate_setop_grouplist() but differs as the setop
- * child query's targetlist entries may already have a tleSortGroupRef
- * assigned for other purposes, such as GROUP BYs. Here we keep the
- * SortGroupClause list in the same order as 'op' groupClauses and just adjust
- * the tleSortGroupRef to reference the TargetEntry's 'ressortgroupref'.
- */
-static List *
-generate_setop_child_grouplist(SetOperationStmt *op, List *targetlist)
-{
- List *grouplist = copyObject(op->groupClauses);
- ListCell *lg;
- ListCell *lt;
-
- lg = list_head(grouplist);
- foreach(lt, targetlist)
- {
- TargetEntry *tle = (TargetEntry *) lfirst(lt);
- SortGroupClause *sgc;
-
- /* resjunk columns could have sortgrouprefs. Leave these alone */
- if (tle->resjunk)
- continue;
-
- /* we expect every non-resjunk target to have a SortGroupClause */
- Assert(lg != NULL);
- sgc = (SortGroupClause *) lfirst(lg);
- lg = lnext(grouplist, lg);
-
- /* assign a tleSortGroupRef, or reuse the existing one */
- sgc->tleSortGroupRef = assignSortGroupRef(tle, targetlist);
- }
- Assert(lg == NULL);
- return grouplist;
-}
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index e35ebea8b43..d5fa281b102 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -218,8 +218,9 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
Assert(root->plan_params == NIL);
/* Generate Paths for the subquery */
- subroot = subquery_planner(root->glob, subquery, root, false,
- tuple_fraction, NULL);
+ subroot = subquery_planner(root->glob, subquery,
+ root,
+ false, tuple_fraction);
/* Isolate the params needed by this specific subplan */
plan_params = root->plan_params;
@@ -265,8 +266,9 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
if (subquery)
{
/* Generate Paths for the ANY subquery; we'll need all rows */
- subroot = subquery_planner(root->glob, subquery, root, false, 0.0,
- NULL);
+ subroot = subquery_planner(root->glob, subquery,
+ root,
+ false, 0.0);
/* Isolate the params needed by this specific subplan */
plan_params = root->plan_params;
@@ -965,8 +967,9 @@ SS_process_ctes(PlannerInfo *root)
* Generate Paths for the CTE query. Always plan for full retrieval
* --- we don't have enough info to predict otherwise.
*/
- subroot = subquery_planner(root->glob, subquery, root,
- cte->cterecursive, 0.0, NULL);
+ subroot = subquery_planner(root->glob, subquery,
+ root,
+ cte->cterecursive, 0.0);
/*
* Since the current query level doesn't yet contain any RTEs, it
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 30068c27a13..296f866677a 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -43,15 +43,11 @@ static RelOptInfo *recurse_set_operations(Node *setOp, PlannerInfo *root,
bool junkOK,
int flag, List *refnames_tlist,
List **pTargetList,
- bool *istrivial_tlist);
+ double *pNumGroups);
static RelOptInfo *generate_recursion_path(SetOperationStmt *setOp,
PlannerInfo *root,
List *refnames_tlist,
List **pTargetList);
-static void build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
- bool trivial_tlist, List *child_tlist,
- List *interesting_pathkeys,
- double *pNumGroups);
static RelOptInfo *generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
List *refnames_tlist,
List **pTargetList);
@@ -61,8 +57,9 @@ static RelOptInfo *generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *ro
static List *plan_union_children(PlannerInfo *root,
SetOperationStmt *top_union,
List *refnames_tlist,
- List **tlist_list,
- List **istrivial_tlist);
+ List **tlist_list);
+static Path *make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
+ PlannerInfo *root);
static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel);
static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses,
Path *input_path,
@@ -117,10 +114,10 @@ plan_set_operations(PlannerInfo *root)
Assert(parse->distinctClause == NIL);
/*
- * In the outer query level, equivalence classes are limited to classes
- * which define that the top-level target entry is equivalent to the
- * corresponding child target entry. There won't be any equivalence class
- * merging. Mark that merging is complete to allow us to make pathkeys.
+ * In the outer query level, we won't have any true equivalences to deal
+ * with; but we do want to be able to make pathkeys, which will require
+ * single-member EquivalenceClasses. Indicate that EC merging is complete
+ * so that pathkeys.c won't complain.
*/
Assert(root->eq_classes == NIL);
root->ec_merging_done = true;
@@ -155,8 +152,6 @@ plan_set_operations(PlannerInfo *root)
}
else
{
- bool trivial_tlist;
-
/*
* Recurse on setOperations tree to generate paths for set ops. The
* final output paths should have just the column types shown as the
@@ -168,7 +163,7 @@ plan_set_operations(PlannerInfo *root)
true, -1,
leftmostQuery->targetList,
&top_tlist,
- &trivial_tlist);
+ NULL);
}
/* Must return the built tlist into root->processed_tlist. */
@@ -178,31 +173,6 @@ plan_set_operations(PlannerInfo *root)
}
/*
- * set_operation_ordered_results_useful
- * Return true if the given SetOperationStmt can be executed by utilizing
- * paths that provide sorted input according to the setop's targetlist.
- * Returns false when sorted paths are not any more useful then unsorted
- * ones.
- */
-bool
-set_operation_ordered_results_useful(SetOperationStmt *setop)
-{
- /*
- * Paths sorted by the targetlist are useful for UNION as we can opt to
- * MergeAppend the sorted paths then Unique them. Ordered paths are no
- * more useful than unordered ones for UNION ALL.
- */
- if (!setop->all && setop->op == SETOP_UNION)
- return true;
-
- /*
- * EXCEPT / EXCEPT ALL / INTERSECT / INTERSECT ALL cannot yet utilize
- * correctly sorted input paths.
- */
- return false;
-}
-
-/*
* recurse_set_operations
* Recursively handle one step in a tree of set operations
*
@@ -214,8 +184,8 @@ set_operation_ordered_results_useful(SetOperationStmt *setop)
*
* Returns a RelOptInfo for the subtree, as well as these output parameters:
* *pTargetList: receives the fully-fledged tlist for the subtree's top plan
- * *istrivial_tlist: true if, and only if, datatypes between parent and child
- * match.
+ * *pNumGroups: if not NULL, we estimate the number of distinct groups
+ * in the result, and store it there
*
* The pTargetList output parameter is mostly redundant with the pathtarget
* of the returned RelOptInfo, but for the moment we need it because much of
@@ -232,11 +202,9 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
bool junkOK,
int flag, List *refnames_tlist,
List **pTargetList,
- bool *istrivial_tlist)
+ double *pNumGroups)
{
- RelOptInfo *rel;
-
- *istrivial_tlist = true; /* for now */
+ RelOptInfo *rel = NULL; /* keep compiler quiet */
/* Guard against stack overflow due to overly complex setop nests */
check_stack_depth();
@@ -245,9 +213,11 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
{
RangeTblRef *rtr = (RangeTblRef *) setOp;
RangeTblEntry *rte = root->simple_rte_array[rtr->rtindex];
- SetOperationStmt *setops;
Query *subquery = rte->subquery;
PlannerInfo *subroot;
+ RelOptInfo *final_rel;
+ Path *subpath;
+ Path *path;
List *tlist;
bool trivial_tlist;
@@ -259,16 +229,11 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
/* plan_params should not be in use in current query level */
Assert(root->plan_params == NIL);
- /*
- * Pass the set operation details to the subquery_planner to have it
- * consider generating Paths correctly ordered for the set operation.
- */
- setops = castNode(SetOperationStmt, root->parse->setOperations);
-
/* Generate a subroot and Paths for the subquery */
- subroot = rel->subroot = subquery_planner(root->glob, subquery, root,
- false, root->tuple_fraction,
- setops);
+ subroot = rel->subroot = subquery_planner(root->glob, subquery,
+ root,
+ false,
+ root->tuple_fraction);
/*
* It should not be possible for the primitive query to contain any
@@ -289,7 +254,90 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
/* Return the fully-fledged tlist to caller, too */
*pTargetList = tlist;
- *istrivial_tlist = trivial_tlist;
+
+ /*
+ * Mark rel with estimated output rows, width, etc. Note that we have
+ * to do this before generating outer-query paths, else
+ * cost_subqueryscan is not happy.
+ */
+ set_subquery_size_estimates(root, rel);
+
+ /*
+ * Since we may want to add a partial path to this relation, we must
+ * set its consider_parallel flag correctly.
+ */
+ final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
+ rel->consider_parallel = final_rel->consider_parallel;
+
+ /*
+ * For the moment, we consider only a single Path for the subquery.
+ * This should change soon (make it look more like
+ * set_subquery_pathlist).
+ */
+ subpath = get_cheapest_fractional_path(final_rel,
+ root->tuple_fraction);
+
+ /*
+ * Stick a SubqueryScanPath atop that.
+ *
+ * We don't bother to determine the subquery's output ordering since
+ * it won't be reflected in the set-op result anyhow; so just label
+ * the SubqueryScanPath with nil pathkeys. (XXX that should change
+ * soon too, likely.)
+ */
+ path = (Path *) create_subqueryscan_path(root, rel, subpath,
+ trivial_tlist,
+ NIL, NULL);
+
+ add_path(rel, path);
+
+ /*
+ * If we have a partial path for the child relation, we can use that
+ * to build a partial path for this relation. But there's no point in
+ * considering any path but the cheapest.
+ */
+ if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
+ final_rel->partial_pathlist != NIL)
+ {
+ Path *partial_subpath;
+ Path *partial_path;
+
+ partial_subpath = linitial(final_rel->partial_pathlist);
+ partial_path = (Path *)
+ create_subqueryscan_path(root, rel, partial_subpath,
+ trivial_tlist,
+ NIL, NULL);
+ add_partial_path(rel, partial_path);
+ }
+
+ /*
+ * Estimate number of groups if caller wants it. If the subquery used
+ * grouping or aggregation, its output is probably mostly unique
+ * anyway; otherwise do statistical estimation.
+ *
+ * XXX you don't really want to know about this: we do the estimation
+ * using the subquery's original targetlist expressions, not the
+ * subroot->processed_tlist which might seem more appropriate. The
+ * reason is that if the subquery is itself a setop, it may return a
+ * processed_tlist containing "varno 0" Vars generated by
+ * generate_append_tlist, and those would confuse estimate_num_groups
+ * mightily. We ought to get rid of the "varno 0" hack, but that
+ * requires a redesign of the parsetree representation of setops, so
+ * that there can be an RTE corresponding to each setop's output.
+ */
+ if (pNumGroups)
+ {
+ if (subquery->groupClause || subquery->groupingSets ||
+ subquery->distinctClause ||
+ subroot->hasHavingQual || subquery->hasAggs)
+ *pNumGroups = subpath->rows;
+ else
+ *pNumGroups = estimate_num_groups(subroot,
+ get_tlist_exprs(subquery->targetList, false),
+ subpath->rows,
+ NULL,
+ NULL);
+ }
}
else if (IsA(setOp, SetOperationStmt))
{
@@ -304,6 +352,8 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
rel = generate_nonunion_paths(op, root,
refnames_tlist,
pTargetList);
+ if (pNumGroups)
+ *pNumGroups = rel->rows;
/*
* If necessary, add a Result node to project the caller-requested
@@ -333,7 +383,6 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
*pTargetList,
refnames_tlist,
&trivial_tlist);
- *istrivial_tlist = trivial_tlist;
target = create_pathtarget(root, *pTargetList);
/* Apply projection to each path */
@@ -364,16 +413,16 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
lfirst(lc) = path;
}
}
- postprocess_setop_rel(root, rel);
}
else
{
elog(ERROR, "unrecognized node type: %d",
(int) nodeTag(setOp));
*pTargetList = NIL;
- rel = NULL; /* keep compiler quiet */
}
+ postprocess_setop_rel(root, rel);
+
return rel;
}
@@ -392,9 +441,7 @@ generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
Path *lpath;
Path *rpath;
List *lpath_tlist;
- bool lpath_trivial_tlist;
List *rpath_tlist;
- bool rpath_trivial_tlist;
List *tlist;
List *groupList;
double dNumGroups;
@@ -414,10 +461,7 @@ generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
false, -1,
refnames_tlist,
&lpath_tlist,
- &lpath_trivial_tlist);
- if (lrel->rtekind == RTE_SUBQUERY)
- build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
- NIL, NULL);
+ NULL);
lpath = lrel->cheapest_total_path;
/* The right path will want to look at the left one ... */
root->non_recursive_path = lpath;
@@ -426,10 +470,7 @@ generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
false, -1,
refnames_tlist,
&rpath_tlist,
- &rpath_trivial_tlist);
- if (rrel->rtekind == RTE_SUBQUERY)
- build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
- NIL, NULL);
+ NULL);
rpath = rrel->cheapest_total_path;
root->non_recursive_path = NULL;
@@ -492,204 +533,6 @@ generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
}
/*
- * build_setop_child_paths
- * Build paths for the set op child relation denoted by 'rel'.
- *
- * interesting_pathkeys: if not NIL, also include paths that suit these
- * pathkeys, sorting any unsorted paths as required.
- * *pNumGroups: if not NULL, we estimate the number of distinct groups
- * in the result, and store it there
- */
-static void
-build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
- bool trivial_tlist, List *child_tlist,
- List *interesting_pathkeys, double *pNumGroups)
-{
- RelOptInfo *final_rel;
- List *setop_pathkeys = rel->subroot->setop_pathkeys;
- ListCell *lc;
-
- /* it can't be a set op child rel if it's not a subquery */
- Assert(rel->rtekind == RTE_SUBQUERY);
-
- /* when sorting is needed, add child rel equivalences */
- if (interesting_pathkeys != NIL)
- add_setop_child_rel_equivalences(root,
- rel,
- child_tlist,
- interesting_pathkeys);
-
- /*
- * Mark rel with estimated output rows, width, etc. Note that we have to
- * do this before generating outer-query paths, else cost_subqueryscan is
- * not happy.
- */
- set_subquery_size_estimates(root, rel);
-
- /*
- * Since we may want to add a partial path to this relation, we must set
- * its consider_parallel flag correctly.
- */
- final_rel = fetch_upper_rel(rel->subroot, UPPERREL_FINAL, NULL);
- rel->consider_parallel = final_rel->consider_parallel;
-
- /* Generate subquery scan paths for any interesting path in final_rel */
- foreach(lc, final_rel->pathlist)
- {
- Path *subpath = (Path *) lfirst(lc);
- List *pathkeys;
- Path *cheapest_input_path = final_rel->cheapest_total_path;
- bool is_sorted;
- int presorted_keys;
-
- /*
- * Include the cheapest path as-is so that the set operation can be
- * cheaply implemented using a method which does not require the input
- * to be sorted.
- */
- if (subpath == cheapest_input_path)
- {
- /* Convert subpath's pathkeys to outer representation */
- pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
- make_tlist_from_pathtarget(subpath->pathtarget));
-
- /* Generate outer path using this subpath */
- add_path(rel, (Path *) create_subqueryscan_path(root,
- rel,
- subpath,
- trivial_tlist,
- pathkeys,
- NULL));
- }
-
- /* skip dealing with sorted paths if the setop doesn't need them */
- if (interesting_pathkeys == NIL)
- continue;
-
- /*
- * Create paths to suit final sort order required for setop_pathkeys.
- * Here we'll sort the cheapest input path (if not sorted already) and
- * incremental sort any paths which are partially sorted.
- */
- is_sorted = pathkeys_count_contained_in(setop_pathkeys,
- subpath->pathkeys,
- &presorted_keys);
-
- if (!is_sorted)
- {
- double limittuples = rel->subroot->limit_tuples;
-
- /*
- * Try at least sorting the cheapest path and also try
- * incrementally sorting any path which is partially sorted
- * already (no need to deal with paths which have presorted keys
- * when incremental sort is disabled unless it's the cheapest
- * input path).
- */
- if (subpath != cheapest_input_path &&
- (presorted_keys == 0 || !enable_incremental_sort))
- continue;
-
- /*
- * We've no need to consider both a sort and incremental sort.
- * We'll just do a sort if there are no presorted keys and an
- * incremental sort when there are presorted keys.
- */
- if (presorted_keys == 0 || !enable_incremental_sort)
- subpath = (Path *) create_sort_path(rel->subroot,
- final_rel,
- subpath,
- setop_pathkeys,
- limittuples);
- else
- subpath = (Path *) create_incremental_sort_path(rel->subroot,
- final_rel,
- subpath,
- setop_pathkeys,
- presorted_keys,
- limittuples);
- }
-
- /*
- * subpath is now sorted, so add it to the pathlist. We already added
- * the cheapest_input_path above, so don't add it again unless we just
- * sorted it.
- */
- if (subpath != cheapest_input_path)
- {
- /* Convert subpath's pathkeys to outer representation */
- pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
- make_tlist_from_pathtarget(subpath->pathtarget));
-
- /* Generate outer path using this subpath */
- add_path(rel, (Path *) create_subqueryscan_path(root,
- rel,
- subpath,
- trivial_tlist,
- pathkeys,
- NULL));
- }
- }
-
- /* if consider_parallel is false, there should be no partial paths */
- Assert(final_rel->consider_parallel ||
- final_rel->partial_pathlist == NIL);
-
- /*
- * If we have a partial path for the child relation, we can use that to
- * build a partial path for this relation. But there's no point in
- * considering any path but the cheapest.
- */
- if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
- final_rel->partial_pathlist != NIL)
- {
- Path *partial_subpath;
- Path *partial_path;
-
- partial_subpath = linitial(final_rel->partial_pathlist);
- partial_path = (Path *)
- create_subqueryscan_path(root, rel, partial_subpath,
- trivial_tlist,
- NIL, NULL);
- add_partial_path(rel, partial_path);
- }
-
- postprocess_setop_rel(root, rel);
-
- /*
- * Estimate number of groups if caller wants it. If the subquery used
- * grouping or aggregation, its output is probably mostly unique anyway;
- * otherwise do statistical estimation.
- *
- * XXX you don't really want to know about this: we do the estimation
- * using the subquery's original targetlist expressions, not the
- * subroot->processed_tlist which might seem more appropriate. The reason
- * is that if the subquery is itself a setop, it may return a
- * processed_tlist containing "varno 0" Vars generated by
- * generate_append_tlist, and those would confuse estimate_num_groups
- * mightily. We ought to get rid of the "varno 0" hack, but that requires
- * a redesign of the parsetree representation of setops, so that there can
- * be an RTE corresponding to each setop's output.
- */
- if (pNumGroups)
- {
- PlannerInfo *subroot = rel->subroot;
- Query *subquery = subroot->parse;
-
- if (subquery->groupClause || subquery->groupingSets ||
- subquery->distinctClause || subroot->hasHavingQual ||
- subquery->hasAggs)
- *pNumGroups = rel->cheapest_total_path->rows;
- else
- *pNumGroups = estimate_num_groups(subroot,
- get_tlist_exprs(subquery->targetList, false),
- rel->cheapest_total_path->rows,
- NULL,
- NULL);
- }
-}
-
-/*
* Generate paths for a UNION or UNION ALL node
*/
static RelOptInfo *
@@ -699,38 +542,41 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
{
Relids relids = NULL;
RelOptInfo *result_rel;
+ double save_fraction = root->tuple_fraction;
ListCell *lc;
- ListCell *lc2;
- ListCell *lc3;
- List *cheapest_pathlist = NIL;
- List *ordered_pathlist = NIL;
+ List *pathlist = NIL;
List *partial_pathlist = NIL;
bool partial_paths_valid = true;
bool consider_parallel = true;
List *rellist;
List *tlist_list;
- List *trivial_tlist_list;
List *tlist;
- List *groupList = NIL;
- Path *apath;
- Path *gpath = NULL;
- bool try_sorted;
- List *union_pathkeys = NIL;
+ Path *path;
+
+ /*
+ * If plain UNION, tell children to fetch all tuples.
+ *
+ * Note: in UNION ALL, we pass the top-level tuple_fraction unmodified to
+ * each arm of the UNION ALL. One could make a case for reducing the
+ * tuple fraction for later arms (discounting by the expected size of the
+ * earlier arms' results) but it seems not worth the trouble. The normal
+ * case where tuple_fraction isn't already zero is a LIMIT at top level,
+ * and passing it down as-is is usually enough to get the desired result
+ * of preferring fast-start plans.
+ */
+ if (!op->all)
+ root->tuple_fraction = 0.0;
/*
* If any of my children are identical UNION nodes (same op, all-flag, and
* colTypes) then they can be merged into this node so that we generate
- * only one Append/MergeAppend and unique-ification for the lot. Recurse
- * to find such nodes.
+ * only one Append and unique-ification for the lot. Recurse to find such
+ * nodes and compute their children's paths.
*/
- rellist = plan_union_children(root,
- op,
- refnames_tlist,
- &tlist_list,
- &trivial_tlist_list);
+ rellist = plan_union_children(root, op, refnames_tlist, &tlist_list);
/*
- * Generate tlist for Append/MergeAppend plan node.
+ * Generate tlist for Append plan node.
*
* The tlist for an Append plan isn't important as far as the Append is
* concerned, but we must make it look real anyway for the benefit of the
@@ -738,68 +584,15 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
*/
tlist = generate_append_tlist(op->colTypes, op->colCollations, false,
tlist_list, refnames_tlist);
- *pTargetList = tlist;
-
- /* For for UNIONs (not UNION ALL), try sorting, if sorting is possible */
- try_sorted = !op->all && grouping_is_sortable(op->groupClauses);
-
- if (try_sorted)
- {
- /* Identify the grouping semantics */
- groupList = generate_setop_grouplist(op, tlist);
-
- /* Determine the pathkeys for sorting by the whole target list */
- union_pathkeys = make_pathkeys_for_sortclauses(root, groupList, tlist);
-
- root->query_pathkeys = union_pathkeys;
- }
-
- /*
- * Now that we've got the append target list, we can build the union child
- * paths.
- */
- forthree(lc, rellist, lc2, trivial_tlist_list, lc3, tlist_list)
- {
- RelOptInfo *rel = lfirst(lc);
- bool trivial_tlist = lfirst_int(lc2);
- List *child_tlist = lfirst_node(List, lc3);
- /* only build paths for the union children */
- if (rel->rtekind == RTE_SUBQUERY)
- build_setop_child_paths(root, rel, trivial_tlist, child_tlist,
- union_pathkeys, NULL);
- }
+ *pTargetList = tlist;
/* Build path lists and relid set. */
foreach(lc, rellist)
{
RelOptInfo *rel = lfirst(lc);
- Path *ordered_path;
- cheapest_pathlist = lappend(cheapest_pathlist,
- rel->cheapest_total_path);
-
- if (try_sorted)
- {
- ordered_path = get_cheapest_path_for_pathkeys(rel->pathlist,
- union_pathkeys,
- NULL,
- TOTAL_COST,
- false);
-
- if (ordered_path != NULL)
- ordered_pathlist = lappend(ordered_pathlist, ordered_path);
- else
- {
- /*
- * If we can't find a sorted path, just give up trying to
- * generate a list of correctly sorted child paths. This can
- * happen when type coercion was added to the targetlist due
- * to mismatching types from the union children.
- */
- try_sorted = false;
- }
- }
+ pathlist = lappend(pathlist, rel->cheapest_total_path);
if (consider_parallel)
{
@@ -822,21 +615,28 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids);
result_rel->reltarget = create_pathtarget(root, tlist);
result_rel->consider_parallel = consider_parallel;
- result_rel->consider_startup = (root->tuple_fraction > 0);
/*
- * Append the child results together using the cheapest paths from each
- * union child.
+ * Append the child results together.
+ */
+ path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
+ NIL, NULL, 0, false, -1);
+
+ /*
+ * For UNION ALL, we just need the Append path. For UNION, need to add
+ * node(s) to remove duplicates.
*/
- apath = (Path *) create_append_path(root, result_rel, cheapest_pathlist,
- NIL, NIL, NULL, 0, false, -1);
+ if (!op->all)
+ path = make_union_unique(op, path, tlist, root);
+
+ add_path(result_rel, path);
/*
* Estimate number of groups. For now we just assume the output is unique
* --- this is certainly true for the UNION case, and we want worst-case
* estimates anyway.
*/
- result_rel->rows = apath->rows;
+ result_rel->rows = path->rows;
/*
* Now consider doing the same thing using the partial paths plus Append
@@ -844,7 +644,7 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
*/
if (partial_paths_valid)
{
- Path *papath;
+ Path *ppath;
int parallel_workers = 0;
/* Find the highest number of workers requested for any subpath. */
@@ -873,137 +673,21 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
}
Assert(parallel_workers > 0);
- papath = (Path *)
+ ppath = (Path *)
create_append_path(root, result_rel, NIL, partial_pathlist,
- NIL, NULL, parallel_workers,
- enable_parallel_append, -1);
- gpath = (Path *)
- create_gather_path(root, result_rel, papath,
+ NIL, NULL,
+ parallel_workers, enable_parallel_append,
+ -1);
+ ppath = (Path *)
+ create_gather_path(root, result_rel, ppath,
result_rel->reltarget, NULL, NULL);
+ if (!op->all)
+ ppath = make_union_unique(op, ppath, tlist, root);
+ add_path(result_rel, ppath);
}
- if (!op->all)
- {
- double dNumGroups;
- bool can_sort = grouping_is_sortable(groupList);
- bool can_hash = grouping_is_hashable(groupList);
-
- /*
- * XXX for the moment, take the number of distinct groups as equal to
- * the total input size, i.e., the worst case. This is too
- * conservative, but it's not clear how to get a decent estimate of
- * the true size. One should note as well the propensity of novices
- * to write UNION rather than UNION ALL even when they don't expect
- * any duplicates...
- */
- dNumGroups = apath->rows;
-
- if (can_hash)
- {
- Path *path;
-
- /*
- * Try a hash aggregate plan on 'apath'. This is the cheapest
- * available path containing each append child.
- */
- path = (Path *) create_agg_path(root,
- result_rel,
- apath,
- create_pathtarget(root, tlist),
- AGG_HASHED,
- AGGSPLIT_SIMPLE,
- groupList,
- NIL,
- NULL,
- dNumGroups);
- add_path(result_rel, path);
-
- /* Try hash aggregate on the Gather path, if valid */
- if (gpath != NULL)
- {
- /* Hashed aggregate plan --- no sort needed */
- path = (Path *) create_agg_path(root,
- result_rel,
- gpath,
- create_pathtarget(root, tlist),
- AGG_HASHED,
- AGGSPLIT_SIMPLE,
- groupList,
- NIL,
- NULL,
- dNumGroups);
- add_path(result_rel, path);
- }
- }
-
- if (can_sort)
- {
- Path *path = apath;
-
- /* Try Sort -> Unique on the Append path */
- if (groupList != NIL)
- path = (Path *) create_sort_path(root, result_rel, path,
- make_pathkeys_for_sortclauses(root, groupList, tlist),
- -1.0);
-
- path = (Path *) create_upper_unique_path(root,
- result_rel,
- path,
- list_length(path->pathkeys),
- dNumGroups);
-
- add_path(result_rel, path);
-
- /* Try Sort -> Unique on the Gather path, if set */
- if (gpath != NULL)
- {
- path = gpath;
-
- path = (Path *) create_sort_path(root, result_rel, path,
- make_pathkeys_for_sortclauses(root, groupList, tlist),
- -1.0);
-
- path = (Path *) create_upper_unique_path(root,
- result_rel,
- path,
- list_length(path->pathkeys),
- dNumGroups);
- add_path(result_rel, path);
- }
- }
-
- /*
- * Try making a MergeAppend path if we managed to find a path with the
- * correct pathkeys in each union child query.
- */
- if (try_sorted && groupList != NIL)
- {
- Path *path;
-
- path = (Path *) create_merge_append_path(root,
- result_rel,
- ordered_pathlist,
- union_pathkeys,
- NULL);
-
- /* and make the MergeAppend unique */
- path = (Path *) create_upper_unique_path(root,
- result_rel,
- path,
- list_length(tlist),
- dNumGroups);
-
- add_path(result_rel, path);
- }
- }
- else
- {
- /* UNION ALL */
- add_path(result_rel, apath);
-
- if (gpath != NULL)
- add_path(result_rel, gpath);
- }
+ /* Undo effects of possibly forcing tuple_fraction to 0 */
+ root->tuple_fraction = save_fraction;
return result_rel;
}
@@ -1029,8 +713,6 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
*tlist,
*groupList,
*pathlist;
- bool lpath_trivial_tlist,
- rpath_trivial_tlist;
double dLeftGroups,
dRightGroups,
dNumGroups,
@@ -1050,26 +732,14 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
false, 0,
refnames_tlist,
&lpath_tlist,
- &lpath_trivial_tlist);
- if (lrel->rtekind == RTE_SUBQUERY)
- build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
- NIL, &dLeftGroups);
- else
- dLeftGroups = lrel->rows;
-
+ &dLeftGroups);
lpath = lrel->cheapest_total_path;
rrel = recurse_set_operations(op->rarg, root,
op->colTypes, op->colCollations,
false, 1,
refnames_tlist,
&rpath_tlist,
- &rpath_trivial_tlist);
- if (rrel->rtekind == RTE_SUBQUERY)
- build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
- NIL, &dRightGroups);
- else
- dRightGroups = rrel->rows;
-
+ &dRightGroups);
rpath = rrel->cheapest_total_path;
/* Undo effects of forcing tuple_fraction to 0 */
@@ -1206,16 +876,13 @@ static List *
plan_union_children(PlannerInfo *root,
SetOperationStmt *top_union,
List *refnames_tlist,
- List **tlist_list,
- List **istrivial_tlist)
+ List **tlist_list)
{
List *pending_rels = list_make1(top_union);
List *result = NIL;
List *child_tlist;
- bool trivial_tlist;
*tlist_list = NIL;
- *istrivial_tlist = NIL;
while (pending_rels != NIL)
{
@@ -1254,15 +921,76 @@ plan_union_children(PlannerInfo *root,
false, -1,
refnames_tlist,
&child_tlist,
- &trivial_tlist));
+ NULL));
*tlist_list = lappend(*tlist_list, child_tlist);
- *istrivial_tlist = lappend_int(*istrivial_tlist, trivial_tlist);
}
return result;
}
/*
+ * Add nodes to the given path tree to unique-ify the result of a UNION.
+ */
+static Path *
+make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
+ PlannerInfo *root)
+{
+ RelOptInfo *result_rel = fetch_upper_rel(root, UPPERREL_SETOP, NULL);
+ List *groupList;
+ double dNumGroups;
+
+ /* Identify the grouping semantics */
+ groupList = generate_setop_grouplist(op, tlist);
+
+ /*
+ * XXX for the moment, take the number of distinct groups as equal to the
+ * total input size, ie, the worst case. This is too conservative, but
+ * it's not clear how to get a decent estimate of the true size. One
+ * should note as well the propensity of novices to write UNION rather
+ * than UNION ALL even when they don't expect any duplicates...
+ */
+ dNumGroups = path->rows;
+
+ /* Decide whether to hash or sort */
+ if (choose_hashed_setop(root, groupList, path,
+ dNumGroups, dNumGroups,
+ "UNION"))
+ {
+ /* Hashed aggregate plan --- no sort needed */
+ path = (Path *) create_agg_path(root,
+ result_rel,
+ path,
+ create_pathtarget(root, tlist),
+ AGG_HASHED,
+ AGGSPLIT_SIMPLE,
+ groupList,
+ NIL,
+ NULL,
+ dNumGroups);
+ }
+ else
+ {
+ /* Sort and Unique */
+ if (groupList)
+ path = (Path *)
+ create_sort_path(root,
+ result_rel,
+ path,
+ make_pathkeys_for_sortclauses(root,
+ groupList,
+ tlist),
+ -1.0);
+ path = (Path *) create_upper_unique_path(root,
+ result_rel,
+ path,
+ list_length(path->pathkeys),
+ dNumGroups);
+ }
+
+ return path;
+}
+
+/*
* postprocess_setop_rel - perform steps required after adding paths
*/
static void
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index 28fed9d87f6..40ea19e6f10 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -1890,8 +1890,7 @@ transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt)
* For now, we don't support resjunk sort clauses on the output of a
* setOperation tree --- you can only use the SQL92-spec options of
* selecting an output column by name or number. Enforce by checking that
- * transformSortClause doesn't add any items to tlist. Note, if changing
- * this, add_setop_child_rel_equivalences() will need to be updated.
+ * transformSortClause doesn't add any items to tlist.
*/
tllen = list_length(qry->targetList);
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 14ef296ab72..6ec81637c1d 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -400,8 +400,6 @@ struct PlannerInfo
List *distinct_pathkeys;
/* sortClause pathkeys, if any */
List *sort_pathkeys;
- /* set operator pathkeys, if any */
- List *setop_pathkeys;
/* Canonicalised partition schemes used in the query. */
List *part_schemes pg_node_attr(read_write_ignore);
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 914d9bdef58..5f500a1c69f 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -173,10 +173,6 @@ extern void add_child_join_rel_equivalences(PlannerInfo *root,
AppendRelInfo **appinfos,
RelOptInfo *parent_joinrel,
RelOptInfo *child_joinrel);
-extern void add_setop_child_rel_equivalences(PlannerInfo *root,
- RelOptInfo *child_rel,
- List *child_tlist,
- List *setop_pathkeys);
extern List *generate_implied_equalities_for_column(PlannerInfo *root,
RelOptInfo *rel,
ec_matches_callback_type callback,
diff --git a/src/include/optimizer/planner.h b/src/include/optimizer/planner.h
index 5aeff21b967..e1d79ffdf3c 100644
--- a/src/include/optimizer/planner.h
+++ b/src/include/optimizer/planner.h
@@ -44,8 +44,7 @@ extern PlannedStmt *standard_planner(Query *parse, const char *query_string,
extern PlannerInfo *subquery_planner(PlannerGlobal *glob, Query *parse,
PlannerInfo *parent_root,
- bool hasRecursion, double tuple_fraction,
- SetOperationStmt *setops);
+ bool hasRecursion, double tuple_fraction);
extern RowMarkType select_rowmark_type(RangeTblEntry *rte,
LockClauseStrength strength);
diff --git a/src/include/optimizer/prep.h b/src/include/optimizer/prep.h
index a52dec285d5..8e00716dc82 100644
--- a/src/include/optimizer/prep.h
+++ b/src/include/optimizer/prep.h
@@ -53,6 +53,6 @@ extern void preprocess_aggrefs(PlannerInfo *root, Node *clause);
* prototypes for prepunion.c
*/
extern RelOptInfo *plan_set_operations(PlannerInfo *root);
-extern bool set_operation_ordered_results_useful(SetOperationStmt *setop);
+
#endif /* PREP_H */
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index 7d59fb44316..2de8924b524 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1396,7 +1396,6 @@ SELECT x FROM test3cs WHERE x ~ 'a';
abc
(1 row)
-SET enable_hashagg TO off;
SELECT x FROM test1cs UNION SELECT x FROM test2cs ORDER BY x;
x
-----
@@ -1449,7 +1448,6 @@ SELECT DISTINCT x FROM test3cs ORDER BY x;
ghi
(4 rows)
-RESET enable_hashagg;
SELECT count(DISTINCT x) FROM test3cs;
count
-------
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out
index 5fd54a10b1a..7fdb685313d 100644
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1472,19 +1472,14 @@ explain (costs off) select * from t union select * from t order by 1,3;
Sort Key: t.a, t.c
Presorted Key: t.a
-> Unique
- -> Merge Append
+ -> Sort
Sort Key: t.a, t.b, t.c
- -> Gather Merge
+ -> Gather
Workers Planned: 2
- -> Sort
- Sort Key: t.a, t.b, t.c
+ -> Parallel Append
-> Parallel Seq Scan on t
- -> Gather Merge
- Workers Planned: 2
- -> Sort
- Sort Key: t_1.a, t_1.b, t_1.c
-> Parallel Seq Scan on t t_1
-(16 rows)
+(11 rows)
-- Full sort, not just incremental sort can be pushed below a gather merge path
-- by generate_useful_gather_paths.
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out
index 26b718e9033..882017afc9a 100644
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -412,17 +412,16 @@ set enable_hashagg to off;
explain (costs off)
select count(*) from
( select unique1 from tenk1 union select fivethous from tenk1 ) ss;
- QUERY PLAN
-----------------------------------------------------------------
+ QUERY PLAN
+----------------------------------------------------------------------
Aggregate
-> Unique
- -> Merge Append
+ -> Sort
Sort Key: tenk1.unique1
- -> Index Only Scan using tenk1_unique1 on tenk1
- -> Sort
- Sort Key: tenk1_1.fivethous
+ -> Append
+ -> Index Only Scan using tenk1_unique1 on tenk1
-> Seq Scan on tenk1 tenk1_1
-(8 rows)
+(7 rows)
select count(*) from
( select unique1 from tenk1 union select fivethous from tenk1 ) ss;
@@ -951,9 +950,16 @@ select except select;
-- check hashed implementation
set enable_hashagg = true;
set enable_sort = false;
--- We've no way to check hashed UNION as the empty pathkeys in the Append are
--- fine to make use of Unique, which is cheaper than HashAggregate and we've
--- no means to disable Unique.
+explain (costs off)
+select from generate_series(1,5) union select from generate_series(1,3);
+ QUERY PLAN
+----------------------------------------------------------------
+ HashAggregate
+ -> Append
+ -> Function Scan on generate_series
+ -> Function Scan on generate_series generate_series_1
+(4 rows)
+
explain (costs off)
select from generate_series(1,5) intersect select from generate_series(1,3);
QUERY PLAN
@@ -966,6 +972,10 @@ select from generate_series(1,5) intersect select from generate_series(1,3);
-> Function Scan on generate_series generate_series_1
(6 rows)
+select from generate_series(1,5) union select from generate_series(1,3);
+--
+(1 row)
+
select from generate_series(1,5) union all select from generate_series(1,3);
--
(8 rows)
@@ -1035,20 +1045,6 @@ select from generate_series(1,5) except all select from generate_series(1,3);
--
(2 rows)
--- Try a variation of the above but with a CTE which contains a column, again
--- with an empty final select list.
--- Ensure we get the expected 1 row with 0 columns
-with cte as materialized (select s from generate_series(1,5) s)
-select from cte union select from cte;
---
-(1 row)
-
--- Ensure we get the same result as the above.
-with cte as not materialized (select s from generate_series(1,5) s)
-select from cte union select from cte;
---
-(1 row)
-
reset enable_hashagg;
reset enable_sort;
--
@@ -1085,7 +1081,6 @@ INSERT INTO t2 VALUES ('ab'), ('xy');
set enable_seqscan = off;
set enable_indexscan = on;
set enable_bitmapscan = off;
-set enable_sort = off;
explain (costs off)
SELECT * FROM
(SELECT a || b AS ab FROM t1
@@ -1167,7 +1162,6 @@ explain (costs off)
reset enable_seqscan;
reset enable_indexscan;
reset enable_bitmapscan;
-reset enable_sort;
-- This simpler variant of the above test has been observed to fail differently
create table events (event_id int primary key);
create table other_events (event_id int primary key);
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index 80f28a97d78..03837de846b 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -555,7 +555,6 @@ SELECT x FROM test3cs WHERE x LIKE 'a%';
SELECT x FROM test3cs WHERE x ILIKE 'a%';
SELECT x FROM test3cs WHERE x SIMILAR TO 'a%';
SELECT x FROM test3cs WHERE x ~ 'a';
-SET enable_hashagg TO off;
SELECT x FROM test1cs UNION SELECT x FROM test2cs ORDER BY x;
SELECT x FROM test2cs UNION SELECT x FROM test1cs ORDER BY x;
SELECT x FROM test1cs INTERSECT SELECT x FROM test2cs;
@@ -563,7 +562,6 @@ SELECT x FROM test2cs INTERSECT SELECT x FROM test1cs;
SELECT x FROM test1cs EXCEPT SELECT x FROM test2cs;
SELECT x FROM test2cs EXCEPT SELECT x FROM test1cs;
SELECT DISTINCT x FROM test3cs ORDER BY x;
-RESET enable_hashagg;
SELECT count(DISTINCT x) FROM test3cs;
SELECT x, count(*) FROM test3cs GROUP BY x ORDER BY x;
SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3cs ORDER BY x;
diff --git a/src/test/regress/sql/union.sql b/src/test/regress/sql/union.sql
index 8afc580c632..d160db54588 100644
--- a/src/test/regress/sql/union.sql
+++ b/src/test/regress/sql/union.sql
@@ -302,12 +302,12 @@ select except select;
set enable_hashagg = true;
set enable_sort = false;
--- We've no way to check hashed UNION as the empty pathkeys in the Append are
--- fine to make use of Unique, which is cheaper than HashAggregate and we've
--- no means to disable Unique.
+explain (costs off)
+select from generate_series(1,5) union select from generate_series(1,3);
explain (costs off)
select from generate_series(1,5) intersect select from generate_series(1,3);
+select from generate_series(1,5) union select from generate_series(1,3);
select from generate_series(1,5) union all select from generate_series(1,3);
select from generate_series(1,5) intersect select from generate_series(1,3);
select from generate_series(1,5) intersect all select from generate_series(1,3);
@@ -330,17 +330,6 @@ select from generate_series(1,5) intersect all select from generate_series(1,3);
select from generate_series(1,5) except select from generate_series(1,3);
select from generate_series(1,5) except all select from generate_series(1,3);
--- Try a variation of the above but with a CTE which contains a column, again
--- with an empty final select list.
-
--- Ensure we get the expected 1 row with 0 columns
-with cte as materialized (select s from generate_series(1,5) s)
-select from cte union select from cte;
-
--- Ensure we get the same result as the above.
-with cte as not materialized (select s from generate_series(1,5) s)
-select from cte union select from cte;
-
reset enable_hashagg;
reset enable_sort;
@@ -372,7 +361,6 @@ INSERT INTO t2 VALUES ('ab'), ('xy');
set enable_seqscan = off;
set enable_indexscan = on;
set enable_bitmapscan = off;
-set enable_sort = off;
explain (costs off)
SELECT * FROM
@@ -419,7 +407,6 @@ explain (costs off)
reset enable_seqscan;
reset enable_indexscan;
reset enable_bitmapscan;
-reset enable_sort;
-- This simpler variant of the above test has been observed to fail differently