aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/nodes/outfuncs.c3
-rw-r--r--src/backend/optimizer/plan/planmain.c20
-rw-r--r--src/backend/optimizer/plan/planner.c394
-rw-r--r--src/backend/parser/parse_clause.c11
-rw-r--r--src/include/nodes/relation.h7
-rw-r--r--src/test/regress/expected/numerology.out2
-rw-r--r--src/test/regress/expected/opr_sanity.out27
-rw-r--r--src/test/regress/expected/select_distinct.out4
-rw-r--r--src/test/regress/input/misc.source3
-rw-r--r--src/test/regress/output/misc.source3
-rw-r--r--src/test/regress/sql/numerology.sql2
-rw-r--r--src/test/regress/sql/opr_sanity.sql27
-rw-r--r--src/test/regress/sql/select_distinct.sql4
13 files changed, 396 insertions, 111 deletions
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 66630ae6612..408b9b2a757 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.329 2008/08/02 21:31:59 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.330 2008/08/05 02:43:17 tgl Exp $
*
* NOTES
* Every node type that can appear in stored rules' parsetrees *must*
@@ -1334,6 +1334,7 @@ _outPlannerInfo(StringInfo str, PlannerInfo *node)
WRITE_NODE_FIELD(append_rel_list);
WRITE_NODE_FIELD(query_pathkeys);
WRITE_NODE_FIELD(group_pathkeys);
+ WRITE_NODE_FIELD(distinct_pathkeys);
WRITE_NODE_FIELD(sort_pathkeys);
WRITE_FLOAT_FIELD(total_table_pages, "%.0f");
WRITE_FLOAT_FIELD(tuple_fraction, "%.4f");
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index 5e5da6cda4f..081a7c9cebd 100644
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -14,7 +14,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.108 2008/08/03 19:10:52 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.109 2008/08/05 02:43:17 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -66,9 +66,9 @@
* PlannerInfo field and not a passed parameter is that the low-level routines
* in indxpath.c need to see it.)
*
- * Note: the PlannerInfo node also includes group_pathkeys and sort_pathkeys,
- * which like query_pathkeys need to be canonicalized once the info is
- * available.
+ * Note: the PlannerInfo node also includes group_pathkeys, distinct_pathkeys,
+ * and sort_pathkeys, which like query_pathkeys need to be canonicalized once
+ * the info is available.
*
* tuple_fraction is interpreted as follows:
* 0: expect all tuples to be retrieved (normal case)
@@ -120,6 +120,8 @@ query_planner(PlannerInfo *root, List *tlist,
root->query_pathkeys);
root->group_pathkeys = canonicalize_pathkeys(root,
root->group_pathkeys);
+ root->distinct_pathkeys = canonicalize_pathkeys(root,
+ root->distinct_pathkeys);
root->sort_pathkeys = canonicalize_pathkeys(root,
root->sort_pathkeys);
return;
@@ -237,10 +239,12 @@ query_planner(PlannerInfo *root, List *tlist,
/*
* We have completed merging equivalence sets, so it's now possible to
* convert the requested query_pathkeys to canonical form. Also
- * canonicalize the groupClause and sortClause pathkeys for use later.
+ * canonicalize the groupClause, distinctClause and sortClause pathkeys
+ * for use later.
*/
root->query_pathkeys = canonicalize_pathkeys(root, root->query_pathkeys);
root->group_pathkeys = canonicalize_pathkeys(root, root->group_pathkeys);
+ root->distinct_pathkeys = canonicalize_pathkeys(root, root->distinct_pathkeys);
root->sort_pathkeys = canonicalize_pathkeys(root, root->sort_pathkeys);
/*
@@ -286,9 +290,11 @@ query_planner(PlannerInfo *root, List *tlist,
/*
* If both GROUP BY and ORDER BY are specified, we will need two
* levels of sort --- and, therefore, certainly need to read all the
- * tuples --- unless ORDER BY is a subset of GROUP BY.
+ * tuples --- unless ORDER BY is a subset of GROUP BY. Likewise if
+ * we have both DISTINCT and GROUP BY.
*/
- if (!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys))
+ if (!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys) ||
+ !pathkeys_contained_in(root->distinct_pathkeys, root->group_pathkeys))
tuple_fraction = 0.0;
}
else if (parse->hasAggs || root->hasHavingQual)
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 40a9bffaf37..d8c6942e250 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.237 2008/08/03 19:10:52 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.238 2008/08/05 02:43:17 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -69,12 +69,17 @@ static double preprocess_limit(PlannerInfo *root,
int64 *offset_est, int64 *count_est);
static void preprocess_groupclause(PlannerInfo *root);
static Oid *extract_grouping_ops(List *groupClause);
+static AttrNumber *extract_grouping_cols(List *groupClause, List *tlist);
static bool grouping_is_sortable(List *groupClause);
static bool grouping_is_hashable(List *groupClause);
static bool choose_hashed_grouping(PlannerInfo *root,
double tuple_fraction, double limit_tuples,
Path *cheapest_path, Path *sorted_path,
double dNumGroups, AggClauseCounts *agg_counts);
+static bool choose_hashed_distinct(PlannerInfo *root,
+ Plan *input_plan, List *input_pathkeys,
+ double tuple_fraction, double limit_tuples,
+ double dNumDistinctRows);
static List *make_subplanTargetList(PlannerInfo *root, List *tlist,
AttrNumber **groupColIdx, bool *need_tlist_eval);
static void locate_grouping_columns(PlannerInfo *root,
@@ -757,7 +762,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
double limit_tuples = -1.0;
Plan *result_plan;
List *current_pathkeys;
- List *sort_pathkeys;
double dNumGroups = 0;
/* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
@@ -829,16 +833,15 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
* Calculate pathkeys that represent result ordering requirements
*/
Assert(parse->distinctClause == NIL);
- sort_pathkeys = make_pathkeys_for_sortclauses(root,
- parse->sortClause,
- tlist,
- true);
+ root->sort_pathkeys = make_pathkeys_for_sortclauses(root,
+ parse->sortClause,
+ tlist,
+ true);
}
else
{
/* No set operations, do regular planning */
List *sub_tlist;
- List *group_pathkeys;
AttrNumber *groupColIdx = NULL;
bool need_tlist_eval = true;
QualCost tlist_cost;
@@ -870,14 +873,12 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
/*
* Calculate pathkeys that represent grouping/ordering requirements.
* Stash them in PlannerInfo so that query_planner can canonicalize
- * them after EquivalenceClasses have been formed.
- *
- * Note: for the moment, DISTINCT is always implemented via sort/uniq,
- * and we set the sort_pathkeys to be the more rigorous of the
- * DISTINCT and ORDER BY requirements. This should be changed
- * someday, but DISTINCT ON is a bit of a problem ...
+ * them after EquivalenceClasses have been formed. The sortClause
+ * is certainly sort-able, but GROUP BY and DISTINCT might not be,
+ * in which case we just leave their pathkeys empty.
*/
- if (parse->groupClause && grouping_is_sortable(parse->groupClause))
+ if (parse->groupClause &&
+ grouping_is_sortable(parse->groupClause))
root->group_pathkeys =
make_pathkeys_for_sortclauses(root,
parse->groupClause,
@@ -886,18 +887,21 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
else
root->group_pathkeys = NIL;
- if (list_length(parse->distinctClause) > list_length(parse->sortClause))
- root->sort_pathkeys =
+ if (parse->distinctClause &&
+ grouping_is_sortable(parse->distinctClause))
+ root->distinct_pathkeys =
make_pathkeys_for_sortclauses(root,
parse->distinctClause,
tlist,
false);
else
- root->sort_pathkeys =
- make_pathkeys_for_sortclauses(root,
- parse->sortClause,
- tlist,
- false);
+ root->distinct_pathkeys = NIL;
+
+ root->sort_pathkeys =
+ make_pathkeys_for_sortclauses(root,
+ parse->sortClause,
+ tlist,
+ false);
/*
* Will need actual number of aggregates for estimating costs.
@@ -917,17 +921,27 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
}
/*
- * Figure out whether we need a sorted result from query_planner.
+ * Figure out whether we want a sorted result from query_planner.
*
* If we have a sortable GROUP BY clause, then we want a result sorted
- * properly for grouping. Otherwise, if there is an ORDER BY clause,
- * we want to sort by the ORDER BY clause. (Note: if we have both, and
- * ORDER BY is a superset of GROUP BY, it would be tempting to request
- * sort by ORDER BY --- but that might just leave us failing to
- * exploit an available sort order at all. Needs more thought...)
+ * properly for grouping. Otherwise, if there's a sortable DISTINCT
+ * clause that's more rigorous than the ORDER BY clause, we try to
+ * produce output that's sufficiently well sorted for the DISTINCT.
+ * Otherwise, if there is an ORDER BY clause, we want to sort by the
+ * ORDER BY clause.
+ *
+ * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a
+ * superset of GROUP BY, it would be tempting to request sort by ORDER
+ * BY --- but that might just leave us failing to exploit an available
+ * sort order at all. Needs more thought. The choice for DISTINCT
+ * versus ORDER BY is much easier, since we know that the parser
+ * ensured that one is a superset of the other.
*/
if (root->group_pathkeys)
root->query_pathkeys = root->group_pathkeys;
+ else if (list_length(root->distinct_pathkeys) >
+ list_length(root->sort_pathkeys))
+ root->query_pathkeys = root->distinct_pathkeys;
else if (root->sort_pathkeys)
root->query_pathkeys = root->sort_pathkeys;
else
@@ -942,9 +956,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
query_planner(root, sub_tlist, tuple_fraction, limit_tuples,
&cheapest_path, &sorted_path, &dNumGroups);
- group_pathkeys = root->group_pathkeys;
- sort_pathkeys = root->sort_pathkeys;
-
/*
* If grouping, decide whether to use sorted or hashed grouping.
*/
@@ -1024,7 +1035,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
/* Detect if we'll need an explicit sort for grouping */
if (parse->groupClause && !use_hashed_grouping &&
- !pathkeys_contained_in(group_pathkeys, current_pathkeys))
+ !pathkeys_contained_in(root->group_pathkeys, current_pathkeys))
{
need_sort_for_grouping = true;
/*
@@ -1135,7 +1146,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
parse->groupClause,
groupColIdx,
result_plan);
- current_pathkeys = group_pathkeys;
+ current_pathkeys = root->group_pathkeys;
}
aggstrategy = AGG_SORTED;
@@ -1178,7 +1189,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
parse->groupClause,
groupColIdx,
result_plan);
- current_pathkeys = group_pathkeys;
+ current_pathkeys = root->group_pathkeys;
}
result_plan = (Plan *) make_group(root,
@@ -1214,35 +1225,129 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
} /* end of if (setOperations) */
/*
- * If we were not able to make the plan come out in the right order, add
- * an explicit sort step.
+ * If there is a DISTINCT clause, add the necessary node(s).
*/
- if (sort_pathkeys)
+ if (parse->distinctClause)
{
- if (!pathkeys_contained_in(sort_pathkeys, current_pathkeys))
+ double dNumDistinctRows;
+ long numDistinctRows;
+ bool use_hashed_distinct;
+ bool can_sort;
+ bool can_hash;
+
+ /*
+ * If there was grouping or aggregation, use the current number of
+ * rows as the estimated number of DISTINCT rows (ie, assume the
+ * result was already mostly unique). If not, use the number of
+ * distinct-groups calculated by query_planner.
+ */
+ if (parse->groupClause || root->hasHavingQual || parse->hasAggs)
+ dNumDistinctRows = result_plan->plan_rows;
+ else
+ dNumDistinctRows = dNumGroups;
+
+ /* Also convert to long int --- but 'ware overflow! */
+ numDistinctRows = (long) Min(dNumDistinctRows, (double) LONG_MAX);
+
+ /*
+ * If we have a sortable DISTINCT ON clause, we always use sorting.
+ * This enforces the expected behavior of DISTINCT ON.
+ */
+ can_sort = grouping_is_sortable(parse->distinctClause);
+ if (can_sort && parse->hasDistinctOn)
+ use_hashed_distinct = false;
+ else
{
- result_plan = (Plan *) make_sort_from_pathkeys(root,
- result_plan,
- sort_pathkeys,
- limit_tuples);
- current_pathkeys = sort_pathkeys;
+ can_hash = grouping_is_hashable(parse->distinctClause);
+ if (can_hash && can_sort)
+ {
+ /* we have a meaningful choice to make ... */
+ use_hashed_distinct =
+ choose_hashed_distinct(root,
+ result_plan, current_pathkeys,
+ tuple_fraction, limit_tuples,
+ dNumDistinctRows);
+ }
+ else if (can_hash)
+ use_hashed_distinct = true;
+ else if (can_sort)
+ use_hashed_distinct = false;
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("could not implement DISTINCT"),
+ errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
+ use_hashed_distinct = false; /* keep compiler quiet */
+ }
+ }
+
+ if (use_hashed_distinct)
+ {
+ /* Hashed aggregate plan --- no sort needed */
+ result_plan = (Plan *) make_agg(root,
+ result_plan->targetlist,
+ NIL,
+ AGG_HASHED,
+ list_length(parse->distinctClause),
+ extract_grouping_cols(parse->distinctClause,
+ result_plan->targetlist),
+ extract_grouping_ops(parse->distinctClause),
+ numDistinctRows,
+ 0,
+ result_plan);
+ /* Hashed aggregation produces randomly-ordered results */
+ current_pathkeys = NIL;
+ }
+ else
+ {
+ /*
+ * Use a Unique node to implement DISTINCT. Add an explicit sort
+ * if we couldn't make the path come out the way the Unique node
+ * needs it. If we do have to sort, sort by the more rigorous
+ * of DISTINCT and ORDER BY, to avoid a second sort below.
+ */
+ if (!pathkeys_contained_in(root->distinct_pathkeys,
+ current_pathkeys))
+ {
+ if (list_length(root->distinct_pathkeys) >=
+ list_length(root->sort_pathkeys))
+ current_pathkeys = root->distinct_pathkeys;
+ else
+ {
+ current_pathkeys = root->sort_pathkeys;
+ /* Assert checks that parser didn't mess up... */
+ Assert(pathkeys_contained_in(root->distinct_pathkeys,
+ current_pathkeys));
+ }
+
+ result_plan = (Plan *) make_sort_from_pathkeys(root,
+ result_plan,
+ current_pathkeys,
+ -1.0);
+ }
+
+ result_plan = (Plan *) make_unique(result_plan,
+ parse->distinctClause);
+ result_plan->plan_rows = dNumDistinctRows;
+ /* The Unique node won't change sort ordering */
}
}
/*
- * If there is a DISTINCT clause, add the UNIQUE node.
+ * If ORDER BY was given and we were not able to make the plan come out in
+ * the right order, add an explicit sort step.
*/
- if (parse->distinctClause)
+ if (parse->sortClause)
{
- result_plan = (Plan *) make_unique(result_plan, parse->distinctClause);
-
- /*
- * If there was grouping or aggregation, leave plan_rows as-is (ie,
- * assume the result was already mostly unique). If not, use the
- * number of distinct-groups calculated by query_planner.
- */
- if (!parse->groupClause && !root->hasHavingQual && !parse->hasAggs)
- result_plan->plan_rows = dNumGroups;
+ if (!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys))
+ {
+ result_plan = (Plan *) make_sort_from_pathkeys(root,
+ result_plan,
+ root->sort_pathkeys,
+ limit_tuples);
+ current_pathkeys = root->sort_pathkeys;
+ }
}
/*
@@ -1623,6 +1728,31 @@ extract_grouping_ops(List *groupClause)
}
/*
+ * extract_grouping_cols - make an array of the grouping column resnos
+ * for a SortGroupClause list
+ */
+static AttrNumber *
+extract_grouping_cols(List *groupClause, List *tlist)
+{
+ AttrNumber *grpColIdx;
+ int numCols = list_length(groupClause);
+ int colno = 0;
+ ListCell *glitem;
+
+ grpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
+
+ foreach(glitem, groupClause)
+ {
+ SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
+ TargetEntry *tle = get_sortgroupclause_tle(groupcl, tlist);
+
+ grpColIdx[colno++] = tle->resno;
+ }
+
+ return grpColIdx;
+}
+
+/*
* grouping_is_sortable - is it possible to implement grouping list by sorting?
*
* This is easy since the parser will have included a sortop if one exists.
@@ -1680,6 +1810,7 @@ choose_hashed_grouping(PlannerInfo *root,
double cheapest_path_rows;
int cheapest_path_width;
Size hashentrysize;
+ List *target_pathkeys;
List *current_pathkeys;
Path hashed_p;
Path sorted_p;
@@ -1717,6 +1848,20 @@ choose_hashed_grouping(PlannerInfo *root,
return false;
/*
+ * When we have both GROUP BY and DISTINCT, use the more-rigorous of
+ * DISTINCT and ORDER BY as the assumed required output sort order.
+ * This is an oversimplification because the DISTINCT might get
+ * implemented via hashing, but it's not clear that the case is common
+ * enough (or that our estimates are good enough) to justify trying to
+ * solve it exactly.
+ */
+ if (list_length(root->distinct_pathkeys) >
+ list_length(root->sort_pathkeys))
+ target_pathkeys = root->distinct_pathkeys;
+ else
+ target_pathkeys = root->sort_pathkeys;
+
+ /*
* See if the estimated cost is no more than doing it the other way. While
* avoiding the need for sorted input is usually a win, the fact that the
* output won't be sorted may be a loss; so we need to do an actual cost
@@ -1737,8 +1882,8 @@ choose_hashed_grouping(PlannerInfo *root,
cheapest_path->startup_cost, cheapest_path->total_cost,
cheapest_path_rows);
/* Result of hashed agg is always unsorted */
- if (root->sort_pathkeys)
- cost_sort(&hashed_p, root, root->sort_pathkeys, hashed_p.total_cost,
+ if (target_pathkeys)
+ cost_sort(&hashed_p, root, target_pathkeys, hashed_p.total_cost,
dNumGroups, cheapest_path_width, limit_tuples);
if (sorted_path)
@@ -1770,9 +1915,9 @@ choose_hashed_grouping(PlannerInfo *root,
sorted_p.startup_cost, sorted_p.total_cost,
cheapest_path_rows);
/* The Agg or Group node will preserve ordering */
- if (root->sort_pathkeys &&
- !pathkeys_contained_in(root->sort_pathkeys, current_pathkeys))
- cost_sort(&sorted_p, root, root->sort_pathkeys, sorted_p.total_cost,
+ if (target_pathkeys &&
+ !pathkeys_contained_in(target_pathkeys, current_pathkeys))
+ cost_sort(&sorted_p, root, target_pathkeys, sorted_p.total_cost,
dNumGroups, cheapest_path_width, limit_tuples);
/*
@@ -1791,6 +1936,111 @@ choose_hashed_grouping(PlannerInfo *root,
return false;
}
+/*
+ * choose_hashed_distinct - should we use hashing for DISTINCT?
+ *
+ * This is fairly similar to choose_hashed_grouping, but there are enough
+ * differences that it doesn't seem worth trying to unify the two functions.
+ *
+ * But note that making the two choices independently is a bit bogus in
+ * itself. If the two could be combined into a single choice operation
+ * it'd probably be better, but that seems far too unwieldy to be practical,
+ * especially considering that the combination of GROUP BY and DISTINCT
+ * isn't very common in real queries. By separating them, we are giving
+ * extra preference to using a sorting implementation when a common sort key
+ * is available ... and that's not necessarily wrong anyway.
+ *
+ * Note: this is only applied when both alternatives are actually feasible.
+ */
+static bool
+choose_hashed_distinct(PlannerInfo *root,
+ Plan *input_plan, List *input_pathkeys,
+ double tuple_fraction, double limit_tuples,
+ double dNumDistinctRows)
+{
+ int numDistinctCols = list_length(root->parse->distinctClause);
+ Size hashentrysize;
+ List *current_pathkeys;
+ Path hashed_p;
+ Path sorted_p;
+
+ /* Prefer sorting when enable_hashagg is off */
+ if (!enable_hashagg)
+ return false;
+
+ /*
+ * Don't do it if it doesn't look like the hashtable will fit into
+ * work_mem.
+ */
+ hashentrysize = MAXALIGN(input_plan->plan_width) + MAXALIGN(sizeof(MinimalTupleData));
+
+ if (hashentrysize * dNumDistinctRows > work_mem * 1024L)
+ return false;
+
+ /*
+ * See if the estimated cost is no more than doing it the other way. While
+ * avoiding the need for sorted input is usually a win, the fact that the
+ * output won't be sorted may be a loss; so we need to do an actual cost
+ * comparison.
+ *
+ * We need to consider input_plan + hashagg [+ final sort] versus
+ * input_plan [+ sort] + group [+ final sort] where brackets indicate
+ * a step that may not be needed.
+ *
+ * These path variables are dummies that just hold cost fields; we don't
+ * make actual Paths for these steps.
+ */
+ cost_agg(&hashed_p, root, AGG_HASHED, 0,
+ numDistinctCols, dNumDistinctRows,
+ input_plan->startup_cost, input_plan->total_cost,
+ input_plan->plan_rows);
+ /*
+ * Result of hashed agg is always unsorted, so if ORDER BY is present
+ * we need to charge for the final sort.
+ */
+ if (root->parse->sortClause)
+ cost_sort(&hashed_p, root, root->sort_pathkeys, hashed_p.total_cost,
+ dNumDistinctRows, input_plan->plan_width, limit_tuples);
+
+ /* Now for the GROUP case ... */
+ sorted_p.startup_cost = input_plan->startup_cost;
+ sorted_p.total_cost = input_plan->total_cost;
+ current_pathkeys = input_pathkeys;
+ if (!pathkeys_contained_in(root->distinct_pathkeys, current_pathkeys))
+ {
+ /* We don't want to sort twice */
+ if (list_length(root->distinct_pathkeys) >=
+ list_length(root->sort_pathkeys))
+ current_pathkeys = root->distinct_pathkeys;
+ else
+ current_pathkeys = root->sort_pathkeys;
+ cost_sort(&sorted_p, root, current_pathkeys, sorted_p.total_cost,
+ input_plan->plan_rows, input_plan->plan_width, -1.0);
+ }
+ cost_group(&sorted_p, root, numDistinctCols, dNumDistinctRows,
+ sorted_p.startup_cost, sorted_p.total_cost,
+ input_plan->plan_rows);
+ if (root->parse->sortClause &&
+ !pathkeys_contained_in(root->sort_pathkeys, current_pathkeys))
+ cost_sort(&sorted_p, root, root->sort_pathkeys, sorted_p.total_cost,
+ dNumDistinctRows, input_plan->plan_width, limit_tuples);
+
+ /*
+ * Now make the decision using the top-level tuple fraction. First we
+ * have to convert an absolute count (LIMIT) into fractional form.
+ */
+ if (tuple_fraction >= 1.0)
+ tuple_fraction /= dNumDistinctRows;
+
+ if (compare_fractional_path_costs(&hashed_p, &sorted_p,
+ tuple_fraction) < 0)
+ {
+ /* Hashed is cheaper, so use it */
+ return true;
+ }
+ return false;
+}
+
/*---------------
* make_subplanTargetList
* Generate appropriate target list when grouping is required.
@@ -1857,7 +2107,7 @@ make_subplanTargetList(PlannerInfo *root,
/*
* Otherwise, start with a "flattened" tlist (having just the vars
- * mentioned in the targetlist and HAVING qual --- but not upper- level
+ * mentioned in the targetlist and HAVING qual --- but not upper-level
* Vars; they will be replaced by Params later on).
*/
sub_tlist = flatten_tlist(tlist);
@@ -1886,16 +2136,28 @@ make_subplanTargetList(PlannerInfo *root,
SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl);
Node *groupexpr = get_sortgroupclause_expr(grpcl, tlist);
TargetEntry *te = NULL;
- ListCell *sl;
- /* Find or make a matching sub_tlist entry */
- foreach(sl, sub_tlist)
+ /*
+ * Find or make a matching sub_tlist entry. If the groupexpr
+ * isn't a Var, no point in searching. (Note that the parser
+ * won't make multiple groupClause entries for the same TLE.)
+ */
+ if (groupexpr && IsA(groupexpr, Var))
{
- te = (TargetEntry *) lfirst(sl);
- if (equal(groupexpr, te->expr))
- break;
+ ListCell *sl;
+
+ foreach(sl, sub_tlist)
+ {
+ TargetEntry *lte = (TargetEntry *) lfirst(sl);
+
+ if (equal(groupexpr, lte->expr))
+ {
+ te = lte;
+ break;
+ }
+ }
}
- if (!sl)
+ if (!te)
{
te = makeTargetEntry((Expr *) groupexpr,
list_length(sub_tlist) + 1,
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index 76e59c82d6e..2b04ee5e337 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.173 2008/08/03 19:10:52 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.174 2008/08/05 02:43:17 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1447,9 +1447,6 @@ transformDistinctClause(ParseState *pstate,
/*
* Now add any remaining non-resjunk tlist items, using default
* sort/group semantics for their data types.
- *
- * XXX for now, the planner requires distinctClause to be sortable,
- * so we have to insist on that here.
*/
foreach(tlitem, *targetlist)
{
@@ -1459,8 +1456,7 @@ transformDistinctClause(ParseState *pstate,
continue; /* ignore junk */
result = addTargetToGroupList(pstate, tle,
result, *targetlist,
- true, /* XXX for now */
- true);
+ false, true);
}
return result;
@@ -1555,8 +1551,7 @@ transformDistinctOnClause(ParseState *pstate, List *distinctlist,
errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions")));
result = addTargetToGroupList(pstate, tle,
result, *targetlist,
- true, /* someday allow hash-only? */
- true);
+ false, true);
}
return result;
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 8476d7e85c1..f8c23071661 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.156 2008/04/21 20:54:15 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.157 2008/08/05 02:43:17 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -162,8 +162,9 @@ typedef struct PlannerInfo
List *query_pathkeys; /* desired pathkeys for query_planner(), and
* actual pathkeys afterwards */
- List *group_pathkeys; /* groupClause pathkeys, if any */
- List *sort_pathkeys; /* sortClause pathkeys, if any */
+ List *group_pathkeys; /* groupClause pathkeys, if any */
+ List *distinct_pathkeys; /* distinctClause pathkeys, if any */
+ List *sort_pathkeys; /* sortClause pathkeys, if any */
List *initial_rels; /* RelOptInfos we are now trying to join */
diff --git a/src/test/regress/expected/numerology.out b/src/test/regress/expected/numerology.out
index c5ad36fdd32..d404d9db681 100644
--- a/src/test/regress/expected/numerology.out
+++ b/src/test/regress/expected/numerology.out
@@ -79,7 +79,7 @@ INSERT INTO TEMP_GROUP
INSERT INTO TEMP_GROUP
SELECT 2, i.f1, f.f1
FROM INT4_TBL i, FLOAT8_TBL f;
-SELECT DISTINCT f1 AS two FROM TEMP_GROUP;
+SELECT DISTINCT f1 AS two FROM TEMP_GROUP ORDER BY 1;
two
-----
1
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 69f0efb8d42..533bac3ab67 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -129,7 +129,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND
- (p1.prorettype < p2.prorettype);
+ (p1.prorettype < p2.prorettype)
+ORDER BY 1, 2;
prorettype | prorettype
------------+------------
25 | 1043
@@ -142,7 +143,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND
- (p1.proargtypes[0] < p2.proargtypes[0]);
+ (p1.proargtypes[0] < p2.proargtypes[0])
+ORDER BY 1, 2;
proargtypes | proargtypes
-------------+-------------
25 | 1042
@@ -158,7 +160,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND
- (p1.proargtypes[1] < p2.proargtypes[1]);
+ (p1.proargtypes[1] < p2.proargtypes[1])
+ORDER BY 1, 2;
proargtypes | proargtypes
-------------+-------------
23 | 28
@@ -173,7 +176,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND
- (p1.proargtypes[2] < p2.proargtypes[2]);
+ (p1.proargtypes[2] < p2.proargtypes[2])
+ORDER BY 1, 2;
proargtypes | proargtypes
-------------+-------------
1114 | 1184
@@ -185,7 +189,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND
- (p1.proargtypes[3] < p2.proargtypes[3]);
+ (p1.proargtypes[3] < p2.proargtypes[3])
+ORDER BY 1, 2;
proargtypes | proargtypes
-------------+-------------
1114 | 1184
@@ -197,7 +202,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND
- (p1.proargtypes[4] < p2.proargtypes[4]);
+ (p1.proargtypes[4] < p2.proargtypes[4])
+ORDER BY 1, 2;
proargtypes | proargtypes
-------------+-------------
(0 rows)
@@ -208,7 +214,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND
- (p1.proargtypes[5] < p2.proargtypes[5]);
+ (p1.proargtypes[5] < p2.proargtypes[5])
+ORDER BY 1, 2;
proargtypes | proargtypes
-------------+-------------
(0 rows)
@@ -219,7 +226,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND
- (p1.proargtypes[6] < p2.proargtypes[6]);
+ (p1.proargtypes[6] < p2.proargtypes[6])
+ORDER BY 1, 2;
proargtypes | proargtypes
-------------+-------------
(0 rows)
@@ -230,7 +238,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND
- (p1.proargtypes[7] < p2.proargtypes[7]);
+ (p1.proargtypes[7] < p2.proargtypes[7])
+ORDER BY 1, 2;
proargtypes | proargtypes
-------------+-------------
(0 rows)
diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out
index abe34ae7ae5..fe64fe0d9c1 100644
--- a/src/test/regress/expected/select_distinct.out
+++ b/src/test/regress/expected/select_distinct.out
@@ -14,7 +14,7 @@ SELECT DISTINCT two FROM tmp;
--
-- awk '{print $5;}' onek.data | sort -n | uniq
--
-SELECT DISTINCT ten FROM tmp;
+SELECT DISTINCT ten FROM tmp ORDER BY 1;
ten
-----
0
@@ -32,7 +32,7 @@ SELECT DISTINCT ten FROM tmp;
--
-- awk '{print $16;}' onek.data | sort -d | uniq
--
-SELECT DISTINCT string4 FROM tmp;
+SELECT DISTINCT string4 FROM tmp ORDER BY 1;
string4
---------
AAAAxx
diff --git a/src/test/regress/input/misc.source b/src/test/regress/input/misc.source
index c82b1405978..c5813630625 100644
--- a/src/test/regress/input/misc.source
+++ b/src/test/regress/input/misc.source
@@ -183,7 +183,8 @@ SELECT p.name, name(p.hobbies) FROM person* p;
-- the next two queries demonstrate how functions generate bogus duplicates.
-- this is a "feature" ..
--
-SELECT DISTINCT hobbies_r.name, name(hobbies_r.equipment) FROM hobbies_r;
+SELECT DISTINCT hobbies_r.name, name(hobbies_r.equipment) FROM hobbies_r
+ ORDER BY 1,2;
SELECT hobbies_r.name, (hobbies_r.equipment).name FROM hobbies_r;
diff --git a/src/test/regress/output/misc.source b/src/test/regress/output/misc.source
index e409c0a1001..91e0d2b04d7 100644
--- a/src/test/regress/output/misc.source
+++ b/src/test/regress/output/misc.source
@@ -469,7 +469,8 @@ SELECT p.name, name(p.hobbies) FROM person* p;
-- the next two queries demonstrate how functions generate bogus duplicates.
-- this is a "feature" ..
--
-SELECT DISTINCT hobbies_r.name, name(hobbies_r.equipment) FROM hobbies_r;
+SELECT DISTINCT hobbies_r.name, name(hobbies_r.equipment) FROM hobbies_r
+ ORDER BY 1,2;
name | name
-------------+---------------
basketball | hightops
diff --git a/src/test/regress/sql/numerology.sql b/src/test/regress/sql/numerology.sql
index 2220fdba385..6626cf20ebc 100644
--- a/src/test/regress/sql/numerology.sql
+++ b/src/test/regress/sql/numerology.sql
@@ -63,7 +63,7 @@ INSERT INTO TEMP_GROUP
SELECT 2, i.f1, f.f1
FROM INT4_TBL i, FLOAT8_TBL f;
-SELECT DISTINCT f1 AS two FROM TEMP_GROUP;
+SELECT DISTINCT f1 AS two FROM TEMP_GROUP ORDER BY 1;
SELECT f1 AS two, max(f3) AS max_float, min(f3) as min_float
FROM TEMP_GROUP
diff --git a/src/test/regress/sql/opr_sanity.sql b/src/test/regress/sql/opr_sanity.sql
index e2ab6e57d4a..5017849830a 100644
--- a/src/test/regress/sql/opr_sanity.sql
+++ b/src/test/regress/sql/opr_sanity.sql
@@ -121,7 +121,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND
- (p1.prorettype < p2.prorettype);
+ (p1.prorettype < p2.prorettype)
+ORDER BY 1, 2;
SELECT DISTINCT p1.proargtypes[0], p2.proargtypes[0]
FROM pg_proc AS p1, pg_proc AS p2
@@ -129,7 +130,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND
- (p1.proargtypes[0] < p2.proargtypes[0]);
+ (p1.proargtypes[0] < p2.proargtypes[0])
+ORDER BY 1, 2;
SELECT DISTINCT p1.proargtypes[1], p2.proargtypes[1]
FROM pg_proc AS p1, pg_proc AS p2
@@ -137,7 +139,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND
- (p1.proargtypes[1] < p2.proargtypes[1]);
+ (p1.proargtypes[1] < p2.proargtypes[1])
+ORDER BY 1, 2;
SELECT DISTINCT p1.proargtypes[2], p2.proargtypes[2]
FROM pg_proc AS p1, pg_proc AS p2
@@ -145,7 +148,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND
- (p1.proargtypes[2] < p2.proargtypes[2]);
+ (p1.proargtypes[2] < p2.proargtypes[2])
+ORDER BY 1, 2;
SELECT DISTINCT p1.proargtypes[3], p2.proargtypes[3]
FROM pg_proc AS p1, pg_proc AS p2
@@ -153,7 +157,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND
- (p1.proargtypes[3] < p2.proargtypes[3]);
+ (p1.proargtypes[3] < p2.proargtypes[3])
+ORDER BY 1, 2;
SELECT DISTINCT p1.proargtypes[4], p2.proargtypes[4]
FROM pg_proc AS p1, pg_proc AS p2
@@ -161,7 +166,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND
- (p1.proargtypes[4] < p2.proargtypes[4]);
+ (p1.proargtypes[4] < p2.proargtypes[4])
+ORDER BY 1, 2;
SELECT DISTINCT p1.proargtypes[5], p2.proargtypes[5]
FROM pg_proc AS p1, pg_proc AS p2
@@ -169,7 +175,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND
- (p1.proargtypes[5] < p2.proargtypes[5]);
+ (p1.proargtypes[5] < p2.proargtypes[5])
+ORDER BY 1, 2;
SELECT DISTINCT p1.proargtypes[6], p2.proargtypes[6]
FROM pg_proc AS p1, pg_proc AS p2
@@ -177,7 +184,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND
- (p1.proargtypes[6] < p2.proargtypes[6]);
+ (p1.proargtypes[6] < p2.proargtypes[6])
+ORDER BY 1, 2;
SELECT DISTINCT p1.proargtypes[7], p2.proargtypes[7]
FROM pg_proc AS p1, pg_proc AS p2
@@ -185,7 +193,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND
- (p1.proargtypes[7] < p2.proargtypes[7]);
+ (p1.proargtypes[7] < p2.proargtypes[7])
+ORDER BY 1, 2;
-- Look for functions that return type "internal" and do not have any
-- "internal" argument. Such a function would be a security hole since
diff --git a/src/test/regress/sql/select_distinct.sql b/src/test/regress/sql/select_distinct.sql
index c4a63aaf16f..7416e0194e1 100644
--- a/src/test/regress/sql/select_distinct.sql
+++ b/src/test/regress/sql/select_distinct.sql
@@ -10,12 +10,12 @@ SELECT DISTINCT two FROM tmp;
--
-- awk '{print $5;}' onek.data | sort -n | uniq
--
-SELECT DISTINCT ten FROM tmp;
+SELECT DISTINCT ten FROM tmp ORDER BY 1;
--
-- awk '{print $16;}' onek.data | sort -d | uniq
--
-SELECT DISTINCT string4 FROM tmp;
+SELECT DISTINCT string4 FROM tmp ORDER BY 1;
--
-- awk '{print $3,$16,$5;}' onek.data | sort -d | uniq |