aboutsummaryrefslogtreecommitdiff
path: root/src/backend/optimizer
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/optimizer')
-rw-r--r--src/backend/optimizer/path/costsize.c77
-rw-r--r--src/backend/optimizer/plan/createplan.c31
-rw-r--r--src/backend/optimizer/plan/planmain.c13
-rw-r--r--src/backend/optimizer/plan/planner.c30
-rw-r--r--src/backend/optimizer/util/pathnode.c5
5 files changed, 114 insertions, 42 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 9cd3a51d897..82dfc77f065 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -54,7 +54,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.181 2007/04/21 21:01:44 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.182 2007/05/04 01:13:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -922,6 +922,10 @@ cost_valuesscan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
* disk traffic = 2 * relsize * ceil(logM(p / (2*work_mem)))
* cpu = comparison_cost * t * log2(t)
*
+ * If the sort is bounded (i.e., only the first k result tuples are needed)
+ * and k tuples can fit into work_mem, we use a heap method that keeps only
+ * k tuples in the heap; this will require about t*log2(k) tuple comparisons.
+ *
* The disk traffic is assumed to be 3/4ths sequential and 1/4th random
* accesses (XXX can't we refine that guess?)
*
@@ -932,6 +936,7 @@ cost_valuesscan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
* 'input_cost' is the total cost for reading the input data
* 'tuples' is the number of tuples in the relation
* 'width' is the average tuple width in bytes
+ * 'limit_tuples' is the bound on the number of output tuples; -1 if no bound
*
* NOTE: some callers currently pass NIL for pathkeys because they
* can't conveniently supply the sort keys. Since this routine doesn't
@@ -942,11 +947,14 @@ cost_valuesscan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
*/
void
cost_sort(Path *path, PlannerInfo *root,
- List *pathkeys, Cost input_cost, double tuples, int width)
+ List *pathkeys, Cost input_cost, double tuples, int width,
+ double limit_tuples)
{
Cost startup_cost = input_cost;
Cost run_cost = 0;
- double nbytes = relation_byte_size(tuples, width);
+ double input_bytes = relation_byte_size(tuples, width);
+ double output_bytes;
+ double output_tuples;
long work_mem_bytes = work_mem * 1024L;
if (!enable_sort)
@@ -959,23 +967,39 @@ cost_sort(Path *path, PlannerInfo *root,
if (tuples < 2.0)
tuples = 2.0;
- /*
- * CPU costs
- *
- * Assume about two operator evals per tuple comparison and N log2 N
- * comparisons
- */
- startup_cost += 2.0 * cpu_operator_cost * tuples * LOG2(tuples);
+ /* Do we have a useful LIMIT? */
+ if (limit_tuples > 0 && limit_tuples < tuples)
+ {
+ output_tuples = limit_tuples;
+ output_bytes = relation_byte_size(output_tuples, width);
+ }
+ else
+ {
+ output_tuples = tuples;
+ output_bytes = input_bytes;
+ }
- /* disk costs */
- if (nbytes > work_mem_bytes)
+ if (output_bytes > work_mem_bytes)
{
- double npages = ceil(nbytes / BLCKSZ);
- double nruns = (nbytes / work_mem_bytes) * 0.5;
+ /*
+ * We'll have to use a disk-based sort of all the tuples
+ */
+ double npages = ceil(input_bytes / BLCKSZ);
+ double nruns = (input_bytes / work_mem_bytes) * 0.5;
double mergeorder = tuplesort_merge_order(work_mem_bytes);
double log_runs;
double npageaccesses;
+ /*
+ * CPU costs
+ *
+ * Assume about two operator evals per tuple comparison and N log2 N
+ * comparisons
+ */
+ startup_cost += 2.0 * cpu_operator_cost * tuples * LOG2(tuples);
+
+ /* Disk costs */
+
/* Compute logM(r) as log(r) / log(M) */
if (nruns > mergeorder)
log_runs = ceil(log(nruns) / log(mergeorder));
@@ -986,10 +1010,27 @@ cost_sort(Path *path, PlannerInfo *root,
startup_cost += npageaccesses *
(seq_page_cost * 0.75 + random_page_cost * 0.25);
}
+ else if (tuples > 2 * output_tuples || input_bytes > work_mem_bytes)
+ {
+ /*
+ * We'll use a bounded heap-sort keeping just K tuples in memory,
+ * for a total number of tuple comparisons of N log2 K; but the
+ * constant factor is a bit higher than for quicksort. Tweak it
+ * so that the cost curve is continuous at the crossover point.
+ */
+ startup_cost += 2.0 * cpu_operator_cost * tuples * LOG2(2.0 * output_tuples);
+ }
+ else
+ {
+ /* We'll use plain quicksort on all the input tuples */
+ startup_cost += 2.0 * cpu_operator_cost * tuples * LOG2(tuples);
+ }
/*
* Also charge a small amount (arbitrarily set equal to operator cost) per
- * extracted tuple.
+ * extracted tuple. Note it's correct to use tuples not output_tuples
+ * here --- the upper LIMIT will pro-rate the run cost so we'd be double
+ * counting the LIMIT otherwise.
*/
run_cost += cpu_operator_cost * tuples;
@@ -1431,7 +1472,8 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
outersortkeys,
outer_path->total_cost,
outer_path_rows,
- outer_path->parent->width);
+ outer_path->parent->width,
+ -1.0);
startup_cost += sort_path.startup_cost;
run_cost += (sort_path.total_cost - sort_path.startup_cost)
* outerscansel;
@@ -1450,7 +1492,8 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
innersortkeys,
inner_path->total_cost,
inner_path_rows,
- inner_path->parent->width);
+ inner_path->parent->width,
+ -1.0);
startup_cost += sort_path.startup_cost;
run_cost += (sort_path.total_cost - sort_path.startup_cost)
* innerscansel * rescanratio;
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 74b89125665..be0162406bd 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,7 +10,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.229 2007/04/21 21:01:44 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.230 2007/05/04 01:13:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -122,7 +122,8 @@ static MergeJoin *make_mergejoin(List *tlist,
Plan *lefttree, Plan *righttree,
JoinType jointype);
static Sort *make_sort(PlannerInfo *root, Plan *lefttree, int numCols,
- AttrNumber *sortColIdx, Oid *sortOperators, bool *nullsFirst);
+ AttrNumber *sortColIdx, Oid *sortOperators, bool *nullsFirst,
+ double limit_tuples);
/*
@@ -1579,7 +1580,8 @@ create_mergejoin_plan(PlannerInfo *root,
outer_plan = (Plan *)
make_sort_from_pathkeys(root,
outer_plan,
- best_path->outersortkeys);
+ best_path->outersortkeys,
+ -1.0);
outerpathkeys = best_path->outersortkeys;
}
else
@@ -1591,7 +1593,8 @@ create_mergejoin_plan(PlannerInfo *root,
inner_plan = (Plan *)
make_sort_from_pathkeys(root,
inner_plan,
- best_path->innersortkeys);
+ best_path->innersortkeys,
+ -1.0);
innerpathkeys = best_path->innersortkeys;
}
else
@@ -2589,11 +2592,13 @@ make_mergejoin(List *tlist,
* make_sort --- basic routine to build a Sort plan node
*
* Caller must have built the sortColIdx, sortOperators, and nullsFirst
- * arrays already.
+ * arrays already. limit_tuples is as for cost_sort (in particular, pass
+ * -1 if no limit)
*/
static Sort *
make_sort(PlannerInfo *root, Plan *lefttree, int numCols,
- AttrNumber *sortColIdx, Oid *sortOperators, bool *nullsFirst)
+ AttrNumber *sortColIdx, Oid *sortOperators, bool *nullsFirst,
+ double limit_tuples)
{
Sort *node = makeNode(Sort);
Plan *plan = &node->plan;
@@ -2603,7 +2608,8 @@ make_sort(PlannerInfo *root, Plan *lefttree, int numCols,
cost_sort(&sort_path, root, NIL,
lefttree->total_cost,
lefttree->plan_rows,
- lefttree->plan_width);
+ lefttree->plan_width,
+ limit_tuples);
plan->startup_cost = sort_path.startup_cost;
plan->total_cost = sort_path.total_cost;
plan->targetlist = lefttree->targetlist;
@@ -2664,6 +2670,8 @@ add_sort_column(AttrNumber colIdx, Oid sortOp, bool nulls_first,
*
* 'lefttree' is the node which yields input tuples
* 'pathkeys' is the list of pathkeys by which the result is to be sorted
+ * 'limit_tuples' is the bound on the number of output tuples;
+ * -1 if no bound
*
* We must convert the pathkey information into arrays of sort key column
* numbers and sort operator OIDs.
@@ -2675,7 +2683,8 @@ add_sort_column(AttrNumber colIdx, Oid sortOp, bool nulls_first,
* adding a Result node just to do the projection.
*/
Sort *
-make_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys)
+make_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys,
+ double limit_tuples)
{
List *tlist = lefttree->targetlist;
ListCell *i;
@@ -2810,7 +2819,7 @@ make_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys)
Assert(numsortkeys > 0);
return make_sort(root, lefttree, numsortkeys,
- sortColIdx, sortOperators, nullsFirst);
+ sortColIdx, sortOperators, nullsFirst, limit_tuples);
}
/*
@@ -2859,7 +2868,7 @@ make_sort_from_sortclauses(PlannerInfo *root, List *sortcls, Plan *lefttree)
Assert(numsortkeys > 0);
return make_sort(root, lefttree, numsortkeys,
- sortColIdx, sortOperators, nullsFirst);
+ sortColIdx, sortOperators, nullsFirst, -1.0);
}
/*
@@ -2919,7 +2928,7 @@ make_sort_from_groupcols(PlannerInfo *root,
Assert(numsortkeys > 0);
return make_sort(root, lefttree, numsortkeys,
- sortColIdx, sortOperators, nullsFirst);
+ sortColIdx, sortOperators, nullsFirst, -1.0);
}
Material *
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index f12e388d723..c31f7d5aa65 100644
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -14,7 +14,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.100 2007/04/21 21:01:45 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.101 2007/05/04 01:13:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -47,6 +47,8 @@
* tlist is the target list the query should produce
* (this is NOT necessarily root->parse->targetList!)
* tuple_fraction is the fraction of tuples we expect will be retrieved
+ * limit_tuples is a hard limit on number of tuples to retrieve,
+ * or -1 if no limit
*
* Output parameters:
* *cheapest_path receives the overall-cheapest path for the query
@@ -74,9 +76,13 @@
* from the plan to be retrieved
* tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
* expected to be retrieved (ie, a LIMIT specification)
+ * Note that a nonzero tuple_fraction could come from outer context; it is
+ * therefore not redundant with limit_tuples. We use limit_tuples to determine
+ * whether a bounded sort can be used at runtime.
*/
void
-query_planner(PlannerInfo *root, List *tlist, double tuple_fraction,
+query_planner(PlannerInfo *root, List *tlist,
+ double tuple_fraction, double limit_tuples,
Path **cheapest_path, Path **sorted_path,
double *num_groups)
{
@@ -354,7 +360,8 @@ query_planner(PlannerInfo *root, List *tlist, double tuple_fraction,
/* Figure cost for sorting */
cost_sort(&sort_path, root, root->query_pathkeys,
cheapestpath->total_cost,
- final_rel->rows, final_rel->width);
+ final_rel->rows, final_rel->width,
+ limit_tuples);
}
if (compare_fractional_path_costs(sortedpath, &sort_path,
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index d5dc6f7c7e6..366c50f2730 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.218 2007/04/27 22:05:47 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.219 2007/05/04 01:13:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -61,7 +61,8 @@ static double preprocess_limit(PlannerInfo *root,
double tuple_fraction,
int64 *offset_est, int64 *count_est);
static Oid *extract_grouping_ops(List *groupClause);
-static bool choose_hashed_grouping(PlannerInfo *root, double tuple_fraction,
+static bool choose_hashed_grouping(PlannerInfo *root,
+ double tuple_fraction, double limit_tuples,
Path *cheapest_path, Path *sorted_path,
Oid *groupOperators, double dNumGroups,
AggClauseCounts *agg_counts);
@@ -696,6 +697,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
List *tlist = parse->targetList;
int64 offset_est = 0;
int64 count_est = 0;
+ double limit_tuples = -1.0;
Plan *result_plan;
List *current_pathkeys;
List *sort_pathkeys;
@@ -703,8 +705,16 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
/* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
if (parse->limitCount || parse->limitOffset)
+ {
tuple_fraction = preprocess_limit(root, tuple_fraction,
&offset_est, &count_est);
+ /*
+ * If we have a known LIMIT, and don't have an unknown OFFSET,
+ * we can estimate the effects of using a bounded sort.
+ */
+ if (count_est > 0 && offset_est >= 0)
+ limit_tuples = (double) count_est + (double) offset_est;
+ }
if (parse->setOperations)
{
@@ -850,7 +860,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
* estimate the number of groups in the query, and canonicalize all
* the pathkeys.
*/
- query_planner(root, sub_tlist, tuple_fraction,
+ query_planner(root, sub_tlist, tuple_fraction, limit_tuples,
&cheapest_path, &sorted_path, &dNumGroups);
group_pathkeys = root->group_pathkeys;
@@ -864,7 +874,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
{
groupOperators = extract_grouping_ops(parse->groupClause);
use_hashed_grouping =
- choose_hashed_grouping(root, tuple_fraction,
+ choose_hashed_grouping(root, tuple_fraction, limit_tuples,
cheapest_path, sorted_path,
groupOperators, dNumGroups,
&agg_counts);
@@ -1099,7 +1109,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
{
result_plan = (Plan *) make_sort_from_pathkeys(root,
result_plan,
- sort_pathkeys);
+ sort_pathkeys,
+ limit_tuples);
current_pathkeys = sort_pathkeys;
}
}
@@ -1414,7 +1425,8 @@ extract_grouping_ops(List *groupClause)
* choose_hashed_grouping - should we use hashed grouping?
*/
static bool
-choose_hashed_grouping(PlannerInfo *root, double tuple_fraction,
+choose_hashed_grouping(PlannerInfo *root,
+ double tuple_fraction, double limit_tuples,
Path *cheapest_path, Path *sorted_path,
Oid *groupOperators, double dNumGroups,
AggClauseCounts *agg_counts)
@@ -1499,7 +1511,7 @@ choose_hashed_grouping(PlannerInfo *root, double tuple_fraction,
/* Result of hashed agg is always unsorted */
if (root->sort_pathkeys)
cost_sort(&hashed_p, root, root->sort_pathkeys, hashed_p.total_cost,
- dNumGroups, cheapest_path_width);
+ dNumGroups, cheapest_path_width, limit_tuples);
if (sorted_path)
{
@@ -1516,7 +1528,7 @@ choose_hashed_grouping(PlannerInfo *root, double tuple_fraction,
if (!pathkeys_contained_in(root->group_pathkeys, current_pathkeys))
{
cost_sort(&sorted_p, root, root->group_pathkeys, sorted_p.total_cost,
- cheapest_path_rows, cheapest_path_width);
+ cheapest_path_rows, cheapest_path_width, -1.0);
current_pathkeys = root->group_pathkeys;
}
@@ -1533,7 +1545,7 @@ choose_hashed_grouping(PlannerInfo *root, double tuple_fraction,
if (root->sort_pathkeys &&
!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys))
cost_sort(&sorted_p, root, root->sort_pathkeys, sorted_p.total_cost,
- dNumGroups, cheapest_path_width);
+ dNumGroups, cheapest_path_width, limit_tuples);
/*
* Now make the decision using the top-level tuple fraction. First we
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 6d440001d6b..bd95a0e0e23 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.139 2007/04/21 21:01:45 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.140 2007/05/04 01:13:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -842,7 +842,8 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
cost_sort(&sort_path, root, NIL,
subpath->total_cost,
rel->rows,
- rel->width);
+ rel->width,
+ -1.0);
/*
* Charge one cpu_operator_cost per comparison per input tuple. We assume