diff options
Diffstat (limited to 'src/backend/optimizer')
-rw-r--r-- | src/backend/optimizer/path/costsize.c | 77 | ||||
-rw-r--r-- | src/backend/optimizer/plan/createplan.c | 31 | ||||
-rw-r--r-- | src/backend/optimizer/plan/planmain.c | 13 | ||||
-rw-r--r-- | src/backend/optimizer/plan/planner.c | 30 | ||||
-rw-r--r-- | src/backend/optimizer/util/pathnode.c | 5 |
5 files changed, 114 insertions, 42 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 9cd3a51d897..82dfc77f065 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -54,7 +54,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.181 2007/04/21 21:01:44 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.182 2007/05/04 01:13:44 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -922,6 +922,10 @@ cost_valuesscan(Path *path, PlannerInfo *root, RelOptInfo *baserel) * disk traffic = 2 * relsize * ceil(logM(p / (2*work_mem))) * cpu = comparison_cost * t * log2(t) * + * If the sort is bounded (i.e., only the first k result tuples are needed) + * and k tuples can fit into work_mem, we use a heap method that keeps only + * k tuples in the heap; this will require about t*log2(k) tuple comparisons. + * * The disk traffic is assumed to be 3/4ths sequential and 1/4th random * accesses (XXX can't we refine that guess?) * @@ -932,6 +936,7 @@ cost_valuesscan(Path *path, PlannerInfo *root, RelOptInfo *baserel) * 'input_cost' is the total cost for reading the input data * 'tuples' is the number of tuples in the relation * 'width' is the average tuple width in bytes + * 'limit_tuples' is the bound on the number of output tuples; -1 if no bound * * NOTE: some callers currently pass NIL for pathkeys because they * can't conveniently supply the sort keys. Since this routine doesn't @@ -942,11 +947,14 @@ cost_valuesscan(Path *path, PlannerInfo *root, RelOptInfo *baserel) */ void cost_sort(Path *path, PlannerInfo *root, - List *pathkeys, Cost input_cost, double tuples, int width) + List *pathkeys, Cost input_cost, double tuples, int width, + double limit_tuples) { Cost startup_cost = input_cost; Cost run_cost = 0; - double nbytes = relation_byte_size(tuples, width); + double input_bytes = relation_byte_size(tuples, width); + double output_bytes; + double output_tuples; long work_mem_bytes = work_mem * 1024L; if (!enable_sort) @@ -959,23 +967,39 @@ cost_sort(Path *path, PlannerInfo *root, if (tuples < 2.0) tuples = 2.0; - /* - * CPU costs - * - * Assume about two operator evals per tuple comparison and N log2 N - * comparisons - */ - startup_cost += 2.0 * cpu_operator_cost * tuples * LOG2(tuples); + /* Do we have a useful LIMIT? */ + if (limit_tuples > 0 && limit_tuples < tuples) + { + output_tuples = limit_tuples; + output_bytes = relation_byte_size(output_tuples, width); + } + else + { + output_tuples = tuples; + output_bytes = input_bytes; + } - /* disk costs */ - if (nbytes > work_mem_bytes) + if (output_bytes > work_mem_bytes) { - double npages = ceil(nbytes / BLCKSZ); - double nruns = (nbytes / work_mem_bytes) * 0.5; + /* + * We'll have to use a disk-based sort of all the tuples + */ + double npages = ceil(input_bytes / BLCKSZ); + double nruns = (input_bytes / work_mem_bytes) * 0.5; double mergeorder = tuplesort_merge_order(work_mem_bytes); double log_runs; double npageaccesses; + /* + * CPU costs + * + * Assume about two operator evals per tuple comparison and N log2 N + * comparisons + */ + startup_cost += 2.0 * cpu_operator_cost * tuples * LOG2(tuples); + + /* Disk costs */ + /* Compute logM(r) as log(r) / log(M) */ if (nruns > mergeorder) log_runs = ceil(log(nruns) / log(mergeorder)); @@ -986,10 +1010,27 @@ cost_sort(Path *path, PlannerInfo *root, startup_cost += npageaccesses * (seq_page_cost * 0.75 + random_page_cost * 0.25); } + else if (tuples > 2 * output_tuples || input_bytes > work_mem_bytes) + { + /* + * We'll use a bounded heap-sort keeping just K tuples in memory, + * for a total number of tuple comparisons of N log2 K; but the + * constant factor is a bit higher than for quicksort. Tweak it + * so that the cost curve is continuous at the crossover point. + */ + startup_cost += 2.0 * cpu_operator_cost * tuples * LOG2(2.0 * output_tuples); + } + else + { + /* We'll use plain quicksort on all the input tuples */ + startup_cost += 2.0 * cpu_operator_cost * tuples * LOG2(tuples); + } /* * Also charge a small amount (arbitrarily set equal to operator cost) per - * extracted tuple. + * extracted tuple. Note it's correct to use tuples not output_tuples + * here --- the upper LIMIT will pro-rate the run cost so we'd be double + * counting the LIMIT otherwise. */ run_cost += cpu_operator_cost * tuples; @@ -1431,7 +1472,8 @@ cost_mergejoin(MergePath *path, PlannerInfo *root) outersortkeys, outer_path->total_cost, outer_path_rows, - outer_path->parent->width); + outer_path->parent->width, + -1.0); startup_cost += sort_path.startup_cost; run_cost += (sort_path.total_cost - sort_path.startup_cost) * outerscansel; @@ -1450,7 +1492,8 @@ cost_mergejoin(MergePath *path, PlannerInfo *root) innersortkeys, inner_path->total_cost, inner_path_rows, - inner_path->parent->width); + inner_path->parent->width, + -1.0); startup_cost += sort_path.startup_cost; run_cost += (sort_path.total_cost - sort_path.startup_cost) * innerscansel * rescanratio; diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 74b89125665..be0162406bd 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.229 2007/04/21 21:01:44 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.230 2007/05/04 01:13:44 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -122,7 +122,8 @@ static MergeJoin *make_mergejoin(List *tlist, Plan *lefttree, Plan *righttree, JoinType jointype); static Sort *make_sort(PlannerInfo *root, Plan *lefttree, int numCols, - AttrNumber *sortColIdx, Oid *sortOperators, bool *nullsFirst); + AttrNumber *sortColIdx, Oid *sortOperators, bool *nullsFirst, + double limit_tuples); /* @@ -1579,7 +1580,8 @@ create_mergejoin_plan(PlannerInfo *root, outer_plan = (Plan *) make_sort_from_pathkeys(root, outer_plan, - best_path->outersortkeys); + best_path->outersortkeys, + -1.0); outerpathkeys = best_path->outersortkeys; } else @@ -1591,7 +1593,8 @@ create_mergejoin_plan(PlannerInfo *root, inner_plan = (Plan *) make_sort_from_pathkeys(root, inner_plan, - best_path->innersortkeys); + best_path->innersortkeys, + -1.0); innerpathkeys = best_path->innersortkeys; } else @@ -2589,11 +2592,13 @@ make_mergejoin(List *tlist, * make_sort --- basic routine to build a Sort plan node * * Caller must have built the sortColIdx, sortOperators, and nullsFirst - * arrays already. + * arrays already. limit_tuples is as for cost_sort (in particular, pass + * -1 if no limit) */ static Sort * make_sort(PlannerInfo *root, Plan *lefttree, int numCols, - AttrNumber *sortColIdx, Oid *sortOperators, bool *nullsFirst) + AttrNumber *sortColIdx, Oid *sortOperators, bool *nullsFirst, + double limit_tuples) { Sort *node = makeNode(Sort); Plan *plan = &node->plan; @@ -2603,7 +2608,8 @@ make_sort(PlannerInfo *root, Plan *lefttree, int numCols, cost_sort(&sort_path, root, NIL, lefttree->total_cost, lefttree->plan_rows, - lefttree->plan_width); + lefttree->plan_width, + limit_tuples); plan->startup_cost = sort_path.startup_cost; plan->total_cost = sort_path.total_cost; plan->targetlist = lefttree->targetlist; @@ -2664,6 +2670,8 @@ add_sort_column(AttrNumber colIdx, Oid sortOp, bool nulls_first, * * 'lefttree' is the node which yields input tuples * 'pathkeys' is the list of pathkeys by which the result is to be sorted + * 'limit_tuples' is the bound on the number of output tuples; + * -1 if no bound * * We must convert the pathkey information into arrays of sort key column * numbers and sort operator OIDs. @@ -2675,7 +2683,8 @@ add_sort_column(AttrNumber colIdx, Oid sortOp, bool nulls_first, * adding a Result node just to do the projection. */ Sort * -make_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys) +make_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys, + double limit_tuples) { List *tlist = lefttree->targetlist; ListCell *i; @@ -2810,7 +2819,7 @@ make_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys) Assert(numsortkeys > 0); return make_sort(root, lefttree, numsortkeys, - sortColIdx, sortOperators, nullsFirst); + sortColIdx, sortOperators, nullsFirst, limit_tuples); } /* @@ -2859,7 +2868,7 @@ make_sort_from_sortclauses(PlannerInfo *root, List *sortcls, Plan *lefttree) Assert(numsortkeys > 0); return make_sort(root, lefttree, numsortkeys, - sortColIdx, sortOperators, nullsFirst); + sortColIdx, sortOperators, nullsFirst, -1.0); } /* @@ -2919,7 +2928,7 @@ make_sort_from_groupcols(PlannerInfo *root, Assert(numsortkeys > 0); return make_sort(root, lefttree, numsortkeys, - sortColIdx, sortOperators, nullsFirst); + sortColIdx, sortOperators, nullsFirst, -1.0); } Material * diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index f12e388d723..c31f7d5aa65 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -14,7 +14,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.100 2007/04/21 21:01:45 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.101 2007/05/04 01:13:44 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -47,6 +47,8 @@ * tlist is the target list the query should produce * (this is NOT necessarily root->parse->targetList!) * tuple_fraction is the fraction of tuples we expect will be retrieved + * limit_tuples is a hard limit on number of tuples to retrieve, + * or -1 if no limit * * Output parameters: * *cheapest_path receives the overall-cheapest path for the query @@ -74,9 +76,13 @@ * from the plan to be retrieved * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples * expected to be retrieved (ie, a LIMIT specification) + * Note that a nonzero tuple_fraction could come from outer context; it is + * therefore not redundant with limit_tuples. We use limit_tuples to determine + * whether a bounded sort can be used at runtime. */ void -query_planner(PlannerInfo *root, List *tlist, double tuple_fraction, +query_planner(PlannerInfo *root, List *tlist, + double tuple_fraction, double limit_tuples, Path **cheapest_path, Path **sorted_path, double *num_groups) { @@ -354,7 +360,8 @@ query_planner(PlannerInfo *root, List *tlist, double tuple_fraction, /* Figure cost for sorting */ cost_sort(&sort_path, root, root->query_pathkeys, cheapestpath->total_cost, - final_rel->rows, final_rel->width); + final_rel->rows, final_rel->width, + limit_tuples); } if (compare_fractional_path_costs(sortedpath, &sort_path, diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index d5dc6f7c7e6..366c50f2730 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.218 2007/04/27 22:05:47 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.219 2007/05/04 01:13:44 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -61,7 +61,8 @@ static double preprocess_limit(PlannerInfo *root, double tuple_fraction, int64 *offset_est, int64 *count_est); static Oid *extract_grouping_ops(List *groupClause); -static bool choose_hashed_grouping(PlannerInfo *root, double tuple_fraction, +static bool choose_hashed_grouping(PlannerInfo *root, + double tuple_fraction, double limit_tuples, Path *cheapest_path, Path *sorted_path, Oid *groupOperators, double dNumGroups, AggClauseCounts *agg_counts); @@ -696,6 +697,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) List *tlist = parse->targetList; int64 offset_est = 0; int64 count_est = 0; + double limit_tuples = -1.0; Plan *result_plan; List *current_pathkeys; List *sort_pathkeys; @@ -703,8 +705,16 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */ if (parse->limitCount || parse->limitOffset) + { tuple_fraction = preprocess_limit(root, tuple_fraction, &offset_est, &count_est); + /* + * If we have a known LIMIT, and don't have an unknown OFFSET, + * we can estimate the effects of using a bounded sort. + */ + if (count_est > 0 && offset_est >= 0) + limit_tuples = (double) count_est + (double) offset_est; + } if (parse->setOperations) { @@ -850,7 +860,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) * estimate the number of groups in the query, and canonicalize all * the pathkeys. */ - query_planner(root, sub_tlist, tuple_fraction, + query_planner(root, sub_tlist, tuple_fraction, limit_tuples, &cheapest_path, &sorted_path, &dNumGroups); group_pathkeys = root->group_pathkeys; @@ -864,7 +874,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) { groupOperators = extract_grouping_ops(parse->groupClause); use_hashed_grouping = - choose_hashed_grouping(root, tuple_fraction, + choose_hashed_grouping(root, tuple_fraction, limit_tuples, cheapest_path, sorted_path, groupOperators, dNumGroups, &agg_counts); @@ -1099,7 +1109,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) { result_plan = (Plan *) make_sort_from_pathkeys(root, result_plan, - sort_pathkeys); + sort_pathkeys, + limit_tuples); current_pathkeys = sort_pathkeys; } } @@ -1414,7 +1425,8 @@ extract_grouping_ops(List *groupClause) * choose_hashed_grouping - should we use hashed grouping? */ static bool -choose_hashed_grouping(PlannerInfo *root, double tuple_fraction, +choose_hashed_grouping(PlannerInfo *root, + double tuple_fraction, double limit_tuples, Path *cheapest_path, Path *sorted_path, Oid *groupOperators, double dNumGroups, AggClauseCounts *agg_counts) @@ -1499,7 +1511,7 @@ choose_hashed_grouping(PlannerInfo *root, double tuple_fraction, /* Result of hashed agg is always unsorted */ if (root->sort_pathkeys) cost_sort(&hashed_p, root, root->sort_pathkeys, hashed_p.total_cost, - dNumGroups, cheapest_path_width); + dNumGroups, cheapest_path_width, limit_tuples); if (sorted_path) { @@ -1516,7 +1528,7 @@ choose_hashed_grouping(PlannerInfo *root, double tuple_fraction, if (!pathkeys_contained_in(root->group_pathkeys, current_pathkeys)) { cost_sort(&sorted_p, root, root->group_pathkeys, sorted_p.total_cost, - cheapest_path_rows, cheapest_path_width); + cheapest_path_rows, cheapest_path_width, -1.0); current_pathkeys = root->group_pathkeys; } @@ -1533,7 +1545,7 @@ choose_hashed_grouping(PlannerInfo *root, double tuple_fraction, if (root->sort_pathkeys && !pathkeys_contained_in(root->sort_pathkeys, current_pathkeys)) cost_sort(&sorted_p, root, root->sort_pathkeys, sorted_p.total_cost, - dNumGroups, cheapest_path_width); + dNumGroups, cheapest_path_width, limit_tuples); /* * Now make the decision using the top-level tuple fraction. First we diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 6d440001d6b..bd95a0e0e23 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.139 2007/04/21 21:01:45 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.140 2007/05/04 01:13:44 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -842,7 +842,8 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath) cost_sort(&sort_path, root, NIL, subpath->total_cost, rel->rows, - rel->width); + rel->width, + -1.0); /* * Charge one cpu_operator_cost per comparison per input tuple. We assume |