diff options
Diffstat (limited to 'src/backend/optimizer/path/costsize.c')
-rw-r--r-- | src/backend/optimizer/path/costsize.c | 458 |
1 files changed, 223 insertions, 235 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 99ee42cf8c7..8261e17c02e 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -18,15 +18,14 @@ * Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.46 1999/11/23 20:06:54 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.47 2000/01/09 00:26:31 tgl Exp $ * *------------------------------------------------------------------------- */ -#include <math.h> - #include "postgres.h" +#include <math.h> #ifdef HAVE_LIMITS_H #include <limits.h> #ifndef MAXINT @@ -45,13 +44,17 @@ #include "utils/lsyscache.h" -static int compute_targetlist_width(List *targetlist); +static void set_rel_width(Query *root, RelOptInfo *rel); static int compute_attribute_width(TargetEntry *tlistentry); -static double relation_byte_size(int tuples, int width); +static double relation_byte_size(double tuples, int width); +static double page_size(double tuples, int width); static double base_log(double x, double b); -int _disable_cost_ = 30000000; +Cost _cpu_page_weight_ = _CPU_PAGE_WEIGHT_; +Cost _cpu_index_page_weight_ = _CPU_INDEX_PAGE_WEIGHT_; + +Cost _disable_cost_ = 100000000.0; bool _enable_seqscan_ = true; bool _enable_indexscan_ = true; @@ -61,9 +64,6 @@ bool _enable_mergejoin_ = true; bool _enable_hashjoin_ = true; bool _enable_tidscan_ = true; -Cost _cpu_page_weight_ = _CPU_PAGE_WEIGHT_; -Cost _cpu_index_page_weight_ = _CPU_INDEX_PAGE_WEIGHT_; - /* * cost_seqscan * Determines and returns the cost of scanning a relation sequentially. @@ -74,27 +74,21 @@ Cost _cpu_index_page_weight_ = _CPU_INDEX_PAGE_WEIGHT_; * be). * * disk = p - * cpu = *CPU-PAGE-WEIGHT* * t - * - * 'relid' is the relid of the relation to be scanned - * 'relpages' is the number of pages in the relation to be scanned - * (as determined from the system catalogs) - * 'reltuples' is the number of tuples in the relation to be scanned - * - * Returns a flonum. - * + * cpu = CPU-PAGE-WEIGHT * t */ Cost -cost_seqscan(int relid, int relpages, int reltuples) +cost_seqscan(RelOptInfo *baserel) { Cost temp = 0; + /* Should only be applied to base relations */ + Assert(length(baserel->relids) == 1); + if (!_enable_seqscan_) temp += _disable_cost_; - if (relid < 0) + if (lfirsti(baserel->relids) < 0) { - /* * cost of sequentially scanning a materialized temporary relation */ @@ -102,9 +96,10 @@ cost_seqscan(int relid, int relpages, int reltuples) } else { - temp += relpages; - temp += _cpu_page_weight_ * reltuples; + temp += baserel->pages; + temp += _cpu_page_weight_ * baserel->tuples; } + Assert(temp >= 0); return temp; } @@ -115,31 +110,38 @@ cost_seqscan(int relid, int relpages, int reltuples) * Determines and returns the cost of scanning a relation using an index. * * disk = expected-index-pages + expected-data-pages - * cpu = *CPU-PAGE-WEIGHT* * - * (expected-index-tuples + expected-data-tuples) - * - * 'indexid' is the index OID - * 'expected-indexpages' is the number of index pages examined in the scan - * 'selec' is the selectivity of the index - * 'relpages' is the number of pages in the main relation - * 'reltuples' is the number of tuples in the main relation - * 'indexpages' is the number of pages in the index relation - * 'indextuples' is the number of tuples in the index relation - * - * Returns a flonum. - * + * cpu = CPU-INDEX-PAGE-WEIGHT * expected-index-tuples + + * CPU-PAGE-WEIGHT * expected-data-tuples + * + * 'baserel' is the base relation the index is for + * 'index' is the index to be used + * 'expected_indexpages' is the estimated number of index pages that will + * be touched in the scan (this is computed by index-type-specific code) + * 'selec' is the selectivity of the index, ie, the fraction of base-relation + * tuples that we will have to fetch and examine + * 'is_injoin' is T if we are considering using the index scan as the inside + * of a nestloop join. + * + * NOTE: 'selec' should be calculated on the basis of indexqual conditions + * only. Any additional quals evaluated as qpquals may reduce the number + * of returned tuples, but they won't reduce the number of tuples we have + * to fetch from the table, so they don't reduce the scan cost. */ Cost -cost_index(Oid indexid, - int expected_indexpages, - Cost selec, - int relpages, - int reltuples, - int indexpages, - int indextuples, +cost_index(RelOptInfo *baserel, + IndexOptInfo *index, + long expected_indexpages, + Selectivity selec, bool is_injoin) { Cost temp = 0; + double reltuples = selec * baserel->tuples; + double indextuples = selec * index->tuples; + double relpages; + + /* Should only be applied to base relations */ + Assert(IsA(baserel, RelOptInfo) && IsA(index, IndexOptInfo)); + Assert(length(baserel->relids) == 1); if (!_enable_indexscan_ && !is_injoin) temp += _disable_cost_; @@ -151,25 +153,49 @@ cost_index(Oid indexid, */ if (expected_indexpages <= 0) expected_indexpages = 1; - if (indextuples <= 0) - indextuples = 1; + if (indextuples <= 0.0) + indextuples = 1.0; /* expected index relation pages */ temp += expected_indexpages; - /* - * expected base relation pages XXX this isn't really right, since we - * will access the table nonsequentially and might have to fetch the - * same page more than once. This calculation assumes the buffer - * cache will prevent that from happening... + /*-------------------- + * expected base relation pages + * + * Worst case is that each tuple the index tells us to fetch comes + * from a different base-rel page, in which case the I/O cost would be + * 'reltuples' pages. In practice we can expect the number of page + * fetches to be reduced by the buffer cache, because more than one + * tuple can be retrieved per page fetched. Currently, we estimate + * the number of pages to be retrieved as + * MIN(reltuples, relpages) + * This amounts to assuming that the buffer cache is perfectly efficient + * and never ends up reading the same page twice within one scan, which + * of course is too optimistic. On the other hand, we are assuming that + * the target tuples are perfectly uniformly distributed across the + * relation's pages, which is too pessimistic --- any nonuniformity of + * distribution will reduce the number of pages we have to fetch. + * So, we guess-and-hope that these sources of error will more or less + * balance out. + * + * XXX if the relation has recently been "clustered" using this index, + * then in fact the target tuples will be highly nonuniformly distributed, + * and we will be seriously overestimating the scan cost! Currently we + * have no way to know whether the relation has been clustered, nor how + * much it's been modified since the last clustering, so we ignore this + * effect. Would be nice to do better someday. + *-------------------- */ - temp += ceil(((double) selec) * ((double) relpages)); + relpages = reltuples; + if (baserel->pages > 0 && baserel->pages < relpages) + relpages = baserel->pages; + temp += relpages; /* per index tuples */ - temp += _cpu_index_page_weight_ * selec * indextuples; + temp += _cpu_index_page_weight_ * indextuples; /* per heap tuples */ - temp += _cpu_page_weight_ * selec * reltuples; + temp += _cpu_page_weight_ * reltuples; Assert(temp >= 0); return temp; @@ -180,13 +206,10 @@ cost_index(Oid indexid, * Determines and returns the cost of scanning a relation using tid-s. * * disk = number of tids - * cpu = *CPU-PAGE-WEIGHT* * number_of_tids - * - * Returns a flonum. - * + * cpu = CPU-PAGE-WEIGHT * number_of_tids */ Cost -cost_tidscan(List *tideval) +cost_tidscan(RelOptInfo *baserel, List *tideval) { Cost temp = 0; @@ -200,28 +223,39 @@ cost_tidscan(List *tideval) /* * cost_sort - * Determines and returns the cost of sorting a relation by considering - * the cost of doing an external sort: XXX this is probably too low - * disk = (p lg p) - * cpu = *CPU-PAGE-WEIGHT* * (t lg t) + * Determines and returns the cost of sorting a relation. + * + * If the total volume of data to sort is less than SortMem, we will do + * an in-memory sort, which requires no I/O and about t*log2(t) tuple + * comparisons for t tuples. We use _cpu_index_page_weight as the cost + * of a tuple comparison (is this reasonable, or do we need another + * basic parameter?). + * + * If the total volume exceeds SortMem, we switch to a tape-style merge + * algorithm. There will still be about t*log2(t) tuple comparisons in + * total, but we will also need to write and read each tuple once per + * merge pass. We expect about ceil(log6(r)) merge passes where r is the + * number of initial runs formed (log6 because tuplesort.c uses six-tape + * merging). Since the average initial run should be about twice SortMem, + * we have + * disk = 2 * p * ceil(log6(p / (2*SortMem))) + * cpu = CPU-INDEX-PAGE-WEIGHT * t * log2(t) * * 'pathkeys' is a list of sort keys * 'tuples' is the number of tuples in the relation * 'width' is the average tuple width in bytes * - * NOTE: some callers currently pass NULL for pathkeys because they + * NOTE: some callers currently pass NIL for pathkeys because they * can't conveniently supply the sort keys. Since this routine doesn't * currently do anything with pathkeys anyway, that doesn't matter... * but if it ever does, it should react gracefully to lack of key data. - * - * Returns a flonum. */ Cost -cost_sort(List *pathkeys, int tuples, int width) +cost_sort(List *pathkeys, double tuples, int width) { Cost temp = 0; - int npages = page_size(tuples, width); - double log_npages; + double nbytes = relation_byte_size(tuples, width); + long sortmembytes = SortMem * 1024L; if (!_enable_sort_) temp += _disable_cost_; @@ -231,25 +265,23 @@ cost_sort(List *pathkeys, int tuples, int width) * even if passed-in tuple count is zero. Besides, mustn't do * log(0)... */ - if (tuples <= 0) - tuples = 1; - if (npages <= 0) - npages = 1; + if (tuples < 2.0) + tuples = 2.0; - log_npages = ceil(base_log((double) npages, 2.0)); - if (log_npages <= 0.0) - log_npages = 1.0; + temp += _cpu_index_page_weight_ * tuples * base_log(tuples, 2.0); - temp += npages * log_npages; + if (nbytes > sortmembytes) + { + double npages = ceil(nbytes / BLCKSZ); + double nruns = nbytes / (sortmembytes * 2); + double log_runs = ceil(base_log(nruns, 6.0)); - /* - * could be base_log(tuples, NBuffers), but we are only doing 2-way - * merges - */ - temp += _cpu_page_weight_ * tuples * base_log((double) tuples, 2.0); + if (log_runs < 1.0) + log_runs = 1.0; + temp += 2 * npages * log_runs; + } Assert(temp > 0); - return temp; } @@ -258,18 +290,15 @@ cost_sort(List *pathkeys, int tuples, int width) * cost_result * Determines and returns the cost of writing a relation of 'tuples' * tuples of 'width' bytes out to a result relation. - * - * Returns a flonum. - * */ #ifdef NOT_USED Cost -cost_result(int tuples, int width) +cost_result(double tuples, int width) { Cost temp = 0; - temp = temp + page_size(tuples, width); - temp = temp + _cpu_page_weight_ * tuples; + temp += page_size(tuples, width); + temp += _cpu_page_weight_ * tuples; Assert(temp >= 0); return temp; } @@ -281,111 +310,106 @@ cost_result(int tuples, int width) * Determines and returns the cost of joining two relations using the * nested loop algorithm. * - * 'outercost' is the (disk+cpu) cost of scanning the outer relation - * 'innercost' is the (disk+cpu) cost of scanning the inner relation - * 'outertuples' is the number of tuples in the outer relation - * - * Returns a flonum. - * + * 'outer_path' is the path for the outer relation + * 'inner_path' is the path for the inner relation + * 'is_indexjoin' is true if we are using an indexscan for the inner relation */ Cost -cost_nestloop(Cost outercost, - Cost innercost, - int outertuples, - int innertuples, - int outerpages, +cost_nestloop(Path *outer_path, + Path *inner_path, bool is_indexjoin) { Cost temp = 0; if (!_enable_nestloop_) temp += _disable_cost_; - temp += outercost; - temp += outertuples * innercost; - Assert(temp >= 0); + temp += outer_path->path_cost; + temp += outer_path->parent->rows * inner_path->path_cost; + + Assert(temp >= 0); return temp; } /* * cost_mergejoin - * 'outercost' and 'innercost' are the (disk+cpu) costs of scanning the - * outer and inner relations - * 'outersortkeys' and 'innersortkeys' are lists of the keys to be used - * to sort the outer and inner relations (or NIL if no explicit - * sort is needed because the source path is already ordered) - * 'outertuples' and 'innertuples' are the number of tuples in the outer - * and inner relations - * 'outerwidth' and 'innerwidth' are the (typical) widths (in bytes) - * of the tuples of the outer and inner relations - * - * Returns a flonum. + * Determines and returns the cost of joining two relations using the + * merge join algorithm. * + * 'outer_path' is the path for the outer relation + * 'inner_path' is the path for the inner relation + * 'outersortkeys' and 'innersortkeys' are lists of the keys to be used + * to sort the outer and inner relations, or NIL if no explicit + * sort is needed because the source path is already ordered */ Cost -cost_mergejoin(Cost outercost, - Cost innercost, +cost_mergejoin(Path *outer_path, + Path *inner_path, List *outersortkeys, - List *innersortkeys, - int outersize, - int innersize, - int outerwidth, - int innerwidth) + List *innersortkeys) { Cost temp = 0; if (!_enable_mergejoin_) temp += _disable_cost_; - temp += outercost; - temp += innercost; + /* cost of source data */ + temp += outer_path->path_cost + inner_path->path_cost; + if (outersortkeys) /* do we need to sort? */ - temp += cost_sort(outersortkeys, outersize, outerwidth); + temp += cost_sort(outersortkeys, + outer_path->parent->rows, + outer_path->parent->width); + if (innersortkeys) /* do we need to sort? */ - temp += cost_sort(innersortkeys, innersize, innerwidth); - temp += _cpu_page_weight_ * (outersize + innersize); + temp += cost_sort(innersortkeys, + inner_path->parent->rows, + inner_path->parent->width); - Assert(temp >= 0); + /* + * Estimate the number of tuples to be processed in the mergejoin itself + * as one per tuple in the two source relations. This could be a drastic + * underestimate if there are many equal-keyed tuples in either relation, + * but we have no good way of estimating that... + */ + temp += _cpu_page_weight_ * (outer_path->parent->rows + + inner_path->parent->rows); + Assert(temp >= 0); return temp; } /* * cost_hashjoin + * Determines and returns the cost of joining two relations using the + * hash join algorithm. * - * 'outercost' and 'innercost' are the (disk+cpu) costs of scanning the - * outer and inner relations - * 'outersize' and 'innersize' are the number of tuples in the outer - * and inner relations - * 'outerwidth' and 'innerwidth' are the (typical) widths (in bytes) - * of the tuples of the outer and inner relations - * 'innerdisbursion' is an estimate of the disbursion statistic + * 'outer_path' is the path for the outer relation + * 'inner_path' is the path for the inner relation + * 'innerdisbursion' is an estimate of the disbursion statistic * for the inner hash key. - * - * Returns a flonum. */ Cost -cost_hashjoin(Cost outercost, - Cost innercost, - int outersize, - int innersize, - int outerwidth, - int innerwidth, - Cost innerdisbursion) +cost_hashjoin(Path *outer_path, + Path *inner_path, + Selectivity innerdisbursion) { Cost temp = 0; - double outerbytes = relation_byte_size(outersize, outerwidth); - double innerbytes = relation_byte_size(innersize, innerwidth); + double outerbytes = relation_byte_size(outer_path->parent->rows, + outer_path->parent->width); + double innerbytes = relation_byte_size(inner_path->parent->rows, + inner_path->parent->width); long hashtablebytes = SortMem * 1024L; if (!_enable_hashjoin_) temp += _disable_cost_; /* cost of source data */ - temp += outercost + innercost; + temp += outer_path->path_cost + inner_path->path_cost; /* cost of computing hash function: must do it once per tuple */ - temp += _cpu_page_weight_ * (outersize + innersize); + temp += _cpu_page_weight_ * (outer_path->parent->rows + + inner_path->parent->rows); /* the number of tuple comparisons needed is the number of outer * tuples times the typical hash bucket size, which we estimate @@ -393,8 +417,8 @@ cost_hashjoin(Cost outercost, * count. The cost per comparison is set at _cpu_index_page_weight_; * is that reasonable, or do we need another basic parameter? */ - temp += _cpu_index_page_weight_ * outersize * - (innersize * innerdisbursion); + temp += _cpu_index_page_weight_ * outer_path->parent->rows * + (inner_path->parent->rows * innerdisbursion); /* * if inner relation is too big then we will need to "batch" the join, @@ -402,8 +426,10 @@ cost_hashjoin(Cost outercost, * extra time. Charge one cost unit per page of I/O. */ if (innerbytes > hashtablebytes) - temp += 2 * (page_size(outersize, outerwidth) + - page_size(innersize, innerwidth)); + temp += 2 * (page_size(outer_path->parent->rows, + outer_path->parent->width) + + page_size(inner_path->parent->rows, + inner_path->parent->width)); /* * Bias against putting larger relation on inside. We don't want @@ -415,76 +441,74 @@ cost_hashjoin(Cost outercost, temp *= 1.1; /* is this an OK fudge factor? */ Assert(temp >= 0); - return temp; } /* - * compute_rel_size - * Computes the size of each relation in 'rel_list' (after applying - * restrictions), by multiplying the selectivity of each restriction - * by the original size of the relation. + * set_rel_rows_width + * Set the 'rows' and 'width' estimates for the given base relation. * - * Sets the 'size' field for each relation entry with this computed size. - * - * Returns the size. + * 'rows' is the estimated number of output tuples (after applying + * restriction clauses). + * 'width' is the estimated average output tuple width in bytes. */ -int -compute_rel_size(RelOptInfo *rel) +void +set_rel_rows_width(Query *root, RelOptInfo *rel) { - Cost temp; - int temp1; + /* Should only be applied to base relations */ + Assert(length(rel->relids) == 1); - temp = rel->tuples * product_selec(rel->restrictinfo); - Assert(temp >= 0); - if (temp >= (MAXINT - 1)) - temp1 = MAXINT; - else - temp1 = ceil((double) temp); - Assert(temp1 >= 0); - Assert(temp1 <= MAXINT); - return temp1; + rel->rows = rel->tuples * restrictlist_selec(root, rel->restrictinfo); + Assert(rel->rows >= 0); + + set_rel_width(root, rel); } /* - * compute_rel_width - * Computes the width in bytes of a tuple from 'rel'. - * - * Returns the width of the tuple as a fixnum. + * set_joinrel_rows_width + * Set the 'rows' and 'width' estimates for the given join relation. */ -int -compute_rel_width(RelOptInfo *rel) +void +set_joinrel_rows_width(Query *root, RelOptInfo *rel, + JoinPath *joinpath) { - return compute_targetlist_width(rel->targetlist); + double temp; + + /* cartesian product */ + temp = joinpath->outerjoinpath->parent->rows * + joinpath->innerjoinpath->parent->rows; + + /* apply restrictivity */ + temp *= restrictlist_selec(root, joinpath->path.parent->restrictinfo); + + Assert(temp >= 0); + rel->rows = temp; + + set_rel_width(root, rel); } /* - * compute_targetlist_width - * Computes the width in bytes of a tuple made from 'targetlist'. - * - * Returns the width of the tuple as a fixnum. + * set_rel_width + * Set the estimated output width of the relation. */ -static int -compute_targetlist_width(List *targetlist) +static void +set_rel_width(Query *root, RelOptInfo *rel) { - List *temp_tl; int tuple_width = 0; + List *tle; - foreach(temp_tl, targetlist) - { - tuple_width += compute_attribute_width(lfirst(temp_tl)); - } - return tuple_width; + foreach(tle, rel->targetlist) + tuple_width += compute_attribute_width((TargetEntry *) lfirst(tle)); + Assert(tuple_width >= 0); + rel->width = tuple_width; } /* * compute_attribute_width * Given a target list entry, find the size in bytes of the attribute. * - * If a field is variable-length, it is assumed to be at least the size - * of a TID field. - * - * Returns the width of the attribute as a fixnum. + * If a field is variable-length, we make a default assumption. Would be + * better if VACUUM recorded some stats about the average field width... */ static int compute_attribute_width(TargetEntry *tlistentry) @@ -498,46 +522,14 @@ compute_attribute_width(TargetEntry *tlistentry) } /* - * compute_joinrel_size - * Computes the size of the join relation 'joinrel'. - * - * Returns a fixnum. - */ -int -compute_joinrel_size(JoinPath *joinpath) -{ - Cost temp = 1.0; - int temp1 = 0; - - /* cartesian product */ - temp *= ((Path *) joinpath->outerjoinpath)->parent->size; - temp *= ((Path *) joinpath->innerjoinpath)->parent->size; - - temp = temp * product_selec(joinpath->pathinfo); - if (temp >= (MAXINT - 1) / 2) - { - /* if we exceed (MAXINT-1)/2, we switch to log scale */ - /* +1 prevents log(0) */ - temp1 = ceil(log(temp + 1 - (MAXINT - 1) / 2) + (MAXINT - 1) / 2); - } - else - temp1 = ceil((double) temp); - Assert(temp1 >= 0); - - return temp1; -} - -/* * relation_byte_size * Estimate the storage space in bytes for a given number of tuples * of a given width (size in bytes). - * To avoid overflow with big relations, result is a double. */ - static double -relation_byte_size(int tuples, int width) +relation_byte_size(double tuples, int width) { - return ((double) tuples) * ((double) (width + sizeof(HeapTupleData))); + return tuples * ((double) (width + sizeof(HeapTupleData))); } /* @@ -545,14 +537,10 @@ relation_byte_size(int tuples, int width) * Returns an estimate of the number of pages covered by a given * number of tuples of a given width (size in bytes). */ -int -page_size(int tuples, int width) +static double +page_size(double tuples, int width) { - int temp; - - temp = (int) ceil(relation_byte_size(tuples, width) / BLCKSZ); - Assert(temp >= 0); - return temp; + return ceil(relation_byte_size(tuples, width) / BLCKSZ); } static double |