diff options
Diffstat (limited to 'src/backend')
-rw-r--r-- | src/backend/optimizer/path/costsize.c | 223 | ||||
-rw-r--r-- | src/backend/optimizer/path/indxpath.c | 86 | ||||
-rw-r--r-- | src/backend/optimizer/path/joinpath.c | 16 | ||||
-rw-r--r-- | src/backend/optimizer/path/orindxpath.c | 6 | ||||
-rw-r--r-- | src/backend/optimizer/plan/planagg.c | 4 | ||||
-rw-r--r-- | src/backend/optimizer/util/pathnode.c | 21 | ||||
-rw-r--r-- | src/backend/utils/adt/selfuncs.c | 108 |
7 files changed, 287 insertions, 177 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 965574c579d..fa8d06707b8 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -54,7 +54,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.157 2006/06/05 20:56:33 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.158 2006/06/06 17:59:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -133,7 +133,7 @@ clamp_row_est(double nrows) * better and to avoid possible divide-by-zero when interpolating costs. * Make it an integer, too. */ - if (nrows < 1.0) + if (nrows <= 1.0) nrows = 1.0; else nrows = rint(nrows); @@ -181,8 +181,10 @@ cost_seqscan(Path *path, PlannerInfo *root, * * 'index' is the index to be used * 'indexQuals' is the list of applicable qual clauses (implicit AND semantics) - * 'is_injoin' is T if we are considering using the index scan as the inside - * of a nestloop join (hence, some of the indexQuals are join clauses) + * 'outer_rel' is the outer relation when we are considering using the index + * scan as the inside of a nestloop join (hence, some of the indexQuals + * are join clauses, and we should expect repeated scans of the index); + * NULL for a plain index scan * * cost_index() takes an IndexPath not just a Path, because it sets a few * additional fields of the IndexPath besides startup_cost and total_cost. @@ -200,7 +202,7 @@ void cost_index(IndexPath *path, PlannerInfo *root, IndexOptInfo *index, List *indexQuals, - bool is_injoin) + RelOptInfo *outer_rel) { RelOptInfo *baserel = index->rel; Cost startup_cost = 0; @@ -215,8 +217,6 @@ cost_index(IndexPath *path, PlannerInfo *root, Cost cpu_per_tuple; double tuples_fetched; double pages_fetched; - double T, - b; /* Should only be applied to base relations */ Assert(IsA(baserel, RelOptInfo) && @@ -233,10 +233,11 @@ cost_index(IndexPath *path, PlannerInfo *root, * the fraction of main-table tuples we will have to retrieve) and its * correlation to the main-table tuple order. */ - OidFunctionCall7(index->amcostestimate, + OidFunctionCall8(index->amcostestimate, PointerGetDatum(root), PointerGetDatum(index), PointerGetDatum(indexQuals), + PointerGetDatum(outer_rel), PointerGetDatum(&indexStartupCost), PointerGetDatum(&indexTotalCost), PointerGetDatum(&indexSelectivity), @@ -254,86 +255,75 @@ cost_index(IndexPath *path, PlannerInfo *root, startup_cost += indexStartupCost; run_cost += indexTotalCost - indexStartupCost; + /* estimate number of main-table tuples fetched */ + tuples_fetched = clamp_row_est(indexSelectivity * baserel->tuples); + /*---------- - * Estimate number of main-table tuples and pages fetched. + * Estimate number of main-table pages fetched, and compute I/O cost. * * When the index ordering is uncorrelated with the table ordering, - * we use an approximation proposed by Mackert and Lohman, "Index Scans - * Using a Finite LRU Buffer: A Validated I/O Model", ACM Transactions - * on Database Systems, Vol. 14, No. 3, September 1989, Pages 401-424. - * The Mackert and Lohman approximation is that the number of pages - * fetched is - * PF = - * min(2TNs/(2T+Ns), T) when T <= b - * 2TNs/(2T+Ns) when T > b and Ns <= 2Tb/(2T-b) - * b + (Ns - 2Tb/(2T-b))*(T-b)/T when T > b and Ns > 2Tb/(2T-b) - * where - * T = # pages in table - * N = # tuples in table - * s = selectivity = fraction of table to be scanned - * b = # buffer pages available (we include kernel space here) + * we use an approximation proposed by Mackert and Lohman (see + * index_pages_fetched() for details) to compute the number of pages + * fetched, and then charge random_page_cost per page fetched. * * When the index ordering is exactly correlated with the table ordering * (just after a CLUSTER, for example), the number of pages fetched should - * be just sT. What's more, these will be sequential fetches, not the - * random fetches that occur in the uncorrelated case. So, depending on - * the extent of correlation, we should estimate the actual I/O cost - * somewhere between s * T * seq_page_cost and PF * random_page_cost. - * We currently interpolate linearly between these two endpoints based on - * the correlation squared (XXX is that appropriate?). - * - * In any case the number of tuples fetched is Ns. + * be exactly selectivity * table_size. What's more, all but the first + * will be sequential fetches, not the random fetches that occur in the + * uncorrelated case. So if the number of pages is more than 1, we + * ought to charge + * random_page_cost + (pages_fetched - 1) * seq_page_cost + * For partially-correlated indexes, we ought to charge somewhere between + * these two estimates. We currently interpolate linearly between the + * estimates based on the correlation squared (XXX is that appropriate?). *---------- */ + if (outer_rel != NULL && outer_rel->rows > 1) + { + /* + * For repeated indexscans, scale up the number of tuples fetched + * in the Mackert and Lohman formula by the number of scans, so + * that we estimate the number of pages fetched by all the scans. + * Then pro-rate the costs for one scan. In this case we assume + * all the fetches are random accesses. XXX it'd be good to + * include correlation in this model, but it's not clear how to do + * that without double-counting cache effects. + */ + double num_scans = outer_rel->rows; - tuples_fetched = clamp_row_est(indexSelectivity * baserel->tuples); - - /* This part is the Mackert and Lohman formula */ + pages_fetched = index_pages_fetched(tuples_fetched * num_scans, + baserel->pages, + index->pages); - T = (baserel->pages > 1) ? (double) baserel->pages : 1.0; - b = (effective_cache_size > 1) ? effective_cache_size : 1.0; - - if (T <= b) - { - pages_fetched = - (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched); - if (pages_fetched >= T) - pages_fetched = T; - else - pages_fetched = ceil(pages_fetched); + run_cost += (pages_fetched * random_page_cost) / num_scans; } else { - double lim; + /* + * Normal case: apply the Mackert and Lohman formula, and then + * interpolate between that and the correlation-derived result. + */ + pages_fetched = index_pages_fetched(tuples_fetched, + baserel->pages, + index->pages); - lim = (2.0 * T * b) / (2.0 * T - b); - if (tuples_fetched <= lim) - { - pages_fetched = - (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched); - } - else - { - pages_fetched = - b + (tuples_fetched - lim) * (T - b) / T; - } - pages_fetched = ceil(pages_fetched); - } + /* max_IO_cost is for the perfectly uncorrelated case (csquared=0) */ + max_IO_cost = pages_fetched * random_page_cost; - /* - * min_IO_cost corresponds to the perfectly correlated case (csquared=1), - * max_IO_cost to the perfectly uncorrelated case (csquared=0). - */ - min_IO_cost = ceil(indexSelectivity * T) * seq_page_cost; - max_IO_cost = pages_fetched * random_page_cost; + /* min_IO_cost is for the perfectly correlated case (csquared=1) */ + pages_fetched = ceil(indexSelectivity * (double) baserel->pages); + min_IO_cost = random_page_cost; + if (pages_fetched > 1) + min_IO_cost += (pages_fetched - 1) * seq_page_cost; - /* - * Now interpolate based on estimated index order correlation to get total - * disk I/O cost for main table accesses. - */ - csquared = indexCorrelation * indexCorrelation; + /* + * Now interpolate based on estimated index order correlation to get + * total disk I/O cost for main table accesses. + */ + csquared = indexCorrelation * indexCorrelation; - run_cost += max_IO_cost + csquared * (min_IO_cost - max_IO_cost); + run_cost += max_IO_cost + csquared * (min_IO_cost - max_IO_cost); + } /* * Estimate CPU costs per tuple. @@ -348,7 +338,7 @@ cost_index(IndexPath *path, PlannerInfo *root, startup_cost += baserel->baserestrictcost.startup; cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple; - if (!is_injoin) + if (outer_rel == NULL) { QualCost index_qual_cost; @@ -364,18 +354,101 @@ cost_index(IndexPath *path, PlannerInfo *root, } /* + * index_pages_fetched + * Estimate the number of pages actually fetched after accounting for + * cache effects. + * + * We use an approximation proposed by Mackert and Lohman, "Index Scans + * Using a Finite LRU Buffer: A Validated I/O Model", ACM Transactions + * on Database Systems, Vol. 14, No. 3, September 1989, Pages 401-424. + * The Mackert and Lohman approximation is that the number of pages + * fetched is + * PF = + * min(2TNs/(2T+Ns), T) when T <= b + * 2TNs/(2T+Ns) when T > b and Ns <= 2Tb/(2T-b) + * b + (Ns - 2Tb/(2T-b))*(T-b)/T when T > b and Ns > 2Tb/(2T-b) + * where + * T = # pages in table + * N = # tuples in table + * s = selectivity = fraction of table to be scanned + * b = # buffer pages available (we include kernel space here) + * + * We assume that effective_cache_size is the total number of buffer pages + * available for both table and index, and pro-rate that space between the + * table and index. (Ideally other_pages should include all the other + * tables and indexes used by the query too; but we don't have a good way + * to get that number here.) + * + * The product Ns is the number of tuples fetched; we pass in that + * product rather than calculating it here. + * + * Caller is expected to have ensured that tuples_fetched is greater than zero + * and rounded to integer (see clamp_row_est). The result will likewise be + * greater than zero and integral. + */ +double +index_pages_fetched(double tuples_fetched, BlockNumber pages, + BlockNumber other_pages) +{ + double pages_fetched; + double T, + b; + + /* T is # pages in table, but don't allow it to be zero */ + T = (pages > 1) ? (double) pages : 1.0; + + /* b is pro-rated share of effective_cache_size */ + b = effective_cache_size * T / (T + (double) other_pages); + /* force it positive and integral */ + if (b <= 1.0) + b = 1.0; + else + b = ceil(b); + + /* This part is the Mackert and Lohman formula */ + if (T <= b) + { + pages_fetched = + (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched); + if (pages_fetched >= T) + pages_fetched = T; + else + pages_fetched = ceil(pages_fetched); + } + else + { + double lim; + + lim = (2.0 * T * b) / (2.0 * T - b); + if (tuples_fetched <= lim) + { + pages_fetched = + (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched); + } + else + { + pages_fetched = + b + (tuples_fetched - lim) * (T - b) / T; + } + pages_fetched = ceil(pages_fetched); + } + return pages_fetched; +} + +/* * cost_bitmap_heap_scan * Determines and returns the cost of scanning a relation using a bitmap * index-then-heap plan. * * 'baserel' is the relation to be scanned * 'bitmapqual' is a tree of IndexPaths, BitmapAndPaths, and BitmapOrPaths - * 'is_injoin' is T if we are considering using the scan as the inside - * of a nestloop join (hence, some of the quals are join clauses) + * + * Note: we take no explicit notice here of whether this is a join inner path. + * If it is, the component IndexPaths should have been costed accordingly. */ void cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel, - Path *bitmapqual, bool is_injoin) + Path *bitmapqual) { Cost startup_cost = 0; Cost run_cost = 0; diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index ac6de3d82a5..eaf4e1cb337 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.206 2006/05/18 19:56:46 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.207 2006/06/06 17:59:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -47,13 +47,11 @@ static List *find_usable_indexes(PlannerInfo *root, RelOptInfo *rel, List *clauses, List *outer_clauses, - bool istoplevel, bool isjoininner, - Relids outer_relids, + bool istoplevel, RelOptInfo *outer_rel, SaOpControl saop_control); static List *find_saop_paths(PlannerInfo *root, RelOptInfo *rel, List *clauses, List *outer_clauses, - bool istoplevel, bool isjoininner, - Relids outer_relids); + bool istoplevel, RelOptInfo *outer_rel); static Path *choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel, List *paths); static int bitmap_path_comparator(const void *a, const void *b); static Cost bitmap_and_cost_est(PlannerInfo *root, RelOptInfo *rel, List *paths); @@ -164,7 +162,7 @@ create_index_paths(PlannerInfo *root, RelOptInfo *rel) */ indexpaths = find_usable_indexes(root, rel, rel->baserestrictinfo, NIL, - true, false, NULL, SAOP_FORBID); + true, NULL, SAOP_FORBID); /* * We can submit them all to add_path. (This generates access paths for @@ -190,7 +188,7 @@ create_index_paths(PlannerInfo *root, RelOptInfo *rel) */ indexpaths = generate_bitmap_or_paths(root, rel, rel->baserestrictinfo, NIL, - false, NULL); + NULL); bitindexpaths = list_concat(bitindexpaths, indexpaths); /* @@ -199,7 +197,7 @@ create_index_paths(PlannerInfo *root, RelOptInfo *rel) */ indexpaths = find_saop_paths(root, rel, rel->baserestrictinfo, NIL, - true, false, NULL); + true, NULL); bitindexpaths = list_concat(bitindexpaths, indexpaths); /* @@ -247,10 +245,8 @@ create_index_paths(PlannerInfo *root, RelOptInfo *rel) * 'outer_clauses' is the list of additional upper-level clauses * 'istoplevel' is true if clauses are the rel's top-level restriction list * (outer_clauses must be NIL when this is true) - * 'isjoininner' is true if forming an inner indexscan (so some of the - * given clauses are join clauses) - * 'outer_relids' identifies the outer side of the join (pass NULL - * if not isjoininner) + * 'outer_rel' is the outer side of the join if forming an inner indexscan + * (so some of the given clauses are join clauses); NULL if not * 'saop_control' indicates whether ScalarArrayOpExpr clauses can be used * * Note: check_partial_indexes() must have been run previously. @@ -259,10 +255,10 @@ create_index_paths(PlannerInfo *root, RelOptInfo *rel) static List * find_usable_indexes(PlannerInfo *root, RelOptInfo *rel, List *clauses, List *outer_clauses, - bool istoplevel, bool isjoininner, - Relids outer_relids, + bool istoplevel, RelOptInfo *outer_rel, SaOpControl saop_control) { + Relids outer_relids = outer_rel ? outer_rel->relids : NULL; List *result = NIL; List *all_clauses = NIL; /* not computed till needed */ ListCell *ilist; @@ -349,7 +345,7 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel, * relevant unless we are at top level. */ index_is_ordered = OidIsValid(index->ordering[0]); - if (istoplevel && index_is_ordered && !isjoininner) + if (index_is_ordered && istoplevel && outer_rel == NULL) { index_pathkeys = build_index_pathkeys(root, index, ForwardScanDirection, @@ -374,7 +370,7 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel, index_is_ordered ? ForwardScanDirection : NoMovementScanDirection, - isjoininner); + outer_rel); result = lappend(result, ipath); } @@ -383,7 +379,7 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel, * that we failed to match, consider variant ways of achieving the * ordering. Again, this is only interesting at top level. */ - if (istoplevel && index_is_ordered && !isjoininner && + if (index_is_ordered && istoplevel && outer_rel == NULL && root->query_pathkeys != NIL && pathkeys_useful_for_ordering(root, useful_pathkeys) == 0) { @@ -396,7 +392,7 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel, restrictclauses, root->query_pathkeys, scandir, - false); + outer_rel); result = lappend(result, ipath); } } @@ -417,8 +413,7 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel, static List * find_saop_paths(PlannerInfo *root, RelOptInfo *rel, List *clauses, List *outer_clauses, - bool istoplevel, bool isjoininner, - Relids outer_relids) + bool istoplevel, RelOptInfo *outer_rel) { bool have_saop = false; ListCell *l; @@ -443,8 +438,7 @@ find_saop_paths(PlannerInfo *root, RelOptInfo *rel, return find_usable_indexes(root, rel, clauses, outer_clauses, - istoplevel, isjoininner, - outer_relids, + istoplevel, outer_rel, SAOP_REQUIRE); } @@ -457,13 +451,13 @@ find_saop_paths(PlannerInfo *root, RelOptInfo *rel, * * outer_clauses is a list of additional clauses that can be assumed true * for the purpose of generating indexquals, but are not to be searched for - * ORs. (See find_usable_indexes() for motivation.) + * ORs. (See find_usable_indexes() for motivation.) outer_rel is the outer + * side when we are considering a nestloop inner indexpath. */ List * generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel, List *clauses, List *outer_clauses, - bool isjoininner, - Relids outer_relids) + RelOptInfo *outer_rel) { List *result = NIL; List *all_clauses; @@ -506,16 +500,14 @@ generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel, andargs, all_clauses, false, - isjoininner, - outer_relids, + outer_rel, SAOP_ALLOW); /* Recurse in case there are sub-ORs */ indlist = list_concat(indlist, generate_bitmap_or_paths(root, rel, andargs, all_clauses, - isjoininner, - outer_relids)); + outer_rel)); } else { @@ -525,8 +517,7 @@ generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel, list_make1(orarg), all_clauses, false, - isjoininner, - outer_relids, + outer_rel, SAOP_ALLOW); } @@ -724,7 +715,7 @@ bitmap_and_cost_est(PlannerInfo *root, RelOptInfo *rel, List *paths) cost_bitmap_and_node(&apath, root); /* Now we can do cost_bitmap_heap_scan */ - cost_bitmap_heap_scan(&bpath, root, rel, (Path *) &apath, false); + cost_bitmap_heap_scan(&bpath, root, rel, (Path *) &apath); return bpath.total_cost; } @@ -1338,7 +1329,7 @@ matches_any_index(RestrictInfo *rinfo, RelOptInfo *rel, Relids outer_relids) /* * best_inner_indexscan * Finds the best available inner indexscan for a nestloop join - * with the given rel on the inside and the given outer_relids outside. + * with the given rel on the inside and the given outer_rel outside. * May return NULL if there are no possible inner indexscans. * * We ignore ordering considerations (since a nestloop's inner scan's order @@ -1353,8 +1344,9 @@ matches_any_index(RestrictInfo *rinfo, RelOptInfo *rel, Relids outer_relids) */ Path * best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel, - Relids outer_relids, JoinType jointype) + RelOptInfo *outer_rel, JoinType jointype) { + Relids outer_relids; Path *cheapest; bool isouterjoin; List *clause_list; @@ -1396,11 +1388,11 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel, oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel)); /* - * Intersect the given outer_relids with index_outer_relids to find the + * Intersect the given outer relids with index_outer_relids to find the * set of outer relids actually relevant for this rel. If there are none, * again we can fail immediately. */ - outer_relids = bms_intersect(rel->index_outer_relids, outer_relids); + outer_relids = bms_intersect(rel->index_outer_relids, outer_rel->relids); if (bms_is_empty(outer_relids)) { bms_free(outer_relids); @@ -1413,6 +1405,13 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel, * outerrels. (We include the isouterjoin status in the cache lookup key * for safety. In practice I suspect this is not necessary because it * should always be the same for a given innerrel.) + * + * NOTE: because we cache on outer_relids rather than outer_rel->relids, + * we will report the same path and hence path cost for joins with + * different sets of irrelevant rels on the outside. Now that cost_index + * is sensitive to outer_rel->rows, this is not really right. However + * the error is probably not large. Is it worth establishing a separate + * cache entry for each distinct outer_rel->relids set to get this right? */ foreach(l, rel->index_inner_paths) { @@ -1433,9 +1432,9 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel, * that could be plain indexscans, ie, they don't require the join context * at all. This may seem redundant, but we need to include those scans in * the input given to choose_bitmap_and() to be sure we find optimal AND - * combinations of join and non-join scans. The worst case is that we - * might return a "best inner indexscan" that's really just a plain - * indexscan, causing some redundant effort in joinpath.c. + * combinations of join and non-join scans. Also, even if the "best + * inner indexscan" is just a plain indexscan, it will have a different + * cost estimate because of cache effects. */ clause_list = find_clauses_for_join(root, rel, outer_relids, isouterjoin); @@ -1445,8 +1444,7 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel, */ indexpaths = find_usable_indexes(root, rel, clause_list, NIL, - false, true, - outer_relids, + false, outer_rel, SAOP_FORBID); /* @@ -1455,8 +1453,7 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel, */ bitindexpaths = generate_bitmap_or_paths(root, rel, clause_list, NIL, - true, - outer_relids); + outer_rel); /* * Likewise, generate paths using ScalarArrayOpExpr clauses; these can't @@ -1465,8 +1462,7 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel, bitindexpaths = list_concat(bitindexpaths, find_saop_paths(root, rel, clause_list, NIL, - false, true, - outer_relids)); + false, outer_rel)); /* * Include the regular index paths in bitindexpaths. diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 9ae79cc19e4..30cc23b672d 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.103 2006/03/05 15:58:28 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.104 2006/06/06 17:59:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -36,7 +36,7 @@ static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, JoinType jointype); static Path *best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel, - Relids outer_relids, JoinType jointype); + RelOptInfo *outer_rel, JoinType jointype); static List *select_mergejoin_clauses(RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, @@ -401,12 +401,10 @@ match_unsorted_outer(PlannerInfo *root, { if (IsA(inner_cheapest_total, AppendPath)) bestinnerjoin = best_appendrel_indexscan(root, innerrel, - outerrel->relids, - jointype); + outerrel, jointype); else if (innerrel->rtekind == RTE_RELATION) bestinnerjoin = best_inner_indexscan(root, innerrel, - outerrel->relids, - jointype); + outerrel, jointype); } } @@ -791,13 +789,13 @@ hash_inner_and_outer(PlannerInfo *root, /* * best_appendrel_indexscan * Finds the best available set of inner indexscans for a nestloop join - * with the given append relation on the inside and the given outer_relids + * with the given append relation on the inside and the given outer_rel * outside. Returns an AppendPath comprising the best inner scans, or * NULL if there are no possible inner indexscans. */ static Path * best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel, - Relids outer_relids, JoinType jointype) + RelOptInfo *outer_rel, JoinType jointype) { int parentRTindex = rel->relid; List *append_paths = NIL; @@ -832,7 +830,7 @@ best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel, * Get the best innerjoin indexpath (if any) for this child rel. */ bestinnerjoin = best_inner_indexscan(root, childrel, - outer_relids, jointype); + outer_rel, jointype); /* * If no luck on an indexpath for this rel, we'll still consider diff --git a/src/backend/optimizer/path/orindxpath.c b/src/backend/optimizer/path/orindxpath.c index 252835c5fa7..988145ca185 100644 --- a/src/backend/optimizer/path/orindxpath.c +++ b/src/backend/optimizer/path/orindxpath.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.78 2006/03/05 15:58:28 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.79 2006/06/06 17:59:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -107,7 +107,7 @@ create_or_index_quals(PlannerInfo *root, RelOptInfo *rel) * Use the generate_bitmap_or_paths() machinery to estimate the * value of each OR clause. We can use regular restriction * clauses along with the OR clause contents to generate - * indexquals. We pass outer_relids = NULL so that sub-clauses + * indexquals. We pass outer_rel = NULL so that sub-clauses * that are actually joins will be ignored. */ List *orpaths; @@ -116,7 +116,7 @@ create_or_index_quals(PlannerInfo *root, RelOptInfo *rel) orpaths = generate_bitmap_or_paths(root, rel, list_make1(rinfo), rel->baserestrictinfo, - false, NULL); + NULL); /* Locate the cheapest OR path */ foreach(k, orpaths) diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c index c2a1199d6c7..c567f8b6b74 100644 --- a/src/backend/optimizer/plan/planagg.c +++ b/src/backend/optimizer/plan/planagg.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.14 2006/04/28 20:57:49 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.15 2006/06/06 17:59:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -373,7 +373,7 @@ build_minmax_path(PlannerInfo *root, RelOptInfo *rel, MinMaxAggInfo *info) restrictclauses, NIL, indexscandir, - false); + NULL); /* * Estimate actual cost of fetching just one row. diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 170f260bfbc..f8270074142 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.127 2006/03/05 15:58:31 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.128 2006/06/06 17:59:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -420,8 +420,8 @@ create_seqscan_path(PlannerInfo *root, RelOptInfo *rel) * 'indexscandir' is ForwardScanDirection or BackwardScanDirection * for an ordered index, or NoMovementScanDirection for * an unordered index. - * 'isjoininner' is TRUE if this is a join inner indexscan path. - * (pathkeys and indexscandir are ignored if so.) + * 'outer_rel' is the outer relation if this is a join inner indexscan path. + * (pathkeys and indexscandir are ignored if so.) NULL if not. * * Returns the new path node. */ @@ -431,7 +431,7 @@ create_index_path(PlannerInfo *root, List *clause_groups, List *pathkeys, ScanDirection indexscandir, - bool isjoininner) + RelOptInfo *outer_rel) { IndexPath *pathnode = makeNode(IndexPath); RelOptInfo *rel = index->rel; @@ -445,7 +445,7 @@ create_index_path(PlannerInfo *root, * don't really care what order it's scanned in. (We could expect the * caller to supply the correct values, but it's easier to force it here.) */ - if (isjoininner) + if (outer_rel != NULL) { pathkeys = NIL; indexscandir = NoMovementScanDirection; @@ -466,10 +466,10 @@ create_index_path(PlannerInfo *root, pathnode->indexclauses = allclauses; pathnode->indexquals = indexquals; - pathnode->isjoininner = isjoininner; + pathnode->isjoininner = (outer_rel != NULL); pathnode->indexscandir = indexscandir; - if (isjoininner) + if (outer_rel != NULL) { /* * We must compute the estimated number of output rows for the @@ -505,7 +505,7 @@ create_index_path(PlannerInfo *root, pathnode->rows = rel->rows; } - cost_index(pathnode, root, index, indexquals, isjoininner); + cost_index(pathnode, root, index, indexquals, outer_rel); return pathnode; } @@ -515,6 +515,9 @@ create_index_path(PlannerInfo *root, * Creates a path node for a bitmap scan. * * 'bitmapqual' is a tree of IndexPath, BitmapAndPath, and BitmapOrPath nodes. + * + * If this is a join inner indexscan path, the component IndexPaths should + * have been costed accordingly, and TRUE should be passed for isjoininner. */ BitmapHeapPath * create_bitmap_heap_path(PlannerInfo *root, @@ -560,7 +563,7 @@ create_bitmap_heap_path(PlannerInfo *root, pathnode->rows = rel->rows; } - cost_bitmap_heap_scan(&pathnode->path, root, rel, bitmapqual, false); + cost_bitmap_heap_scan(&pathnode->path, root, rel, bitmapqual); return pathnode; } diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index c75ceef3732..8bf517e4208 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.206 2006/06/05 02:49:58 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.207 2006/06/06 17:59:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -4465,6 +4465,7 @@ string_to_bytea_const(const char *str, size_t str_len) static void genericcostestimate(PlannerInfo *root, IndexOptInfo *index, List *indexQuals, + RelOptInfo *outer_rel, double numIndexTuples, Cost *indexStartupCost, Cost *indexTotalCost, @@ -4538,26 +4539,61 @@ genericcostestimate(PlannerInfo *root, /* * Estimate the number of index pages that will be retrieved. * - * For all currently-supported index types, the first page of the index is - * a metadata page, and we should figure on fetching that plus a pro-rated - * fraction of the remaining pages. + * We use the simplistic method of taking a pro-rata fraction of the + * total number of index pages. In effect, this counts only leaf pages + * and not any overhead such as index metapage or upper tree levels. + * In practice this seems a better approximation than charging for + * access to the upper levels, perhaps because those tend to stay in + * cache under load. */ - if (index->pages > 1 && index->tuples > 0) - { - numIndexPages = (numIndexTuples / index->tuples) * (index->pages - 1); - numIndexPages += 1; /* count the metapage too */ - numIndexPages = ceil(numIndexPages); - } + if (index->pages > 1 && index->tuples > 1) + numIndexPages = ceil(numIndexTuples * index->pages / index->tuples); else numIndexPages = 1.0; /* - * Compute the index access cost. + * Now compute the disk access costs. * - * Disk cost: our generic assumption is that the index pages will be read - * sequentially, so they cost seq_page_cost each, not random_page_cost. + * The above calculations are all per-index-scan. However, if we are + * in a nestloop inner scan, we can expect the scan to be repeated (with + * different search keys) for each row of the outer relation. This + * creates the potential for cache effects to reduce the number of + * disk page fetches needed. We want to estimate the average per-scan + * I/O cost in the presence of caching. + * + * We use the Mackert-Lohman formula (see costsize.c for details) to + * estimate the total number of page fetches that occur. While this + * wasn't what it was designed for, it seems a reasonable model anyway. + * Note that we are counting pages not tuples anymore, so we take + * N = T = index size, as if there were one "tuple" per page. */ - *indexTotalCost = seq_page_cost * numIndexPages; + if (outer_rel != NULL && outer_rel->rows > 1) + { + double num_scans = outer_rel->rows; + double pages_fetched; + + /* total page fetches ignoring cache effects */ + pages_fetched = numIndexPages * num_scans; + + /* use Mackert and Lohman formula to adjust for cache effects */ + pages_fetched = index_pages_fetched(pages_fetched, + index->pages, + index->rel->pages); + + /* + * Now compute the total disk access cost, and then report a + * pro-rated share for one index scan. + */ + *indexTotalCost = (pages_fetched * random_page_cost) / num_scans; + } + else + { + /* + * For a single index scan, we just charge random_page_cost per page + * touched. + */ + *indexTotalCost = numIndexPages * random_page_cost; + } /* * CPU cost: any complex expressions in the indexquals will need to be @@ -4594,10 +4630,11 @@ btcostestimate(PG_FUNCTION_ARGS) PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1); List *indexQuals = (List *) PG_GETARG_POINTER(2); - Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3); - Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4); - Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5); - double *indexCorrelation = (double *) PG_GETARG_POINTER(6); + RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(3); + Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(4); + Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(5); + Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6); + double *indexCorrelation = (double *) PG_GETARG_POINTER(7); Oid relid; AttrNumber colnum; HeapTuple tuple; @@ -4728,7 +4765,7 @@ btcostestimate(PG_FUNCTION_ARGS) numIndexTuples = btreeSelectivity * index->rel->tuples; } - genericcostestimate(root, index, indexQuals, numIndexTuples, + genericcostestimate(root, index, indexQuals, outer_rel, numIndexTuples, indexStartupCost, indexTotalCost, indexSelectivity, indexCorrelation); @@ -4800,12 +4837,13 @@ hashcostestimate(PG_FUNCTION_ARGS) PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1); List *indexQuals = (List *) PG_GETARG_POINTER(2); - Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3); - Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4); - Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5); - double *indexCorrelation = (double *) PG_GETARG_POINTER(6); + RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(3); + Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(4); + Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(5); + Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6); + double *indexCorrelation = (double *) PG_GETARG_POINTER(7); - genericcostestimate(root, index, indexQuals, 0.0, + genericcostestimate(root, index, indexQuals, outer_rel, 0.0, indexStartupCost, indexTotalCost, indexSelectivity, indexCorrelation); @@ -4818,12 +4856,13 @@ gistcostestimate(PG_FUNCTION_ARGS) PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1); List *indexQuals = (List *) PG_GETARG_POINTER(2); - Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3); - Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4); - Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5); - double *indexCorrelation = (double *) PG_GETARG_POINTER(6); + RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(3); + Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(4); + Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(5); + Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6); + double *indexCorrelation = (double *) PG_GETARG_POINTER(7); - genericcostestimate(root, index, indexQuals, 0.0, + genericcostestimate(root, index, indexQuals, outer_rel, 0.0, indexStartupCost, indexTotalCost, indexSelectivity, indexCorrelation); @@ -4836,12 +4875,13 @@ gincostestimate(PG_FUNCTION_ARGS) PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1); List *indexQuals = (List *) PG_GETARG_POINTER(2); - Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3); - Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4); - Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5); - double *indexCorrelation = (double *) PG_GETARG_POINTER(6); + RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(3); + Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(4); + Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(5); + Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6); + double *indexCorrelation = (double *) PG_GETARG_POINTER(7); - genericcostestimate(root, index, indexQuals, 0.0, + genericcostestimate(root, index, indexQuals, outer_rel, 0.0, indexStartupCost, indexTotalCost, indexSelectivity, indexCorrelation); |