diff options
-rw-r--r-- | src/backend/optimizer/path/indxpath.c | 410 |
1 files changed, 291 insertions, 119 deletions
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index d9e53a951ce..7ad89c16886 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.191.2.9 2006/06/07 17:08:15 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.191.2.10 2007/04/17 20:03:16 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -47,15 +47,29 @@ ((opclass) == BOOL_BTREE_OPS_OID || (opclass) == BOOL_HASH_OPS_OID) +/* Per-path data used within choose_bitmap_and() */ +typedef struct +{ + Path *path; /* IndexPath, BitmapAndPath, or BitmapOrPath */ + List *quals; /* the WHERE clauses it uses */ + List *preds; /* predicates of its partial index(es) */ + Bitmapset *clauseids; /* quals+preds represented as a bitmapset */ +} PathClauseUsage; + + static List *find_usable_indexes(PlannerInfo *root, RelOptInfo *rel, List *clauses, List *outer_clauses, bool istoplevel, bool isjoininner, Relids outer_relids); static Path *choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel, List *paths); -static int bitmap_path_comparator(const void *a, const void *b); +static int path_usage_comparator(const void *a, const void *b); +static Cost bitmap_scan_cost_est(PlannerInfo *root, RelOptInfo *rel, + Path *ipath); static Cost bitmap_and_cost_est(PlannerInfo *root, RelOptInfo *rel, List *paths); -static List *pull_indexpath_quals(Path *bitmapqual); -static bool lists_intersect_ptr(List *list1, List *list2); +static PathClauseUsage *classify_index_clause_usage(Path *path, + List **clauselist); +static void find_indexpath_quals(Path *bitmapqual, List **quals, List **preds); +static int find_list_position(Node *node, List **nodelist); static bool match_clause_to_indexcol(IndexOptInfo *index, int indexcol, Oid opclass, RestrictInfo *rinfo, @@ -508,11 +522,12 @@ static Path * choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel, List *paths) { int npaths = list_length(paths); - Path **patharray; - Cost costsofar; - List *qualsofar; - ListCell *lastcell; - int i; + PathClauseUsage **pathinfoarray; + PathClauseUsage *pathinfo; + List *clauselist; + List *bestpaths = NIL; + Cost bestcost = 0; + int i, j; ListCell *l; Assert(npaths > 0); /* else caller error */ @@ -523,117 +538,230 @@ choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel, List *paths) * In theory we should consider every nonempty subset of the given paths. * In practice that seems like overkill, given the crude nature of the * estimates, not to mention the possible effects of higher-level AND and - * OR clauses. As a compromise, we sort the paths by selectivity. We - * always take the first, and sequentially add on paths that result in a - * lower estimated cost. + * OR clauses. Moreover, it's completely impractical if there are a large + * number of paths, since the work would grow as O(2^N). * - * We also make some effort to detect directly redundant input paths, as - * can happen if there are multiple possibly usable indexes. We - * consider an index redundant if any of its index conditions were already - * used by earlier indexes. (We could use predicate_implied_by to have a - * more intelligent, but much more expensive, check --- but in most cases - * simple pointer equality should suffice, since after all the index - * conditions are all coming from the same RestrictInfo lists.) + * As a heuristic, we first check for paths using exactly the same + * sets of WHERE clauses + index predicate conditions, and reject all + * but the cheapest-to-scan in any such group. This primarily gets rid + * of indexes that include the interesting columns but also irrelevant + * columns. (In situations where the DBA has gone overboard on creating + * variant indexes, this can make for a very large reduction in the number + * of paths considered further.) * - * You might think the condition for redundancy should be "all index - * conditions already used", not "any", but this turns out to be wrong. - * For example, if we use an index on A, and then come to an index with - * conditions on A and B, the only way that the second index can be later - * in the selectivity-order sort is if the condition on B is completely - * non-selective. In any case, we'd surely be drastically misestimating - * the selectivity if we count the same condition twice. + * We then sort the surviving paths with the cheapest-to-scan first, + * and for each path, consider using that path alone as the basis for + * a bitmap scan. Then we consider bitmap AND scans formed from that + * path plus each subsequent (higher-cost) path, adding on a subsequent + * path if it results in a reduction in the estimated total scan cost. + * This means we consider about O(N^2) rather than O(2^N) path + * combinations, which is quite tolerable, especially given than N is + * usually reasonably small because of the prefiltering step. The + * cheapest of these is returned. * - * We include index predicate conditions in the redundancy test. Because - * the test is just for pointer equality and not equal(), the effect is - * that use of the same partial index in two different AND elements is - * considered redundant. (XXX is this too strong?) + * We will only consider AND combinations in which no two indexes use + * the same WHERE clause. This is a bit of a kluge: it's needed because + * costsize.c and clausesel.c aren't very smart about redundant clauses. + * They will usually double-count the redundant clauses, producing a + * too-small selectivity that makes a redundant AND step look like it + * reduces the total cost. Perhaps someday that code will be smarter and + * we can remove this limitation. (But note that this also defends + * against flat-out duplicate input paths, which can happen because + * best_inner_indexscan will find the same OR join clauses that + * create_or_index_quals has pulled OR restriction clauses out of.) * - * Note: outputting the selected sub-paths in selectivity order is a good - * thing even if we weren't using that as part of the selection method, - * because it makes the short-circuit case in MultiExecBitmapAnd() more - * likely to apply. + * For the same reason, we reject AND combinations in which an index + * predicate clause duplicates another clause. Here we find it necessary + * to be even stricter: we'll reject a partial index if any of its + * predicate clauses are implied by the set of WHERE clauses and predicate + * clauses used so far. This covers cases such as a condition "x = 42" + * used with a plain index, followed by a clauseless scan of a partial + * index "WHERE x >= 40 AND x < 50". The partial index has been accepted + * only because "x = 42" was present, and so allowing it would partially + * double-count selectivity. (We could use predicate_implied_by on + * regular qual clauses too, to have a more intelligent, but much more + * expensive, check for redundancy --- but in most cases simple equality + * seems to suffice.) */ - /* Convert list to array so we can apply qsort */ - patharray = (Path **) palloc(npaths * sizeof(Path *)); - i = 0; + /* + * Extract clause usage info and detect any paths that use exactly + * the same set of clauses; keep only the cheapest-to-scan of any such + * groups. The surviving paths are put into an array for qsort'ing. + */ + pathinfoarray = (PathClauseUsage **) + palloc(npaths * sizeof(PathClauseUsage *)); + clauselist = NIL; + npaths = 0; foreach(l, paths) { - patharray[i++] = (Path *) lfirst(l); + Path *ipath = (Path *) lfirst(l); + + pathinfo = classify_index_clause_usage(ipath, &clauselist); + for (i = 0; i < npaths; i++) + { + if (bms_equal(pathinfo->clauseids, pathinfoarray[i]->clauseids)) + break; + } + if (i < npaths) + { + /* duplicate clauseids, keep the cheaper one */ + Cost ncost; + Cost ocost; + Selectivity nselec; + Selectivity oselec; + + cost_bitmap_tree_node(pathinfo->path, &ncost, &nselec); + cost_bitmap_tree_node(pathinfoarray[i]->path, &ocost, &oselec); + if (ncost < ocost) + pathinfoarray[i] = pathinfo; + } + else + { + /* not duplicate clauseids, add to array */ + pathinfoarray[npaths++] = pathinfo; + } } - qsort(patharray, npaths, sizeof(Path *), bitmap_path_comparator); - paths = list_make1(patharray[0]); - costsofar = bitmap_and_cost_est(root, rel, paths); - qualsofar = pull_indexpath_quals(patharray[0]); - lastcell = list_head(paths); /* for quick deletions */ + /* If only one surviving path, we're done */ + if (npaths == 1) + return pathinfoarray[0]->path; + + /* Sort the surviving paths by index access cost */ + qsort(pathinfoarray, npaths, sizeof(PathClauseUsage *), + path_usage_comparator); - for (i = 1; i < npaths; i++) + /* + * For each surviving index, consider it as an "AND group leader", and + * see whether adding on any of the later indexes results in an AND path + * with cheaper total cost than before. Then take the cheapest AND group. + */ + for (i = 0; i < npaths; i++) { - Path *newpath = patharray[i]; - List *newqual; - Cost newcost; - - newqual = pull_indexpath_quals(newpath); - if (lists_intersect_ptr(newqual, qualsofar)) - continue; /* consider it redundant */ - /* tentatively add newpath to paths, so we can estimate cost */ - paths = lappend(paths, newpath); - newcost = bitmap_and_cost_est(root, rel, paths); - if (newcost < costsofar) + Cost costsofar; + List *qualsofar; + Bitmapset *clauseidsofar; + ListCell *lastcell; + + pathinfo = pathinfoarray[i]; + paths = list_make1(pathinfo->path); + costsofar = bitmap_scan_cost_est(root, rel, pathinfo->path); + qualsofar = list_concat(list_copy(pathinfo->quals), + list_copy(pathinfo->preds)); + clauseidsofar = bms_copy(pathinfo->clauseids); + lastcell = list_head(paths); /* for quick deletions */ + + for (j = i+1; j < npaths; j++) { - /* keep newpath in paths, update subsidiary variables */ - costsofar = newcost; - qualsofar = list_concat(qualsofar, newqual); - lastcell = lnext(lastcell); + Cost newcost; + + pathinfo = pathinfoarray[j]; + /* Check for redundancy */ + if (bms_overlap(pathinfo->clauseids, clauseidsofar)) + continue; /* consider it redundant */ + if (pathinfo->preds) + { + bool redundant = false; + + /* we check each predicate clause separately */ + foreach(l, pathinfo->preds) + { + Node *np = (Node *) lfirst(l); + + if (predicate_implied_by(list_make1(np), qualsofar)) + { + redundant = true; + break; /* out of inner foreach loop */ + } + } + if (redundant) + continue; + } + /* tentatively add new path to paths, so we can estimate cost */ + paths = lappend(paths, pathinfo->path); + newcost = bitmap_and_cost_est(root, rel, paths); + if (newcost < costsofar) + { + /* keep new path in paths, update subsidiary variables */ + costsofar = newcost; + qualsofar = list_concat(qualsofar, + list_copy(pathinfo->quals)); + qualsofar = list_concat(qualsofar, + list_copy(pathinfo->preds)); + clauseidsofar = bms_add_members(clauseidsofar, + pathinfo->clauseids); + lastcell = lnext(lastcell); + } + else + { + /* reject new path, remove it from paths list */ + paths = list_delete_cell(paths, lnext(lastcell), lastcell); + } + Assert(lnext(lastcell) == NULL); } - else + + /* Keep the cheapest AND-group (or singleton) */ + if (i == 0 || costsofar < bestcost) { - /* reject newpath, remove it from paths list */ - paths = list_delete_cell(paths, lnext(lastcell), lastcell); + bestpaths = paths; + bestcost = costsofar; } - Assert(lnext(lastcell) == NULL); + + /* some easy cleanup (we don't try real hard though) */ + list_free(qualsofar); } - if (list_length(paths) == 1) - return (Path *) linitial(paths); /* no need for AND */ - return (Path *) create_bitmap_and_path(root, rel, paths); + if (list_length(bestpaths) == 1) + return (Path *) linitial(bestpaths); /* no need for AND */ + return (Path *) create_bitmap_and_path(root, rel, bestpaths); } -/* qsort comparator to sort in increasing selectivity order */ +/* qsort comparator to sort in increasing index access cost order */ static int -bitmap_path_comparator(const void *a, const void *b) +path_usage_comparator(const void *a, const void *b) { - Path *pa = *(Path *const *) a; - Path *pb = *(Path *const *) b; + PathClauseUsage *pa = *(PathClauseUsage *const *) a; + PathClauseUsage *pb = *(PathClauseUsage *const *) b; Cost acost; Cost bcost; Selectivity aselec; Selectivity bselec; - cost_bitmap_tree_node(pa, &acost, &aselec); - cost_bitmap_tree_node(pb, &bcost, &bselec); + cost_bitmap_tree_node(pa->path, &acost, &aselec); + cost_bitmap_tree_node(pb->path, &bcost, &bselec); /* - * If selectivities are the same, sort by cost. (Note: there used to be - * logic here to do "fuzzy comparison", but that's a bad idea because it - * fails to be transitive, which will confuse qsort terribly.) + * If costs are the same, sort by selectivity. */ - if (aselec < bselec) + if (acost < bcost) return -1; - if (aselec > bselec) + if (acost > bcost) return 1; - if (acost < bcost) + if (aselec < bselec) return -1; - if (acost > bcost) + if (aselec > bselec) return 1; return 0; } /* - * Estimate the cost of actually executing a BitmapAnd with the given + * Estimate the cost of actually executing a bitmap scan with a single + * index path (no BitmapAnd, at least not at this level). + */ +static Cost +bitmap_scan_cost_est(PlannerInfo *root, RelOptInfo *rel, Path *ipath) +{ + Path bpath; + + cost_bitmap_heap_scan(&bpath, root, rel, ipath, false); + + return bpath.total_cost; +} + +/* + * Estimate the cost of actually executing a BitmapAnd scan with the given * inputs. */ static Cost @@ -654,90 +782,134 @@ bitmap_and_cost_est(PlannerInfo *root, RelOptInfo *rel, List *paths) return bpath.total_cost; } + /* - * pull_indexpath_quals + * classify_index_clause_usage + * Construct a PathClauseUsage struct describing the WHERE clauses and + * index predicate clauses used by the given indexscan path. + * We consider two clauses the same if they are equal(). + * + * At some point we might want to migrate this info into the Path data + * structure proper, but for the moment it's only needed within + * choose_bitmap_and(). * - * Given the Path structure for a plain or bitmap indexscan, extract a list + * *clauselist is used and expanded as needed to identify all the distinct + * clauses seen across successive calls. Caller must initialize it to NIL + * before first call of a set. + */ +static PathClauseUsage * +classify_index_clause_usage(Path *path, List **clauselist) +{ + PathClauseUsage *result; + Bitmapset *clauseids; + ListCell *lc; + + result = (PathClauseUsage *) palloc(sizeof(PathClauseUsage)); + result->path = path; + + /* Recursively find the quals and preds used by the path */ + result->quals = NIL; + result->preds = NIL; + find_indexpath_quals(path, &result->quals, &result->preds); + + /* Build up a bitmapset representing the quals and preds */ + clauseids = NULL; + foreach(lc, result->quals) + { + Node *node = (Node *) lfirst(lc); + + clauseids = bms_add_member(clauseids, + find_list_position(node, clauselist)); + } + foreach(lc, result->preds) + { + Node *node = (Node *) lfirst(lc); + + clauseids = bms_add_member(clauseids, + find_list_position(node, clauselist)); + } + result->clauseids = clauseids; + + return result; +} + + +/* + * find_indexpath_quals + * + * Given the Path structure for a plain or bitmap indexscan, extract lists * of all the indexquals and index predicate conditions used in the Path. + * These are appended to the initial contents of *quals and *preds (hence + * caller should initialize those to NIL). * * This is sort of a simplified version of make_restrictinfo_from_bitmapqual; * here, we are not trying to produce an accurate representation of the AND/OR * semantics of the Path, but just find out all the base conditions used. * - * The result list contains pointers to the expressions used in the Path, + * The result lists contain pointers to the expressions used in the Path, * but all the list cells are freshly built, so it's safe to destructively - * modify the list (eg, by concat'ing it with other lists). + * modify the lists (eg, by concat'ing with other lists). */ -static List * -pull_indexpath_quals(Path *bitmapqual) +static void +find_indexpath_quals(Path *bitmapqual, List **quals, List **preds) { - List *result = NIL; - ListCell *l; - if (IsA(bitmapqual, BitmapAndPath)) { BitmapAndPath *apath = (BitmapAndPath *) bitmapqual; + ListCell *l; foreach(l, apath->bitmapquals) { - List *sublist; - - sublist = pull_indexpath_quals((Path *) lfirst(l)); - result = list_concat(result, sublist); + find_indexpath_quals((Path *) lfirst(l), quals, preds); } } else if (IsA(bitmapqual, BitmapOrPath)) { BitmapOrPath *opath = (BitmapOrPath *) bitmapqual; + ListCell *l; foreach(l, opath->bitmapquals) { - List *sublist; - - sublist = pull_indexpath_quals((Path *) lfirst(l)); - result = list_concat(result, sublist); + find_indexpath_quals((Path *) lfirst(l), quals, preds); } } else if (IsA(bitmapqual, IndexPath)) { IndexPath *ipath = (IndexPath *) bitmapqual; - result = get_actual_clauses(ipath->indexclauses); - result = list_concat(result, list_copy(ipath->indexinfo->indpred)); + *quals = list_concat(*quals, get_actual_clauses(ipath->indexclauses)); + *preds = list_concat(*preds, list_copy(ipath->indexinfo->indpred)); } else elog(ERROR, "unrecognized node type: %d", nodeTag(bitmapqual)); - - return result; } /* - * lists_intersect_ptr - * Detect whether two lists have a nonempty intersection, - * using pointer equality to compare members. - * - * This possibly should go into list.c, but it doesn't yet have any use - * except in choose_bitmap_and. + * find_list_position + * Return the given node's position (counting from 0) in the given + * list of nodes. If it's not equal() to any existing list member, + * add it at the end, and return that position. */ -static bool -lists_intersect_ptr(List *list1, List *list2) +static int +find_list_position(Node *node, List **nodelist) { - ListCell *cell1; + int i; + ListCell *lc; - foreach(cell1, list1) + i = 0; + foreach(lc, *nodelist) { - void *datum1 = lfirst(cell1); - ListCell *cell2; + Node *oldnode = (Node *) lfirst(lc); - foreach(cell2, list2) - { - if (lfirst(cell2) == datum1) - return true; - } + if (equal(node, oldnode)) + return i; + i++; } - return false; + *nodelist = lappend(*nodelist, node); + + return i; } |