diff options
Diffstat (limited to 'src/backend/optimizer/path')
-rw-r--r-- | src/backend/optimizer/path/allpaths.c | 6 | ||||
-rw-r--r-- | src/backend/optimizer/path/indxpath.c | 176 | ||||
-rw-r--r-- | src/backend/optimizer/path/joinpath.c | 182 | ||||
-rw-r--r-- | src/backend/optimizer/path/pathkeys.c | 411 |
4 files changed, 490 insertions, 285 deletions
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 12fc576612f..4f7a0e570f5 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.67 2000/11/12 00:36:58 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.68 2000/12/14 22:30:43 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -162,9 +162,7 @@ set_plain_rel_pathlist(Query *root, RelOptInfo *rel, RangeTblEntry *rte) create_tidscan_paths(root, rel); /* Consider index paths for both simple and OR index clauses */ - create_index_paths(root, rel, indices, - rel->baserestrictinfo, - rel->joininfo); + create_index_paths(root, rel, indices); /* * Note: create_or_index_paths depends on create_index_paths to diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 63e3a53af5a..28437e6c56d 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.99 2000/11/25 20:33:51 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.100 2000/12/14 22:30:43 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -87,11 +87,6 @@ static void indexable_joinclauses(RelOptInfo *rel, IndexOptInfo *index, List **clausegroups, List **outerrelids); static List *index_innerjoin(Query *root, RelOptInfo *rel, IndexOptInfo *index, List *clausegroup_list, List *outerrelids_list); -static bool useful_for_mergejoin(RelOptInfo *rel, IndexOptInfo *index, - List *joininfo_list); -static bool useful_for_ordering(Query *root, RelOptInfo *rel, - IndexOptInfo *index, - ScanDirection scandir); static bool match_index_to_operand(int indexkey, Var *operand, RelOptInfo *rel, IndexOptInfo *index); static bool function_index_operand(Expr *funcOpnd, RelOptInfo *rel, @@ -125,31 +120,31 @@ static Const *string_to_const(const char *str, Oid datatype); * attributes are available and fixed during any one scan of the indexpath. * * An IndexPath is generated and submitted to add_path() for each index - * this routine deems potentially interesting for the current query - * (at most one IndexPath per index on the given relation). An innerjoin - * path is also generated for each interesting combination of outer join - * relations. The innerjoin paths are *not* passed to add_path(), but are - * appended to the "innerjoin" list of the relation for later consideration - * in nested-loop joins. + * this routine deems potentially interesting for the current query. + * An innerjoin path is also generated for each interesting combination of + * outer join relations. The innerjoin paths are *not* passed to add_path(), + * but are appended to the "innerjoin" list of the relation for later + * consideration in nested-loop joins. * * 'rel' is the relation for which we want to generate index paths * 'indices' is a list of available indexes for 'rel' - * 'restrictinfo_list' is a list of restrictinfo nodes for 'rel' - * 'joininfo_list' is a list of joininfo nodes for 'rel' */ void create_index_paths(Query *root, RelOptInfo *rel, - List *indices, - List *restrictinfo_list, - List *joininfo_list) + List *indices) { + List *restrictinfo_list = rel->baserestrictinfo; + List *joininfo_list = rel->joininfo; List *ilist; foreach(ilist, indices) { IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist); List *restrictclauses; + List *index_pathkeys; + List *useful_pathkeys; + bool index_is_ordered; List *joinclausegroups; List *joinouterrelids; @@ -179,9 +174,7 @@ create_index_paths(Query *root, match_index_orclauses(rel, index, restrictinfo_list); /* - * 2. If the keys of this index match any of the available - * non-'or' restriction clauses, then create a path using those - * clauses as indexquals. + * 2. Match the index against non-'or' restriction clauses. */ restrictclauses = group_clauses_by_indexkey(rel, index, @@ -189,43 +182,50 @@ create_index_paths(Query *root, index->classlist, restrictinfo_list); - if (restrictclauses != NIL) - add_path(rel, (Path *) create_index_path(root, rel, index, - restrictclauses, - NoMovementScanDirection)); - /* - * 3. If this index can be used for a mergejoin, then create an - * index path for it even if there were no restriction clauses. - * (If there were, there is no need to make another index path.) - * This will allow the index to be considered as a base for a - * mergejoin in later processing. Similarly, if the index matches - * the ordering that is needed for the overall query result, make - * an index path for it even if there is no other reason to do so. + * 3. Compute pathkeys describing index's ordering, if any, + * then see how many of them are actually useful for this query. */ - if (restrictclauses == NIL) - { - if (useful_for_mergejoin(rel, index, joininfo_list) || - useful_for_ordering(root, rel, index, ForwardScanDirection)) - add_path(rel, (Path *) - create_index_path(root, rel, index, - restrictclauses, - ForwardScanDirection)); - } + index_pathkeys = build_index_pathkeys(root, rel, index, + ForwardScanDirection); + index_is_ordered = (index_pathkeys != NIL); + useful_pathkeys = truncate_useless_pathkeys(root, rel, + index_pathkeys); /* - * Currently, backwards scan is never considered except for the - * case of matching a query result ordering. Possibly should - * consider it in other places? + * 4. Generate an indexscan path if there are relevant restriction + * clauses OR the index ordering is potentially useful for later + * merging or final output ordering. */ - if (useful_for_ordering(root, rel, index, BackwardScanDirection)) + if (restrictclauses != NIL || useful_pathkeys != NIL) add_path(rel, (Path *) create_index_path(root, rel, index, restrictclauses, - BackwardScanDirection)); + useful_pathkeys, + index_is_ordered ? + ForwardScanDirection : + NoMovementScanDirection)); + + /* + * 5. If the index is ordered, a backwards scan might be interesting. + * Currently this is only possible for a DESC query result ordering. + */ + if (index_is_ordered) + { + index_pathkeys = build_index_pathkeys(root, rel, index, + BackwardScanDirection); + useful_pathkeys = truncate_useless_pathkeys(root, rel, + index_pathkeys); + if (useful_pathkeys != NIL) + add_path(rel, (Path *) + create_index_path(root, rel, index, + restrictclauses, + useful_pathkeys, + BackwardScanDirection)); + } /* - * 4. Create an innerjoin index path for each combination of other + * 6. Create an innerjoin index path for each combination of other * rels used in available join clauses. These paths will be * considered as the inner side of nestloop joins against those * sets of other rels. indexable_joinclauses() finds sets of @@ -904,88 +904,6 @@ indexable_operator(Expr *clause, Oid opclass, Oid relam, return InvalidOid; } -/* - * useful_for_mergejoin - * Determine whether the given index can support a mergejoin based - * on any available join clause. - * - * We look to see whether the first indexkey of the index matches the - * left or right sides of any of the mergejoinable clauses and provides - * the ordering needed for that side. If so, the index is useful. - * Matching a second or later indexkey is not useful unless there is - * also a mergeclause for the first indexkey, so we need not consider - * secondary indexkeys at this stage. - * - * 'rel' is the relation for which 'index' is defined - * 'joininfo_list' is the list of JoinInfo nodes for 'rel' - */ -static bool -useful_for_mergejoin(RelOptInfo *rel, - IndexOptInfo *index, - List *joininfo_list) -{ - int *indexkeys = index->indexkeys; - Oid *ordering = index->ordering; - List *i; - - if (!indexkeys || indexkeys[0] == 0 || - !ordering || ordering[0] == InvalidOid) - return false; /* unordered index is not useful */ - - foreach(i, joininfo_list) - { - JoinInfo *joininfo = (JoinInfo *) lfirst(i); - List *j; - - foreach(j, joininfo->jinfo_restrictinfo) - { - RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j); - - if (restrictinfo->mergejoinoperator) - { - if (restrictinfo->left_sortop == ordering[0] && - match_index_to_operand(indexkeys[0], - get_leftop(restrictinfo->clause), - rel, index)) - return true; - if (restrictinfo->right_sortop == ordering[0] && - match_index_to_operand(indexkeys[0], - get_rightop(restrictinfo->clause), - rel, index)) - return true; - } - } - } - return false; -} - -/* - * useful_for_ordering - * Determine whether the given index can produce an ordering matching - * the order that is wanted for the query result. - * - * 'rel' is the relation for which 'index' is defined - * 'scandir' is the contemplated scan direction - */ -static bool -useful_for_ordering(Query *root, - RelOptInfo *rel, - IndexOptInfo *index, - ScanDirection scandir) -{ - List *index_pathkeys; - - if (root->query_pathkeys == NIL) - return false; /* no special ordering requested */ - - index_pathkeys = build_index_pathkeys(root, rel, index, scandir); - - if (index_pathkeys == NIL) - return false; /* unordered index */ - - return pathkeys_contained_in(root->query_pathkeys, index_pathkeys); -} - /**************************************************************************** * ---- ROUTINES TO DO PARTIAL INDEX PREDICATE TESTS ---- ****************************************************************************/ diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 6096f8c3f26..81a5431db97 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.59 2000/11/23 03:57:31 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.60 2000/12/14 22:30:43 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -152,6 +152,7 @@ sort_inner_and_outer(Query *root, List *mergeclause_list, JoinType jointype) { + List *all_pathkeys; List *i; /* @@ -159,36 +160,57 @@ sort_inner_and_outer(Query *root, * generate a differently-sorted result path at essentially the same * cost. We have no basis for choosing one over another at this level * of joining, but some sort orders may be more useful than others for - * higher-level mergejoins. Generating a path here for *every* - * permutation of mergejoin clauses doesn't seem like a winning - * strategy, however; the cost in planning time is too high. + * higher-level mergejoins, so it's worth considering multiple orderings. * - * For now, we generate one path for each mergejoin clause, listing that - * clause first and the rest in random order. This should allow at + * Actually, it's not quite true that every mergeclause ordering will + * generate a different path order, because some of the clauses may be + * redundant. Therefore, what we do is convert the mergeclause list to + * a list of canonical pathkeys, and then consider different orderings + * of the pathkeys. + * + * Generating a path for *every* permutation of the pathkeys doesn't + * seem like a winning strategy; the cost in planning time is too high. + * For now, we generate one path for each pathkey, listing that pathkey + * first and the rest in random order. This should allow at * least a one-clause mergejoin without re-sorting against any other * possible mergejoin partner path. But if we've not guessed the - * right ordering of secondary clauses, we may end up evaluating + * right ordering of secondary keys, we may end up evaluating * clauses as qpquals when they could have been done as mergeclauses. * We need to figure out a better way. (Two possible approaches: look * at all the relevant index relations to suggest plausible sort * orders, or make just one output path and somehow mark it as having * a sort-order that can be rearranged freely.) */ - foreach(i, mergeclause_list) + all_pathkeys = make_pathkeys_for_mergeclauses(root, + mergeclause_list, + outerrel); + + foreach(i, all_pathkeys) { - RestrictInfo *restrictinfo = lfirst(i); - List *curclause_list; + List *front_pathkey = lfirst(i); + List *cur_pathkeys; + List *cur_mergeclauses; List *outerkeys; List *innerkeys; List *merge_pathkeys; - /* Make a mergeclause list with this guy first. */ - if (i != mergeclause_list) - curclause_list = lcons(restrictinfo, - lremove(restrictinfo, - listCopy(mergeclause_list))); + /* Make a pathkey list with this guy first. */ + if (i != all_pathkeys) + cur_pathkeys = lcons(front_pathkey, + lremove(front_pathkey, + listCopy(all_pathkeys))); else - curclause_list = mergeclause_list; /* no work at first one... */ + cur_pathkeys = all_pathkeys; /* no work at first one... */ + + /* + * Select mergeclause(s) that match this sort ordering. If we had + * redundant merge clauses then we will get a subset of the original + * clause list. There had better be some match, however... + */ + cur_mergeclauses = find_mergeclauses_for_pathkeys(root, + cur_pathkeys, + mergeclause_list); + Assert(cur_mergeclauses != NIL); /* * Build sort pathkeys for both sides. @@ -198,15 +220,13 @@ sort_inner_and_outer(Query *root, * suppress an explicit sort step, so we needn't do so here. */ outerkeys = make_pathkeys_for_mergeclauses(root, - curclause_list, + cur_mergeclauses, outerrel); innerkeys = make_pathkeys_for_mergeclauses(root, - curclause_list, + cur_mergeclauses, innerrel); /* Build pathkeys representing output sort order. */ - merge_pathkeys = build_join_pathkeys(outerkeys, - joinrel->targetlist, - root->equi_key_list); + merge_pathkeys = build_join_pathkeys(root, joinrel, outerkeys); /* * And now we can make the path. We only consider the cheapest- @@ -221,7 +241,7 @@ sort_inner_and_outer(Query *root, innerrel->cheapest_total_path, restrictlist, merge_pathkeys, - curclause_list, + cur_mergeclauses, outerkeys, innerkeys)); } @@ -301,17 +321,16 @@ match_unsorted_outer(Query *root, List *trialsortkeys; Path *cheapest_startup_inner; Path *cheapest_total_inner; - int num_mergeclauses; - int clausecnt; + int num_sortkeys; + int sortkeycnt; /* * The result will have this sort order (even if it is implemented * as a nestloop, and even if some of the mergeclauses are * implemented by qpquals rather than as true mergeclauses): */ - merge_pathkeys = build_join_pathkeys(outerpath->pathkeys, - joinrel->targetlist, - root->equi_key_list); + merge_pathkeys = build_join_pathkeys(root, joinrel, + outerpath->pathkeys); if (nestjoinOK) { @@ -347,7 +366,8 @@ match_unsorted_outer(Query *root, } /* Look for useful mergeclauses (if any) */ - mergeclauses = find_mergeclauses_for_pathkeys(outerpath->pathkeys, + mergeclauses = find_mergeclauses_for_pathkeys(root, + outerpath->pathkeys, mergeclause_list); /* Done with this outer path if no chance for a mergejoin */ @@ -362,7 +382,8 @@ match_unsorted_outer(Query *root, /* * Generate a mergejoin on the basis of sorting the cheapest * inner. Since a sort will be needed, only cheapest total cost - * matters. + * matters. (But create_mergejoin_path will do the right thing + * if innerrel->cheapest_total_path is already correctly sorted.) */ add_path(joinrel, (Path *) create_mergejoin_path(joinrel, @@ -376,38 +397,49 @@ match_unsorted_outer(Query *root, innersortkeys)); /* - * Look for presorted inner paths that satisfy the mergeclause + * Look for presorted inner paths that satisfy the innersortkey * list or any truncation thereof. Here, we consider both cheap - * startup cost and cheap total cost. + * startup cost and cheap total cost. Ignore + * innerrel->cheapest_total_path, since we already made a path with it. */ - trialsortkeys = listCopy(innersortkeys); /* modifiable copy */ + num_sortkeys = length(innersortkeys); + if (num_sortkeys > 1) + trialsortkeys = listCopy(innersortkeys); /* need modifiable copy */ + else + trialsortkeys = innersortkeys; /* won't really truncate */ cheapest_startup_inner = NULL; cheapest_total_inner = NULL; - num_mergeclauses = length(mergeclauses); - for (clausecnt = num_mergeclauses; clausecnt > 0; clausecnt--) + for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--) { Path *innerpath; List *newclauses = NIL; /* - * Look for an inner path ordered well enough to merge with - * the first 'clausecnt' mergeclauses. NB: trialsortkeys list + * Look for an inner path ordered well enough for the first + * 'sortkeycnt' innersortkeys. NB: trialsortkeys list * is modified destructively, which is why we made a copy... */ - trialsortkeys = ltruncate(clausecnt, trialsortkeys); + trialsortkeys = ltruncate(sortkeycnt, trialsortkeys); innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, trialsortkeys, TOTAL_COST); if (innerpath != NULL && + innerpath != innerrel->cheapest_total_path && (cheapest_total_inner == NULL || compare_path_costs(innerpath, cheapest_total_inner, TOTAL_COST) < 0)) { /* Found a cheap (or even-cheaper) sorted path */ - if (clausecnt < num_mergeclauses) - newclauses = ltruncate(clausecnt, - listCopy(mergeclauses)); + /* Select the right mergeclauses, if we didn't already */ + if (sortkeycnt < num_sortkeys) + { + newclauses = + find_mergeclauses_for_pathkeys(root, + trialsortkeys, + mergeclauses); + Assert(newclauses != NIL); + } else newclauses = mergeclauses; add_path(joinrel, (Path *) @@ -427,6 +459,7 @@ match_unsorted_outer(Query *root, trialsortkeys, STARTUP_COST); if (innerpath != NULL && + innerpath != innerrel->cheapest_total_path && (cheapest_startup_inner == NULL || compare_path_costs(innerpath, cheapest_startup_inner, STARTUP_COST) < 0)) @@ -441,9 +474,14 @@ match_unsorted_outer(Query *root, */ if (newclauses == NIL) { - if (clausecnt < num_mergeclauses) - newclauses = ltruncate(clausecnt, - listCopy(mergeclauses)); + if (sortkeycnt < num_sortkeys) + { + newclauses = + find_mergeclauses_for_pathkeys(root, + trialsortkeys, + mergeclauses); + Assert(newclauses != NIL); + } else newclauses = mergeclauses; } @@ -501,7 +539,8 @@ match_unsorted_inner(Query *root, Path *startupouterpath; /* Look for useful mergeclauses (if any) */ - mergeclauses = find_mergeclauses_for_pathkeys(innerpath->pathkeys, + mergeclauses = find_mergeclauses_for_pathkeys(root, + innerpath->pathkeys, mergeclause_list); if (mergeclauses == NIL) continue; @@ -516,9 +555,7 @@ match_unsorted_inner(Query *root, * outer. Since a sort will be needed, only cheapest total cost * matters. */ - merge_pathkeys = build_join_pathkeys(outersortkeys, - joinrel->targetlist, - root->equi_key_list); + merge_pathkeys = build_join_pathkeys(root, joinrel, outersortkeys); add_path(joinrel, (Path *) create_mergejoin_path(joinrel, jointype, @@ -545,9 +582,8 @@ match_unsorted_inner(Query *root, continue; /* there won't be a startup-cost path * either */ - merge_pathkeys = build_join_pathkeys(totalouterpath->pathkeys, - joinrel->targetlist, - root->equi_key_list); + merge_pathkeys = build_join_pathkeys(root, joinrel, + totalouterpath->pathkeys); add_path(joinrel, (Path *) create_mergejoin_path(joinrel, jointype, @@ -564,9 +600,8 @@ match_unsorted_inner(Query *root, STARTUP_COST); if (startupouterpath != NULL && startupouterpath != totalouterpath) { - merge_pathkeys = build_join_pathkeys(startupouterpath->pathkeys, - joinrel->targetlist, - root->equi_key_list); + merge_pathkeys = build_join_pathkeys(root, joinrel, + startupouterpath->pathkeys); add_path(joinrel, (Path *) create_mergejoin_path(joinrel, jointype, @@ -637,10 +672,9 @@ hash_inner_and_outer(Query *root, RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i); Expr *clause; Var *left, - *right, - *inner; - List *hashclauses; + *right; Selectivity innerdispersion; + List *hashclauses; if (restrictinfo->hashjoinoperator == InvalidOid) continue; /* not hashjoinable */ @@ -657,26 +691,48 @@ hash_inner_and_outer(Query *root, left = get_leftop(clause); right = get_rightop(clause); - /* check if clause is usable with these sub-rels, find inner var */ + /* + * Check if clause is usable with these sub-rels, find inner side, + * estimate dispersion of inner var for costing purposes. + * + * Since we tend to visit the same clauses over and over when + * planning a large query, we cache the dispersion estimates in the + * RestrictInfo node to avoid repeated lookups of statistics. + */ if (intMember(left->varno, outerrelids) && intMember(right->varno, innerrelids)) - inner = right; + { + /* righthand side is inner */ + innerdispersion = restrictinfo->right_dispersion; + if (innerdispersion < 0) + { + /* not cached yet */ + innerdispersion = estimate_dispersion(root, right); + restrictinfo->right_dispersion = innerdispersion; + } + } else if (intMember(left->varno, innerrelids) && intMember(right->varno, outerrelids)) - inner = left; + { + /* lefthand side is inner */ + innerdispersion = restrictinfo->left_dispersion; + if (innerdispersion < 0) + { + /* not cached yet */ + innerdispersion = estimate_dispersion(root, left); + restrictinfo->left_dispersion = innerdispersion; + } + } else continue; /* no good for these input relations */ /* always a one-element list of hash clauses */ hashclauses = makeList1(restrictinfo); - /* estimate dispersion of inner var for costing purposes */ - innerdispersion = estimate_dispersion(root, inner); - /* * We consider both the cheapest-total-cost and * cheapest-startup-cost outer paths. There's no need to consider - * any but the cheapest- total-cost inner path, however. + * any but the cheapest-total-cost inner path, however. */ add_path(joinrel, (Path *) create_hashjoin_path(joinrel, diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index f94d2e4037b..9c4e537d557 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -11,7 +11,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.27 2000/11/12 00:36:58 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.28 2000/12/14 22:30:43 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -195,9 +195,8 @@ generate_implied_equalities(Query *root) * Given a PathKeyItem, find the equi_key_list subset it is a member of, * if any. If so, return a pointer to that sublist, which is the * canonical representation (for this query) of that PathKeyItem's - * equivalence set. If it is not found, return a single-element list - * containing the PathKeyItem (when the item has no equivalence peers, - * we just allow it to be a standalone list). + * equivalence set. If it is not found, add a singleton "equivalence set" + * to the equi_key_list and return that --- see compare_pathkeys. * * Note that this function must not be used until after we have completed * scanning the WHERE clause for equijoin operators. @@ -206,6 +205,7 @@ static List * make_canonical_pathkey(Query *root, PathKeyItem *item) { List *cursetlink; + List *newset; foreach(cursetlink, root->equi_key_list) { @@ -214,7 +214,9 @@ make_canonical_pathkey(Query *root, PathKeyItem *item) if (member(item, curset)) return curset; } - return lcons(item, NIL); + newset = makeList1(item); + root->equi_key_list = lcons(newset, root->equi_key_list); + return newset; } /* @@ -234,6 +236,7 @@ canonicalize_pathkeys(Query *root, List *pathkeys) { List *pathkey = (List *) lfirst(i); PathKeyItem *item; + List *cpathkey; /* * It's sufficient to look at the first entry in the sublist; if @@ -242,8 +245,15 @@ canonicalize_pathkeys(Query *root, List *pathkeys) */ Assert(pathkey != NIL); item = (PathKeyItem *) lfirst(pathkey); - new_pathkeys = lappend(new_pathkeys, - make_canonical_pathkey(root, item)); + cpathkey = make_canonical_pathkey(root, item); + /* + * Eliminate redundant ordering requests --- ORDER BY A,A + * is the same as ORDER BY A. We want to check this only + * after we have canonicalized the keys, so that equivalent-key + * knowledge is used when deciding if an item is redundant. + */ + if (!ptrMember(cpathkey, new_pathkeys)) + new_pathkeys = lappend(new_pathkeys, cpathkey); } return new_pathkeys; } @@ -257,19 +267,9 @@ canonicalize_pathkeys(Query *root, List *pathkeys) * Compare two pathkeys to see if they are equivalent, and if not whether * one is "better" than the other. * - * A pathkey can be considered better than another if it is a superset: - * it contains all the keys of the other plus more. For example, either - * ((A) (B)) or ((A B)) is better than ((A)). - * - * Because we actually only expect to see canonicalized pathkey sublists, - * we don't have to do the full two-way-subset-inclusion test on each - * pair of sublists that is implied by the above statement. Instead we - * just do an equal(). In the normal case where multi-element sublists - * are pointers into the root's equi_key_list, equal() will be very fast: - * it will recognize pointer equality when the sublists are the same, - * and will fail at the first sublist element when they are not. - * - * Yes, this gets called enough to be worth coding it this tensely. + * This function may only be applied to canonicalized pathkey lists. + * In the canonical representation, sublists can be checked for equality + * by simple pointer comparison. */ PathKeysComparison compare_pathkeys(List *keys1, List *keys2) @@ -285,10 +285,70 @@ compare_pathkeys(List *keys1, List *keys2) List *subkey2 = lfirst(key2); /* + * XXX would like to check that we've been given canonicalized input, + * but query root not accessible here... + */ +#ifdef NOT_USED + Assert(ptrMember(subkey1, root->equi_key_list)); + Assert(ptrMember(subkey2, root->equi_key_list)); +#endif + + /* * We will never have two subkeys where one is a subset of the - * other, because of the canonicalization explained above. Either - * they are equal or they ain't. + * other, because of the canonicalization process. Either they + * are equal or they ain't. Furthermore, we only need pointer + * comparison to detect equality. */ + if (subkey1 != subkey2) + return PATHKEYS_DIFFERENT; /* no need to keep looking */ + } + + /* + * If we reached the end of only one list, the other is longer and + * therefore not a subset. (We assume the additional sublist(s) of + * the other list are not NIL --- no pathkey list should ever have a + * NIL sublist.) + */ + if (key1 == NIL && key2 == NIL) + return PATHKEYS_EQUAL; + if (key1 != NIL) + return PATHKEYS_BETTER1;/* key1 is longer */ + return PATHKEYS_BETTER2; /* key2 is longer */ +} + +/* + * compare_noncanonical_pathkeys + * Compare two pathkeys to see if they are equivalent, and if not whether + * one is "better" than the other. This is used when we must compare + * non-canonicalized pathkeys. + * + * A pathkey can be considered better than another if it is a superset: + * it contains all the keys of the other plus more. For example, either + * ((A) (B)) or ((A B)) is better than ((A)). + * + * Currently, the only user of this routine is grouping_planner(), + * and it will only pass single-element sublists (from + * make_pathkeys_for_sortclauses). Therefore we don't have to do the + * full two-way-subset-inclusion test on each pair of sublists that is + * implied by the above statement. Instead we just verify they are + * singleton lists and then do an equal(). This could be improved if + * necessary. + */ +PathKeysComparison +compare_noncanonical_pathkeys(List *keys1, List *keys2) +{ + List *key1, + *key2; + + for (key1 = keys1, key2 = keys2; + key1 != NIL && key2 != NIL; + key1 = lnext(key1), key2 = lnext(key2)) + { + List *subkey1 = lfirst(key1); + List *subkey2 = lfirst(key2); + + Assert(length(subkey1) == 1); + Assert(length(subkey2) == 1); if (!equal(subkey1, subkey2)) return PATHKEYS_DIFFERENT; /* no need to keep looking */ } @@ -326,6 +386,24 @@ pathkeys_contained_in(List *keys1, List *keys2) } /* + * noncanonical_pathkeys_contained_in + * The same, when we don't have canonical pathkeys. + */ +bool +noncanonical_pathkeys_contained_in(List *keys1, List *keys2) +{ + switch (compare_noncanonical_pathkeys(keys1, keys2)) + { + case PATHKEYS_EQUAL: + case PATHKEYS_BETTER2: + return true; + default: + break; + } + return false; +} + +/* * get_cheapest_path_for_pathkeys * Find the cheapest path (according to the specified criterion) that * satisfies the given pathkeys. Return NULL if no such path. @@ -464,6 +542,7 @@ build_index_pathkeys(Query *root, while (*indexkeys != 0 && *ordering != InvalidOid) { Var *relvar = find_indexkey_var(root, rel, *indexkeys); + List *cpathkey; sortop = *ordering; if (ScanDirectionIsBackward(scandir)) @@ -475,8 +554,13 @@ build_index_pathkeys(Query *root, /* OK, make a sublist for this sort key */ item = makePathKeyItem((Node *) relvar, sortop); - retval = lappend(retval, make_canonical_pathkey(root, item)); - + cpathkey = make_canonical_pathkey(root, item); + /* + * Eliminate redundant ordering info; could happen if query + * is such that index keys are equijoined... + */ + if (!ptrMember(cpathkey, retval)) + retval = lappend(retval, cpathkey); indexkeys++; ordering++; } @@ -526,21 +610,20 @@ find_indexkey_var(Query *root, RelOptInfo *rel, AttrNumber varattno) * outer path (since the join will retain the ordering of the outer path) * plus any vars of the inner path that are equijoined to the outer vars. * - * Per the discussion at the top of this file, equijoined inner vars + * Per the discussion in backend/optimizer/README, equijoined inner vars * can be considered path keys of the result, just the same as the outer * vars they were joined with; furthermore, it doesn't matter what kind * of join algorithm is actually used. * - * 'outer_pathkeys' is the list of the outer path's path keys - * 'join_rel_tlist' is the target list of the join relation - * 'equi_key_list' is the query's list of pathkeyitem equivalence sets + * 'joinrel' is the join relation that paths are being formed for + * 'outer_pathkeys' is the list of the current outer path's path keys * * Returns the list of new path keys. */ List * -build_join_pathkeys(List *outer_pathkeys, - List *join_rel_tlist, - List *equi_key_list) +build_join_pathkeys(Query *root, + RelOptInfo *joinrel, + List *outer_pathkeys) { /* @@ -549,9 +632,11 @@ build_join_pathkeys(List *outer_pathkeys, * a darn thing here! The inner-rel vars we used to need to add are * *already* part of the outer pathkey! * - * I'd remove the routine entirely, but maybe someday we'll need it... + * We do, however, need to truncate the pathkeys list, since it may + * contain pathkeys that were useful for forming this joinrel but are + * uninteresting to higher levels. */ - return outer_pathkeys; + return truncate_useless_pathkeys(root, joinrel, outer_pathkeys); } /**************************************************************************** @@ -603,6 +688,39 @@ make_pathkeys_for_sortclauses(List *sortclauses, ****************************************************************************/ /* + * cache_mergeclause_pathkeys + * Make the cached pathkeys valid in a mergeclause restrictinfo. + * + * RestrictInfo contains fields in which we may cache the result + * of looking up the canonical pathkeys for the left and right sides + * of the mergeclause. (Note that in normal cases they will be the + * same, but not if the mergeclause appears above an OUTER JOIN.) + * This is a worthwhile savings because these routines will be invoked + * many times when dealing with a many-relation query. + */ +static void +cache_mergeclause_pathkeys(Query *root, RestrictInfo *restrictinfo) +{ + Node *key; + PathKeyItem *item; + + Assert(restrictinfo->mergejoinoperator != InvalidOid); + + if (restrictinfo->left_pathkey == NIL) + { + key = (Node *) get_leftop(restrictinfo->clause); + item = makePathKeyItem(key, restrictinfo->left_sortop); + restrictinfo->left_pathkey = make_canonical_pathkey(root, item); + } + if (restrictinfo->right_pathkey == NIL) + { + key = (Node *) get_rightop(restrictinfo->clause); + item = makePathKeyItem(key, restrictinfo->right_sortop); + restrictinfo->right_pathkey = make_canonical_pathkey(root, item); + } +} + +/* * find_mergeclauses_for_pathkeys * This routine attempts to find a set of mergeclauses that can be * used with a specified ordering for one of the input relations. @@ -618,11 +736,13 @@ make_pathkeys_for_sortclauses(List *sortclauses, * * XXX Ideally we ought to be considering context, ie what path orderings * are available on the other side of the join, rather than just making - * an arbitrary choice among the mergeclause orders that will work for - * this side of the join. + * an arbitrary choice among the mergeclauses that will work for this side + * of the join. */ List * -find_mergeclauses_for_pathkeys(List *pathkeys, List *restrictinfos) +find_mergeclauses_for_pathkeys(Query *root, + List *pathkeys, + List *restrictinfos) { List *mergeclauses = NIL; List *i; @@ -634,38 +754,28 @@ find_mergeclauses_for_pathkeys(List *pathkeys, List *restrictinfos) List *j; /* - * We can match any of the keys in this pathkey sublist, since - * they're all equivalent. And we can match against either left - * or right side of any mergejoin clause we haven't used yet. For - * the moment we use a dumb "greedy" algorithm with no - * backtracking. Is it worth being any smarter to make a longer - * list of usable mergeclauses? Probably not. + * We can match a pathkey against either left or right side of any + * mergejoin clause we haven't used yet. For the moment we use a + * dumb "greedy" algorithm with no backtracking. Is it worth being + * any smarter to make a longer list of usable mergeclauses? + * Probably not. */ - foreach(j, pathkey) + foreach(j, restrictinfos) { - PathKeyItem *keyitem = lfirst(j); - Node *key = keyitem->key; - Oid keyop = keyitem->sortop; - List *k; + RestrictInfo *restrictinfo = lfirst(j); - foreach(k, restrictinfos) + cache_mergeclause_pathkeys(root, restrictinfo); + /* + * We can compare canonical pathkey sublists by simple + * pointer equality; see compare_pathkeys. + */ + if ((pathkey == restrictinfo->left_pathkey || + pathkey == restrictinfo->right_pathkey) && + !ptrMember(restrictinfo, mergeclauses)) { - RestrictInfo *restrictinfo = lfirst(k); - - Assert(restrictinfo->mergejoinoperator != InvalidOid); - - if (((keyop == restrictinfo->left_sortop && - equal(key, get_leftop(restrictinfo->clause))) || - (keyop == restrictinfo->right_sortop && - equal(key, get_rightop(restrictinfo->clause)))) && - !member(restrictinfo, mergeclauses)) - { - matched_restrictinfo = restrictinfo; - break; - } - } - if (matched_restrictinfo) + matched_restrictinfo = restrictinfo; break; + } } /* @@ -715,47 +825,170 @@ make_pathkeys_for_mergeclauses(Query *root, { RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i); Node *key; - Oid sortop; - PathKeyItem *item; List *pathkey; - Assert(restrictinfo->mergejoinoperator != InvalidOid); + cache_mergeclause_pathkeys(root, restrictinfo); - /* - * Which key and sortop is needed for this relation? - */ key = (Node *) get_leftop(restrictinfo->clause); - sortop = restrictinfo->left_sortop; - if (!IsA(key, Var) || - !intMember(((Var *) key)->varno, rel->relids)) + if (IsA(key, Var) && intMember(((Var *) key)->varno, rel->relids)) + { + /* Rel is left side of mergeclause */ + pathkey = restrictinfo->left_pathkey; + } + else { key = (Node *) get_rightop(restrictinfo->clause); - sortop = restrictinfo->right_sortop; - if (!IsA(key, Var) || - !intMember(((Var *) key)->varno, rel->relids)) + if (IsA(key, Var) && intMember(((Var *) key)->varno, rel->relids)) + { + /* Rel is right side of mergeclause */ + pathkey = restrictinfo->right_pathkey; + } + else + { elog(ERROR, "make_pathkeys_for_mergeclauses: can't identify which side of mergeclause to use"); + pathkey = NIL; /* keep compiler quiet */ + } } /* - * Find or create canonical pathkey sublist for this sort item. + * When we are given multiple merge clauses, it's possible that some + * clauses refer to the same vars as earlier clauses. There's no + * reason for us to specify sort keys like (A,B,A) when (A,B) will + * do --- and adding redundant sort keys makes add_path think that + * this sort order is different from ones that are really the same, + * so don't do it. Since we now have a canonicalized pathkey, + * a simple ptrMember test is sufficient to detect redundant keys. */ - item = makePathKeyItem(key, sortop); - pathkey = make_canonical_pathkey(root, item); + if (!ptrMember(pathkey, pathkeys)) + pathkeys = lappend(pathkeys, pathkey); + } + + return pathkeys; +} + +/**************************************************************************** + * PATHKEY USEFULNESS CHECKS + * + * We only want to remember as many of the pathkeys of a path as have some + * potential use, either for subsequent mergejoins or for meeting the query's + * requested output ordering. This ensures that add_path() won't consider + * a path to have a usefully different ordering unless it really is useful. + * These routines check for usefulness of given pathkeys. + ****************************************************************************/ + +/* + * pathkeys_useful_for_merging + * Count the number of pathkeys that may be useful for mergejoins + * above the given relation (by looking at its joininfo lists). + * + * We consider a pathkey potentially useful if it corresponds to the merge + * ordering of either side of any joinclause for the rel. This might be + * overoptimistic, since joinclauses that appear in different join lists + * might never be usable at the same time, but trying to be exact is likely + * to be more trouble than it's worth. + */ +int +pathkeys_useful_for_merging(Query *root, RelOptInfo *rel, List *pathkeys) +{ + int useful = 0; + List *i; + + foreach(i, pathkeys) + { + List *pathkey = lfirst(i); + bool matched = false; + List *j; + + foreach(j, rel->joininfo) + { + JoinInfo *joininfo = (JoinInfo *) lfirst(j); + List *k; + + foreach(k, joininfo->jinfo_restrictinfo) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(k); + + if (restrictinfo->mergejoinoperator == InvalidOid) + continue; + cache_mergeclause_pathkeys(root, restrictinfo); + /* + * We can compare canonical pathkey sublists by simple + * pointer equality; see compare_pathkeys. + */ + if (pathkey == restrictinfo->left_pathkey || + pathkey == restrictinfo->right_pathkey) + { + matched = true; + break; + } + } + + if (matched) + break; + } /* - * Most of the time we will get back a canonical pathkey set - * including both the mergeclause's left and right sides (the only - * case where we don't is if the mergeclause appeared in an OUTER - * JOIN, which causes us not to generate an equijoin set from it). - * Therefore, most of the time the item we just made is not part - * of the returned structure, and we can free it. This check - * saves a useful amount of storage in a big join tree. + * If we didn't find a mergeclause, we're done --- any additional + * sort-key positions in the pathkeys are useless. (But we can + * still mergejoin if we found at least one mergeclause.) */ - if (item != (PathKeyItem *) lfirst(pathkey)) - pfree(item); + if (matched) + useful++; + else + break; + } - pathkeys = lappend(pathkeys, pathkey); + return useful; +} + +/* + * pathkeys_useful_for_ordering + * Count the number of pathkeys that are useful for meeting the + * query's requested output ordering. + * + * Unlike merge pathkeys, this is an all-or-nothing affair: it does us + * no good to order by just the first key(s) of the requested ordering. + * So the result is always either 0 or length(root->query_pathkeys). + */ +int +pathkeys_useful_for_ordering(Query *root, List *pathkeys) +{ + if (root->query_pathkeys == NIL) + return 0; /* no special ordering requested */ + + if (pathkeys == NIL) + return 0; /* unordered path */ + + if (pathkeys_contained_in(root->query_pathkeys, pathkeys)) + { + /* It's useful ... or at least the first N keys are */ + return length(root->query_pathkeys); } - return pathkeys; + return 0; /* path ordering not useful */ +} + +/* + * truncate_useless_pathkeys + * Shorten the given pathkey list to just the useful pathkeys. + */ +List * +truncate_useless_pathkeys(Query *root, + RelOptInfo *rel, + List *pathkeys) +{ + int nuseful; + int nuseful2; + + nuseful = pathkeys_useful_for_merging(root, rel, pathkeys); + nuseful2 = pathkeys_useful_for_ordering(root, pathkeys); + if (nuseful2 > nuseful) + nuseful = nuseful2; + /* Note: not safe to modify input list destructively, but we can avoid + * copying the list if we're not actually going to change it + */ + if (nuseful == length(pathkeys)) + return pathkeys; + else + return ltruncate(nuseful, listCopy(pathkeys)); } |