diff options
Diffstat (limited to 'src/backend/optimizer/path/pathkeys.c')
-rw-r--r-- | src/backend/optimizer/path/pathkeys.c | 1523 |
1 files changed, 647 insertions, 876 deletions
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 4fc5a5f1250..15793b4ef99 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -11,687 +11,180 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.81 2007/01/09 02:14:12 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.82 2007/01/20 20:45:39 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" +#include "access/skey.h" #include "nodes/makefuncs.h" +#include "nodes/plannodes.h" #include "optimizer/clauses.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" -#include "optimizer/planmain.h" #include "optimizer/tlist.h" -#include "optimizer/var.h" #include "parser/parsetree.h" #include "parser/parse_expr.h" #include "utils/lsyscache.h" -#include "utils/memutils.h" - - -static PathKeyItem *makePathKeyItem(Node *key, Oid sortop, bool nulls_first, - bool checkType); -static void generate_outer_join_implications(PlannerInfo *root, - List *equi_key_set, - Relids *relids); -static void sub_generate_join_implications(PlannerInfo *root, - List *equi_key_set, Relids *relids, - Node *item1, Oid sortop1, - Relids item1_relids); -static void process_implied_const_eq(PlannerInfo *root, - List *equi_key_set, Relids *relids, - Node *item1, Oid sortop1, - Relids item1_relids, - bool delete_it); -static List *make_canonical_pathkey(PlannerInfo *root, PathKeyItem *item); -static Var *find_indexkey_var(PlannerInfo *root, RelOptInfo *rel, - AttrNumber varattno); /* - * makePathKeyItem - * create a PathKeyItem node + * If an EC contains a const and isn't below-outer-join, any PathKey depending + * on it must be redundant, since there's only one possible value of the key. */ -static PathKeyItem * -makePathKeyItem(Node *key, Oid sortop, bool nulls_first, bool checkType) -{ - PathKeyItem *item = makeNode(PathKeyItem); - - /* - * Some callers pass expressions that are not necessarily of the same type - * as the sort operator expects as input (for example when dealing with an - * index that uses binary-compatible operators). We must relabel these - * with the correct type so that the key expressions will be seen as - * equal() to expressions that have been correctly labeled. - */ - if (checkType) - { - Oid lefttype, - righttype; +#define MUST_BE_REDUNDANT(eclass) \ + ((eclass)->ec_has_const && !(eclass)->ec_below_outer_join) + +static PathKey *makePathKey(EquivalenceClass *eclass, Oid opfamily, + int strategy, bool nulls_first); +static PathKey *make_canonical_pathkey(PlannerInfo *root, + EquivalenceClass *eclass, Oid opfamily, + int strategy, bool nulls_first); +static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys); +static PathKey *make_pathkey_from_sortinfo(PlannerInfo *root, + Expr *expr, Oid ordering_op, + bool nulls_first, + bool canonicalize); +static Var *find_indexkey_var(PlannerInfo *root, RelOptInfo *rel, + AttrNumber varattno); - op_input_types(sortop, &lefttype, &righttype); - if (exprType(key) != lefttype) - key = (Node *) makeRelabelType((Expr *) key, - lefttype, -1, - COERCE_DONTCARE); - } - item->key = key; - item->sortop = sortop; - item->nulls_first = nulls_first; - return item; -} +/**************************************************************************** + * PATHKEY CONSTRUCTION AND REDUNDANCY TESTING + ****************************************************************************/ /* - * add_equijoined_keys - * The given clause has a mergejoinable operator, so its two sides - * can be considered equal after restriction clause application; in - * particular, any pathkey mentioning one side (with the correct sortop) - * can be expanded to include the other as well. Record the exprs and - * associated sortops in the query's equi_key_list for future use. + * makePathKey + * create a PathKey node * - * The query's equi_key_list field points to a list of sublists of PathKeyItem - * nodes, where each sublist is a set of two or more exprs+sortops that have - * been identified as logically equivalent (and, therefore, we may consider - * any two in a set to be equal). As described above, we will subsequently - * use direct pointers to one of these sublists to represent any pathkey - * that involves an equijoined variable. + * This does not promise to create a canonical PathKey, it's merely a + * convenience routine to build the specified node. */ -void -add_equijoined_keys(PlannerInfo *root, RestrictInfo *restrictinfo) +static PathKey * +makePathKey(EquivalenceClass *eclass, Oid opfamily, + int strategy, bool nulls_first) { - Expr *clause = restrictinfo->clause; - PathKeyItem *item1 = makePathKeyItem(get_leftop(clause), - restrictinfo->left_sortop, - false, /* XXX nulls_first? */ - false); - PathKeyItem *item2 = makePathKeyItem(get_rightop(clause), - restrictinfo->right_sortop, - false, /* XXX nulls_first? */ - false); - List *newset; - ListCell *cursetlink; - - /* We might see a clause X=X; don't make a single-element list from it */ - if (equal(item1, item2)) - return; - - /* - * Our plan is to make a two-element set, then sweep through the existing - * equijoin sets looking for matches to item1 or item2. When we find one, - * we remove that set from equi_key_list and union it into our new set. - * When done, we add the new set to the front of equi_key_list. - * - * It may well be that the two items we're given are already known to be - * equijoin-equivalent, in which case we don't need to change our data - * structure. If we find both of them in the same equivalence set to - * start with, we can quit immediately. - * - * This is a standard UNION-FIND problem, for which there exist better - * data structures than simple lists. If this code ever proves to be a - * bottleneck then it could be sped up --- but for now, simple is - * beautiful. - */ - newset = NIL; - - /* cannot use foreach here because of possible lremove */ - cursetlink = list_head(root->equi_key_list); - while (cursetlink) - { - List *curset = (List *) lfirst(cursetlink); - bool item1here = list_member(curset, item1); - bool item2here = list_member(curset, item2); - - /* must advance cursetlink before lremove possibly pfree's it */ - cursetlink = lnext(cursetlink); - - if (item1here || item2here) - { - /* - * If find both in same equivalence set, no need to do any more - */ - if (item1here && item2here) - { - /* Better not have seen only one in an earlier set... */ - Assert(newset == NIL); - return; - } - - /* Build the new set only when we know we must */ - if (newset == NIL) - newset = list_make2(item1, item2); - - /* Found a set to merge into our new set */ - newset = list_concat_unique(newset, curset); - - /* - * Remove old set from equi_key_list. - */ - root->equi_key_list = list_delete_ptr(root->equi_key_list, curset); - list_free(curset); /* might as well recycle old cons cells */ - } - } + PathKey *pk = makeNode(PathKey); - /* Build the new set only when we know we must */ - if (newset == NIL) - newset = list_make2(item1, item2); + pk->pk_eclass = eclass; + pk->pk_opfamily = opfamily; + pk->pk_strategy = strategy; + pk->pk_nulls_first = nulls_first; - root->equi_key_list = lcons(newset, root->equi_key_list); + return pk; } /* - * generate_implied_equalities - * Scan the completed equi_key_list for the query, and generate explicit - * qualifications (WHERE clauses) for all the pairwise equalities not - * already mentioned in the quals; or remove qualifications found to be - * redundant. - * - * Adding deduced equalities is useful because the additional clauses help - * the selectivity-estimation code and may allow better joins to be chosen; - * and in fact it's *necessary* to ensure that sort keys we think are - * equivalent really are (see src/backend/optimizer/README for more info). - * - * If an equi_key_list set includes any constants then we adopt a different - * strategy: we record all the "var = const" deductions we can make, and - * actively remove all the "var = var" clauses that are implied by the set - * (including the clauses that originally gave rise to the set!). The reason - * is that given input like "a = b AND b = 42", once we have deduced "a = 42" - * there is no longer any need to apply the clause "a = b"; not only is - * it a waste of time to check it, but we will misestimate selectivity if the - * clause is left in. So we must remove it. For this purpose, any pathkey - * item that mentions no Vars of the current level can be taken as a constant. - * (The only case where this would be risky is if the item contains volatile - * functions; but we will never consider such an expression to be a pathkey - * at all, because check_mergejoinable() will reject it.) - * - * Also, when we have constants in an equi_key_list we can try to propagate - * the constants into outer joins; see generate_outer_join_implications - * for discussion. + * make_canonical_pathkey + * Given the parameters for a PathKey, find any pre-existing matching + * pathkey in the query's list of "canonical" pathkeys. Make a new + * entry if there's not one already. * - * This routine just walks the equi_key_list to find all pairwise equalities. - * We call process_implied_equality (in plan/initsplan.c) to adjust the - * restrictinfo datastructures for each pair. + * Note that this function must not be used until after we have completed + * merging EquivalenceClasses. */ -void -generate_implied_equalities(PlannerInfo *root) +static PathKey * +make_canonical_pathkey(PlannerInfo *root, + EquivalenceClass *eclass, Oid opfamily, + int strategy, bool nulls_first) { - ListCell *cursetlink; - - foreach(cursetlink, root->equi_key_list) - { - List *curset = (List *) lfirst(cursetlink); - int nitems = list_length(curset); - Relids *relids; - bool have_consts; - ListCell *ptr1; - int i1; + PathKey *pk; + ListCell *lc; + MemoryContext oldcontext; - /* - * A set containing only two items cannot imply any equalities beyond - * the one that created the set, so we can skip it --- unless outer - * joins appear in the query. - */ - if (nitems < 3 && !root->hasOuterJoins) - continue; + /* The passed eclass might be non-canonical, so chase up to the top */ + while (eclass->ec_merged) + eclass = eclass->ec_merged; - /* - * Collect info about relids mentioned in each item. For this routine - * we only really care whether there are any at all in each item, but - * process_implied_equality() needs the exact sets, so we may as well - * pull them here. - */ - relids = (Relids *) palloc(nitems * sizeof(Relids)); - have_consts = false; - i1 = 0; - foreach(ptr1, curset) - { - PathKeyItem *item1 = (PathKeyItem *) lfirst(ptr1); + foreach(lc, root->canon_pathkeys) + { + pk = (PathKey *) lfirst(lc); + if (eclass == pk->pk_eclass && + opfamily == pk->pk_opfamily && + strategy == pk->pk_strategy && + nulls_first == pk->pk_nulls_first) + return pk; + } - relids[i1] = pull_varnos(item1->key); - if (bms_is_empty(relids[i1])) - have_consts = true; - i1++; - } + /* + * Be sure canonical pathkeys are allocated in the main planning context. + * Not an issue in normal planning, but it is for GEQO. + */ + oldcontext = MemoryContextSwitchTo(root->planner_cxt); - /* - * Match each item in the set with all that appear after it (it's - * sufficient to generate A=B, need not process B=A too). - * - * A set containing only two items cannot imply any equalities beyond - * the one that created the set, so we can skip this processing in - * that case. - */ - if (nitems >= 3) - { - i1 = 0; - foreach(ptr1, curset) - { - PathKeyItem *item1 = (PathKeyItem *) lfirst(ptr1); - bool i1_is_variable = !bms_is_empty(relids[i1]); - ListCell *ptr2; - int i2 = i1 + 1; + pk = makePathKey(eclass, opfamily, strategy, nulls_first); + root->canon_pathkeys = lappend(root->canon_pathkeys, pk); - for_each_cell(ptr2, lnext(ptr1)) - { - PathKeyItem *item2 = (PathKeyItem *) lfirst(ptr2); - bool i2_is_variable = !bms_is_empty(relids[i2]); - - /* - * If it's "const = const" then just ignore it altogether. - * There is no place in the restrictinfo structure to - * store it. (If the two consts are in fact unequal, then - * propagating the comparison to Vars will cause us to - * produce zero rows out, as expected.) - */ - if (i1_is_variable || i2_is_variable) - { - /* - * Tell process_implied_equality to delete the clause, - * not add it, if it's "var = var" and we have - * constants present in the list. - */ - bool delete_it = (have_consts && - i1_is_variable && - i2_is_variable); - - process_implied_equality(root, - item1->key, item2->key, - item1->sortop, item2->sortop, - relids[i1], relids[i2], - delete_it); - } - i2++; - } - i1++; - } - } + MemoryContextSwitchTo(oldcontext); - /* - * If we have constant(s) and outer joins, try to propagate the - * constants through outer-join quals. - */ - if (have_consts && root->hasOuterJoins) - generate_outer_join_implications(root, curset, relids); - } + return pk; } /* - * generate_outer_join_implications - * Generate clauses that can be deduced in outer-join situations. + * pathkey_is_redundant + * Is a pathkey redundant with one already in the given list? * - * When we have mergejoinable clauses A = B that are outer-join clauses, - * we can't blindly combine them with other clauses A = C to deduce B = C, - * since in fact the "equality" A = B won't necessarily hold above the - * outer join (one of the variables might be NULL instead). Nonetheless - * there are cases where we can add qual clauses using transitivity. + * Both the given pathkey and the list members must be canonical for this + * to work properly. We detect two cases: * - * One case that we look for here is an outer-join clause OUTERVAR = INNERVAR - * combined with a pushed-down (valid everywhere) clause OUTERVAR = CONSTANT. - * It is safe and useful to push a clause INNERVAR = CONSTANT into the - * evaluation of the inner (nullable) relation, because any inner rows not - * meeting this condition will not contribute to the outer-join result anyway. - * (Any outer rows they could join to will be eliminated by the pushed-down - * clause.) + * 1. If the new pathkey's equivalence class contains a constant, and isn't + * below an outer join, then we can disregard it as a sort key. An example: + * SELECT ... WHERE x = 42 ORDER BY x, y; + * We may as well just sort by y. Note that because of opfamily matching, + * this is semantically correct: we know that the equality constraint is one + * that actually binds the variable to a single value in the terms of any + * ordering operator that might go with the eclass. This rule not only lets + * us simplify (or even skip) explicit sorts, but also allows matching index + * sort orders to a query when there are don't-care index columns. * - * Note that the above rule does not work for full outer joins, nor for - * pushed-down restrictions on an inner-side variable; nor is it very - * interesting to consider cases where the pushed-down clause involves - * relations entirely outside the outer join, since such clauses couldn't - * be pushed into the inner side's scan anyway. So the restriction to - * outervar = pseudoconstant is not really giving up anything. + * 2. If the new pathkey's equivalence class is the same as that of any + * existing member of the pathkey list, then it is redundant. Some examples: + * SELECT ... ORDER BY x, x; + * SELECT ... ORDER BY x, x DESC; + * SELECT ... WHERE x = y ORDER BY x, y; + * In all these cases the second sort key cannot distinguish values that are + * considered equal by the first, and so there's no point in using it. + * Note in particular that we need not compare opfamily (all the opfamilies + * of the EC have the same notion of equality) nor sort direction. * - * For full-join cases, we can only do something useful if it's a FULL JOIN - * USING and a merged column has a restriction MERGEDVAR = CONSTANT. By - * the time it gets here, the restriction will look like - * COALESCE(LEFTVAR, RIGHTVAR) = CONSTANT - * and we will have a join clause LEFTVAR = RIGHTVAR that we can match the - * COALESCE expression to. In this situation we can push LEFTVAR = CONSTANT - * and RIGHTVAR = CONSTANT into the input relations, since any rows not - * meeting these conditions cannot contribute to the join result. - * - * Again, there isn't any traction to be gained by trying to deal with - * clauses comparing a mergedvar to a non-pseudoconstant. So we can make - * use of the equi_key_lists to quickly find the interesting pushed-down - * clauses. The interesting outer-join clauses were accumulated for us by - * distribute_qual_to_rels. - * - * equi_key_set: a list of PathKeyItems that are known globally equivalent, - * at least one of which is a pseudoconstant. - * relids: an array of Relids sets showing the relation membership of each - * PathKeyItem in equi_key_set. + * Because the equivclass.c machinery forms only one copy of any EC per query, + * pointer comparison is enough to decide whether canonical ECs are the same. */ -static void -generate_outer_join_implications(PlannerInfo *root, - List *equi_key_set, - Relids *relids) +static bool +pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys) { - ListCell *l; - int i = 0; + EquivalenceClass *new_ec = new_pathkey->pk_eclass; + ListCell *lc; - /* Process each non-constant element of equi_key_set */ - foreach(l, equi_key_set) - { - PathKeyItem *item1 = (PathKeyItem *) lfirst(l); + /* Assert we've been given canonical pathkeys */ + Assert(!new_ec->ec_merged); - if (!bms_is_empty(relids[i])) - { - sub_generate_join_implications(root, equi_key_set, relids, - item1->key, - item1->sortop, - relids[i]); - } - i++; - } -} - -/* - * sub_generate_join_implications - * Propagate a constant equality through outer join clauses. - * - * The item described by item1/sortop1/item1_relids has been determined - * to be equal to the constant(s) listed in equi_key_set. Recursively - * trace out the implications of this. - * - * equi_key_set and relids are as for generate_outer_join_implications. - */ -static void -sub_generate_join_implications(PlannerInfo *root, - List *equi_key_set, Relids *relids, - Node *item1, Oid sortop1, Relids item1_relids) - -{ - ListCell *l; + /* Check for EC containing a constant --- unconditionally redundant */ + if (MUST_BE_REDUNDANT(new_ec)) + return true; - /* - * Examine each mergejoinable outer-join clause with OUTERVAR on left, - * looking for an OUTERVAR identical to item1 - */ - foreach(l, root->left_join_clauses) + /* If same EC already used in list, then redundant */ + foreach(lc, pathkeys) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); - Node *leftop = get_leftop(rinfo->clause); - - if (equal(leftop, item1) && rinfo->left_sortop == sortop1) - { - /* - * Match, so find constant member(s) of set and generate implied - * INNERVAR = CONSTANT - */ - Node *rightop = get_rightop(rinfo->clause); + PathKey *old_pathkey = (PathKey *) lfirst(lc); - process_implied_const_eq(root, equi_key_set, relids, - rightop, - rinfo->right_sortop, - rinfo->right_relids, - false); + /* Assert we've been given canonical pathkeys */ + Assert(!old_pathkey->pk_eclass->ec_merged); - /* - * We can remove explicit tests of this outer-join qual, too, - * since we now have tests forcing each of its sides to the same - * value. - */ - process_implied_equality(root, - leftop, rightop, - rinfo->left_sortop, rinfo->right_sortop, - rinfo->left_relids, rinfo->right_relids, - true); - - /* - * And recurse to see if we can deduce anything from INNERVAR = - * CONSTANT - */ - sub_generate_join_implications(root, equi_key_set, relids, - rightop, - rinfo->right_sortop, - rinfo->right_relids); - } - } - - /* The same, looking at clauses with OUTERVAR on right */ - foreach(l, root->right_join_clauses) - { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); - Node *rightop = get_rightop(rinfo->clause); - - if (equal(rightop, item1) && rinfo->right_sortop == sortop1) - { - /* - * Match, so find constant member(s) of set and generate implied - * INNERVAR = CONSTANT - */ - Node *leftop = get_leftop(rinfo->clause); - - process_implied_const_eq(root, equi_key_set, relids, - leftop, - rinfo->left_sortop, - rinfo->left_relids, - false); - - /* - * We can remove explicit tests of this outer-join qual, too, - * since we now have tests forcing each of its sides to the same - * value. - */ - process_implied_equality(root, - leftop, rightop, - rinfo->left_sortop, rinfo->right_sortop, - rinfo->left_relids, rinfo->right_relids, - true); - - /* - * And recurse to see if we can deduce anything from INNERVAR = - * CONSTANT - */ - sub_generate_join_implications(root, equi_key_set, relids, - leftop, - rinfo->left_sortop, - rinfo->left_relids); - } - } - - /* - * Only COALESCE(x,y) items can possibly match full joins - */ - if (IsA(item1, CoalesceExpr)) - { - CoalesceExpr *cexpr = (CoalesceExpr *) item1; - Node *cfirst; - Node *csecond; - - if (list_length(cexpr->args) != 2) - return; - cfirst = (Node *) linitial(cexpr->args); - csecond = (Node *) lsecond(cexpr->args); - - /* - * Examine each mergejoinable full-join clause, looking for a clause - * of the form "x = y" matching the COALESCE(x,y) expression - */ - foreach(l, root->full_join_clauses) - { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); - Node *leftop = get_leftop(rinfo->clause); - Node *rightop = get_rightop(rinfo->clause); - - /* - * We can assume the COALESCE() inputs are in the same order as - * the join clause, since both were automatically generated in the - * cases we care about. - * - * XXX currently this may fail to match in cross-type cases - * because the COALESCE will contain typecast operations while the - * join clause may not (if there is a cross-type mergejoin - * operator available for the two column types). Is it OK to strip - * implicit coercions from the COALESCE arguments? What of the - * sortops in such cases? - */ - if (equal(leftop, cfirst) && - equal(rightop, csecond) && - rinfo->left_sortop == sortop1 && - rinfo->right_sortop == sortop1) - { - /* - * Match, so find constant member(s) of set and generate - * implied LEFTVAR = CONSTANT - */ - process_implied_const_eq(root, equi_key_set, relids, - leftop, - rinfo->left_sortop, - rinfo->left_relids, - false); - /* ... and RIGHTVAR = CONSTANT */ - process_implied_const_eq(root, equi_key_set, relids, - rightop, - rinfo->right_sortop, - rinfo->right_relids, - false); - /* ... and remove COALESCE() = CONSTANT */ - process_implied_const_eq(root, equi_key_set, relids, - item1, - sortop1, - item1_relids, - true); - - /* - * We can remove explicit tests of this outer-join qual, too, - * since we now have tests forcing each of its sides to the - * same value. - */ - process_implied_equality(root, - leftop, rightop, - rinfo->left_sortop, - rinfo->right_sortop, - rinfo->left_relids, - rinfo->right_relids, - true); - - /* - * And recurse to see if we can deduce anything from LEFTVAR = - * CONSTANT - */ - sub_generate_join_implications(root, equi_key_set, relids, - leftop, - rinfo->left_sortop, - rinfo->left_relids); - /* ... and RIGHTVAR = CONSTANT */ - sub_generate_join_implications(root, equi_key_set, relids, - rightop, - rinfo->right_sortop, - rinfo->right_relids); - - } - } - } -} - -/* - * process_implied_const_eq - * Apply process_implied_equality with the given item and each - * pseudoconstant member of equi_key_set. - * - * equi_key_set and relids are as for generate_outer_join_implications, - * the other parameters as for process_implied_equality. - */ -static void -process_implied_const_eq(PlannerInfo *root, List *equi_key_set, Relids *relids, - Node *item1, Oid sortop1, Relids item1_relids, - bool delete_it) -{ - ListCell *l; - bool found = false; - int i = 0; - - foreach(l, equi_key_set) - { - PathKeyItem *item2 = (PathKeyItem *) lfirst(l); - - if (bms_is_empty(relids[i])) - { - process_implied_equality(root, - item1, item2->key, - sortop1, item2->sortop, - item1_relids, NULL, - delete_it); - found = true; - } - i++; + if (new_ec == old_pathkey->pk_eclass) + return true; } - /* Caller screwed up if no constants in list */ - Assert(found); -} -/* - * exprs_known_equal - * Detect whether two expressions are known equal due to equijoin clauses. - * - * Note: does not bother to check for "equal(item1, item2)"; caller must - * check that case if it's possible to pass identical items. - */ -bool -exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2) -{ - ListCell *cursetlink; - - foreach(cursetlink, root->equi_key_list) - { - List *curset = (List *) lfirst(cursetlink); - bool item1member = false; - bool item2member = false; - ListCell *ptr; - - foreach(ptr, curset) - { - PathKeyItem *pitem = (PathKeyItem *) lfirst(ptr); - - if (equal(item1, pitem->key)) - item1member = true; - else if (equal(item2, pitem->key)) - item2member = true; - /* Exit as soon as equality is proven */ - if (item1member && item2member) - return true; - } - } return false; } - -/* - * make_canonical_pathkey - * Given a PathKeyItem, find the equi_key_list subset it is a member of, - * if any. If so, return a pointer to that sublist, which is the - * canonical representation (for this query) of that PathKeyItem's - * equivalence set. If it is not found, add a singleton "equivalence set" - * to the equi_key_list and return that --- see compare_pathkeys. - * - * Note that this function must not be used until after we have completed - * scanning the WHERE clause for equijoin operators. - */ -static List * -make_canonical_pathkey(PlannerInfo *root, PathKeyItem *item) -{ - List *newset; - ListCell *cursetlink; - - foreach(cursetlink, root->equi_key_list) - { - List *curset = (List *) lfirst(cursetlink); - - if (list_member(curset, item)) - return curset; - } - newset = list_make1(item); - root->equi_key_list = lcons(newset, root->equi_key_list); - return newset; -} - /* * canonicalize_pathkeys * Convert a not-necessarily-canonical pathkeys list to canonical form. * * Note that this function must not be used until after we have completed - * scanning the WHERE clause for equijoin operators. + * merging EquivalenceClasses. */ List * canonicalize_pathkeys(PlannerInfo *root, List *pathkeys) @@ -701,55 +194,116 @@ canonicalize_pathkeys(PlannerInfo *root, List *pathkeys) foreach(l, pathkeys) { - List *pathkey = (List *) lfirst(l); - PathKeyItem *item; - List *cpathkey; + PathKey *pathkey = (PathKey *) lfirst(l); + EquivalenceClass *eclass; + PathKey *cpathkey; - /* - * It's sufficient to look at the first entry in the sublist; if there - * are more entries, they're already part of an equivalence set by - * definition. - */ - Assert(pathkey != NIL); - item = (PathKeyItem *) linitial(pathkey); - cpathkey = make_canonical_pathkey(root, item); + /* Find the canonical (merged) EquivalenceClass */ + eclass = pathkey->pk_eclass; + while (eclass->ec_merged) + eclass = eclass->ec_merged; /* - * Eliminate redundant ordering requests --- ORDER BY A,A is the same - * as ORDER BY A. We want to check this only after we have - * canonicalized the keys, so that equivalent-key knowledge is used - * when deciding if an item is redundant. + * If we can tell it's redundant just from the EC, skip. + * pathkey_is_redundant would notice that, but we needn't even bother + * constructing the node... */ - new_pathkeys = list_append_unique_ptr(new_pathkeys, cpathkey); + if (MUST_BE_REDUNDANT(eclass)) + continue; + + /* OK, build a canonicalized PathKey struct */ + cpathkey = make_canonical_pathkey(root, + eclass, + pathkey->pk_opfamily, + pathkey->pk_strategy, + pathkey->pk_nulls_first); + + /* Add to list unless redundant */ + if (!pathkey_is_redundant(cpathkey, new_pathkeys)) + new_pathkeys = lappend(new_pathkeys, cpathkey); } return new_pathkeys; } - /* - * count_canonical_peers - * Given a PathKeyItem, find the equi_key_list subset it is a member of, - * if any. If so, return the number of other members of the set. - * If not, return 0 (without actually adding it to our equi_key_list). + * make_pathkey_from_sortinfo + * Given an expression, a sortop, and a nulls-first flag, create + * a PathKey. If canonicalize = true, the result is a "canonical" + * PathKey, otherwise not. (But note it might be redundant anyway.) * - * This is a hack to support the rather bogus heuristics in - * convert_subquery_pathkeys. + * canonicalize should always be TRUE after EquivalenceClass merging has + * been performed, but FALSE if we haven't done EquivalenceClass merging yet. */ -static int -count_canonical_peers(PlannerInfo *root, PathKeyItem *item) +static PathKey * +make_pathkey_from_sortinfo(PlannerInfo *root, + Expr *expr, Oid ordering_op, + bool nulls_first, + bool canonicalize) { - ListCell *cursetlink; + Oid equality_op; + List *opfamilies; + Oid opfamily, + lefttype, + righttype; + int strategy; + ListCell *lc; + EquivalenceClass *eclass; - foreach(cursetlink, root->equi_key_list) - { - List *curset = (List *) lfirst(cursetlink); + /* + * An ordering operator fully determines the behavior of its opfamily, + * so could only meaningfully appear in one family --- or perhaps two + * if one builds a reverse-sort opfamily, but there's not much point in + * that anymore. But EquivalenceClasses need to contain opfamily lists + * based on the family membership of equality operators, which could + * easily be bigger. So, look up the equality operator that goes with + * the ordering operator (this should be unique) and get its membership. + */ + equality_op = get_equality_op_for_ordering_op(ordering_op); + if (!OidIsValid(equality_op)) /* shouldn't happen */ + elog(ERROR, "could not find equality operator for ordering operator %u", + ordering_op); + opfamilies = get_mergejoin_opfamilies(equality_op); + if (!opfamilies) /* certainly should find some */ + elog(ERROR, "could not find opfamilies for ordering operator %u", + ordering_op); - if (list_member(curset, item)) - return list_length(curset) - 1; + /* + * Next we have to determine the strategy number to put into the pathkey. + * In the presence of reverse-sort opclasses there might be two answers. + * We prefer the one associated with the first opfamilies member that + * this ordering_op appears in (this will be consistently defined in + * normal system operation; see comments for get_mergejoin_opfamilies()). + */ + opfamily = InvalidOid; + strategy = 0; + foreach(lc, opfamilies) + { + opfamily = lfirst_oid(lc); + strategy = get_op_opfamily_strategy(ordering_op, opfamily); + if (strategy) + break; } - return 0; + if (!(strategy == BTLessStrategyNumber || + strategy == BTGreaterStrategyNumber)) + elog(ERROR, "ordering operator %u is has wrong strategy number %d", + ordering_op, strategy); + + /* Need the declared input type of the operator, too */ + op_input_types(ordering_op, &lefttype, &righttype); + Assert(lefttype == righttype); + + /* Now find or create a matching EquivalenceClass */ + eclass = get_eclass_for_sort_expr(root, expr, lefttype, opfamilies); + + /* And finally we can find or create a PathKey node */ + if (canonicalize) + return make_canonical_pathkey(root, eclass, opfamily, + strategy, nulls_first); + else + return makePathKey(eclass, opfamily, strategy, nulls_first); } + /**************************************************************************** * PATHKEY COMPARISONS ****************************************************************************/ @@ -760,7 +314,7 @@ count_canonical_peers(PlannerInfo *root, PathKeyItem *item) * one is "better" than the other. * * This function may only be applied to canonicalized pathkey lists. - * In the canonical representation, sublists can be checked for equality + * In the canonical representation, pathkeys can be checked for equality * by simple pointer comparison. */ PathKeysComparison @@ -771,33 +325,25 @@ compare_pathkeys(List *keys1, List *keys2) forboth(key1, keys1, key2, keys2) { - List *subkey1 = (List *) lfirst(key1); - List *subkey2 = (List *) lfirst(key2); + PathKey *pathkey1 = (PathKey *) lfirst(key1); + PathKey *pathkey2 = (PathKey *) lfirst(key2); /* * XXX would like to check that we've been given canonicalized input, * but PlannerInfo not accessible here... */ #ifdef NOT_USED - Assert(list_member_ptr(root->equi_key_list, subkey1)); - Assert(list_member_ptr(root->equi_key_list, subkey2)); + Assert(list_member_ptr(root->canon_pathkeys, pathkey1)); + Assert(list_member_ptr(root->canon_pathkeys, pathkey2)); #endif - /* - * We will never have two subkeys where one is a subset of the other, - * because of the canonicalization process. Either they are equal or - * they ain't. Furthermore, we only need pointer comparison to detect - * equality. - */ - if (subkey1 != subkey2) + if (pathkey1 != pathkey2) return PATHKEYS_DIFFERENT; /* no need to keep looking */ } /* * If we reached the end of only one list, the other is longer and - * therefore not a subset. (We assume the additional sublist(s) of the - * other list are not NIL --- no pathkey list should ever have a NIL - * sublist.) + * therefore not a subset. */ if (key1 == NULL && key2 == NULL) return PATHKEYS_EQUAL; @@ -912,11 +458,8 @@ get_cheapest_fractional_path_for_pathkeys(List *paths, * If 'scandir' is BackwardScanDirection, attempt to build pathkeys * representing a backwards scan of the index. Return NIL if can't do it. * - * If 'canonical' is TRUE, we remove duplicate pathkeys (which can occur - * if two index columns are equijoined, eg WHERE x = 1 AND y = 1). This - * is required if the result is to be compared directly to a canonical query - * pathkeys list. However, some callers want a list with exactly one entry - * per index column, and they must pass FALSE. + * The result is canonical, meaning that redundant pathkeys are removed; + * it may therefore have fewer entries than there are index columns. * * We generate the full pathkeys list whether or not all are useful for the * current query. Caller should do truncate_useless_pathkeys(). @@ -924,8 +467,7 @@ get_cheapest_fractional_path_for_pathkeys(List *paths, List * build_index_pathkeys(PlannerInfo *root, IndexOptInfo *index, - ScanDirection scandir, - bool canonical) + ScanDirection scandir) { List *retval = NIL; ListCell *indexprs_item = list_head(index->indexprs); @@ -936,9 +478,8 @@ build_index_pathkeys(PlannerInfo *root, Oid sortop; bool nulls_first; int ikey; - Node *indexkey; - PathKeyItem *item; - List *cpathkey; + Expr *indexkey; + PathKey *cpathkey; if (ScanDirectionIsBackward(scandir)) { @@ -958,25 +499,26 @@ build_index_pathkeys(PlannerInfo *root, if (ikey != 0) { /* simple index column */ - indexkey = (Node *) find_indexkey_var(root, index->rel, ikey); + indexkey = (Expr *) find_indexkey_var(root, index->rel, ikey); } else { /* expression --- assume we need not copy it */ if (indexprs_item == NULL) elog(ERROR, "wrong number of index expressions"); - indexkey = (Node *) lfirst(indexprs_item); + indexkey = (Expr *) lfirst(indexprs_item); indexprs_item = lnext(indexprs_item); } - /* OK, make a sublist for this sort key */ - item = makePathKeyItem(indexkey, sortop, nulls_first, true); - cpathkey = make_canonical_pathkey(root, item); + /* OK, make a canonical pathkey for this sort key */ + cpathkey = make_pathkey_from_sortinfo(root, + indexkey, + sortop, + nulls_first, + true); - /* Eliminate redundant ordering info if requested */ - if (canonical) - retval = list_append_unique_ptr(retval, cpathkey); - else + /* Add to list unless redundant */ + if (!pathkey_is_redundant(cpathkey, retval)) retval = lappend(retval, cpathkey); } @@ -1042,31 +584,31 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, foreach(i, subquery_pathkeys) { - List *sub_pathkey = (List *) lfirst(i); + PathKey *sub_pathkey = (PathKey *) lfirst(i); + EquivalenceClass *sub_eclass = sub_pathkey->pk_eclass; + PathKey *best_pathkey = NULL; + int best_score = -1; ListCell *j; - PathKeyItem *best_item = NULL; - int best_score = 0; - List *cpathkey; /* - * The sub_pathkey could contain multiple elements (representing - * knowledge that multiple items are effectively equal). Each element - * might match none, one, or more of the output columns that are - * visible to the outer query. This means we may have multiple - * possible representations of the sub_pathkey in the context of the - * outer query. Ideally we would generate them all and put them all - * into a pathkey list of the outer query, thereby propagating - * equality knowledge up to the outer query. Right now we cannot do - * so, because the outer query's canonical pathkey sets are already - * frozen when this is called. Instead we prefer the one that has the - * highest "score" (number of canonical pathkey peers, plus one if it - * matches the outer query_pathkeys). This is the most likely to be - * useful in the outer query. + * The sub_pathkey's EquivalenceClass could contain multiple elements + * (representing knowledge that multiple items are effectively equal). + * Each element might match none, one, or more of the output columns + * that are visible to the outer query. This means we may have + * multiple possible representations of the sub_pathkey in the context + * of the outer query. Ideally we would generate them all and put + * them all into an EC of the outer query, thereby propagating + * equality knowledge up to the outer query. Right now we cannot do + * so, because the outer query's EquivalenceClasses are already frozen + * when this is called. Instead we prefer the one that has the highest + * "score" (number of EC peers, plus one if it matches the outer + * query_pathkeys). This is the most likely to be useful in the outer + * query. */ - foreach(j, sub_pathkey) + foreach(j, sub_eclass->ec_members) { - PathKeyItem *sub_item = (PathKeyItem *) lfirst(j); - Node *sub_key = sub_item->key; + EquivalenceMember *sub_member = (EquivalenceMember *) lfirst(j); + Expr *sub_expr = sub_member->em_expr; Expr *rtarg; ListCell *k; @@ -1076,26 +618,27 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, * entry. (The latter case is worth extra code because it arises * frequently in connection with varchar fields.) */ - if (IsA(sub_key, RelabelType)) - rtarg = ((RelabelType *) sub_key)->arg; + if (IsA(sub_expr, RelabelType)) + rtarg = ((RelabelType *) sub_expr)->arg; else rtarg = NULL; foreach(k, sub_tlist) { TargetEntry *tle = (TargetEntry *) lfirst(k); - Node *outer_expr; - PathKeyItem *outer_item; + Expr *outer_expr; + EquivalenceClass *outer_ec; + PathKey *outer_pk; int score; /* resjunk items aren't visible to outer query */ if (tle->resjunk) continue; - if (equal(tle->expr, sub_key)) + if (equal(tle->expr, sub_expr)) { /* Exact match */ - outer_expr = (Node *) + outer_expr = (Expr *) makeVar(rel->relid, tle->resno, exprType((Node *) tle->expr), @@ -1105,35 +648,40 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, else if (rtarg && equal(tle->expr, rtarg)) { /* Match after discarding RelabelType */ - outer_expr = (Node *) + outer_expr = (Expr *) makeVar(rel->relid, tle->resno, exprType((Node *) tle->expr), exprTypmod((Node *) tle->expr), 0); - outer_expr = (Node *) + outer_expr = (Expr *) makeRelabelType((Expr *) outer_expr, - ((RelabelType *) sub_key)->resulttype, - ((RelabelType *) sub_key)->resulttypmod, - ((RelabelType *) sub_key)->relabelformat); + ((RelabelType *) sub_expr)->resulttype, + ((RelabelType *) sub_expr)->resulttypmod, + ((RelabelType *) sub_expr)->relabelformat); } else continue; - /* Found a representation for this sub_key */ - outer_item = makePathKeyItem(outer_expr, - sub_item->sortop, - sub_item->nulls_first, - true); - /* score = # of mergejoin peers */ - score = count_canonical_peers(root, outer_item); + /* Found a representation for this sub_pathkey */ + outer_ec = get_eclass_for_sort_expr(root, + outer_expr, + sub_member->em_datatype, + sub_eclass->ec_opfamilies); + outer_pk = make_canonical_pathkey(root, + outer_ec, + sub_pathkey->pk_opfamily, + sub_pathkey->pk_strategy, + sub_pathkey->pk_nulls_first); + /* score = # of equivalence peers */ + score = list_length(outer_ec->ec_members) - 1; /* +1 if it matches the proper query_pathkeys item */ if (retvallen < outer_query_keys && - list_member(list_nth(root->query_pathkeys, retvallen), outer_item)) + list_nth(root->query_pathkeys, retvallen) == outer_pk) score++; if (score > best_score) { - best_item = outer_item; + best_pathkey = outer_pk; best_score = score; } } @@ -1143,19 +691,16 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, * If we couldn't find a representation of this sub_pathkey, we're * done (we can't use the ones to its right, either). */ - if (!best_item) + if (!best_pathkey) break; - /* Canonicalize the chosen item (we did not before) */ - cpathkey = make_canonical_pathkey(root, best_item); - /* * Eliminate redundant ordering info; could happen if outer query - * equijoins subquery keys... + * equivalences subquery keys... */ - if (!list_member_ptr(retval, cpathkey)) + if (!pathkey_is_redundant(best_pathkey, retval)) { - retval = lappend(retval, cpathkey); + retval = lappend(retval, best_pathkey); retvallen++; } } @@ -1166,19 +711,14 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, /* * build_join_pathkeys * Build the path keys for a join relation constructed by mergejoin or - * nestloop join. These keys should include all the path key vars of the - * outer path (since the join will retain the ordering of the outer path) - * plus any vars of the inner path that are equijoined to the outer vars. - * - * Per the discussion in backend/optimizer/README, equijoined inner vars - * can be considered path keys of the result, just the same as the outer - * vars they were joined with; furthermore, it doesn't matter what kind - * of join algorithm is actually used. + * nestloop join. This is normally the same as the outer path's keys. * * EXCEPTION: in a FULL or RIGHT join, we cannot treat the result as * having the outer path's path keys, because null lefthand rows may be * inserted at random points. It must be treated as unsorted. * + * We truncate away any pathkeys that are uninteresting for higher joins. + * * 'joinrel' is the join relation that paths are being formed for * 'jointype' is the join type (inner, left, full, etc) * 'outer_pathkeys' is the list of the current outer path's path keys @@ -1197,8 +737,7 @@ build_join_pathkeys(PlannerInfo *root, /* * This used to be quite a complex bit of code, but now that all pathkey * sublists start out life canonicalized, we don't have to do a darn thing - * here! The inner-rel vars we used to need to add are *already* part of - * the outer pathkey! + * here! * * We do, however, need to truncate the pathkeys list, since it may * contain pathkeys that were useful for forming this joinrel but are @@ -1216,18 +755,21 @@ build_join_pathkeys(PlannerInfo *root, * Generate a pathkeys list that represents the sort order specified * by a list of SortClauses (GroupClauses will work too!) * - * NB: the result is NOT in canonical form, but must be passed through - * canonicalize_pathkeys() before it can be used for comparisons or - * labeling relation sort orders. (We do things this way because - * grouping_planner needs to be able to construct requested pathkeys - * before the pathkey equivalence sets have been created for the query.) + * If canonicalize is TRUE, the resulting PathKeys are all in canonical form; + * otherwise not. canonicalize should always be TRUE after EquivalenceClass + * merging has been performed, but FALSE if we haven't done EquivalenceClass + * merging yet. (We provide this option because grouping_planner() needs to + * be able to represent requested pathkeys before the equivalence classes have + * been created for the query.) * * 'sortclauses' is a list of SortClause or GroupClause nodes * 'tlist' is the targetlist to find the referenced tlist entries in */ List * -make_pathkeys_for_sortclauses(List *sortclauses, - List *tlist) +make_pathkeys_for_sortclauses(PlannerInfo *root, + List *sortclauses, + List *tlist, + bool canonicalize) { List *pathkeys = NIL; ListCell *l; @@ -1235,19 +777,24 @@ make_pathkeys_for_sortclauses(List *sortclauses, foreach(l, sortclauses) { SortClause *sortcl = (SortClause *) lfirst(l); - Node *sortkey; - PathKeyItem *pathkey; - - sortkey = get_sortgroupclause_expr(sortcl, tlist); - pathkey = makePathKeyItem(sortkey, sortcl->sortop, sortcl->nulls_first, - true); - - /* - * The pathkey becomes a one-element sublist, for now; - * canonicalize_pathkeys() might replace it with a longer sublist - * later. - */ - pathkeys = lappend(pathkeys, list_make1(pathkey)); + Expr *sortkey; + PathKey *pathkey; + + sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist); + pathkey = make_pathkey_from_sortinfo(root, + sortkey, + sortcl->sortop, + sortcl->nulls_first, + canonicalize); + + /* Canonical form eliminates redundant ordering keys */ + if (canonicalize) + { + if (!pathkey_is_redundant(pathkey, pathkeys)) + pathkeys = lappend(pathkeys, pathkey); + } + else + pathkeys = lappend(pathkeys, pathkey); } return pathkeys; } @@ -1257,49 +804,44 @@ make_pathkeys_for_sortclauses(List *sortclauses, ****************************************************************************/ /* - * cache_mergeclause_pathkeys - * Make the cached pathkeys valid in a mergeclause restrictinfo. + * cache_mergeclause_eclasses + * Make the cached EquivalenceClass links valid in a mergeclause + * restrictinfo. * - * RestrictInfo contains fields in which we may cache the result - * of looking up the canonical pathkeys for the left and right sides - * of the mergeclause. (Note that in normal cases they will be the - * same, but not if the mergeclause appears above an OUTER JOIN.) - * This is a worthwhile savings because these routines will be invoked - * many times when dealing with a many-relation query. - * - * We have to be careful that the cached values are palloc'd in the same - * context the RestrictInfo node itself is in. This is not currently a - * problem for normal planning, but it is an issue for GEQO planning. + * RestrictInfo contains fields in which we may cache pointers to + * EquivalenceClasses for the left and right inputs of the mergeclause. + * (If the mergeclause is a true equivalence clause these will be the + * same EquivalenceClass, otherwise not.) */ void -cache_mergeclause_pathkeys(PlannerInfo *root, RestrictInfo *restrictinfo) +cache_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo) { - Node *key; - PathKeyItem *item; - MemoryContext oldcontext; - - Assert(restrictinfo->mergejoinoperator != InvalidOid); + Assert(restrictinfo->mergeopfamilies != NIL); - if (restrictinfo->left_pathkey == NIL) - { - oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(restrictinfo)); - key = get_leftop(restrictinfo->clause); - item = makePathKeyItem(key, restrictinfo->left_sortop, - false, /* XXX nulls_first? */ - false); - restrictinfo->left_pathkey = make_canonical_pathkey(root, item); - MemoryContextSwitchTo(oldcontext); - } - if (restrictinfo->right_pathkey == NIL) + /* the cached values should be either both set or both not */ + if (restrictinfo->left_ec == NULL) { - oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(restrictinfo)); - key = get_rightop(restrictinfo->clause); - item = makePathKeyItem(key, restrictinfo->right_sortop, - false, /* XXX nulls_first? */ - false); - restrictinfo->right_pathkey = make_canonical_pathkey(root, item); - MemoryContextSwitchTo(oldcontext); + Expr *clause = restrictinfo->clause; + Oid lefttype, + righttype; + + /* Need the declared input types of the operator */ + op_input_types(((OpExpr *) clause)->opno, &lefttype, &righttype); + + /* Find or create a matching EquivalenceClass for each side */ + restrictinfo->left_ec = + get_eclass_for_sort_expr(root, + (Expr *) get_leftop(clause), + lefttype, + restrictinfo->mergeopfamilies); + restrictinfo->right_ec = + get_eclass_for_sort_expr(root, + (Expr *) get_rightop(clause), + righttype, + restrictinfo->mergeopfamilies); } + else + Assert(restrictinfo->right_ec != NULL); } /* @@ -1309,72 +851,82 @@ cache_mergeclause_pathkeys(PlannerInfo *root, RestrictInfo *restrictinfo) * If successful, it returns a list of mergeclauses. * * 'pathkeys' is a pathkeys list showing the ordering of an input path. - * It doesn't matter whether it is for the inner or outer path. + * 'outer_keys' is TRUE if these keys are for the outer input path, + * FALSE if for inner. * 'restrictinfos' is a list of mergejoinable restriction clauses for the * join relation being formed. * + * The restrictinfos must be marked (via outer_is_left) to show which side + * of each clause is associated with the current outer path. (See + * select_mergejoin_clauses()) + * * The result is NIL if no merge can be done, else a maximal list of * usable mergeclauses (represented as a list of their restrictinfo nodes). - * - * XXX Ideally we ought to be considering context, ie what path orderings - * are available on the other side of the join, rather than just making - * an arbitrary choice among the mergeclauses that will work for this side - * of the join. */ List * find_mergeclauses_for_pathkeys(PlannerInfo *root, List *pathkeys, + bool outer_keys, List *restrictinfos) { List *mergeclauses = NIL; ListCell *i; - /* make sure we have pathkeys cached in the clauses */ + /* make sure we have eclasses cached in the clauses */ foreach(i, restrictinfos) { - RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i); + RestrictInfo *rinfo = (RestrictInfo *) lfirst(i); - cache_mergeclause_pathkeys(root, restrictinfo); + cache_mergeclause_eclasses(root, rinfo); } foreach(i, pathkeys) { - List *pathkey = (List *) lfirst(i); + PathKey *pathkey = (PathKey *) lfirst(i); + EquivalenceClass *pathkey_ec = pathkey->pk_eclass; List *matched_restrictinfos = NIL; ListCell *j; - /* - * We can match a pathkey against either left or right side of any - * mergejoin clause. (We examine both sides since we aren't told if - * the given pathkeys are for inner or outer input path; no confusion - * is possible.) Furthermore, if there are multiple matching clauses, - * take them all. In plain inner-join scenarios we expect only one - * match, because redundant-mergeclause elimination will have removed - * any redundant mergeclauses from the input list. However, in - * outer-join scenarios there might be multiple matches. An example is + /*---------- + * A mergejoin clause matches a pathkey if it has the same EC. + * If there are multiple matching clauses, take them all. In plain + * inner-join scenarios we expect only one match, because + * equivalence-class processing will have removed any redundant + * mergeclauses. However, in outer-join scenarios there might be + * multiple matches. An example is * - * select * from a full join b on a.v1 = b.v1 and a.v2 = b.v2 and a.v1 - * = b.v2; + * select * from a full join b + * on a.v1 = b.v1 and a.v2 = b.v2 and a.v1 = b.v2; * - * Given the pathkeys ((a.v1), (a.v2)) it is okay to return all three + * Given the pathkeys ({a.v1}, {a.v2}) it is okay to return all three * clauses (in the order a.v1=b.v1, a.v1=b.v2, a.v2=b.v2) and indeed * we *must* do so or we will be unable to form a valid plan. + * + * We expect that the given pathkeys list is canonical, which means + * no two members have the same EC, so it's not possible for this + * code to enter the same mergeclause into the result list twice. + * + * XXX it's possible that multiple matching clauses might have + * different ECs on the other side, in which case the order we put + * them into our result makes a difference in the pathkeys required + * for the other input path. However this routine hasn't got any info + * about which order would be best, so for now we disregard that case + * (which is probably a corner case anyway). + *---------- */ foreach(j, restrictinfos) { - RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j); + RestrictInfo *rinfo = (RestrictInfo *) lfirst(j); + EquivalenceClass *clause_ec; - /* - * We can compare canonical pathkey sublists by simple pointer - * equality; see compare_pathkeys. - */ - if ((pathkey == restrictinfo->left_pathkey || - pathkey == restrictinfo->right_pathkey) && - !list_member_ptr(mergeclauses, restrictinfo)) - { - matched_restrictinfos = lappend(matched_restrictinfos, - restrictinfo); - } + if (outer_keys) + clause_ec = rinfo->outer_is_left ? + rinfo->left_ec : rinfo->right_ec; + else + clause_ec = rinfo->outer_is_left ? + rinfo->right_ec : rinfo->left_ec; + if (clause_ec == pathkey_ec) + matched_restrictinfos = lappend(matched_restrictinfos, rinfo); } /* @@ -1396,63 +948,270 @@ find_mergeclauses_for_pathkeys(PlannerInfo *root, } /* - * make_pathkeys_for_mergeclauses + * select_outer_pathkeys_for_merge + * Builds a pathkey list representing a possible sort ordering + * that can be used with the given mergeclauses. + * + * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses + * that will be used in a merge join. + * 'joinrel' is the join relation we are trying to construct. + * + * The restrictinfos must be marked (via outer_is_left) to show which side + * of each clause is associated with the current outer path. (See + * select_mergejoin_clauses()) + * + * Returns a pathkeys list that can be applied to the outer relation. + * + * Since we assume here that a sort is required, there is no particular use + * in matching any available ordering of the outerrel. (joinpath.c has an + * entirely separate code path for considering sort-free mergejoins.) Rather, + * it's interesting to try to match the requested query_pathkeys so that a + * second output sort may be avoided; and failing that, we try to list "more + * popular" keys (those with the most unmatched EquivalenceClass peers) + * earlier, in hopes of making the resulting ordering useful for as many + * higher-level mergejoins as possible. + */ +List * +select_outer_pathkeys_for_merge(PlannerInfo *root, + List *mergeclauses, + RelOptInfo *joinrel) +{ + List *pathkeys = NIL; + int nClauses = list_length(mergeclauses); + EquivalenceClass **ecs; + int *scores; + int necs; + ListCell *lc; + int j; + + /* Might have no mergeclauses */ + if (nClauses == 0) + return NIL; + + /* + * Make arrays of the ECs used by the mergeclauses (dropping any + * duplicates) and their "popularity" scores. + */ + ecs = (EquivalenceClass **) palloc(nClauses * sizeof(EquivalenceClass *)); + scores = (int *) palloc(nClauses * sizeof(int)); + necs = 0; + + foreach(lc, mergeclauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + EquivalenceClass *oeclass; + int score; + ListCell *lc2; + + /* get the outer eclass */ + cache_mergeclause_eclasses(root, rinfo); + + if (rinfo->outer_is_left) + oeclass = rinfo->left_ec; + else + oeclass = rinfo->right_ec; + + /* reject duplicates */ + for (j = 0; j < necs; j++) + { + if (ecs[j] == oeclass) + break; + } + if (j < necs) + continue; + + /* compute score */ + score = 0; + foreach(lc2, oeclass->ec_members) + { + EquivalenceMember *em = (EquivalenceMember *) lfirst(lc2); + + /* Potential future join partner? */ + if (!em->em_is_const && !em->em_is_child && + !bms_overlap(em->em_relids, joinrel->relids)) + score++; + } + + ecs[necs] = oeclass; + scores[necs] = score; + necs++; + } + + /* + * Find out if we have all the ECs mentioned in query_pathkeys; if so + * we can generate a sort order that's also useful for final output. + * There is no percentage in a partial match, though, so we have to + * have 'em all. + */ + if (root->query_pathkeys) + { + foreach(lc, root->query_pathkeys) + { + PathKey *query_pathkey = (PathKey *) lfirst(lc); + EquivalenceClass *query_ec = query_pathkey->pk_eclass; + + for (j = 0; j < necs; j++) + { + if (ecs[j] == query_ec) + break; /* found match */ + } + if (j >= necs) + break; /* didn't find match */ + } + /* if we got to the end of the list, we have them all */ + if (lc == NULL) + { + /* copy query_pathkeys as starting point for our output */ + pathkeys = list_copy(root->query_pathkeys); + /* mark their ECs as already-emitted */ + foreach(lc, root->query_pathkeys) + { + PathKey *query_pathkey = (PathKey *) lfirst(lc); + EquivalenceClass *query_ec = query_pathkey->pk_eclass; + + for (j = 0; j < necs; j++) + { + if (ecs[j] == query_ec) + { + scores[j] = -1; + break; + } + } + } + } + } + + /* + * Add remaining ECs to the list in popularity order, using a default + * sort ordering. (We could use qsort() here, but the list length is + * usually so small it's not worth it.) + */ + for (;;) + { + int best_j; + int best_score; + EquivalenceClass *ec; + PathKey *pathkey; + + best_j = 0; + best_score = scores[0]; + for (j = 1; j < necs; j++) + { + if (scores[j] > best_score) + { + best_j = j; + best_score = scores[j]; + } + } + if (best_score < 0) + break; /* all done */ + ec = ecs[best_j]; + scores[best_j] = -1; + pathkey = make_canonical_pathkey(root, + ec, + linitial_oid(ec->ec_opfamilies), + BTLessStrategyNumber, + false); + /* can't be redundant because no duplicate ECs */ + Assert(!pathkey_is_redundant(pathkey, pathkeys)); + pathkeys = lappend(pathkeys, pathkey); + } + + pfree(ecs); + pfree(scores); + + return pathkeys; +} + +/* + * make_inner_pathkeys_for_merge * Builds a pathkey list representing the explicit sort order that - * must be applied to a path in order to make it usable for the + * must be applied to an inner path to make it usable with the * given mergeclauses. * * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses * that will be used in a merge join. - * 'rel' is the relation the pathkeys will apply to (ie, either the inner - * or outer side of the proposed join rel). + * 'outer_pathkeys' are the already-known canonical pathkeys for the outer + * side of the join. * - * Returns a pathkeys list that can be applied to the indicated relation. + * The restrictinfos must be marked (via outer_is_left) to show which side + * of each clause is associated with the current outer path. (See + * select_mergejoin_clauses()) + * + * Returns a pathkeys list that can be applied to the inner relation. * * Note that it is not this routine's job to decide whether sorting is * actually needed for a particular input path. Assume a sort is necessary; * just make the keys, eh? */ List * -make_pathkeys_for_mergeclauses(PlannerInfo *root, - List *mergeclauses, - RelOptInfo *rel) +make_inner_pathkeys_for_merge(PlannerInfo *root, + List *mergeclauses, + List *outer_pathkeys) { List *pathkeys = NIL; - ListCell *l; + EquivalenceClass *lastoeclass; + PathKey *opathkey; + ListCell *lc; + ListCell *lop; - foreach(l, mergeclauses) + lastoeclass = NULL; + opathkey = NULL; + lop = list_head(outer_pathkeys); + + foreach(lc, mergeclauses) { - RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); - List *pathkey; + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + EquivalenceClass *oeclass; + EquivalenceClass *ieclass; + PathKey *pathkey; - cache_mergeclause_pathkeys(root, restrictinfo); + cache_mergeclause_eclasses(root, rinfo); - if (bms_is_subset(restrictinfo->left_relids, rel->relids)) + if (rinfo->outer_is_left) { - /* Rel is left side of mergeclause */ - pathkey = restrictinfo->left_pathkey; + oeclass = rinfo->left_ec; + ieclass = rinfo->right_ec; } - else if (bms_is_subset(restrictinfo->right_relids, rel->relids)) + else { - /* Rel is right side of mergeclause */ - pathkey = restrictinfo->right_pathkey; + oeclass = rinfo->right_ec; + ieclass = rinfo->left_ec; } - else + + /* outer eclass should match current or next pathkeys */ + /* we check this carefully for debugging reasons */ + if (oeclass != lastoeclass) { - elog(ERROR, "could not identify which side of mergeclause to use"); - pathkey = NIL; /* keep compiler quiet */ + if (!lop) + elog(ERROR, "too few pathkeys for mergeclauses"); + opathkey = (PathKey *) lfirst(lop); + lop = lnext(lop); + lastoeclass = opathkey->pk_eclass; + if (oeclass != lastoeclass) + elog(ERROR, "outer pathkeys do not match mergeclause"); } /* - * When we are given multiple merge clauses, it's possible that some - * clauses refer to the same vars as earlier clauses. There's no - * reason for us to specify sort keys like (A,B,A) when (A,B) will do - * --- and adding redundant sort keys makes add_path think that this - * sort order is different from ones that are really the same, so - * don't do it. Since we now have a canonicalized pathkey, a simple - * ptrMember test is sufficient to detect redundant keys. + * Often, we'll have same EC on both sides, in which case the outer + * pathkey is also canonical for the inner side, and we can skip a + * useless search. + */ + if (ieclass == oeclass) + pathkey = opathkey; + else + pathkey = make_canonical_pathkey(root, + ieclass, + opathkey->pk_opfamily, + opathkey->pk_strategy, + opathkey->pk_nulls_first); + + /* + * Don't generate redundant pathkeys (can happen if multiple + * mergeclauses refer to same EC). */ - pathkeys = list_append_unique_ptr(pathkeys, pathkey); + if (!pathkey_is_redundant(pathkey, pathkeys)) + pathkeys = lappend(pathkeys, pathkey); } return pathkeys; @@ -1471,7 +1230,7 @@ make_pathkeys_for_mergeclauses(PlannerInfo *root, /* * pathkeys_useful_for_merging * Count the number of pathkeys that may be useful for mergejoins - * above the given relation (by looking at its joininfo list). + * above the given relation. * * We consider a pathkey potentially useful if it corresponds to the merge * ordering of either side of any joinclause for the rel. This might be @@ -1487,27 +1246,39 @@ pathkeys_useful_for_merging(PlannerInfo *root, RelOptInfo *rel, List *pathkeys) foreach(i, pathkeys) { - List *pathkey = (List *) lfirst(i); + PathKey *pathkey = (PathKey *) lfirst(i); bool matched = false; ListCell *j; - foreach(j, rel->joininfo) + /* + * First look into the EquivalenceClass of the pathkey, to see if + * there are any members not yet joined to the rel. If so, it's + * surely possible to generate a mergejoin clause using them. + */ + if (rel->has_eclass_joins && + eclass_useful_for_merging(pathkey->pk_eclass, rel)) + matched = true; + else { - RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j); - - if (restrictinfo->mergejoinoperator == InvalidOid) - continue; - cache_mergeclause_pathkeys(root, restrictinfo); - /* - * We can compare canonical pathkey sublists by simple pointer - * equality; see compare_pathkeys. + * Otherwise search the rel's joininfo list, which contains + * non-EquivalenceClass-derivable join clauses that might + * nonetheless be mergejoinable. */ - if (pathkey == restrictinfo->left_pathkey || - pathkey == restrictinfo->right_pathkey) + foreach(j, rel->joininfo) { - matched = true; - break; + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j); + + if (restrictinfo->mergeopfamilies == NIL) + continue; + cache_mergeclause_eclasses(root, restrictinfo); + + if (pathkey->pk_eclass == restrictinfo->left_ec || + pathkey->pk_eclass == restrictinfo->right_ec) + { + matched = true; + break; + } } } |