/*------------------------------------------------------------------------- * * initsplan.c * Target list, qualification, joininfo initialization routines * * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.50 2000/09/12 21:06:54 tgl Exp $ * *------------------------------------------------------------------------- */ #include #include "postgres.h" #include "catalog/pg_operator.h" #include "catalog/pg_type.h" #include "nodes/makefuncs.h" #include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/joininfo.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/planmain.h" #include "optimizer/tlist.h" #include "optimizer/var.h" #include "parser/parsetree.h" #include "parser/parse_expr.h" #include "parser/parse_oper.h" #include "parser/parse_type.h" #include "utils/lsyscache.h" static void mark_baserels_for_outer_join(Query *root, Relids rels, Relids outerrels); static void add_restrict_and_join_to_rel(Query *root, Node *clause, bool isjoinqual, Relids outerjoinrelids); static void add_join_info_to_rels(Query *root, RestrictInfo *restrictinfo, Relids join_relids); static void add_vars_to_targetlist(Query *root, List *vars); static void check_mergejoinable(RestrictInfo *restrictinfo); static void check_hashjoinable(RestrictInfo *restrictinfo); /***************************************************************************** * * TARGET LISTS * *****************************************************************************/ /* * build_base_rel_tlists * Creates rel nodes for every relation mentioned in the target list * 'tlist' (if a node hasn't already been created) and adds them to * root->base_rel_list. Creates targetlist entries for each var seen * in 'tlist' and adds them to the tlist of the appropriate rel node. */ void build_base_rel_tlists(Query *root, List *tlist) { List *tlist_vars = pull_var_clause((Node *) tlist, false); add_vars_to_targetlist(root, tlist_vars); freeList(tlist_vars); } /* * add_vars_to_targetlist * For each variable appearing in the list, add it to the relation's * targetlist if not already present. Rel nodes will also be created * if not already present. */ static void add_vars_to_targetlist(Query *root, List *vars) { List *temp; foreach(temp, vars) { Var *var = (Var *) lfirst(temp); RelOptInfo *rel = get_base_rel(root, var->varno); add_var_to_tlist(rel, var); } } /*---------- * add_missing_rels_to_query * * If we have a relation listed in the join tree that does not appear * in the target list nor qualifications, we must add it to the base * relation list so that it can be processed. For instance, * select f.x from foo f, foo f2 * is a join of f and f2. Note that if we have * select foo.x from foo f * this also gets turned into a join (between foo as foo and foo as f). * * To avoid putting useless entries into the per-relation targetlists, * this should only be called after all the variables in the targetlist * and quals have been processed by the routines above. * * Returns a list of all the base relations (RelOptInfo nodes) that appear * in the join tree. This list can be used for cross-checking in the * reverse direction, ie, that we have a join tree entry for every * relation used in the query. *---------- */ List * add_missing_rels_to_query(Query *root, Node *jtnode) { List *result = NIL; if (jtnode == NULL) return NIL; if (IsA(jtnode, List)) { List *l; foreach(l, (List *) jtnode) { result = nconc(result, add_missing_rels_to_query(root, lfirst(l))); } } else if (IsA(jtnode, RangeTblRef)) { int varno = ((RangeTblRef *) jtnode)->rtindex; RelOptInfo *rel = get_base_rel(root, varno); /* * If the rel isn't otherwise referenced, give it a dummy * targetlist consisting of its own OID. */ if (rel->targetlist == NIL) { Var *var = makeVar(varno, ObjectIdAttributeNumber, OIDOID, -1, 0); add_var_to_tlist(rel, var); } result = lcons(rel, NIL); } else if (IsA(jtnode, JoinExpr)) { JoinExpr *j = (JoinExpr *) jtnode; result = add_missing_rels_to_query(root, j->larg); result = nconc(result, add_missing_rels_to_query(root, j->rarg)); } else elog(ERROR, "add_missing_rels_to_query: unexpected node type %d", nodeTag(jtnode)); return result; } /***************************************************************************** * * QUALIFICATIONS * *****************************************************************************/ /* * add_join_quals_to_rels * Recursively scan the join tree for JOIN/ON (and JOIN/USING) qual * clauses, and add these to the appropriate JoinInfo lists. Also, * mark base RelOptInfos with outerjoinset information, which will * be needed for proper placement of WHERE clauses during * add_restrict_and_join_to_rels(). * * NOTE: when dealing with inner joins, it is appropriate to let a qual clause * be evaluated at the lowest level where all the variables it mentions are * available. However, we cannot do this within an outer join since the qual * might eliminate matching rows and cause a NULL row to be added improperly. * Therefore, rels appearing within (the nullable side of) an outer join * are marked with outerjoinset = list of Relids used at the outer join node. * This list will be added to the list of rels referenced by quals using * such a rel, thereby forcing them up the join tree to the right level. * * To ease the calculation of these values, add_join_quals_to_rels() returns * the list of Relids involved in its own level of join. This is just an * internal convenience; no outside callers pay attention to the result. */ Relids add_join_quals_to_rels(Query *root, Node *jtnode) { Relids result = NIL; if (jtnode == NULL) return result; if (IsA(jtnode, List)) { List *l; /* * Note: we assume it's impossible to see same RT index from more * than one subtree, so nconc() is OK rather than LispUnioni(). */ foreach(l, (List *) jtnode) result = nconc(result, add_join_quals_to_rels(root, lfirst(l))); } else if (IsA(jtnode, RangeTblRef)) { int varno = ((RangeTblRef *) jtnode)->rtindex; /* No quals to deal with, just return correct result */ result = lconsi(varno, NIL); } else if (IsA(jtnode, JoinExpr)) { JoinExpr *j = (JoinExpr *) jtnode; Relids leftids, rightids, outerjoinids; List *qual; /* * Order of operations here is subtle and critical. First we recurse * to handle sub-JOINs. Their join quals will be placed without * regard for whether this level is an outer join, which is correct. * Then, if we are an outer join, we mark baserels contained within * the nullable side(s) with our own rel list; this will restrict * placement of subsequent quals using those rels, including our own * quals, quals above us in the join tree, and WHERE quals. * Finally we place our own join quals. */ leftids = add_join_quals_to_rels(root, j->larg); rightids = add_join_quals_to_rels(root, j->rarg); result = nconc(listCopy(leftids), rightids); outerjoinids = NIL; switch (j->jointype) { case JOIN_INNER: /* Inner join adds no restrictions for quals */ break; case JOIN_LEFT: mark_baserels_for_outer_join(root, rightids, result); outerjoinids = result; break; case JOIN_FULL: mark_baserels_for_outer_join(root, result, result); outerjoinids = result; break; case JOIN_RIGHT: mark_baserels_for_outer_join(root, leftids, result); outerjoinids = result; break; case JOIN_UNION: /* * This is where we fail if upper levels of planner haven't * rewritten UNION JOIN as an Append ... */ elog(ERROR, "UNION JOIN is not implemented yet"); break; default: elog(ERROR, "add_join_quals_to_rels: unsupported join type %d", (int) j->jointype); break; } foreach(qual, (List *) j->quals) add_restrict_and_join_to_rel(root, (Node *) lfirst(qual), true, outerjoinids); } else elog(ERROR, "add_join_quals_to_rels: unexpected node type %d", nodeTag(jtnode)); return result; } /* * mark_baserels_for_outer_join * Mark all base rels listed in 'rels' as having the given outerjoinset. */ static void mark_baserels_for_outer_join(Query *root, Relids rels, Relids outerrels) { List *relid; foreach(relid, rels) { RelOptInfo *rel = get_base_rel(root, lfirsti(relid)); /* * Since we do this bottom-up, any outer-rels previously marked * should be within the new outer join set. */ Assert(is_subseti(rel->outerjoinset, outerrels)); rel->outerjoinset = outerrels; } } /* * add_restrict_and_join_to_rels * Fill RestrictInfo and JoinInfo lists of relation entries for all * relations appearing within clauses. Creates new relation entries if * necessary, adding them to root->base_rel_list. * * 'clauses': the list of clauses in the cnfify'd query qualification. */ void add_restrict_and_join_to_rels(Query *root, List *clauses) { List *clause; foreach(clause, clauses) add_restrict_and_join_to_rel(root, (Node *) lfirst(clause), false, NIL); } /* * add_restrict_and_join_to_rel * Add clause information to either the 'RestrictInfo' or 'JoinInfo' field * (depending on whether the clause is a join) of each base relation * mentioned in the clause. A RestrictInfo node is created and added to * the appropriate list for each rel. Also, if the clause uses a * mergejoinable operator and is not an outer-join qual, enter the left- * and right-side expressions into the query's lists of equijoined vars. * * isjoinqual is true if the clause came from JOIN/ON or JOIN/USING; * we have to mark the created RestrictInfo accordingly. If the JOIN * is an OUTER join, the caller must set outerjoinrelids = all relids of join, * which will override the joinrel identifiers extracted from the clause * itself. For inner join quals and WHERE clauses, set outerjoinrelids = NIL. * (Passing the whole list, and not just an "isouterjoin" boolean, is simply * a speed optimization: we could extract the same list from the base rels' * outerjoinsets, but since add_join_quals_to_rels() already knows what we * should use, might as well pass it in instead of recalculating it.) */ static void add_restrict_and_join_to_rel(Query *root, Node *clause, bool isjoinqual, Relids outerjoinrelids) { RestrictInfo *restrictinfo = makeNode(RestrictInfo); Relids relids; List *vars; bool can_be_equijoin; restrictinfo->clause = (Expr *) clause; restrictinfo->isjoinqual = isjoinqual; restrictinfo->subclauseindices = NIL; restrictinfo->mergejoinoperator = InvalidOid; restrictinfo->left_sortop = InvalidOid; restrictinfo->right_sortop = InvalidOid; restrictinfo->hashjoinoperator = InvalidOid; /* * Retrieve all relids and vars contained within the clause. */ clause_get_relids_vars(clause, &relids, &vars); /* * If caller has given us a join relid list, use it; otherwise, we must * scan the referenced base rels and add in any outer-join rel lists. * This prevents the clause from being applied at a lower level of joining * than any OUTER JOIN that should be evaluated before it. */ if (outerjoinrelids) { /* Safety check: parser should have enforced this to start with */ if (! is_subseti(relids, outerjoinrelids)) elog(ERROR, "JOIN qualification may not refer to other relations"); relids = outerjoinrelids; can_be_equijoin = false; } else { Relids newrelids = relids; List *relid; /* We rely on LispUnioni to be nondestructive of its input lists... */ can_be_equijoin = true; foreach(relid, relids) { RelOptInfo *rel = get_base_rel(root, lfirsti(relid)); if (rel->outerjoinset) { newrelids = LispUnioni(newrelids, rel->outerjoinset); /* * Because application of the qual will be delayed by outer * join, we mustn't assume its vars are equal everywhere. */ can_be_equijoin = false; } } relids = newrelids; } if (length(relids) == 1) { /* * There is only one relation participating in 'clause', so * 'clause' is a restriction clause for that relation. */ RelOptInfo *rel = get_base_rel(root, lfirsti(relids)); rel->baserestrictinfo = lcons(restrictinfo, rel->baserestrictinfo); /* * Check for a "mergejoinable" clause even though it's not a join * clause. This is so that we can recognize that "a.x = a.y" * makes x and y eligible to be considered equal, even when they * belong to the same rel. Without this, we would not recognize * that "a.x = a.y AND a.x = b.z AND a.y = c.q" allows us to * consider z and q equal after their rels are joined. */ if (can_be_equijoin) check_mergejoinable(restrictinfo); } else if (relids != NIL) { /* * 'clause' is a join clause, since there is more than one rel in * the relid list. Set additional RestrictInfo fields for * joining. * * We don't bother setting the merge/hashjoin info if we're not * going to need it. */ if (enable_mergejoin || can_be_equijoin) check_mergejoinable(restrictinfo); if (enable_hashjoin) check_hashjoinable(restrictinfo); /* * Add clause to the join lists of all the relevant relations. */ add_join_info_to_rels(root, restrictinfo, relids); /* * Add vars used in the join clause to targetlists of their * relations, so that they will be emitted by the plan nodes that * scan those relations (else they won't be available at the join * node!). */ add_vars_to_targetlist(root, vars); } else { /* * 'clause' references no rels, and therefore we have no place to * attach it. This means query_planner() screwed up --- it should * treat variable-less clauses separately. */ elog(ERROR, "add_restrict_and_join_to_rel: can't cope with variable-free clause"); } /* * If the clause has a mergejoinable operator, and is not an outer-join * qualification nor bubbled up due to an outer join, then the two sides * represent equivalent PathKeyItems for path keys: any path that is * sorted by one side will also be sorted by the other (as soon as the * two rels are joined, that is). Record the key equivalence for future * use. */ if (can_be_equijoin && restrictinfo->mergejoinoperator != InvalidOid) add_equijoined_keys(root, restrictinfo); } /* * add_join_info_to_rels * For every relation participating in a join clause, add 'restrictinfo' to * the appropriate joininfo list (creating a new list and adding it to the * appropriate rel node if necessary). * * 'restrictinfo' describes the join clause * 'join_relids' is the list of relations participating in the join clause */ static void add_join_info_to_rels(Query *root, RestrictInfo *restrictinfo, Relids join_relids) { List *join_relid; /* For every relid, find the joininfo, and add the proper join entries */ foreach(join_relid, join_relids) { int cur_relid = lfirsti(join_relid); Relids unjoined_relids = NIL; JoinInfo *joininfo; List *otherrel; /* Get the relids not equal to the current relid */ foreach(otherrel, join_relids) { if (lfirsti(otherrel) != cur_relid) unjoined_relids = lappendi(unjoined_relids, lfirsti(otherrel)); } /* * Find or make the joininfo node for this combination of rels, * and add the restrictinfo node to it. */ joininfo = find_joininfo_node(get_base_rel(root, cur_relid), unjoined_relids); joininfo->jinfo_restrictinfo = lcons(restrictinfo, joininfo->jinfo_restrictinfo); } } /* * process_implied_equality * Check to see whether we already have a restrictinfo item that says * item1 = item2, and create one if not. This is a consequence of * transitivity of mergejoin equality: if we have mergejoinable * clauses A = B and B = C, we can deduce A = C (where = is an * appropriate mergejoinable operator). */ void process_implied_equality(Query *root, Node *item1, Node *item2, Oid sortop1, Oid sortop2) { Index irel1; Index irel2; RelOptInfo *rel1; List *restrictlist; List *itm; Oid ltype, rtype; Operator eq_operator; Form_pg_operator pgopform; Expr *clause; /* * Currently, since check_mergejoinable only accepts Var = Var clauses, * we should only see Var nodes here. Would have to work a little * harder to locate the right rel(s) if more-general mergejoin clauses * were accepted. */ Assert(IsA(item1, Var)); irel1 = ((Var *) item1)->varno; Assert(IsA(item2, Var)); irel2 = ((Var *) item2)->varno; /* * If both vars belong to same rel, we need to look at that rel's * baserestrictinfo list. If different rels, each will have a * joininfo node for the other, and we can scan either list. */ rel1 = get_base_rel(root, irel1); if (irel1 == irel2) restrictlist = rel1->baserestrictinfo; else { JoinInfo *joininfo = find_joininfo_node(rel1, lconsi(irel2, NIL)); restrictlist = joininfo->jinfo_restrictinfo; } /* * Scan to see if equality is already known. */ foreach(itm, restrictlist) { RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(itm); Node *left, *right; if (restrictinfo->mergejoinoperator == InvalidOid) continue; /* ignore non-mergejoinable clauses */ /* We now know the restrictinfo clause is a binary opclause */ left = (Node *) get_leftop(restrictinfo->clause); right = (Node *) get_rightop(restrictinfo->clause); if ((equal(item1, left) && equal(item2, right)) || (equal(item2, left) && equal(item1, right))) return; /* found a matching clause */ } /* * This equality is new information, so construct a clause * representing it to add to the query data structures. */ ltype = exprType(item1); rtype = exprType(item2); eq_operator = oper("=", ltype, rtype, true); if (!HeapTupleIsValid(eq_operator)) { /* * Would it be safe to just not add the equality to the query if * we have no suitable equality operator for the combination of * datatypes? NO, because sortkey selection may screw up anyway. */ elog(ERROR, "Unable to identify an equality operator for types '%s' and '%s'", typeidTypeName(ltype), typeidTypeName(rtype)); } pgopform = (Form_pg_operator) GETSTRUCT(eq_operator); /* * Let's just make sure this appears to be a compatible operator. */ if (pgopform->oprlsortop != sortop1 || pgopform->oprrsortop != sortop2 || pgopform->oprresult != BOOLOID) elog(ERROR, "Equality operator for types '%s' and '%s' should be mergejoinable, but isn't", typeidTypeName(ltype), typeidTypeName(rtype)); clause = makeNode(Expr); clause->typeOid = BOOLOID; clause->opType = OP_EXPR; clause->oper = (Node *) makeOper(oprid(eq_operator), /* opno */ InvalidOid, /* opid */ BOOLOID); /* operator result type */ clause->args = lcons(item1, lcons(item2, NIL)); add_restrict_and_join_to_rel(root, (Node *) clause, false, NIL); } /***************************************************************************** * * CHECKS FOR MERGEJOINABLE AND HASHJOINABLE CLAUSES * *****************************************************************************/ /* * check_mergejoinable * If the restrictinfo's clause is mergejoinable, set the mergejoin * info fields in the restrictinfo. * * Currently, we support mergejoin for binary opclauses where * both operands are simple Vars and the operator is a mergejoinable * operator. */ static void check_mergejoinable(RestrictInfo *restrictinfo) { Expr *clause = restrictinfo->clause; Var *left, *right; Oid opno, leftOp, rightOp; if (!is_opclause((Node *) clause)) return; left = get_leftop(clause); right = get_rightop(clause); /* caution: is_opclause accepts more than I do, so check it */ if (!right) return; /* unary opclauses need not apply */ if (!IsA(left, Var) ||!IsA(right, Var)) return; opno = ((Oper *) clause->oper)->opno; if (op_mergejoinable(opno, left->vartype, right->vartype, &leftOp, &rightOp)) { restrictinfo->mergejoinoperator = opno; restrictinfo->left_sortop = leftOp; restrictinfo->right_sortop = rightOp; } } /* * check_hashjoinable * If the restrictinfo's clause is hashjoinable, set the hashjoin * info fields in the restrictinfo. * * Currently, we support hashjoin for binary opclauses where * both operands are simple Vars and the operator is a hashjoinable * operator. */ static void check_hashjoinable(RestrictInfo *restrictinfo) { Expr *clause = restrictinfo->clause; Var *left, *right; Oid opno; if (!is_opclause((Node *) clause)) return; left = get_leftop(clause); right = get_rightop(clause); /* caution: is_opclause accepts more than I do, so check it */ if (!right) return; /* unary opclauses need not apply */ if (!IsA(left, Var) ||!IsA(right, Var)) return; opno = ((Oper *) clause->oper)->opno; if (op_hashjoinable(opno, left->vartype, right->vartype)) restrictinfo->hashjoinoperator = opno; }