diff options
Diffstat (limited to 'src/backend/optimizer/util')
-rw-r--r-- | src/backend/optimizer/util/appendinfo.c | 59 | ||||
-rw-r--r-- | src/backend/optimizer/util/clauses.c | 6 | ||||
-rw-r--r-- | src/backend/optimizer/util/joininfo.c | 19 | ||||
-rw-r--r-- | src/backend/optimizer/util/orclauses.c | 4 | ||||
-rw-r--r-- | src/backend/optimizer/util/pathnode.c | 15 | ||||
-rw-r--r-- | src/backend/optimizer/util/placeholder.c | 123 | ||||
-rw-r--r-- | src/backend/optimizer/util/relnode.c | 389 | ||||
-rw-r--r-- | src/backend/optimizer/util/restrictinfo.c | 93 | ||||
-rw-r--r-- | src/backend/optimizer/util/var.c | 252 |
9 files changed, 871 insertions, 89 deletions
diff --git a/src/backend/optimizer/util/appendinfo.c b/src/backend/optimizer/util/appendinfo.c index cd45ab48993..d449b5c2746 100644 --- a/src/backend/optimizer/util/appendinfo.c +++ b/src/backend/optimizer/util/appendinfo.c @@ -228,6 +228,28 @@ adjust_appendrel_attrs_mutator(Node *node, if (var->varlevelsup != 0) return (Node *) var; /* no changes needed */ + /* + * You might think we need to adjust var->varnullingrels, but that + * shouldn't need any changes. It will contain outer-join relids, + * while the transformation we are making affects only baserels. + * Below, we just propagate var->varnullingrels into the translated + * Var. + * + * If var->varnullingrels isn't empty, and the translation wouldn't be + * a Var, we have to fail. One could imagine wrapping the translated + * expression in a PlaceHolderVar, but that won't work because this is + * typically used after freezing placeholders. Fortunately, the case + * appears unreachable at the moment. We can see nonempty + * var->varnullingrels here, but only in cases involving partitionwise + * joining, and in such cases the translations will always be Vars. + * (Non-Var translations occur only for appendrels made by flattening + * UNION ALL subqueries.) Should we need to make this work in future, + * a possible fix is to mandate that prepjointree.c create PHVs for + * all non-Var outputs of such subqueries, and then we could look up + * the pre-existing PHV here. Or perhaps just wrap the translations + * that way to begin with? + */ + for (cnt = 0; cnt < nappinfos; cnt++) { if (var->varno == appinfos[cnt]->parent_relid) @@ -255,6 +277,10 @@ adjust_appendrel_attrs_mutator(Node *node, if (newnode == NULL) elog(ERROR, "attribute %d of relation \"%s\" does not exist", var->varattno, get_rel_name(appinfo->parent_reloid)); + if (IsA(newnode, Var)) + ((Var *) newnode)->varnullingrels = var->varnullingrels; + else if (var->varnullingrels != NULL) + elog(ERROR, "failed to apply nullingrels to a non-Var"); return newnode; } else if (var->varattno == 0) @@ -308,6 +334,9 @@ adjust_appendrel_attrs_mutator(Node *node, rowexpr->colnames = copyObject(rte->eref->colnames); rowexpr->location = -1; + if (var->varnullingrels != NULL) + elog(ERROR, "failed to apply nullingrels to a non-Var"); + return (Node *) rowexpr; } } @@ -348,6 +377,8 @@ adjust_appendrel_attrs_mutator(Node *node, var = copyObject(ridinfo->rowidvar); /* ... but use the correct relid */ var->varno = leaf_relid; + /* identity vars shouldn't have nulling rels */ + Assert(var->varnullingrels == NULL); /* varnosyn in the RowIdentityVarInfo is probably wrong */ var->varnosyn = 0; var->varattnosyn = 0; @@ -392,8 +423,11 @@ adjust_appendrel_attrs_mutator(Node *node, (void *) context); /* now fix PlaceHolderVar's relid sets */ if (phv->phlevelsup == 0) - phv->phrels = adjust_child_relids(phv->phrels, context->nappinfos, - context->appinfos); + { + phv->phrels = adjust_child_relids(phv->phrels, + nappinfos, appinfos); + /* as above, we needn't touch phnullingrels */ + } return (Node *) phv; } /* Shouldn't need to handle planner auxiliary nodes here */ @@ -412,7 +446,7 @@ adjust_appendrel_attrs_mutator(Node *node, RestrictInfo *oldinfo = (RestrictInfo *) node; RestrictInfo *newinfo = makeNode(RestrictInfo); - /* Copy all flat-copiable fields */ + /* Copy all flat-copiable fields, notably including rinfo_serial */ memcpy(newinfo, oldinfo, sizeof(RestrictInfo)); /* Recursively fix the clause itself */ @@ -688,7 +722,11 @@ get_translated_update_targetlist(PlannerInfo *root, Index relid, /* * find_appinfos_by_relids - * Find AppendRelInfo structures for all relations specified by relids. + * Find AppendRelInfo structures for base relations listed in relids. + * + * The relids argument is typically a join relation's relids, which can + * include outer-join RT indexes in addition to baserels. We silently + * ignore the outer joins. * * The AppendRelInfos are returned in an array, which can be pfree'd by the * caller. *nappinfos is set to the number of entries in the array. @@ -700,8 +738,9 @@ find_appinfos_by_relids(PlannerInfo *root, Relids relids, int *nappinfos) int cnt = 0; int i; - *nappinfos = bms_num_members(relids); - appinfos = (AppendRelInfo **) palloc(sizeof(AppendRelInfo *) * *nappinfos); + /* Allocate an array that's certainly big enough */ + appinfos = (AppendRelInfo **) + palloc(sizeof(AppendRelInfo *) * bms_num_members(relids)); i = -1; while ((i = bms_next_member(relids, i)) >= 0) @@ -709,10 +748,17 @@ find_appinfos_by_relids(PlannerInfo *root, Relids relids, int *nappinfos) AppendRelInfo *appinfo = root->append_rel_array[i]; if (!appinfo) + { + /* Probably i is an OJ index, but let's check */ + if (find_base_rel_ignore_join(root, i) == NULL) + continue; + /* It's a base rel, but we lack an append_rel_array entry */ elog(ERROR, "child rel %d not found in append_rel_array", i); + } appinfos[cnt++] = appinfo; } + *nappinfos = cnt; return appinfos; } @@ -754,6 +800,7 @@ add_row_identity_var(PlannerInfo *root, Var *orig_var, Assert(IsA(orig_var, Var)); Assert(orig_var->varno == rtindex); Assert(orig_var->varlevelsup == 0); + Assert(orig_var->varnullingrels == NULL); /* * If we're doing non-inherited UPDATE/DELETE/MERGE, there's little need diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index aa584848cf9..76e25118f94 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -2004,14 +2004,16 @@ is_pseudo_constant_clause_relids(Node *clause, Relids relids) * NumRelids * (formerly clause_relids) * - * Returns the number of different relations referenced in 'clause'. + * Returns the number of different base relations referenced in 'clause'. */ int NumRelids(PlannerInfo *root, Node *clause) { + int result; Relids varnos = pull_varnos(root, clause); - int result = bms_num_members(varnos); + varnos = bms_del_members(varnos, root->outer_join_rels); + result = bms_num_members(varnos); bms_free(varnos); return result; } diff --git a/src/backend/optimizer/util/joininfo.c b/src/backend/optimizer/util/joininfo.c index 197f20faec7..968a5a488ef 100644 --- a/src/backend/optimizer/util/joininfo.c +++ b/src/backend/optimizer/util/joininfo.c @@ -88,8 +88,8 @@ have_relevant_joinclause(PlannerInfo *root, * not depend on context). * * 'restrictinfo' describes the join clause - * 'join_relids' is the list of relations participating in the join clause - * (there must be more than one) + * 'join_relids' is the set of relations participating in the join clause + * (some of these could be outer joins) */ void add_join_clause_to_rels(PlannerInfo *root, @@ -101,8 +101,11 @@ add_join_clause_to_rels(PlannerInfo *root, cur_relid = -1; while ((cur_relid = bms_next_member(join_relids, cur_relid)) >= 0) { - RelOptInfo *rel = find_base_rel(root, cur_relid); + RelOptInfo *rel = find_base_rel_ignore_join(root, cur_relid); + /* We only need to add the clause to baserels */ + if (rel == NULL) + continue; rel->joininfo = lappend(rel->joininfo, restrictinfo); } } @@ -115,8 +118,8 @@ add_join_clause_to_rels(PlannerInfo *root, * discover that a relation need not be joined at all. * * 'restrictinfo' describes the join clause - * 'join_relids' is the list of relations participating in the join clause - * (there must be more than one) + * 'join_relids' is the set of relations participating in the join clause + * (some of these could be outer joins) */ void remove_join_clause_from_rels(PlannerInfo *root, @@ -128,7 +131,11 @@ remove_join_clause_from_rels(PlannerInfo *root, cur_relid = -1; while ((cur_relid = bms_next_member(join_relids, cur_relid)) >= 0) { - RelOptInfo *rel = find_base_rel(root, cur_relid); + RelOptInfo *rel = find_base_rel_ignore_join(root, cur_relid); + + /* We would only have added the clause to baserels */ + if (rel == NULL) + continue; /* * Remove the restrictinfo from the list. Pointer comparison is diff --git a/src/backend/optimizer/util/orclauses.c b/src/backend/optimizer/util/orclauses.c index 493aaa7aedc..abc994dbf2e 100644 --- a/src/backend/optimizer/util/orclauses.c +++ b/src/backend/optimizer/util/orclauses.c @@ -338,6 +338,10 @@ consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel, sjinfo.syn_lefthand = sjinfo.min_lefthand; sjinfo.syn_righthand = sjinfo.min_righthand; sjinfo.jointype = JOIN_INNER; + sjinfo.ojrelid = 0; + sjinfo.commute_above_l = NULL; + sjinfo.commute_above_r = NULL; + sjinfo.commute_below = NULL; /* we don't bother trying to make the remaining fields valid */ sjinfo.lhs_strict = false; sjinfo.delay_upper_joins = false; diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 4478036bb6a..f2bf68d33be 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1307,7 +1307,7 @@ create_append_path(PlannerInfo *root, * Apply query-wide LIMIT if known and path is for sole base relation. * (Handling this at this low level is a bit klugy.) */ - if (root != NULL && bms_equal(rel->relids, root->all_baserels)) + if (root != NULL && bms_equal(rel->relids, root->all_query_rels)) pathnode->limit_tuples = root->limit_tuples; else pathnode->limit_tuples = -1.0; @@ -1436,7 +1436,7 @@ create_merge_append_path(PlannerInfo *root, * Apply query-wide LIMIT if known and path is for sole base relation. * (Handling this at this low level is a bit klugy.) */ - if (bms_equal(rel->relids, root->all_baserels)) + if (bms_equal(rel->relids, root->all_query_rels)) pathnode->limit_tuples = root->limit_tuples; else pathnode->limit_tuples = -1.0; @@ -2442,12 +2442,12 @@ create_nestloop_path(PlannerInfo *root, * restrict_clauses that are due to be moved into the inner path. We have * to do this now, rather than postpone the work till createplan time, * because the restrict_clauses list can affect the size and cost - * estimates for this path. + * estimates for this path. We detect such clauses by checking for serial + * number match to clauses already enforced in the inner path. */ if (bms_overlap(inner_req_outer, outer_path->parent->relids)) { - Relids inner_and_outer = bms_union(inner_path->parent->relids, - inner_req_outer); + Bitmapset *enforced_serials = get_param_path_clause_serials(inner_path); List *jclauses = NIL; ListCell *lc; @@ -2455,9 +2455,7 @@ create_nestloop_path(PlannerInfo *root, { RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); - if (!join_clause_is_movable_into(rinfo, - inner_path->parent->relids, - inner_and_outer)) + if (!bms_is_member(rinfo->rinfo_serial, enforced_serials)) jclauses = lappend(jclauses, rinfo); } restrict_clauses = jclauses; @@ -4298,6 +4296,7 @@ do { \ new_ppi->ppi_rows = old_ppi->ppi_rows; new_ppi->ppi_clauses = old_ppi->ppi_clauses; ADJUST_CHILD_ATTRS(new_ppi->ppi_clauses); + new_ppi->ppi_serials = bms_copy(old_ppi->ppi_serials); rel->ppilist = lappend(rel->ppilist, new_ppi); MemoryContextSwitchTo(oldcontext); diff --git a/src/backend/optimizer/util/placeholder.c b/src/backend/optimizer/util/placeholder.c index 72b9977022b..af10dbd124f 100644 --- a/src/backend/optimizer/util/placeholder.c +++ b/src/backend/optimizer/util/placeholder.c @@ -23,17 +23,32 @@ #include "optimizer/planmain.h" #include "utils/lsyscache.h" + +typedef struct contain_placeholder_references_context +{ + int relid; + int sublevels_up; +} contain_placeholder_references_context; + /* Local functions */ static void find_placeholders_recurse(PlannerInfo *root, Node *jtnode); static void find_placeholders_in_expr(PlannerInfo *root, Node *expr); +static bool contain_placeholder_references_walker(Node *node, + contain_placeholder_references_context *context); /* * make_placeholder_expr * Make a PlaceHolderVar for the given expression. * - * phrels is the syntactic location (as a set of baserels) to attribute + * phrels is the syntactic location (as a set of relids) to attribute * to the expression. + * + * The caller is responsible for adjusting phlevelsup and phnullingrels + * as needed. Because we do not know here which query level the PHV + * will be associated with, it's important that this function touches + * only root->glob; messing with other parts of PlannerInfo would be + * likely to do the wrong thing. */ PlaceHolderVar * make_placeholder_expr(PlannerInfo *root, Expr *expr, Relids phrels) @@ -42,8 +57,9 @@ make_placeholder_expr(PlannerInfo *root, Expr *expr, Relids phrels) phv->phexpr = expr; phv->phrels = phrels; + phv->phnullingrels = NULL; /* caller may change this later */ phv->phid = ++(root->glob->lastPHId); - phv->phlevelsup = 0; + phv->phlevelsup = 0; /* caller may change this later */ return phv; } @@ -93,6 +109,15 @@ find_placeholder_info(PlannerInfo *root, PlaceHolderVar *phv) phinfo->ph_var = copyObject(phv); /* + * By convention, phinfo->ph_var->phnullingrels is always empty, since the + * PlaceHolderInfo represents the initially-calculated state of the + * PlaceHolderVar. PlaceHolderVars appearing in the query tree might have + * varying values of phnullingrels, reflecting outer joins applied above + * the calculation level. + */ + phinfo->ph_var->phnullingrels = NULL; + + /* * Any referenced rels that are outside the PHV's syntactic scope are * LATERAL references, which should be included in ph_lateral but not in * ph_eval_at. If no referenced rels are within the syntactic scope, @@ -339,6 +364,8 @@ update_placeholder_eval_levels(PlannerInfo *root, SpecialJoinInfo *new_sjinfo) sjinfo->min_lefthand); eval_at = bms_add_members(eval_at, sjinfo->min_righthand); + if (sjinfo->ojrelid) + eval_at = bms_add_member(eval_at, sjinfo->ojrelid); /* we'll need another iteration */ found_some = true; } @@ -413,6 +440,14 @@ add_placeholders_to_base_rels(PlannerInfo *root) { RelOptInfo *rel = find_base_rel(root, varno); + /* + * As in add_vars_to_targetlist(), a value computed at scan level + * has not yet been nulled by any outer join, so its phnullingrels + * should be empty. + */ + Assert(phinfo->ph_var->phnullingrels == NULL); + + /* Copying the PHV might be unnecessary here, but be safe */ rel->reltarget->exprs = lappend(rel->reltarget->exprs, copyObject(phinfo->ph_var)); /* reltarget's cost and width fields will be updated later */ @@ -435,7 +470,8 @@ add_placeholders_to_base_rels(PlannerInfo *root) */ void add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel, - RelOptInfo *outer_rel, RelOptInfo *inner_rel) + RelOptInfo *outer_rel, RelOptInfo *inner_rel, + SpecialJoinInfo *sjinfo) { Relids relids = joinrel->relids; ListCell *lc; @@ -466,9 +502,17 @@ add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel, if (!bms_is_subset(phinfo->ph_eval_at, outer_rel->relids) && !bms_is_subset(phinfo->ph_eval_at, inner_rel->relids)) { - PlaceHolderVar *phv = phinfo->ph_var; + /* Copying might be unnecessary here, but be safe */ + PlaceHolderVar *phv = copyObject(phinfo->ph_var); QualCost cost; + /* + * It'll start out not nulled by anything. Joins above + * this one might add to its phnullingrels later, in much + * the same way as for Vars. + */ + Assert(phv->phnullingrels == NULL); + joinrel->reltarget->exprs = lappend(joinrel->reltarget->exprs, phv); cost_qual_eval_node(&cost, (Node *) phv->phexpr, root); @@ -499,3 +543,74 @@ add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel, } } } + +/* + * contain_placeholder_references_to + * Detect whether any PlaceHolderVars in the given clause contain + * references to the given relid (typically an OJ relid). + * + * "Contain" means that there's a use of the relid inside the PHV's + * contained expression, so that changing the nullability status of + * the rel might change what the PHV computes. + * + * The code here to cope with upper-level PHVs is likely dead, but keep it + * anyway just in case. + */ +bool +contain_placeholder_references_to(PlannerInfo *root, Node *clause, + int relid) +{ + contain_placeholder_references_context context; + + /* We can answer quickly in the common case that there's no PHVs at all */ + if (root->glob->lastPHId == 0) + return false; + /* Else run the recursive search */ + context.relid = relid; + context.sublevels_up = 0; + return contain_placeholder_references_walker(clause, &context); +} + +static bool +contain_placeholder_references_walker(Node *node, + contain_placeholder_references_context *context) +{ + if (node == NULL) + return false; + if (IsA(node, PlaceHolderVar)) + { + PlaceHolderVar *phv = (PlaceHolderVar *) node; + + /* We should just look through PHVs of other query levels */ + if (phv->phlevelsup == context->sublevels_up) + { + /* If phrels matches, we found what we came for */ + if (bms_is_member(context->relid, phv->phrels)) + return true; + + /* + * We should not examine phnullingrels: what we are looking for is + * references in the contained expression, not OJs that might null + * the result afterwards. Also, we don't need to recurse into the + * contained expression, because phrels should adequately + * summarize what's in there. So we're done here. + */ + return false; + } + } + else if (IsA(node, Query)) + { + /* Recurse into RTE subquery or not-yet-planned sublink subquery */ + bool result; + + context->sublevels_up++; + result = query_tree_walker((Query *) node, + contain_placeholder_references_walker, + context, + 0); + context->sublevels_up--; + return result; + } + return expression_tree_walker(node, contain_placeholder_references_walker, + context); +} diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 0a5632699d6..ebfb4ddd121 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -28,6 +28,7 @@ #include "optimizer/plancat.h" #include "optimizer/restrictinfo.h" #include "optimizer/tlist.h" +#include "rewrite/rewriteManip.h" #include "parser/parse_relation.h" #include "utils/hsearch.h" #include "utils/lsyscache.h" @@ -40,7 +41,9 @@ typedef struct JoinHashEntry } JoinHashEntry; static void build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, - RelOptInfo *input_rel); + RelOptInfo *input_rel, + SpecialJoinInfo *sjinfo, + bool can_null); static List *build_joinrel_restrictlist(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outer_rel, @@ -48,8 +51,10 @@ static List *build_joinrel_restrictlist(PlannerInfo *root, static void build_joinrel_joinlist(RelOptInfo *joinrel, RelOptInfo *outer_rel, RelOptInfo *inner_rel); -static List *subbuild_joinrel_restrictlist(RelOptInfo *joinrel, - List *joininfo_list, +static List *subbuild_joinrel_restrictlist(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *input_rel, + Relids both_input_relids, List *new_restrictlist); static List *subbuild_joinrel_joinlist(RelOptInfo *joinrel, List *joininfo_list, @@ -57,10 +62,12 @@ static List *subbuild_joinrel_joinlist(RelOptInfo *joinrel, static void set_foreign_rel_properties(RelOptInfo *joinrel, RelOptInfo *outer_rel, RelOptInfo *inner_rel); static void add_join_rel(PlannerInfo *root, RelOptInfo *joinrel); -static void build_joinrel_partition_info(RelOptInfo *joinrel, +static void build_joinrel_partition_info(PlannerInfo *root, + RelOptInfo *joinrel, RelOptInfo *outer_rel, RelOptInfo *inner_rel, - List *restrictlist, JoinType jointype); -static bool have_partkey_equi_join(RelOptInfo *joinrel, + SpecialJoinInfo *sjinfo, + List *restrictlist); +static bool have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *rel1, RelOptInfo *rel2, JoinType jointype, List *restrictlist); static int match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel, @@ -373,7 +380,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) /* * find_base_rel - * Find a base or other relation entry, which must already exist. + * Find a base or otherrel relation entry, which must already exist. */ RelOptInfo * find_base_rel(PlannerInfo *root, int relid) @@ -395,6 +402,44 @@ find_base_rel(PlannerInfo *root, int relid) } /* + * find_base_rel_ignore_join + * Find a base or otherrel relation entry, which must already exist. + * + * Unlike find_base_rel, if relid references an outer join then this + * will return NULL rather than raising an error. This is convenient + * for callers that must deal with relid sets including both base and + * outer joins. + */ +RelOptInfo * +find_base_rel_ignore_join(PlannerInfo *root, int relid) +{ + Assert(relid > 0); + + if (relid < root->simple_rel_array_size) + { + RelOptInfo *rel; + RangeTblEntry *rte; + + rel = root->simple_rel_array[relid]; + if (rel) + return rel; + + /* + * We could just return NULL here, but for debugging purposes it seems + * best to actually verify that the relid is an outer join and not + * something weird. + */ + rte = root->simple_rte_array[relid]; + if (rte && rte->rtekind == RTE_JOIN && rte->jointype != JOIN_INNER) + return NULL; + } + + elog(ERROR, "no relation entry for relid %d", relid); + + return NULL; /* keep compiler quiet */ +} + +/* * build_join_rel_hash * Construct the auxiliary hash table for join relations. */ @@ -692,9 +737,11 @@ build_join_rel(PlannerInfo *root, * and inner rels we first try to build it from. But the contents should * be the same regardless. */ - build_joinrel_tlist(root, joinrel, outer_rel); - build_joinrel_tlist(root, joinrel, inner_rel); - add_placeholders_to_joinrel(root, joinrel, outer_rel, inner_rel); + build_joinrel_tlist(root, joinrel, outer_rel, sjinfo, + (sjinfo->jointype == JOIN_FULL)); + build_joinrel_tlist(root, joinrel, inner_rel, sjinfo, + (sjinfo->jointype != JOIN_INNER)); + add_placeholders_to_joinrel(root, joinrel, outer_rel, inner_rel, sjinfo); /* * add_placeholders_to_joinrel also took care of adding the ph_lateral @@ -726,8 +773,8 @@ build_join_rel(PlannerInfo *root, joinrel->has_eclass_joins = has_relevant_eclass_joinclause(root, joinrel); /* Store the partition information. */ - build_joinrel_partition_info(joinrel, outer_rel, inner_rel, restrictlist, - sjinfo->jointype); + build_joinrel_partition_info(root, joinrel, outer_rel, inner_rel, sjinfo, + restrictlist); /* * Set estimates of the joinrel's size. @@ -783,16 +830,14 @@ build_join_rel(PlannerInfo *root, * 'parent_joinrel' is the RelOptInfo representing the join between parent * relations. Some of the members of new RelOptInfo are produced by * translating corresponding members of this RelOptInfo - * 'sjinfo': child-join context info * 'restrictlist': list of RestrictInfo nodes that apply to this particular * pair of joinable relations - * 'jointype' is the join type (inner, left, full, etc) + * 'sjinfo': child join's join-type details */ RelOptInfo * build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, RelOptInfo *inner_rel, RelOptInfo *parent_joinrel, - List *restrictlist, SpecialJoinInfo *sjinfo, - JoinType jointype) + List *restrictlist, SpecialJoinInfo *sjinfo) { RelOptInfo *joinrel = makeNode(RelOptInfo); AppendRelInfo **appinfos; @@ -806,6 +851,8 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, joinrel->reloptkind = RELOPT_OTHER_JOINREL; joinrel->relids = bms_union(outer_rel->relids, inner_rel->relids); + if (sjinfo->ojrelid != 0) + joinrel->relids = bms_add_member(joinrel->relids, sjinfo->ojrelid); joinrel->rows = 0; /* cheap startup cost is interesting iff not all tuples to be retrieved */ joinrel->consider_startup = (root->tuple_fraction > 0); @@ -892,8 +939,8 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, joinrel->has_eclass_joins = parent_joinrel->has_eclass_joins; /* Is the join between partitions itself partitioned? */ - build_joinrel_partition_info(joinrel, outer_rel, inner_rel, restrictlist, - jointype); + build_joinrel_partition_info(root, joinrel, outer_rel, inner_rel, sjinfo, + restrictlist); /* Child joinrel is parallel safe if parent is parallel safe. */ joinrel->consider_parallel = parent_joinrel->consider_parallel; @@ -975,10 +1022,41 @@ min_join_parameterization(PlannerInfo *root, * * We also compute the expected width of the join's output, making use * of data that was cached at the baserel level by set_rel_width(). + * + * Pass can_null as true if the join is an outer join that can null Vars + * from this input relation. If so, we will (normally) add the join's relid + * to the nulling bitmaps of Vars and PHVs bubbled up from the input. + * + * When forming an outer join's target list, special handling is needed + * in case the outer join was commuted with another one per outer join + * identity 3 (see optimizer/README). We must take steps to ensure that + * the output Vars have the same nulling bitmaps that they would if the + * two joins had been done in syntactic order; else they won't match Vars + * appearing higher in the query tree. We need to do two things: + * + * First, sjinfo->commute_above_r is added to the nulling bitmaps of RHS Vars. + * This takes care of the case where we implement + * A leftjoin (B leftjoin C on (Pbc)) on (Pab) + * as + * (A leftjoin B on (Pab)) leftjoin C on (Pbc) + * The C columns emitted by the B/C join need to be shown as nulled by both + * the B/C and A/B joins, even though they've not traversed the A/B join. + * (If the joins haven't been commuted, we are adding the nullingrel bits + * prematurely; but that's okay because the C columns can't be referenced + * between here and the upper join.) + * + * Second, if a RHS Var has any of the relids in sjinfo->commute_above_l + * already set in its nulling bitmap, then we *don't* add sjinfo->ojrelid + * to its nulling bitmap (but we do still add commute_above_r). This takes + * care of the reverse transformation: if the original syntax was + * (A leftjoin B on (Pab)) leftjoin C on (Pbc) + * then the now-upper A/B join must not mark C columns as nulled by itself. */ static void build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, - RelOptInfo *input_rel) + RelOptInfo *input_rel, + SpecialJoinInfo *sjinfo, + bool can_null) { Relids relids = joinrel->relids; ListCell *vars; @@ -998,7 +1076,24 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, /* Is it still needed above this joinrel? */ if (bms_nonempty_difference(phinfo->ph_needed, relids)) { - /* Yup, add it to the output */ + /* + * Yup, add it to the output. If this join potentially nulls + * this input, we have to update the PHV's phnullingrels, + * which means making a copy. + */ + if (can_null) + { + phv = copyObject(phv); + /* See comments above to understand this logic */ + if (sjinfo->ojrelid != 0 && + !bms_overlap(phv->phnullingrels, sjinfo->commute_above_l)) + phv->phnullingrels = bms_add_member(phv->phnullingrels, + sjinfo->ojrelid); + if (sjinfo->commute_above_r) + phv->phnullingrels = bms_add_members(phv->phnullingrels, + sjinfo->commute_above_r); + } + joinrel->reltarget->exprs = lappend(joinrel->reltarget->exprs, phv); /* Bubbling up the precomputed result has cost zero */ @@ -1022,9 +1117,7 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, RowIdentityVarInfo *ridinfo = (RowIdentityVarInfo *) list_nth(root->row_identity_vars, var->varattno - 1); - joinrel->reltarget->exprs = lappend(joinrel->reltarget->exprs, - var); - /* Vars have cost zero, so no need to adjust reltarget->cost */ + /* Update reltarget width estimate from RowIdentityVarInfo */ joinrel->reltarget->width += ridinfo->rowidwidth; } else @@ -1037,15 +1130,35 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, /* Is it still needed above this joinrel? */ ndx = var->varattno - baserel->min_attr; - if (bms_nonempty_difference(baserel->attr_needed[ndx], relids)) - { - /* Yup, add it to the output */ - joinrel->reltarget->exprs = lappend(joinrel->reltarget->exprs, - var); - /* Vars have cost zero, so no need to adjust reltarget->cost */ - joinrel->reltarget->width += baserel->attr_widths[ndx]; - } + if (!bms_nonempty_difference(baserel->attr_needed[ndx], relids)) + continue; /* nope, skip it */ + + /* Update reltarget width estimate from baserel's attr_widths */ + joinrel->reltarget->width += baserel->attr_widths[ndx]; } + + /* + * Add the Var to the output. If this join potentially nulls this + * input, we have to update the Var's varnullingrels, which means + * making a copy. + */ + if (can_null) + { + var = copyObject(var); + /* See comments above to understand this logic */ + if (sjinfo->ojrelid != 0 && + !bms_overlap(var->varnullingrels, sjinfo->commute_above_l)) + var->varnullingrels = bms_add_member(var->varnullingrels, + sjinfo->ojrelid); + if (sjinfo->commute_above_r) + var->varnullingrels = bms_add_members(var->varnullingrels, + sjinfo->commute_above_r); + } + + joinrel->reltarget->exprs = lappend(joinrel->reltarget->exprs, + var); + + /* Vars have cost zero, so no need to adjust reltarget->cost */ } } @@ -1064,7 +1177,7 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, * is not handled in the sub-relations, so it depends on which * sub-relations are considered. * - * If a join clause from an input relation refers to base rels still not + * If a join clause from an input relation refers to base+OJ rels still not * present in the joinrel, then it is still a join clause for the joinrel; * we put it into the joininfo list for the joinrel. Otherwise, * the clause is now a restrict clause for the joined relation, and we @@ -1098,14 +1211,19 @@ build_joinrel_restrictlist(PlannerInfo *root, RelOptInfo *inner_rel) { List *result; + Relids both_input_relids; + + both_input_relids = bms_union(outer_rel->relids, inner_rel->relids); /* * Collect all the clauses that syntactically belong at this level, * eliminating any duplicates (important since we will see many of the * same clauses arriving from both input relations). */ - result = subbuild_joinrel_restrictlist(joinrel, outer_rel->joininfo, NIL); - result = subbuild_joinrel_restrictlist(joinrel, inner_rel->joininfo, result); + result = subbuild_joinrel_restrictlist(root, joinrel, outer_rel, + both_input_relids, NIL); + result = subbuild_joinrel_restrictlist(root, joinrel, inner_rel, + both_input_relids, result); /* * Add on any clauses derived from EquivalenceClasses. These cannot be @@ -1140,24 +1258,63 @@ build_joinrel_joinlist(RelOptInfo *joinrel, } static List * -subbuild_joinrel_restrictlist(RelOptInfo *joinrel, - List *joininfo_list, +subbuild_joinrel_restrictlist(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *input_rel, + Relids both_input_relids, List *new_restrictlist) { ListCell *l; - foreach(l, joininfo_list) + foreach(l, input_rel->joininfo) { RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); if (bms_is_subset(rinfo->required_relids, joinrel->relids)) { /* - * This clause becomes a restriction clause for the joinrel, since - * it refers to no outside rels. Add it to the list, being - * careful to eliminate duplicates. (Since RestrictInfo nodes in - * different joinlists will have been multiply-linked rather than - * copied, pointer equality should be a sufficient test.) + * This clause should become a restriction clause for the joinrel, + * since it refers to no outside rels. However, if it's a clone + * clause then it might be too late to evaluate it, so we have to + * check. (If it is too late, just ignore the clause, taking it + * on faith that another clone was or will be selected.) Clone + * clauses should always be outer-join clauses, so we compare + * against both_input_relids. + */ + if (rinfo->has_clone || rinfo->is_clone) + { + Assert(!RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids)); + if (!bms_is_subset(rinfo->required_relids, both_input_relids)) + continue; + if (!clause_is_computable_at(root, rinfo->clause_relids, + both_input_relids)) + continue; + } + else + { + /* + * For non-clone clauses, we just Assert it's OK. These might + * be either join or filter clauses. + */ +#ifdef USE_ASSERT_CHECKING + if (RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids)) + Assert(clause_is_computable_at(root, rinfo->clause_relids, + joinrel->relids)); + else + { + Assert(bms_is_subset(rinfo->required_relids, + both_input_relids)); + Assert(clause_is_computable_at(root, rinfo->clause_relids, + both_input_relids)); + } +#endif + } + + /* + * OK, so add it to the list, being careful to eliminate + * duplicates. (Since RestrictInfo nodes in different joinlists + * will have been multiply-linked rather than copied, pointer + * equality should be a sufficient test.) */ new_restrictlist = list_append_unique_ptr(new_restrictlist, rinfo); } @@ -1319,6 +1476,7 @@ get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel, ParamPathInfo *ppi; Relids joinrelids; List *pclauses; + Bitmapset *pserials; double rows; ListCell *lc; @@ -1361,6 +1519,15 @@ get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel, required_outer, baserel)); + /* Compute set of serial numbers of the enforced clauses */ + pserials = NULL; + foreach(lc, pclauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + pserials = bms_add_member(pserials, rinfo->rinfo_serial); + } + /* Estimate the number of rows returned by the parameterized scan */ rows = get_parameterized_baserel_size(root, baserel, pclauses); @@ -1369,6 +1536,7 @@ get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel, ppi->ppi_req_outer = required_outer; ppi->ppi_rows = rows; ppi->ppi_clauses = pclauses; + ppi->ppi_serials = pserials; baserel->ppilist = lappend(baserel->ppilist, ppi); return ppi; @@ -1594,6 +1762,7 @@ get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel, ppi->ppi_req_outer = required_outer; ppi->ppi_rows = rows; ppi->ppi_clauses = NIL; + ppi->ppi_serials = NULL; joinrel->ppilist = lappend(joinrel->ppilist, ppi); return ppi; @@ -1632,6 +1801,7 @@ get_appendrel_parampathinfo(RelOptInfo *appendrel, Relids required_outer) ppi->ppi_req_outer = required_outer; ppi->ppi_rows = 0; ppi->ppi_clauses = NIL; + ppi->ppi_serials = NULL; appendrel->ppilist = lappend(appendrel->ppilist, ppi); return ppi; @@ -1658,15 +1828,110 @@ find_param_path_info(RelOptInfo *rel, Relids required_outer) } /* + * get_param_path_clause_serials + * Given a parameterized Path, return the set of pushed-down clauses + * (identified by rinfo_serial numbers) enforced within the Path. + */ +Bitmapset * +get_param_path_clause_serials(Path *path) +{ + if (path->param_info == NULL) + return NULL; /* not parameterized */ + if (IsA(path, NestPath) || + IsA(path, MergePath) || + IsA(path, HashPath)) + { + /* + * For a join path, combine clauses enforced within either input path + * with those enforced as joinrestrictinfo in this path. Note that + * joinrestrictinfo may include some non-pushed-down clauses, but for + * current purposes it's okay if we include those in the result. (To + * be more careful, we could check for clause_relids overlapping the + * path parameterization, but it's not worth the cycles for now.) + */ + JoinPath *jpath = (JoinPath *) path; + Bitmapset *pserials; + ListCell *lc; + + pserials = NULL; + pserials = bms_add_members(pserials, + get_param_path_clause_serials(jpath->outerjoinpath)); + pserials = bms_add_members(pserials, + get_param_path_clause_serials(jpath->innerjoinpath)); + foreach(lc, jpath->joinrestrictinfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + pserials = bms_add_member(pserials, rinfo->rinfo_serial); + } + return pserials; + } + else if (IsA(path, AppendPath)) + { + /* + * For an appendrel, take the intersection of the sets of clauses + * enforced in each input path. + */ + AppendPath *apath = (AppendPath *) path; + Bitmapset *pserials; + ListCell *lc; + + pserials = NULL; + foreach(lc, apath->subpaths) + { + Path *subpath = (Path *) lfirst(lc); + Bitmapset *subserials; + + subserials = get_param_path_clause_serials(subpath); + if (lc == list_head(apath->subpaths)) + pserials = bms_copy(subserials); + else + pserials = bms_int_members(pserials, subserials); + } + return pserials; + } + else if (IsA(path, MergeAppendPath)) + { + /* Same as AppendPath case */ + MergeAppendPath *apath = (MergeAppendPath *) path; + Bitmapset *pserials; + ListCell *lc; + + pserials = NULL; + foreach(lc, apath->subpaths) + { + Path *subpath = (Path *) lfirst(lc); + Bitmapset *subserials; + + subserials = get_param_path_clause_serials(subpath); + if (lc == list_head(apath->subpaths)) + pserials = bms_copy(subserials); + else + pserials = bms_int_members(pserials, subserials); + } + return pserials; + } + else + { + /* + * Otherwise, it's a baserel path and we can use the + * previously-computed set of serial numbers. + */ + return path->param_info->ppi_serials; + } +} + +/* * build_joinrel_partition_info * Checks if the two relations being joined can use partitionwise join * and if yes, initialize partitioning information of the resulting * partitioned join relation. */ static void -build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel, - RelOptInfo *inner_rel, List *restrictlist, - JoinType jointype) +build_joinrel_partition_info(PlannerInfo *root, + RelOptInfo *joinrel, RelOptInfo *outer_rel, + RelOptInfo *inner_rel, SpecialJoinInfo *sjinfo, + List *restrictlist) { PartitionScheme part_scheme; @@ -1692,8 +1957,8 @@ build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel, !outer_rel->consider_partitionwise_join || !inner_rel->consider_partitionwise_join || outer_rel->part_scheme != inner_rel->part_scheme || - !have_partkey_equi_join(joinrel, outer_rel, inner_rel, - jointype, restrictlist)) + !have_partkey_equi_join(root, joinrel, outer_rel, inner_rel, + sjinfo->jointype, restrictlist)) { Assert(!IS_PARTITIONED_REL(joinrel)); return; @@ -1717,7 +1982,8 @@ build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel, * child-join relations of the join relation in try_partitionwise_join(). */ joinrel->part_scheme = part_scheme; - set_joinrel_partition_key_exprs(joinrel, outer_rel, inner_rel, jointype); + set_joinrel_partition_key_exprs(joinrel, outer_rel, inner_rel, + sjinfo->jointype); /* * Set the consider_partitionwise_join flag. @@ -1735,7 +2001,7 @@ build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel, * partition keys. */ static bool -have_partkey_equi_join(RelOptInfo *joinrel, +have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *rel1, RelOptInfo *rel2, JoinType jointype, List *restrictlist) { @@ -1801,6 +2067,24 @@ have_partkey_equi_join(RelOptInfo *joinrel, strict_op = op_strict(opexpr->opno); /* + * Vars appearing in the relation's partition keys will not have any + * varnullingrels, but those in expr1 and expr2 will if we're above + * outer joins that could null the respective rels. It's okay to + * match anyway, if the join operator is strict. + */ + if (strict_op) + { + if (bms_overlap(rel1->relids, root->outer_join_rels)) + expr1 = (Expr *) remove_nulling_relids((Node *) expr1, + root->outer_join_rels, + NULL); + if (bms_overlap(rel2->relids, root->outer_join_rels)) + expr2 = (Expr *) remove_nulling_relids((Node *) expr2, + root->outer_join_rels, + NULL); + } + + /* * Only clauses referencing the partition keys are useful for * partitionwise join. */ @@ -2012,7 +2296,12 @@ set_joinrel_partition_key_exprs(RelOptInfo *joinrel, * partitionwise nesting of any outer join.) We assume no * type coercions are needed to make the coalesce expressions, * since columns of different types won't have gotten - * classified as the same PartitionScheme. + * classified as the same PartitionScheme. Note that we + * intentionally leave out the varnullingrels decoration that + * would ordinarily appear on the Vars inside these + * CoalesceExprs, because have_partkey_equi_join will strip + * varnullingrels from the expressions it will compare to the + * partexprs. */ foreach(lc, list_concat_copy(outer_expr, outer_null_expr)) { diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c index 0c3878a805b..1350f011a62 100644 --- a/src/backend/optimizer/util/restrictinfo.c +++ b/src/backend/optimizer/util/restrictinfo.c @@ -53,6 +53,10 @@ static Expr *make_sub_restrictinfos(PlannerInfo *root, * required_relids can be NULL, in which case it defaults to the actual clause * contents (i.e., clause_relids). * + * Note that there aren't options to set the has_clone and is_clone flags: + * we always initialize those to false. There's just one place that wants + * something different, so making all callers pass them seems inconvenient. + * * We initialize fields that depend only on the given subexpression, leaving * others that depend on context (or may never be needed at all) to be filled * later. @@ -116,12 +120,15 @@ make_restrictinfo_internal(PlannerInfo *root, Relids nullable_relids) { RestrictInfo *restrictinfo = makeNode(RestrictInfo); + Relids baserels; restrictinfo->clause = clause; restrictinfo->orclause = orclause; restrictinfo->is_pushed_down = is_pushed_down; restrictinfo->outerjoin_delayed = outerjoin_delayed; restrictinfo->pseudoconstant = pseudoconstant; + restrictinfo->has_clone = false; /* may get set by caller */ + restrictinfo->is_clone = false; /* may get set by caller */ restrictinfo->can_join = false; /* may get set below */ restrictinfo->security_level = security_level; restrictinfo->outer_relids = outer_relids; @@ -188,6 +195,25 @@ make_restrictinfo_internal(PlannerInfo *root, restrictinfo->required_relids = restrictinfo->clause_relids; /* + * Count the number of base rels appearing in clause_relids. To do this, + * we just delete rels mentioned in root->outer_join_rels and count the + * survivors. Because we are called during deconstruct_jointree which is + * the same tree walk that populates outer_join_rels, this is a little bit + * unsafe-looking; but it should be fine because the recursion in + * deconstruct_jointree should already have visited any outer join that + * could be mentioned in this clause. + */ + baserels = bms_difference(restrictinfo->clause_relids, + root->outer_join_rels); + restrictinfo->num_base_rels = bms_num_members(baserels); + bms_free(baserels); + + /* + * Label this RestrictInfo with a fresh serial number. + */ + restrictinfo->rinfo_serial = ++(root->last_rinfo_serial); + + /* * Fill in all the cacheable fields with "not yet set" markers. None of * these will be computed until/unless needed. Note in particular that we * don't mark a binary opclause as mergejoinable or hashjoinable here; @@ -350,7 +376,7 @@ commute_restrictinfo(RestrictInfo *rinfo, Oid comm_op) * ... and adjust those we need to change. Note in particular that we can * preserve any cached selectivity or cost estimates, since those ought to * be the same for the new clause. Likewise we can keep the source's - * parent_ec. + * parent_ec. It's also important that we keep the same rinfo_serial. */ result->clause = (Expr *) newclause; result->left_relids = rinfo->right_relids; @@ -497,6 +523,58 @@ extract_actual_join_clauses(List *restrictinfo_list, } } +/* + * clause_is_computable_at + * Test whether a clause is computable at a given evaluation level. + * + * There are two conditions for whether an expression can actually be + * evaluated at a given join level: the evaluation context must include + * all the relids (both base and OJ) used by the expression, and we must + * not have already evaluated any outer joins that null Vars/PHVs of the + * expression and are not listed in their nullingrels. + * + * This function checks the second condition; we assume the caller already + * saw to the first one. + * + * For speed reasons, we don't individually examine each Var/PHV of the + * expression, but just look at the overall clause_relids (the union of the + * varnos and varnullingrels). This could give a misleading answer if the + * Vars of a given varno don't all have the same varnullingrels; but that + * really shouldn't happen within a single scalar expression or RestrictInfo + * clause. Despite that, this is still annoyingly expensive :-( + */ +bool +clause_is_computable_at(PlannerInfo *root, + Relids clause_relids, + Relids eval_relids) +{ + ListCell *lc; + + /* Nothing to do if no outer joins have been performed yet. */ + if (!bms_overlap(eval_relids, root->outer_join_rels)) + return true; + + foreach(lc, root->join_info_list) + { + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc); + + /* Ignore outer joins that are not yet performed. */ + if (!bms_is_member(sjinfo->ojrelid, eval_relids)) + continue; + + /* OK if clause lists it (we assume all Vars in it agree). */ + if (bms_is_member(sjinfo->ojrelid, clause_relids)) + continue; + + /* Else, trouble if clause mentions any nullable Vars. */ + if (bms_overlap(clause_relids, sjinfo->min_righthand) || + (sjinfo->jointype == JOIN_FULL && + bms_overlap(clause_relids, sjinfo->min_lefthand))) + return false; /* doesn't work */ + } + + return true; /* OK */ +} /* * join_clause_is_movable_to @@ -522,6 +600,12 @@ extract_actual_join_clauses(List *restrictinfo_list, * Also, the join clause must not use any relations that have LATERAL * references to the target relation, since we could not put such rels on * the outer side of a nestloop with the target relation. + * + * Also, we reject is_clone versions of outer-join clauses. This has the + * effect of preventing us from generating variant parameterized paths + * that differ only in which outer joins null the parameterization rel(s). + * Generating one path from the minimally-parameterized has_clone version + * is sufficient. */ bool join_clause_is_movable_to(RestrictInfo *rinfo, RelOptInfo *baserel) @@ -542,6 +626,10 @@ join_clause_is_movable_to(RestrictInfo *rinfo, RelOptInfo *baserel) if (bms_overlap(baserel->lateral_referencers, rinfo->clause_relids)) return false; + /* Ignore clones, too */ + if (rinfo->is_clone) + return false; + return true; } @@ -587,6 +675,9 @@ join_clause_is_movable_to(RestrictInfo *rinfo, RelOptInfo *baserel) * moved for some valid set of outer rels, so we don't have the benefit of * relying on prior checks for lateral-reference validity. * + * Likewise, we don't check is_clone here: rejecting the inappropriate + * variants of a cloned clause must be handled upstream. + * * Note: if this returns true, it means that the clause could be moved to * this join relation, but that doesn't mean that this is the lowest join * it could be moved to. Caller may need to make additional calls to verify diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c index 09cc7d6f518..8c9824e54d6 100644 --- a/src/backend/optimizer/util/var.c +++ b/src/backend/optimizer/util/var.c @@ -62,6 +62,7 @@ typedef struct typedef struct { + PlannerInfo *root; /* could be NULL! */ Query *query; /* outer Query */ int sublevels_up; bool possible_sublink; /* could aliases include a SubLink? */ @@ -80,6 +81,10 @@ static bool pull_var_clause_walker(Node *node, pull_var_clause_context *context); static Node *flatten_join_alias_vars_mutator(Node *node, flatten_join_alias_vars_context *context); +static Node *add_nullingrels_if_needed(PlannerInfo *root, Node *newnode, + Var *oldvar); +static bool is_standard_join_alias_expression(Node *newnode, Var *oldvar); +static void adjust_standard_join_alias_expression(Node *newnode, Var *oldvar); static Relids alias_relid_set(Query *query, Relids relids); @@ -88,6 +93,9 @@ static Relids alias_relid_set(Query *query, Relids relids); * Create a set of all the distinct varnos present in a parsetree. * Only varnos that reference level-zero rtable entries are considered. * + * The result includes outer-join relids mentioned in Var.varnullingrels and + * PlaceHolderVar.phnullingrels fields in the parsetree. + * * "root" can be passed as NULL if it is not necessary to process * PlaceHolderVars. * @@ -153,7 +161,11 @@ pull_varnos_walker(Node *node, pull_varnos_context *context) Var *var = (Var *) node; if (var->varlevelsup == context->sublevels_up) + { context->varnos = bms_add_member(context->varnos, var->varno); + context->varnos = bms_add_members(context->varnos, + var->varnullingrels); + } return false; } if (IsA(node, CurrentOfExpr)) @@ -244,6 +256,14 @@ pull_varnos_walker(Node *node, pull_varnos_context *context) context->varnos = bms_join(context->varnos, newevalat); } + + /* + * In all three cases, include phnullingrels in the result. We + * don't worry about possibly needing to translate it, because + * appendrels only translate varnos of baserels, not outer joins. + */ + context->varnos = bms_add_members(context->varnos, + phv->phnullingrels); return false; /* don't recurse into expression */ } } @@ -707,26 +727,42 @@ pull_var_clause_walker(Node *node, pull_var_clause_context *context) * is the only way that the executor can directly handle whole-row Vars. * * This also adjusts relid sets found in some expression node types to - * substitute the contained base rels for any join relid. + * substitute the contained base+OJ rels for any join relid. * * If a JOIN contains sub-selects that have been flattened, its join alias * entries might now be arbitrary expressions, not just Vars. This affects - * this function in one important way: we might find ourselves inserting - * SubLink expressions into subqueries, and we must make sure that their - * Query.hasSubLinks fields get set to true if so. If there are any + * this function in two important ways. First, we might find ourselves + * inserting SubLink expressions into subqueries, and we must make sure that + * their Query.hasSubLinks fields get set to true if so. If there are any * SubLinks in the join alias lists, the outer Query should already have * hasSubLinks = true, so this is only relevant to un-flattened subqueries. + * Second, we have to preserve any varnullingrels info attached to the + * alias Vars we're replacing. If the replacement expression is a Var or + * PlaceHolderVar or constructed from those, we can just add the + * varnullingrels bits to the existing nullingrels field(s); otherwise + * we have to add a PlaceHolderVar wrapper. * - * NOTE: this is used on not-yet-planned expressions. We do not expect it - * to be applied directly to the whole Query, so if we see a Query to start - * with, we do want to increment sublevels_up (this occurs for LATERAL - * subqueries). + * NOTE: this is also used by the parser, to expand join alias Vars before + * checking GROUP BY validity. For that use-case, root will be NULL, which + * is why we have to pass the Query separately. We need the root itself only + * for making PlaceHolderVars. We can avoid making PlaceHolderVars in the + * parser's usage because it won't be dealing with arbitrary expressions: + * so long as adjust_standard_join_alias_expression can handle everything + * the parser would make as a join alias expression, we're OK. */ Node * -flatten_join_alias_vars(Query *query, Node *node) +flatten_join_alias_vars(PlannerInfo *root, Query *query, Node *node) { flatten_join_alias_vars_context context; + /* + * We do not expect this to be applied to the whole Query, only to + * expressions or LATERAL subqueries. Hence, if the top node is a Query, + * it's okay to immediately increment sublevels_up. + */ + Assert(node != (Node *) query); + + context.root = root; context.query = query; context.sublevels_up = 0; /* flag whether join aliases could possibly contain SubLinks */ @@ -797,7 +833,9 @@ flatten_join_alias_vars_mutator(Node *node, rowexpr->colnames = colnames; rowexpr->location = var->location; - return (Node *) rowexpr; + /* Lastly, add any varnullingrels to the replacement expression */ + return add_nullingrels_if_needed(context->root, (Node *) rowexpr, + var); } /* Expand join alias reference */ @@ -824,7 +862,8 @@ flatten_join_alias_vars_mutator(Node *node, if (context->possible_sublink && !context->inserted_sublink) context->inserted_sublink = checkExprHasSubLink(newvar); - return newvar; + /* Lastly, add any varnullingrels to the replacement expression */ + return add_nullingrels_if_needed(context->root, newvar, var); } if (IsA(node, PlaceHolderVar)) { @@ -839,6 +878,7 @@ flatten_join_alias_vars_mutator(Node *node, { phv->phrels = alias_relid_set(context->query, phv->phrels); + /* we *don't* change phnullingrels */ } return (Node *) phv; } @@ -873,8 +913,196 @@ flatten_join_alias_vars_mutator(Node *node, } /* + * Add oldvar's varnullingrels, if any, to a flattened join alias expression. + * The newnode has been copied, so we can modify it freely. + */ +static Node * +add_nullingrels_if_needed(PlannerInfo *root, Node *newnode, Var *oldvar) +{ + if (oldvar->varnullingrels == NULL) + return newnode; /* nothing to do */ + /* If possible, do it by adding to existing nullingrel fields */ + if (is_standard_join_alias_expression(newnode, oldvar)) + adjust_standard_join_alias_expression(newnode, oldvar); + else if (root) + { + /* + * We can insert a PlaceHolderVar to carry the nullingrels. However, + * deciding where to evaluate the PHV is slightly tricky. We first + * try to evaluate it at the natural semantic level of the new + * expression; but if that expression is variable-free, fall back to + * evaluating it at the join that the oldvar is an alias Var for. + */ + PlaceHolderVar *newphv; + Index levelsup = oldvar->varlevelsup; + Relids phrels = pull_varnos_of_level(root, newnode, levelsup); + + if (bms_is_empty(phrels)) /* variable-free? */ + { + if (levelsup != 0) /* this won't work otherwise */ + elog(ERROR, "unsupported join alias expression"); + phrels = get_relids_for_join(root->parse, oldvar->varno); + /* If it's an outer join, eval below not above the join */ + phrels = bms_del_member(phrels, oldvar->varno); + Assert(!bms_is_empty(phrels)); + } + newphv = make_placeholder_expr(root, (Expr *) newnode, phrels); + /* newphv has zero phlevelsup and NULL phnullingrels; fix it */ + newphv->phlevelsup = levelsup; + newphv->phnullingrels = bms_copy(oldvar->varnullingrels); + newnode = (Node *) newphv; + } + else + { + /* ooops, we're missing support for something the parser can make */ + elog(ERROR, "unsupported join alias expression"); + } + return newnode; +} + +/* + * Check to see if we can insert nullingrels into this join alias expression + * without use of a separate PlaceHolderVar. + * + * This will handle Vars, PlaceHolderVars, and implicit-coercion and COALESCE + * expressions built from those. This coverage needs to handle anything + * that the parser would put into joinaliasvars. + */ +static bool +is_standard_join_alias_expression(Node *newnode, Var *oldvar) +{ + if (newnode == NULL) + return false; + if (IsA(newnode, Var) && + ((Var *) newnode)->varlevelsup == oldvar->varlevelsup) + return true; + else if (IsA(newnode, PlaceHolderVar) && + ((PlaceHolderVar *) newnode)->phlevelsup == oldvar->varlevelsup) + return true; + else if (IsA(newnode, FuncExpr)) + { + FuncExpr *fexpr = (FuncExpr *) newnode; + + /* + * We need to assume that the function wouldn't produce non-NULL from + * NULL, which is reasonable for implicit coercions but otherwise not + * so much. (Looking at its strictness is likely overkill, and anyway + * it would cause us to fail if someone forgot to mark an implicit + * coercion as strict.) + */ + if (fexpr->funcformat != COERCE_IMPLICIT_CAST || + fexpr->args == NIL) + return false; + + /* + * Examine only the first argument --- coercions might have additional + * arguments that are constants. + */ + return is_standard_join_alias_expression(linitial(fexpr->args), oldvar); + } + else if (IsA(newnode, RelabelType)) + { + RelabelType *relabel = (RelabelType *) newnode; + + /* This definitely won't produce non-NULL from NULL */ + return is_standard_join_alias_expression((Node *) relabel->arg, oldvar); + } + else if (IsA(newnode, CoerceViaIO)) + { + CoerceViaIO *iocoerce = (CoerceViaIO *) newnode; + + /* This definitely won't produce non-NULL from NULL */ + return is_standard_join_alias_expression((Node *) iocoerce->arg, oldvar); + } + else if (IsA(newnode, ArrayCoerceExpr)) + { + ArrayCoerceExpr *acoerce = (ArrayCoerceExpr *) newnode; + + /* This definitely won't produce non-NULL from NULL (at array level) */ + return is_standard_join_alias_expression((Node *) acoerce->arg, oldvar); + } + else if (IsA(newnode, CoalesceExpr)) + { + CoalesceExpr *cexpr = (CoalesceExpr *) newnode; + ListCell *lc; + + Assert(cexpr->args != NIL); + foreach(lc, cexpr->args) + { + if (!is_standard_join_alias_expression(lfirst(lc), oldvar)) + return false; + } + return true; + } + else + return false; +} + +/* + * Insert nullingrels into an expression accepted by + * is_standard_join_alias_expression. + */ +static void +adjust_standard_join_alias_expression(Node *newnode, Var *oldvar) +{ + if (IsA(newnode, Var) && + ((Var *) newnode)->varlevelsup == oldvar->varlevelsup) + { + Var *newvar = (Var *) newnode; + + newvar->varnullingrels = bms_add_members(newvar->varnullingrels, + oldvar->varnullingrels); + } + else if (IsA(newnode, PlaceHolderVar) && + ((PlaceHolderVar *) newnode)->phlevelsup == oldvar->varlevelsup) + { + PlaceHolderVar *newphv = (PlaceHolderVar *) newnode; + + newphv->phnullingrels = bms_add_members(newphv->phnullingrels, + oldvar->varnullingrels); + } + else if (IsA(newnode, FuncExpr)) + { + FuncExpr *fexpr = (FuncExpr *) newnode; + + adjust_standard_join_alias_expression(linitial(fexpr->args), oldvar); + } + else if (IsA(newnode, RelabelType)) + { + RelabelType *relabel = (RelabelType *) newnode; + + adjust_standard_join_alias_expression((Node *) relabel->arg, oldvar); + } + else if (IsA(newnode, CoerceViaIO)) + { + CoerceViaIO *iocoerce = (CoerceViaIO *) newnode; + + adjust_standard_join_alias_expression((Node *) iocoerce->arg, oldvar); + } + else if (IsA(newnode, ArrayCoerceExpr)) + { + ArrayCoerceExpr *acoerce = (ArrayCoerceExpr *) newnode; + + adjust_standard_join_alias_expression((Node *) acoerce->arg, oldvar); + } + else if (IsA(newnode, CoalesceExpr)) + { + CoalesceExpr *cexpr = (CoalesceExpr *) newnode; + ListCell *lc; + + Assert(cexpr->args != NIL); + foreach(lc, cexpr->args) + { + adjust_standard_join_alias_expression(lfirst(lc), oldvar); + } + } + else + Assert(false); +} + +/* * alias_relid_set: in a set of RT indexes, replace joins by their - * underlying base relids + * underlying base+OJ relids */ static Relids alias_relid_set(Query *query, Relids relids) |