diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2024-02-01 12:34:21 -0500 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2024-02-01 12:34:21 -0500 |
commit | 7af96a66f43cdb0b07956a36412b3b0da260b56e (patch) | |
tree | 34eef57bd10f49f1ca5d631b4fa36454af77a654 /src/backend/optimizer/util/pathnode.c | |
parent | a0cffcfffd11b0ea7662f47cfd4d225150e55dbf (diff) | |
download | postgresql-7af96a66f43cdb0b07956a36412b3b0da260b56e.tar.gz postgresql-7af96a66f43cdb0b07956a36412b3b0da260b56e.zip |
Apply band-aid fix for an oversight in reparameterize_path_by_child.
The path we wish to reparameterize is not a standalone object:
in particular, it implicitly references baserestrictinfo clauses
in the associated RelOptInfo, and if it's a SampleScan path then
there is also the TableSampleClause in the RTE to worry about.
Both of those could contain lateral references to the join partner
relation, which would need to be modified to refer to its child.
Since we aren't doing that, affected queries can give wrong answers,
or odd failures such as "variable not found in subplan target list",
or executor crashes. But we can't just summarily modify those
expressions, because they are shared with other paths for the rel.
We'd break things if we modify them and then end up using some
non-partitioned-join path.
In HEAD, we plan to fix this by postponing reparameterization
until create_plan(), when we know that those other paths are
no longer of interest, and then adjusting those expressions along
with the ones in the path itself. That seems like too big a change
for stable branches however. In the back branches, let's just detect
whether any troublesome lateral references actually exist in those
expressions, and fail reparameterization if so. This will result in
not performing a partitioned join in such cases. Given the lack of
field complaints, nobody's likely to miss the optimization.
Report and patch by Richard Guo. Apply to 12-16 only, since
the intended fix for HEAD looks quite different. We're not quite
ready to push the HEAD fix, but with back-branch releases coming
up soon, it seems wise to get this stopgap fix in place there.
Discussion: https://postgr.es/m/CAMbWs496+N=UAjOc=rcD3P7B6oJe4rZw08e_TZRUsWbPxZW3Tw@mail.gmail.com
Diffstat (limited to 'src/backend/optimizer/util/pathnode.c')
-rw-r--r-- | src/backend/optimizer/util/pathnode.c | 175 |
1 files changed, 175 insertions, 0 deletions
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index a490b204e6a..f9cf083bc63 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -26,6 +26,7 @@ #include "optimizer/optimizer.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" +#include "optimizer/placeholder.h" #include "optimizer/planmain.h" #include "optimizer/prep.h" #include "optimizer/restrictinfo.h" @@ -56,6 +57,10 @@ static int append_startup_cost_compare(const ListCell *a, const ListCell *b); static List *reparameterize_pathlist_by_child(PlannerInfo *root, List *pathlist, RelOptInfo *child_rel); +static bool contain_references_to(PlannerInfo *root, Node *clause, + Relids relids); +static bool ris_contain_references_to(PlannerInfo *root, List *rinfos, + Relids relids); /***************************************************************************** @@ -3949,6 +3954,40 @@ do { \ switch (nodeTag(path)) { case T_Path: + + /* + * If the path's restriction clauses contain lateral references to + * the other relation, we can't reparameterize, because we must + * not change the RelOptInfo's contents here. (Doing so would + * break things if we end up using a non-partitionwise join.) + */ + if (ris_contain_references_to(root, + path->parent->baserestrictinfo, + child_rel->top_parent_relids)) + return NULL; + + /* + * If it's a SampleScan with tablesample parameters referencing + * the other relation, we can't reparameterize, because we must + * not change the RTE's contents here. (Doing so would break + * things if we end up using a non-partitionwise join.) + */ + if (path->pathtype == T_SampleScan) + { + Index scan_relid = path->parent->relid; + RangeTblEntry *rte; + + /* it should be a base rel with a tablesample clause... */ + Assert(scan_relid > 0); + rte = planner_rt_fetch(scan_relid, root); + Assert(rte->rtekind == RTE_RELATION); + Assert(rte->tablesample != NULL); + + if (contain_references_to(root, (Node *) rte->tablesample, + child_rel->top_parent_relids)) + return NULL; + } + FLAT_COPY_PATH(new_path, path, Path); break; @@ -3956,6 +3995,18 @@ do { \ { IndexPath *ipath; + /* + * If the path's restriction clauses contain lateral + * references to the other relation, we can't reparameterize, + * because we must not change the IndexOptInfo's contents + * here. (Doing so would break things if we end up using a + * non-partitionwise join.) + */ + if (ris_contain_references_to(root, + path->parent->baserestrictinfo, + child_rel->top_parent_relids)) + return NULL; + FLAT_COPY_PATH(ipath, path, IndexPath); ADJUST_CHILD_ATTRS(ipath->indexclauses); new_path = (Path *) ipath; @@ -3966,6 +4017,18 @@ do { \ { BitmapHeapPath *bhpath; + /* + * If the path's restriction clauses contain lateral + * references to the other relation, we can't reparameterize, + * because we must not change the RelOptInfo's contents here. + * (Doing so would break things if we end up using a + * non-partitionwise join.) + */ + if (ris_contain_references_to(root, + path->parent->baserestrictinfo, + child_rel->top_parent_relids)) + return NULL; + FLAT_COPY_PATH(bhpath, path, BitmapHeapPath); REPARAMETERIZE_CHILD_PATH(bhpath->bitmapqual); new_path = (Path *) bhpath; @@ -3997,6 +4060,18 @@ do { \ ForeignPath *fpath; ReparameterizeForeignPathByChild_function rfpc_func; + /* + * If the path's restriction clauses contain lateral + * references to the other relation, we can't reparameterize, + * because we must not change the RelOptInfo's contents here. + * (Doing so would break things if we end up using a + * non-partitionwise join.) + */ + if (ris_contain_references_to(root, + path->parent->baserestrictinfo, + child_rel->top_parent_relids)) + return NULL; + FLAT_COPY_PATH(fpath, path, ForeignPath); if (fpath->fdw_outerpath) REPARAMETERIZE_CHILD_PATH(fpath->fdw_outerpath); @@ -4015,6 +4090,18 @@ do { \ { CustomPath *cpath; + /* + * If the path's restriction clauses contain lateral + * references to the other relation, we can't reparameterize, + * because we must not change the RelOptInfo's contents here. + * (Doing so would break things if we end up using a + * non-partitionwise join.) + */ + if (ris_contain_references_to(root, + path->parent->baserestrictinfo, + child_rel->top_parent_relids)) + return NULL; + FLAT_COPY_PATH(cpath, path, CustomPath); REPARAMETERIZE_CHILD_PATH_LIST(cpath->custom_paths); if (cpath->methods && @@ -4180,3 +4267,91 @@ reparameterize_pathlist_by_child(PlannerInfo *root, return result; } + +/* + * contain_references_to + * Detect whether any Vars or PlaceHolderVars in the given clause contain + * lateral references to the given 'relids'. + */ +static bool +contain_references_to(PlannerInfo *root, Node *clause, Relids relids) +{ + bool ret = false; + List *vars; + ListCell *lc; + + /* + * Examine all Vars and PlaceHolderVars used in the clause. + * + * By omitting the relevant flags, this also gives us a cheap sanity check + * that no aggregates or window functions appear in the clause. We don't + * expect any of those in scan-level restrictions or tablesamples. + */ + vars = pull_var_clause(clause, PVC_INCLUDE_PLACEHOLDERS); + foreach(lc, vars) + { + Node *node = (Node *) lfirst(lc); + + if (IsA(node, Var)) + { + Var *var = (Var *) node; + + if (bms_is_member(var->varno, relids)) + { + ret = true; + break; + } + } + else if (IsA(node, PlaceHolderVar)) + { + PlaceHolderVar *phv = (PlaceHolderVar *) node; + PlaceHolderInfo *phinfo = find_placeholder_info(root, phv, false); + + /* + * We should check both ph_eval_at (in case the PHV is to be + * computed at the other relation and then laterally referenced + * here) and ph_lateral (in case the PHV is to be evaluated here + * but contains lateral references to the other relation). The + * former case should not occur in baserestrictinfo clauses, but + * it can occur in tablesample clauses. + */ + if (bms_overlap(phinfo->ph_eval_at, relids) || + bms_overlap(phinfo->ph_lateral, relids)) + { + ret = true; + break; + } + } + else + Assert(false); + } + + list_free(vars); + + return ret; +} + +/* + * ris_contain_references_to + * Apply contain_references_to() to a list of RestrictInfos. + * + * We need extra code for this because pull_var_clause() can't descend + * through RestrictInfos. + */ +static bool +ris_contain_references_to(PlannerInfo *root, List *rinfos, Relids relids) +{ + ListCell *lc; + + foreach(lc, rinfos) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); + + /* Pseudoconstant clauses can't contain any Vars or PHVs */ + if (rinfo->pseudoconstant) + continue; + if (contain_references_to(root, (Node *) rinfo->clause, relids)) + return true; + } + return false; +} |