aboutsummaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
authorRichard Guo <rguo@postgresql.org>2024-07-05 09:26:48 +0900
committerRichard Guo <rguo@postgresql.org>2024-07-05 09:26:48 +0900
commitaa86129e19d704afb93cb84ab9638f33d266ee9d (patch)
tree0c1643a4ca9aaa321c08f4d896499aed3c961c2c /src/backend
parent5a519abeddfe34659a8c0478f04a0acfd0d80ec6 (diff)
downloadpostgresql-aa86129e19d704afb93cb84ab9638f33d266ee9d.tar.gz
postgresql-aa86129e19d704afb93cb84ab9638f33d266ee9d.zip
Support "Right Semi Join" plan shapes
Hash joins can support semijoin with the LHS input on the right, using the existing logic for inner join, combined with the assurance that only the first match for each inner tuple is considered, which can be achieved by leveraging the HEAP_TUPLE_HAS_MATCH flag. This can be very useful in some cases since we may now have the option to hash the smaller table instead of the larger. Merge join could likely support "Right Semi Join" too. However, the benefit of swapping inputs tends to be small here, so we do not address that in this patch. Note that this patch also modifies a test query in join.sql to ensure it continues testing as intended. With this patch the original query would result in a right-semi-join rather than semi-join, compromising its original purpose of testing the fix for neqjoinsel's behavior for semi-joins. Author: Richard Guo Reviewed-by: wenhui qiu, Alena Rybakina, Japin Li Discussion: https://postgr.es/m/CAMbWs4_X1mN=ic+SxcyymUqFx9bB8pqSLTGJ-F=MHy4PW3eRXw@mail.gmail.com
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/commands/explain.c3
-rw-r--r--src/backend/executor/nodeHashjoin.c15
-rw-r--r--src/backend/optimizer/path/joinpath.c42
-rw-r--r--src/backend/optimizer/path/joinrels.c3
-rw-r--r--src/backend/optimizer/path/pathkeys.c3
-rw-r--r--src/backend/optimizer/prep/prepjointree.c6
6 files changed, 54 insertions, 18 deletions
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 94511a5a024..30de9de9d4f 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1749,6 +1749,9 @@ ExplainNode(PlanState *planstate, List *ancestors,
case JOIN_ANTI:
jointype = "Anti";
break;
+ case JOIN_RIGHT_SEMI:
+ jointype = "Right Semi";
+ break;
case JOIN_RIGHT_ANTI:
jointype = "Right Anti";
break;
diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c
index dbf114cd5eb..c46764023df 100644
--- a/src/backend/executor/nodeHashjoin.c
+++ b/src/backend/executor/nodeHashjoin.c
@@ -534,6 +534,14 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
}
/*
+ * In a right-semijoin, we only need the first match for each
+ * inner tuple.
+ */
+ if (node->js.jointype == JOIN_RIGHT_SEMI &&
+ HeapTupleHeaderHasMatch(HJTUPLE_MINTUPLE(node->hj_CurTuple)))
+ continue;
+
+ /*
* We've got a match, but still need to test non-hashed quals.
* ExecScanHashBucket already set up all the state needed to
* call ExecQual.
@@ -549,10 +557,10 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
{
node->hj_MatchedOuter = true;
-
/*
- * This is really only needed if HJ_FILL_INNER(node), but
- * we'll avoid the branch and just set it always.
+ * This is really only needed if HJ_FILL_INNER(node) or if
+ * we are in a right-semijoin, but we'll avoid the branch
+ * and just set it always.
*/
if (!HeapTupleHeaderHasMatch(HJTUPLE_MINTUPLE(node->hj_CurTuple)))
HeapTupleHeaderSetMatch(HJTUPLE_MINTUPLE(node->hj_CurTuple));
@@ -779,6 +787,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
{
case JOIN_INNER:
case JOIN_SEMI:
+ case JOIN_RIGHT_SEMI:
break;
case JOIN_LEFT:
case JOIN_ANTI:
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 5be8da9e095..40eb58341c1 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -288,8 +288,8 @@ add_paths_to_joinrel(PlannerInfo *root,
* sorted. This includes both nestloops and mergejoins where the outer
* path is already ordered. Again, skip this if we can't mergejoin.
* (That's okay because we know that nestloop can't handle
- * right/right-anti/full joins at all, so it wouldn't work in the
- * prohibited cases either.)
+ * right/right-anti/right-semi/full joins at all, so it wouldn't work in
+ * the prohibited cases either.)
*/
if (mergejoin_allowed)
match_unsorted_outer(root, joinrel, outerrel, innerrel,
@@ -1729,6 +1729,13 @@ match_unsorted_outer(PlannerInfo *root,
ListCell *lc1;
/*
+ * For now we do not support RIGHT_SEMI join in mergejoin or nestloop
+ * join.
+ */
+ if (jointype == JOIN_RIGHT_SEMI)
+ return;
+
+ /*
* Nestloop only supports inner, left, semi, and anti joins. Also, if we
* are doing a right, right-anti or full mergejoin, we must use *all* the
* mergeclauses as join clauses, else we will not have a valid plan.
@@ -2297,12 +2304,13 @@ hash_inner_and_outer(PlannerInfo *root,
* total inner path will also be parallel-safe, but if not, we'll
* have to search for the cheapest safe, unparameterized inner
* path. If doing JOIN_UNIQUE_INNER, we can't use any alternative
- * inner path. If full, right, or right-anti join, we can't use
- * parallelism (building the hash table in each backend) because
- * no one process has all the match bits.
+ * inner path. If full, right, right-semi or right-anti join, we
+ * can't use parallelism (building the hash table in each backend)
+ * because no one process has all the match bits.
*/
if (save_jointype == JOIN_FULL ||
save_jointype == JOIN_RIGHT ||
+ save_jointype == JOIN_RIGHT_SEMI ||
save_jointype == JOIN_RIGHT_ANTI)
cheapest_safe_inner = NULL;
else if (cheapest_total_inner->parallel_safe)
@@ -2327,13 +2335,13 @@ hash_inner_and_outer(PlannerInfo *root,
* Returns a list of RestrictInfo nodes for those clauses.
*
* *mergejoin_allowed is normally set to true, but it is set to false if
- * this is a right/right-anti/full join and there are nonmergejoinable join
- * clauses. The executor's mergejoin machinery cannot handle such cases, so
- * we have to avoid generating a mergejoin plan. (Note that this flag does
- * NOT consider whether there are actually any mergejoinable clauses. This is
- * correct because in some cases we need to build a clauseless mergejoin.
- * Simply returning NIL is therefore not enough to distinguish safe from
- * unsafe cases.)
+ * this is a right-semi join, or this is a right/right-anti/full join and
+ * there are nonmergejoinable join clauses. The executor's mergejoin
+ * machinery cannot handle such cases, so we have to avoid generating a
+ * mergejoin plan. (Note that this flag does NOT consider whether there are
+ * actually any mergejoinable clauses. This is correct because in some
+ * cases we need to build a clauseless mergejoin. Simply returning NIL is
+ * therefore not enough to distinguish safe from unsafe cases.)
*
* We also mark each selected RestrictInfo to show which side is currently
* being considered as outer. These are transient markings that are only
@@ -2357,6 +2365,16 @@ select_mergejoin_clauses(PlannerInfo *root,
bool have_nonmergeable_joinclause = false;
ListCell *l;
+ /*
+ * For now we do not support RIGHT_SEMI join in mergejoin: the benefit of
+ * swapping inputs tends to be small here.
+ */
+ if (jointype == JOIN_RIGHT_SEMI)
+ {
+ *mergejoin_allowed = false;
+ return NIL;
+ }
+
foreach(l, restrictlist)
{
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c
index db475e25b15..a3677f824fe 100644
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -991,6 +991,9 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
add_paths_to_joinrel(root, joinrel, rel1, rel2,
JOIN_SEMI, sjinfo,
restrictlist);
+ add_paths_to_joinrel(root, joinrel, rel2, rel1,
+ JOIN_RIGHT_SEMI, sjinfo,
+ restrictlist);
}
/*
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 416fc4e240b..e25798972f6 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -1294,6 +1294,9 @@ build_join_pathkeys(PlannerInfo *root,
JoinType jointype,
List *outer_pathkeys)
{
+ /* RIGHT_SEMI should not come here */
+ Assert(jointype != JOIN_RIGHT_SEMI);
+
if (jointype == JOIN_FULL ||
jointype == JOIN_RIGHT ||
jointype == JOIN_RIGHT_ANTI)
diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
index 5482ab85a76..969e257f70b 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -455,8 +455,8 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode,
* point of the available_rels machinations is to ensure that we only
* pull up quals for which that's okay.
*
- * We don't expect to see any pre-existing JOIN_SEMI, JOIN_ANTI, or
- * JOIN_RIGHT_ANTI jointypes here.
+ * We don't expect to see any pre-existing JOIN_SEMI, JOIN_ANTI,
+ * JOIN_RIGHT_SEMI, or JOIN_RIGHT_ANTI jointypes here.
*/
switch (j->jointype)
{
@@ -2950,7 +2950,7 @@ reduce_outer_joins_pass2(Node *jtnode,
* These could only have been introduced by pull_up_sublinks,
* so there's no way that upper quals could refer to their
* righthand sides, and no point in checking. We don't expect
- * to see JOIN_RIGHT_ANTI yet.
+ * to see JOIN_RIGHT_SEMI or JOIN_RIGHT_ANTI yet.
*/
break;
default: