aboutsummaryrefslogtreecommitdiff
path: root/src/backend/optimizer/path
diff options
context:
space:
mode:
authorRichard Guo <rguo@postgresql.org>2024-07-05 09:26:48 +0900
committerRichard Guo <rguo@postgresql.org>2024-07-05 09:26:48 +0900
commitaa86129e19d704afb93cb84ab9638f33d266ee9d (patch)
tree0c1643a4ca9aaa321c08f4d896499aed3c961c2c /src/backend/optimizer/path
parent5a519abeddfe34659a8c0478f04a0acfd0d80ec6 (diff)
downloadpostgresql-aa86129e19d704afb93cb84ab9638f33d266ee9d.tar.gz
postgresql-aa86129e19d704afb93cb84ab9638f33d266ee9d.zip
Support "Right Semi Join" plan shapes
Hash joins can support semijoin with the LHS input on the right, using the existing logic for inner join, combined with the assurance that only the first match for each inner tuple is considered, which can be achieved by leveraging the HEAP_TUPLE_HAS_MATCH flag. This can be very useful in some cases since we may now have the option to hash the smaller table instead of the larger. Merge join could likely support "Right Semi Join" too. However, the benefit of swapping inputs tends to be small here, so we do not address that in this patch. Note that this patch also modifies a test query in join.sql to ensure it continues testing as intended. With this patch the original query would result in a right-semi-join rather than semi-join, compromising its original purpose of testing the fix for neqjoinsel's behavior for semi-joins. Author: Richard Guo Reviewed-by: wenhui qiu, Alena Rybakina, Japin Li Discussion: https://postgr.es/m/CAMbWs4_X1mN=ic+SxcyymUqFx9bB8pqSLTGJ-F=MHy4PW3eRXw@mail.gmail.com
Diffstat (limited to 'src/backend/optimizer/path')
-rw-r--r--src/backend/optimizer/path/joinpath.c42
-rw-r--r--src/backend/optimizer/path/joinrels.c3
-rw-r--r--src/backend/optimizer/path/pathkeys.c3
3 files changed, 36 insertions, 12 deletions
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 5be8da9e095..40eb58341c1 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -288,8 +288,8 @@ add_paths_to_joinrel(PlannerInfo *root,
* sorted. This includes both nestloops and mergejoins where the outer
* path is already ordered. Again, skip this if we can't mergejoin.
* (That's okay because we know that nestloop can't handle
- * right/right-anti/full joins at all, so it wouldn't work in the
- * prohibited cases either.)
+ * right/right-anti/right-semi/full joins at all, so it wouldn't work in
+ * the prohibited cases either.)
*/
if (mergejoin_allowed)
match_unsorted_outer(root, joinrel, outerrel, innerrel,
@@ -1729,6 +1729,13 @@ match_unsorted_outer(PlannerInfo *root,
ListCell *lc1;
/*
+ * For now we do not support RIGHT_SEMI join in mergejoin or nestloop
+ * join.
+ */
+ if (jointype == JOIN_RIGHT_SEMI)
+ return;
+
+ /*
* Nestloop only supports inner, left, semi, and anti joins. Also, if we
* are doing a right, right-anti or full mergejoin, we must use *all* the
* mergeclauses as join clauses, else we will not have a valid plan.
@@ -2297,12 +2304,13 @@ hash_inner_and_outer(PlannerInfo *root,
* total inner path will also be parallel-safe, but if not, we'll
* have to search for the cheapest safe, unparameterized inner
* path. If doing JOIN_UNIQUE_INNER, we can't use any alternative
- * inner path. If full, right, or right-anti join, we can't use
- * parallelism (building the hash table in each backend) because
- * no one process has all the match bits.
+ * inner path. If full, right, right-semi or right-anti join, we
+ * can't use parallelism (building the hash table in each backend)
+ * because no one process has all the match bits.
*/
if (save_jointype == JOIN_FULL ||
save_jointype == JOIN_RIGHT ||
+ save_jointype == JOIN_RIGHT_SEMI ||
save_jointype == JOIN_RIGHT_ANTI)
cheapest_safe_inner = NULL;
else if (cheapest_total_inner->parallel_safe)
@@ -2327,13 +2335,13 @@ hash_inner_and_outer(PlannerInfo *root,
* Returns a list of RestrictInfo nodes for those clauses.
*
* *mergejoin_allowed is normally set to true, but it is set to false if
- * this is a right/right-anti/full join and there are nonmergejoinable join
- * clauses. The executor's mergejoin machinery cannot handle such cases, so
- * we have to avoid generating a mergejoin plan. (Note that this flag does
- * NOT consider whether there are actually any mergejoinable clauses. This is
- * correct because in some cases we need to build a clauseless mergejoin.
- * Simply returning NIL is therefore not enough to distinguish safe from
- * unsafe cases.)
+ * this is a right-semi join, or this is a right/right-anti/full join and
+ * there are nonmergejoinable join clauses. The executor's mergejoin
+ * machinery cannot handle such cases, so we have to avoid generating a
+ * mergejoin plan. (Note that this flag does NOT consider whether there are
+ * actually any mergejoinable clauses. This is correct because in some
+ * cases we need to build a clauseless mergejoin. Simply returning NIL is
+ * therefore not enough to distinguish safe from unsafe cases.)
*
* We also mark each selected RestrictInfo to show which side is currently
* being considered as outer. These are transient markings that are only
@@ -2357,6 +2365,16 @@ select_mergejoin_clauses(PlannerInfo *root,
bool have_nonmergeable_joinclause = false;
ListCell *l;
+ /*
+ * For now we do not support RIGHT_SEMI join in mergejoin: the benefit of
+ * swapping inputs tends to be small here.
+ */
+ if (jointype == JOIN_RIGHT_SEMI)
+ {
+ *mergejoin_allowed = false;
+ return NIL;
+ }
+
foreach(l, restrictlist)
{
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c
index db475e25b15..a3677f824fe 100644
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -991,6 +991,9 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
add_paths_to_joinrel(root, joinrel, rel1, rel2,
JOIN_SEMI, sjinfo,
restrictlist);
+ add_paths_to_joinrel(root, joinrel, rel2, rel1,
+ JOIN_RIGHT_SEMI, sjinfo,
+ restrictlist);
}
/*
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 416fc4e240b..e25798972f6 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -1294,6 +1294,9 @@ build_join_pathkeys(PlannerInfo *root,
JoinType jointype,
List *outer_pathkeys)
{
+ /* RIGHT_SEMI should not come here */
+ Assert(jointype != JOIN_RIGHT_SEMI);
+
if (jointype == JOIN_FULL ||
jointype == JOIN_RIGHT ||
jointype == JOIN_RIGHT_ANTI)