aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/utils/adt/selfuncs.c70
-rw-r--r--src/test/regress/expected/join.out22
-rw-r--r--src/test/regress/sql/join.sql9
3 files changed, 85 insertions, 16 deletions
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index edff6da4109..ea95b8068d9 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -2767,29 +2767,67 @@ neqjoinsel(PG_FUNCTION_ARGS)
List *args = (List *) PG_GETARG_POINTER(2);
JoinType jointype = (JoinType) PG_GETARG_INT16(3);
SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
- Oid eqop;
float8 result;
- /*
- * We want 1 - eqjoinsel() where the equality operator is the one
- * associated with this != operator, that is, its negator.
- */
- eqop = get_negator(operator);
- if (eqop)
+ if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
{
- result = DatumGetFloat8(DirectFunctionCall5(eqjoinsel,
- PointerGetDatum(root),
- ObjectIdGetDatum(eqop),
- PointerGetDatum(args),
- Int16GetDatum(jointype),
- PointerGetDatum(sjinfo)));
+ /*
+ * For semi-joins, if there is more than one distinct value in the RHS
+ * relation then every non-null LHS row must find a row to join since
+ * it can only be equal to one of them. We'll assume that there is
+ * always more than one distinct RHS value for the sake of stability,
+ * though in theory we could have special cases for empty RHS
+ * (selectivity = 0) and single-distinct-value RHS (selectivity =
+ * fraction of LHS that has the same value as the single RHS value).
+ *
+ * For anti-joins, if we use the same assumption that there is more
+ * than one distinct key in the RHS relation, then every non-null LHS
+ * row must be suppressed by the anti-join.
+ *
+ * So either way, the selectivity estimate should be 1 - nullfrac.
+ */
+ VariableStatData leftvar;
+ VariableStatData rightvar;
+ bool reversed;
+ HeapTuple statsTuple;
+ double nullfrac;
+
+ get_join_variables(root, args, sjinfo, &leftvar, &rightvar, &reversed);
+ statsTuple = reversed ? rightvar.statsTuple : leftvar.statsTuple;
+ if (HeapTupleIsValid(statsTuple))
+ nullfrac = ((Form_pg_statistic) GETSTRUCT(statsTuple))->stanullfrac;
+ else
+ nullfrac = 0.0;
+ ReleaseVariableStats(leftvar);
+ ReleaseVariableStats(rightvar);
+
+ result = 1.0 - nullfrac;
}
else
{
- /* Use default selectivity (should we raise an error instead?) */
- result = DEFAULT_EQ_SEL;
+ /*
+ * We want 1 - eqjoinsel() where the equality operator is the one
+ * associated with this != operator, that is, its negator.
+ */
+ Oid eqop = get_negator(operator);
+
+ if (eqop)
+ {
+ result = DatumGetFloat8(DirectFunctionCall5(eqjoinsel,
+ PointerGetDatum(root),
+ ObjectIdGetDatum(eqop),
+ PointerGetDatum(args),
+ Int16GetDatum(jointype),
+ PointerGetDatum(sjinfo)));
+ }
+ else
+ {
+ /* Use default selectivity (should we raise an error instead?) */
+ result = DEFAULT_EQ_SEL;
+ }
+ result = 1.0 - result;
}
- result = 1.0 - result;
+
PG_RETURN_FLOAT8(result);
}
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index f02ef3f9787..b7d17900978 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -1846,6 +1846,28 @@ SELECT '' AS "xxx", *
(1 row)
--
+-- semijoin selectivity for <>
+--
+explain (costs off)
+select * from int4_tbl i4, tenk1 a
+where exists(select * from tenk1 b
+ where a.twothousand = b.twothousand and a.fivethous <> b.fivethous)
+ and i4.f1 = a.tenthous;
+ QUERY PLAN
+----------------------------------------------
+ Hash Semi Join
+ Hash Cond: (a.twothousand = b.twothousand)
+ Join Filter: (a.fivethous <> b.fivethous)
+ -> Hash Join
+ Hash Cond: (a.tenthous = i4.f1)
+ -> Seq Scan on tenk1 a
+ -> Hash
+ -> Seq Scan on int4_tbl i4
+ -> Hash
+ -> Seq Scan on tenk1 b
+(10 rows)
+
+--
-- More complicated constructs
--
--
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index cbb71c5b1be..c6d4a513e86 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -193,6 +193,15 @@ SELECT '' AS "xxx", *
SELECT '' AS "xxx", *
FROM J1_TBL LEFT JOIN J2_TBL USING (i) WHERE (i = 1);
+--
+-- semijoin selectivity for <>
+--
+explain (costs off)
+select * from int4_tbl i4, tenk1 a
+where exists(select * from tenk1 b
+ where a.twothousand = b.twothousand and a.fivethous <> b.fivethous)
+ and i4.f1 = a.tenthous;
+
--
-- More complicated constructs