aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/utils/adt/selfuncs.c254
-rw-r--r--src/test/regress/expected/partition_join.out23
2 files changed, 147 insertions, 130 deletions
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 73fbb4ac86b..ffca0fe5bb8 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -164,10 +164,20 @@ static double ineq_histogram_selectivity(PlannerInfo *root,
VariableStatData *vardata,
FmgrInfo *opproc, bool isgt, bool iseq,
Datum constval, Oid consttype);
-static double eqjoinsel_inner(Oid operator,
- VariableStatData *vardata1, VariableStatData *vardata2);
-static double eqjoinsel_semi(Oid operator,
+static double eqjoinsel_inner(Oid opfuncoid,
+ VariableStatData *vardata1, VariableStatData *vardata2,
+ double nd1, double nd2,
+ bool isdefault1, bool isdefault2,
+ AttStatsSlot *sslot1, AttStatsSlot *sslot2,
+ Form_pg_statistic stats1, Form_pg_statistic stats2,
+ bool have_mcvs1, bool have_mcvs2);
+static double eqjoinsel_semi(Oid opfuncoid,
VariableStatData *vardata1, VariableStatData *vardata2,
+ double nd1, double nd2,
+ bool isdefault1, bool isdefault2,
+ AttStatsSlot *sslot1, AttStatsSlot *sslot2,
+ Form_pg_statistic stats1, Form_pg_statistic stats2,
+ bool have_mcvs1, bool have_mcvs2,
RelOptInfo *inner_rel);
static bool estimate_multivariate_ndistinct(PlannerInfo *root,
RelOptInfo *rel, List **varinfos, double *ndistinct);
@@ -2290,20 +2300,69 @@ eqjoinsel(PG_FUNCTION_ARGS)
#endif
SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
double selec;
+ double selec_inner;
VariableStatData vardata1;
VariableStatData vardata2;
+ double nd1;
+ double nd2;
+ bool isdefault1;
+ bool isdefault2;
+ Oid opfuncoid;
+ AttStatsSlot sslot1;
+ AttStatsSlot sslot2;
+ Form_pg_statistic stats1 = NULL;
+ Form_pg_statistic stats2 = NULL;
+ bool have_mcvs1 = false;
+ bool have_mcvs2 = false;
bool join_is_reversed;
RelOptInfo *inner_rel;
get_join_variables(root, args, sjinfo,
&vardata1, &vardata2, &join_is_reversed);
+ nd1 = get_variable_numdistinct(&vardata1, &isdefault1);
+ nd2 = get_variable_numdistinct(&vardata2, &isdefault2);
+
+ opfuncoid = get_opcode(operator);
+
+ memset(&sslot1, 0, sizeof(sslot1));
+ memset(&sslot2, 0, sizeof(sslot2));
+
+ if (HeapTupleIsValid(vardata1.statsTuple))
+ {
+ /* note we allow use of nullfrac regardless of security check */
+ stats1 = (Form_pg_statistic) GETSTRUCT(vardata1.statsTuple);
+ if (statistic_proc_security_check(&vardata1, opfuncoid))
+ have_mcvs1 = get_attstatsslot(&sslot1, vardata1.statsTuple,
+ STATISTIC_KIND_MCV, InvalidOid,
+ ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
+ }
+
+ if (HeapTupleIsValid(vardata2.statsTuple))
+ {
+ /* note we allow use of nullfrac regardless of security check */
+ stats2 = (Form_pg_statistic) GETSTRUCT(vardata2.statsTuple);
+ if (statistic_proc_security_check(&vardata2, opfuncoid))
+ have_mcvs2 = get_attstatsslot(&sslot2, vardata2.statsTuple,
+ STATISTIC_KIND_MCV, InvalidOid,
+ ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
+ }
+
+ /* We need to compute the inner-join selectivity in all cases */
+ selec_inner = eqjoinsel_inner(opfuncoid,
+ &vardata1, &vardata2,
+ nd1, nd2,
+ isdefault1, isdefault2,
+ &sslot1, &sslot2,
+ stats1, stats2,
+ have_mcvs1, have_mcvs2);
+
switch (sjinfo->jointype)
{
case JOIN_INNER:
case JOIN_LEFT:
case JOIN_FULL:
- selec = eqjoinsel_inner(operator, &vardata1, &vardata2);
+ selec = selec_inner;
break;
case JOIN_SEMI:
case JOIN_ANTI:
@@ -2317,12 +2376,40 @@ eqjoinsel(PG_FUNCTION_ARGS)
inner_rel = find_join_input_rel(root, sjinfo->min_righthand);
if (!join_is_reversed)
- selec = eqjoinsel_semi(operator, &vardata1, &vardata2,
+ selec = eqjoinsel_semi(opfuncoid,
+ &vardata1, &vardata2,
+ nd1, nd2,
+ isdefault1, isdefault2,
+ &sslot1, &sslot2,
+ stats1, stats2,
+ have_mcvs1, have_mcvs2,
inner_rel);
else
- selec = eqjoinsel_semi(get_commutator(operator),
+ {
+ Oid commop = get_commutator(operator);
+ Oid commopfuncoid = OidIsValid(commop) ? get_opcode(commop) : InvalidOid;
+
+ selec = eqjoinsel_semi(commopfuncoid,
&vardata2, &vardata1,
+ nd2, nd1,
+ isdefault2, isdefault1,
+ &sslot2, &sslot1,
+ stats2, stats1,
+ have_mcvs2, have_mcvs1,
inner_rel);
+ }
+
+ /*
+ * We should never estimate the output of a semijoin to be more
+ * rows than we estimate for an inner join with the same input
+ * rels and join condition; it's obviously impossible for that to
+ * happen. The former estimate is N1 * Ssemi while the latter is
+ * N1 * N2 * Sinner, so we may clamp Ssemi <= N2 * Sinner. Doing
+ * this is worthwhile because of the shakier estimation rules we
+ * use in eqjoinsel_semi, particularly in cases where it has to
+ * punt entirely.
+ */
+ selec = Min(selec, inner_rel->rows * selec_inner);
break;
default:
/* other values not expected here */
@@ -2332,6 +2419,9 @@ eqjoinsel(PG_FUNCTION_ARGS)
break;
}
+ free_attstatsslot(&sslot1);
+ free_attstatsslot(&sslot2);
+
ReleaseVariableStats(vardata1);
ReleaseVariableStats(vardata2);
@@ -2347,49 +2437,15 @@ eqjoinsel(PG_FUNCTION_ARGS)
* that it's worth trying to distinguish them here.
*/
static double
-eqjoinsel_inner(Oid operator,
- VariableStatData *vardata1, VariableStatData *vardata2)
+eqjoinsel_inner(Oid opfuncoid,
+ VariableStatData *vardata1, VariableStatData *vardata2,
+ double nd1, double nd2,
+ bool isdefault1, bool isdefault2,
+ AttStatsSlot *sslot1, AttStatsSlot *sslot2,
+ Form_pg_statistic stats1, Form_pg_statistic stats2,
+ bool have_mcvs1, bool have_mcvs2)
{
double selec;
- double nd1;
- double nd2;
- bool isdefault1;
- bool isdefault2;
- Oid opfuncoid;
- Form_pg_statistic stats1 = NULL;
- Form_pg_statistic stats2 = NULL;
- bool have_mcvs1 = false;
- bool have_mcvs2 = false;
- AttStatsSlot sslot1;
- AttStatsSlot sslot2;
-
- nd1 = get_variable_numdistinct(vardata1, &isdefault1);
- nd2 = get_variable_numdistinct(vardata2, &isdefault2);
-
- opfuncoid = get_opcode(operator);
-
- memset(&sslot1, 0, sizeof(sslot1));
- memset(&sslot2, 0, sizeof(sslot2));
-
- if (HeapTupleIsValid(vardata1->statsTuple))
- {
- /* note we allow use of nullfrac regardless of security check */
- stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
- if (statistic_proc_security_check(vardata1, opfuncoid))
- have_mcvs1 = get_attstatsslot(&sslot1, vardata1->statsTuple,
- STATISTIC_KIND_MCV, InvalidOid,
- ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
- }
-
- if (HeapTupleIsValid(vardata2->statsTuple))
- {
- /* note we allow use of nullfrac regardless of security check */
- stats2 = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
- if (statistic_proc_security_check(vardata2, opfuncoid))
- have_mcvs2 = get_attstatsslot(&sslot2, vardata2->statsTuple,
- STATISTIC_KIND_MCV, InvalidOid,
- ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
- }
if (have_mcvs1 && have_mcvs2)
{
@@ -2423,8 +2479,8 @@ eqjoinsel_inner(Oid operator,
nmatches;
fmgr_info(opfuncoid, &eqproc);
- hasmatch1 = (bool *) palloc0(sslot1.nvalues * sizeof(bool));
- hasmatch2 = (bool *) palloc0(sslot2.nvalues * sizeof(bool));
+ hasmatch1 = (bool *) palloc0(sslot1->nvalues * sizeof(bool));
+ hasmatch2 = (bool *) palloc0(sslot2->nvalues * sizeof(bool));
/*
* Note we assume that each MCV will match at most one member of the
@@ -2434,21 +2490,21 @@ eqjoinsel_inner(Oid operator,
*/
matchprodfreq = 0.0;
nmatches = 0;
- for (i = 0; i < sslot1.nvalues; i++)
+ for (i = 0; i < sslot1->nvalues; i++)
{
int j;
- for (j = 0; j < sslot2.nvalues; j++)
+ for (j = 0; j < sslot2->nvalues; j++)
{
if (hasmatch2[j])
continue;
if (DatumGetBool(FunctionCall2Coll(&eqproc,
DEFAULT_COLLATION_OID,
- sslot1.values[i],
- sslot2.values[j])))
+ sslot1->values[i],
+ sslot2->values[j])))
{
hasmatch1[i] = hasmatch2[j] = true;
- matchprodfreq += sslot1.numbers[i] * sslot2.numbers[j];
+ matchprodfreq += sslot1->numbers[i] * sslot2->numbers[j];
nmatches++;
break;
}
@@ -2457,22 +2513,22 @@ eqjoinsel_inner(Oid operator,
CLAMP_PROBABILITY(matchprodfreq);
/* Sum up frequencies of matched and unmatched MCVs */
matchfreq1 = unmatchfreq1 = 0.0;
- for (i = 0; i < sslot1.nvalues; i++)
+ for (i = 0; i < sslot1->nvalues; i++)
{
if (hasmatch1[i])
- matchfreq1 += sslot1.numbers[i];
+ matchfreq1 += sslot1->numbers[i];
else
- unmatchfreq1 += sslot1.numbers[i];
+ unmatchfreq1 += sslot1->numbers[i];
}
CLAMP_PROBABILITY(matchfreq1);
CLAMP_PROBABILITY(unmatchfreq1);
matchfreq2 = unmatchfreq2 = 0.0;
- for (i = 0; i < sslot2.nvalues; i++)
+ for (i = 0; i < sslot2->nvalues; i++)
{
if (hasmatch2[i])
- matchfreq2 += sslot2.numbers[i];
+ matchfreq2 += sslot2->numbers[i];
else
- unmatchfreq2 += sslot2.numbers[i];
+ unmatchfreq2 += sslot2->numbers[i];
}
CLAMP_PROBABILITY(matchfreq2);
CLAMP_PROBABILITY(unmatchfreq2);
@@ -2497,15 +2553,15 @@ eqjoinsel_inner(Oid operator,
* MCVs plus non-MCV values.
*/
totalsel1 = matchprodfreq;
- if (nd2 > sslot2.nvalues)
- totalsel1 += unmatchfreq1 * otherfreq2 / (nd2 - sslot2.nvalues);
+ if (nd2 > sslot2->nvalues)
+ totalsel1 += unmatchfreq1 * otherfreq2 / (nd2 - sslot2->nvalues);
if (nd2 > nmatches)
totalsel1 += otherfreq1 * (otherfreq2 + unmatchfreq2) /
(nd2 - nmatches);
/* Same estimate from the point of view of relation 2. */
totalsel2 = matchprodfreq;
- if (nd1 > sslot1.nvalues)
- totalsel2 += unmatchfreq2 * otherfreq1 / (nd1 - sslot1.nvalues);
+ if (nd1 > sslot1->nvalues)
+ totalsel2 += unmatchfreq2 * otherfreq1 / (nd1 - sslot1->nvalues);
if (nd1 > nmatches)
totalsel2 += otherfreq2 * (otherfreq1 + unmatchfreq1) /
(nd1 - nmatches);
@@ -2550,9 +2606,6 @@ eqjoinsel_inner(Oid operator,
selec /= nd2;
}
- free_attstatsslot(&sslot1);
- free_attstatsslot(&sslot2);
-
return selec;
}
@@ -2561,32 +2614,19 @@ eqjoinsel_inner(Oid operator,
*
* (Also used for anti join, which we are supposed to estimate the same way.)
* Caller has ensured that vardata1 is the LHS variable.
- * Unlike eqjoinsel_inner, we have to cope with operator being InvalidOid.
+ * Unlike eqjoinsel_inner, we have to cope with opfuncoid being InvalidOid.
*/
static double
-eqjoinsel_semi(Oid operator,
+eqjoinsel_semi(Oid opfuncoid,
VariableStatData *vardata1, VariableStatData *vardata2,
+ double nd1, double nd2,
+ bool isdefault1, bool isdefault2,
+ AttStatsSlot *sslot1, AttStatsSlot *sslot2,
+ Form_pg_statistic stats1, Form_pg_statistic stats2,
+ bool have_mcvs1, bool have_mcvs2,
RelOptInfo *inner_rel)
{
double selec;
- double nd1;
- double nd2;
- bool isdefault1;
- bool isdefault2;
- Oid opfuncoid;
- Form_pg_statistic stats1 = NULL;
- bool have_mcvs1 = false;
- bool have_mcvs2 = false;
- AttStatsSlot sslot1;
- AttStatsSlot sslot2;
-
- nd1 = get_variable_numdistinct(vardata1, &isdefault1);
- nd2 = get_variable_numdistinct(vardata2, &isdefault2);
-
- opfuncoid = OidIsValid(operator) ? get_opcode(operator) : InvalidOid;
-
- memset(&sslot1, 0, sizeof(sslot1));
- memset(&sslot2, 0, sizeof(sslot2));
/*
* We clamp nd2 to be not more than what we estimate the inner relation's
@@ -2621,26 +2661,7 @@ eqjoinsel_semi(Oid operator,
isdefault2 = false;
}
- if (HeapTupleIsValid(vardata1->statsTuple))
- {
- /* note we allow use of nullfrac regardless of security check */
- stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
- if (statistic_proc_security_check(vardata1, opfuncoid))
- have_mcvs1 = get_attstatsslot(&sslot1, vardata1->statsTuple,
- STATISTIC_KIND_MCV, InvalidOid,
- ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
- }
-
- if (HeapTupleIsValid(vardata2->statsTuple) &&
- statistic_proc_security_check(vardata2, opfuncoid))
- {
- have_mcvs2 = get_attstatsslot(&sslot2, vardata2->statsTuple,
- STATISTIC_KIND_MCV, InvalidOid,
- ATTSTATSSLOT_VALUES);
- /* note: currently don't need stanumbers from RHS */
- }
-
- if (have_mcvs1 && have_mcvs2 && OidIsValid(operator))
+ if (have_mcvs1 && have_mcvs2 && OidIsValid(opfuncoid))
{
/*
* We have most-common-value lists for both relations. Run through
@@ -2663,15 +2684,15 @@ eqjoinsel_semi(Oid operator,
/*
* The clamping above could have resulted in nd2 being less than
- * sslot2.nvalues; in which case, we assume that precisely the nd2
+ * sslot2->nvalues; in which case, we assume that precisely the nd2
* most common values in the relation will appear in the join input,
* and so compare to only the first nd2 members of the MCV list. Of
* course this is frequently wrong, but it's the best bet we can make.
*/
- clamped_nvalues2 = Min(sslot2.nvalues, nd2);
+ clamped_nvalues2 = Min(sslot2->nvalues, nd2);
fmgr_info(opfuncoid, &eqproc);
- hasmatch1 = (bool *) palloc0(sslot1.nvalues * sizeof(bool));
+ hasmatch1 = (bool *) palloc0(sslot1->nvalues * sizeof(bool));
hasmatch2 = (bool *) palloc0(clamped_nvalues2 * sizeof(bool));
/*
@@ -2681,7 +2702,7 @@ eqjoinsel_semi(Oid operator,
* and because the math wouldn't add up...
*/
nmatches = 0;
- for (i = 0; i < sslot1.nvalues; i++)
+ for (i = 0; i < sslot1->nvalues; i++)
{
int j;
@@ -2691,8 +2712,8 @@ eqjoinsel_semi(Oid operator,
continue;
if (DatumGetBool(FunctionCall2Coll(&eqproc,
DEFAULT_COLLATION_OID,
- sslot1.values[i],
- sslot2.values[j])))
+ sslot1->values[i],
+ sslot2->values[j])))
{
hasmatch1[i] = hasmatch2[j] = true;
nmatches++;
@@ -2702,10 +2723,10 @@ eqjoinsel_semi(Oid operator,
}
/* Sum up frequencies of matched MCVs */
matchfreq1 = 0.0;
- for (i = 0; i < sslot1.nvalues; i++)
+ for (i = 0; i < sslot1->nvalues; i++)
{
if (hasmatch1[i])
- matchfreq1 += sslot1.numbers[i];
+ matchfreq1 += sslot1->numbers[i];
}
CLAMP_PROBABILITY(matchfreq1);
pfree(hasmatch1);
@@ -2760,9 +2781,6 @@ eqjoinsel_semi(Oid operator,
selec = 0.5 * (1.0 - nullfrac1);
}
- free_attstatsslot(&sslot1);
- free_attstatsslot(&sslot2);
-
return selec;
}
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index 3ba3aaf2d86..c55de5d4765 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -801,8 +801,8 @@ SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1, prt1_e t2 WHER
EXPLAIN (COSTS OFF)
SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (SELECT (t1.a + t1.b)/2 FROM prt1_e t1 WHERE t1.c = 0)) AND t1.b = 0 ORDER BY t1.a;
- QUERY PLAN
--------------------------------------------------------------------------------
+ QUERY PLAN
+-------------------------------------------------------------------------
Sort
Sort Key: t1.a
-> Append
@@ -831,19 +831,18 @@ SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (
Index Cond: (a = t1_4.b)
Filter: (b = 0)
-> Nested Loop
- -> Unique
- -> Sort
- Sort Key: t1_5.b
- -> Hash Semi Join
- Hash Cond: (t1_5.b = ((t1_8.a + t1_8.b) / 2))
- -> Seq Scan on prt2_p3 t1_5
- -> Hash
- -> Seq Scan on prt1_e_p3 t1_8
- Filter: (c = 0)
+ -> HashAggregate
+ Group Key: t1_5.b
+ -> Hash Semi Join
+ Hash Cond: (t1_5.b = ((t1_8.a + t1_8.b) / 2))
+ -> Seq Scan on prt2_p3 t1_5
+ -> Hash
+ -> Seq Scan on prt1_e_p3 t1_8
+ Filter: (c = 0)
-> Index Scan using iprt1_p3_a on prt1_p3 t1_2
Index Cond: (a = t1_5.b)
Filter: (b = 0)
-(40 rows)
+(39 rows)
SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (SELECT (t1.a + t1.b)/2 FROM prt1_e t1 WHERE t1.c = 0)) AND t1.b = 0 ORDER BY t1.a;
a | b | c