aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/selfuncs.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils/adt/selfuncs.c')
-rw-r--r--src/backend/utils/adt/selfuncs.c49
1 files changed, 32 insertions, 17 deletions
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 5c7e8325a19..bbc344f16bd 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -2329,7 +2329,9 @@ eqjoinsel_semi(Oid operator,
bool *hasmatch1;
bool *hasmatch2;
double nullfrac1 = stats1->stanullfrac;
- double matchfreq1;
+ double matchfreq1,
+ uncertainfrac,
+ uncertain;
int i,
nmatches;
@@ -2382,18 +2384,26 @@ eqjoinsel_semi(Oid operator,
* the uncertain rows that a fraction nd2/nd1 have join partners. We
* can discount the known-matched MCVs from the distinct-values counts
* before doing the division.
+ *
+ * Crude as the above is, it's completely useless if we don't have
+ * reliable ndistinct values for both sides. Hence, if either nd1
+ * or nd2 is default, punt and assume half of the uncertain rows
+ * have join partners.
*/
- nd1 -= nmatches;
- nd2 -= nmatches;
- if (nd1 <= nd2 || nd2 <= 0)
- selec = Max(matchfreq1, 1.0 - nullfrac1);
- else
+ if (nd1 != DEFAULT_NUM_DISTINCT && nd2 != DEFAULT_NUM_DISTINCT)
{
- double uncertain = 1.0 - matchfreq1 - nullfrac1;
-
- CLAMP_PROBABILITY(uncertain);
- selec = matchfreq1 + (nd2 / nd1) * uncertain;
+ nd1 -= nmatches;
+ nd2 -= nmatches;
+ if (nd1 <= nd2 || nd2 <= 0)
+ uncertainfrac = 1.0;
+ else
+ uncertainfrac = nd2 / nd1;
}
+ else
+ uncertainfrac = 0.5;
+ uncertain = 1.0 - matchfreq1 - nullfrac1;
+ CLAMP_PROBABILITY(uncertain);
+ selec = matchfreq1 + uncertainfrac * uncertain;
}
else
{
@@ -2403,15 +2413,20 @@ eqjoinsel_semi(Oid operator,
*/
double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
- if (vardata1->rel)
- nd1 = Min(nd1, vardata1->rel->rows);
- if (vardata2->rel)
- nd2 = Min(nd2, vardata2->rel->rows);
+ if (nd1 != DEFAULT_NUM_DISTINCT && nd2 != DEFAULT_NUM_DISTINCT)
+ {
+ if (vardata1->rel)
+ nd1 = Min(nd1, vardata1->rel->rows);
+ if (vardata2->rel)
+ nd2 = Min(nd2, vardata2->rel->rows);
- if (nd1 <= nd2 || nd2 <= 0)
- selec = 1.0 - nullfrac1;
+ if (nd1 <= nd2 || nd2 <= 0)
+ selec = 1.0 - nullfrac1;
+ else
+ selec = (nd2 / nd1) * (1.0 - nullfrac1);
+ }
else
- selec = (nd2 / nd1) * (1.0 - nullfrac1);
+ selec = 0.5 * (1.0 - nullfrac1);
}
if (have_mcvs1)