aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/selfuncs.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2011-08-31 16:04:48 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2011-08-31 16:05:43 -0400
commit97930cf578e28c01f67fe4006ffcdbb5aedf18c2 (patch)
tree440c121a7408e8f2f3521d621a9b5187d36639ed /src/backend/utils/adt/selfuncs.c
parent5cfe33fe7bb5f5a29e9c2f6780c8278b8a7e5735 (diff)
downloadpostgresql-97930cf578e28c01f67fe4006ffcdbb5aedf18c2.tar.gz
postgresql-97930cf578e28c01f67fe4006ffcdbb5aedf18c2.zip
Improve eqjoinsel's ndistinct clamping to work for multiple levels of join.
This patch fixes an oversight in my commit 7f3eba30c9d622d1981b1368f2d79ba0999cdff2 of 2008-10-23. That patch accounted for baserel restriction clauses that reduced the number of rows coming out of a table (and hence the number of possibly-distinct values of a join variable), but not for join restriction clauses that might have been applied at a lower level of join. To account for the latter, look up the sizes of the min_lefthand and min_righthand inputs of the current join, and clamp with those in the same way as for the base relations. Noted while investigating a complaint from Ben Chobot, although this in itself doesn't seem to explain his report. Back-patch to 8.4; previous versions used different estimation methods for which this heuristic isn't relevant.
Diffstat (limited to 'src/backend/utils/adt/selfuncs.c')
-rw-r--r--src/backend/utils/adt/selfuncs.c81
1 files changed, 73 insertions, 8 deletions
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index e0658265ecb..18bbfe9b8d7 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -142,9 +142,11 @@ static double ineq_histogram_selectivity(PlannerInfo *root,
FmgrInfo *opproc, bool isgt,
Datum constval, Oid consttype);
static double eqjoinsel_inner(Oid operator,
- VariableStatData *vardata1, VariableStatData *vardata2);
+ VariableStatData *vardata1, VariableStatData *vardata2,
+ RelOptInfo *rel1, RelOptInfo *rel2);
static double eqjoinsel_semi(Oid operator,
- VariableStatData *vardata1, VariableStatData *vardata2);
+ VariableStatData *vardata1, VariableStatData *vardata2,
+ RelOptInfo *rel1, RelOptInfo *rel2);
static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
Datum lobound, Datum hibound, Oid boundstypid,
double *scaledlobound, double *scaledhibound);
@@ -173,6 +175,7 @@ static bool get_actual_variable_range(PlannerInfo *root,
VariableStatData *vardata,
Oid sortop,
Datum *min, Datum *max);
+static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids);
static Selectivity prefix_selectivity(PlannerInfo *root,
VariableStatData *vardata,
Oid vartype, Oid opfamily, Const *prefixcon);
@@ -2008,24 +2011,47 @@ eqjoinsel(PG_FUNCTION_ARGS)
VariableStatData vardata1;
VariableStatData vardata2;
bool join_is_reversed;
+ RelOptInfo *rel1;
+ RelOptInfo *rel2;
get_join_variables(root, args, sjinfo,
&vardata1, &vardata2, &join_is_reversed);
+ /*
+ * Identify the join's direct input relations. We use the min lefthand
+ * and min righthand as the inputs, even though the join might actually
+ * get done with larger input relations. The min inputs are guaranteed to
+ * have been formed by now, though, and always using them ensures
+ * consistency of estimates.
+ */
+ if (!join_is_reversed)
+ {
+ rel1 = find_join_input_rel(root, sjinfo->min_lefthand);
+ rel2 = find_join_input_rel(root, sjinfo->min_righthand);
+ }
+ else
+ {
+ rel1 = find_join_input_rel(root, sjinfo->min_righthand);
+ rel2 = find_join_input_rel(root, sjinfo->min_lefthand);
+ }
+
switch (sjinfo->jointype)
{
case JOIN_INNER:
case JOIN_LEFT:
case JOIN_FULL:
- selec = eqjoinsel_inner(operator, &vardata1, &vardata2);
+ selec = eqjoinsel_inner(operator, &vardata1, &vardata2,
+ rel1, rel2);
break;
case JOIN_SEMI:
case JOIN_ANTI:
if (!join_is_reversed)
- selec = eqjoinsel_semi(operator, &vardata1, &vardata2);
+ selec = eqjoinsel_semi(operator, &vardata1, &vardata2,
+ rel1, rel2);
else
selec = eqjoinsel_semi(get_commutator(operator),
- &vardata2, &vardata1);
+ &vardata2, &vardata1,
+ rel2, rel1);
break;
default:
/* other values not expected here */
@@ -2051,7 +2077,8 @@ eqjoinsel(PG_FUNCTION_ARGS)
*/
static double
eqjoinsel_inner(Oid operator,
- VariableStatData *vardata1, VariableStatData *vardata2)
+ VariableStatData *vardata1, VariableStatData *vardata2,
+ RelOptInfo *rel1, RelOptInfo *rel2)
{
double selec;
double nd1;
@@ -2252,15 +2279,19 @@ eqjoinsel_inner(Oid operator,
* be, providing a crude correction for the selectivity of restriction
* clauses on those relations. (We don't do that in the other path
* since there we are comparing the nd values to stats for the whole
- * relations.)
+ * relations.) We can apply this clamp both with respect to the base
+ * relations from which the join variables come, and to the immediate
+ * input relations of the current join.
*/
double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
double nullfrac2 = stats2 ? stats2->stanullfrac : 0.0;
if (vardata1->rel)
nd1 = Min(nd1, vardata1->rel->rows);
+ nd1 = Min(nd1, rel1->rows);
if (vardata2->rel)
nd2 = Min(nd2, vardata2->rel->rows);
+ nd2 = Min(nd2, rel2->rows);
selec = (1.0 - nullfrac1) * (1.0 - nullfrac2);
if (nd1 > nd2)
@@ -2287,7 +2318,8 @@ eqjoinsel_inner(Oid operator,
*/
static double
eqjoinsel_semi(Oid operator,
- VariableStatData *vardata1, VariableStatData *vardata2)
+ VariableStatData *vardata1, VariableStatData *vardata2,
+ RelOptInfo *rel1, RelOptInfo *rel2)
{
double selec;
double nd1;
@@ -2435,8 +2467,10 @@ eqjoinsel_semi(Oid operator,
{
if (vardata1->rel)
nd1 = Min(nd1, vardata1->rel->rows);
+ nd1 = Min(nd1, rel1->rows);
if (vardata2->rel)
nd2 = Min(nd2, vardata2->rel->rows);
+ nd2 = Min(nd2, rel2->rows);
if (nd1 <= nd2 || nd2 <= 0)
selec = 1.0 - nullfrac1;
@@ -4759,6 +4793,37 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
return have_data;
}
+/*
+ * find_join_input_rel
+ * Look up the input relation for a join.
+ *
+ * We assume that the input relation's RelOptInfo must have been constructed
+ * already.
+ */
+static RelOptInfo *
+find_join_input_rel(PlannerInfo *root, Relids relids)
+{
+ RelOptInfo *rel = NULL;
+
+ switch (bms_membership(relids))
+ {
+ case BMS_EMPTY_SET:
+ /* should not happen */
+ break;
+ case BMS_SINGLETON:
+ rel = find_base_rel(root, bms_singleton_member(relids));
+ break;
+ case BMS_MULTIPLE:
+ rel = find_join_rel(root, relids);
+ break;
+ }
+
+ if (rel == NULL)
+ elog(ERROR, "could not find RelOptInfo for given relids");
+
+ return rel;
+}
+
/*-------------------------------------------------------------------------
*