diff options
Diffstat (limited to 'src/backend/utils/adt/selfuncs.c')
-rw-r--r-- | src/backend/utils/adt/selfuncs.c | 292 |
1 files changed, 269 insertions, 23 deletions
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index edcfe91a47d..6f08deec5a1 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.103 2002/01/03 04:02:34 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.104 2002/03/01 04:09:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -92,11 +92,13 @@ #include "parser/parsetree.h" #include "utils/builtins.h" #include "utils/date.h" +#include "utils/datum.h" #include "utils/int8.h" #include "utils/lsyscache.h" #include "utils/selfuncs.h" #include "utils/syscache.h" + /* * Note: the default selectivity estimates are not chosen entirely at random. * We want them to be small enough to ensure that indexscans will be used if @@ -137,6 +139,7 @@ } while (0) +static bool get_var_maximum(Query *root, Var *var, Oid sortop, Datum *max); static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue, Datum lobound, Datum hibound, Oid boundstypid, double *scaledlobound, double *scaledhibound); @@ -419,7 +422,9 @@ neqsel(PG_FUNCTION_ARGS) * * This is the guts of both scalarltsel and scalargtsel. The caller has * commuted the clause, if necessary, so that we can treat the Var as - * being on the left. + * being on the left. The caller must also make sure that the other side + * of the clause is a non-null Const, and dissect same into a value and + * datatype. * * This routine works for any datatype (or pair of datatypes) known to * convert_to_scalar(). If it is applied to some other datatype, @@ -427,11 +432,9 @@ neqsel(PG_FUNCTION_ARGS) */ static double scalarineqsel(Query *root, Oid operator, bool isgt, - Var *var, Node *other) + Var *var, Datum constval, Oid consttype) { Oid relid; - Datum constval; - Oid consttype; HeapTuple statsTuple; Form_pg_statistic stats; FmgrInfo opproc; @@ -454,22 +457,6 @@ scalarineqsel(Query *root, Oid operator, bool isgt, if (relid == InvalidOid) return DEFAULT_INEQ_SEL; - /* - * Can't do anything useful if the something is not a constant, - * either. - */ - if (!IsA(other, Const)) - return DEFAULT_INEQ_SEL; - - /* - * If the constant is NULL, assume operator is strict and return zero, - * ie, operator will never return TRUE. - */ - if (((Const *) other)->constisnull) - return 0.0; - constval = ((Const *) other)->constvalue; - consttype = ((Const *) other)->consttype; - /* get stats for the attribute */ statsTuple = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid), @@ -697,6 +684,8 @@ scalarltsel(PG_FUNCTION_ARGS) int varRelid = PG_GETARG_INT32(3); Var *var; Node *other; + Datum constval; + Oid consttype; bool varonleft; bool isgt; double selec; @@ -711,6 +700,22 @@ scalarltsel(PG_FUNCTION_ARGS) PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); /* + * Can't do anything useful if the something is not a constant, + * either. + */ + if (!IsA(other, Const)) + PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); + + /* + * If the constant is NULL, assume operator is strict and return zero, + * ie, operator will never return TRUE. + */ + if (((Const *) other)->constisnull) + PG_RETURN_FLOAT8(0.0); + constval = ((Const *) other)->constvalue; + consttype = ((Const *) other)->consttype; + + /* * Force the var to be on the left to simplify logic in scalarineqsel. */ if (varonleft) @@ -730,7 +735,7 @@ scalarltsel(PG_FUNCTION_ARGS) isgt = true; } - selec = scalarineqsel(root, operator, isgt, var, other); + selec = scalarineqsel(root, operator, isgt, var, constval, consttype); PG_RETURN_FLOAT8((float8) selec); } @@ -747,6 +752,8 @@ scalargtsel(PG_FUNCTION_ARGS) int varRelid = PG_GETARG_INT32(3); Var *var; Node *other; + Datum constval; + Oid consttype; bool varonleft; bool isgt; double selec; @@ -761,6 +768,22 @@ scalargtsel(PG_FUNCTION_ARGS) PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); /* + * Can't do anything useful if the something is not a constant, + * either. + */ + if (!IsA(other, Const)) + PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); + + /* + * If the constant is NULL, assume operator is strict and return zero, + * ie, operator will never return TRUE. + */ + if (((Const *) other)->constisnull) + PG_RETURN_FLOAT8(0.0); + constval = ((Const *) other)->constvalue; + consttype = ((Const *) other)->consttype; + + /* * Force the var to be on the left to simplify logic in scalarineqsel. */ if (varonleft) @@ -780,7 +803,7 @@ scalargtsel(PG_FUNCTION_ARGS) isgt = false; } - selec = scalarineqsel(root, operator, isgt, var, other); + selec = scalarineqsel(root, operator, isgt, var, constval, consttype); PG_RETURN_FLOAT8((float8) selec); } @@ -1696,6 +1719,229 @@ icnlikejoinsel(PG_FUNCTION_ARGS) PG_RETURN_FLOAT8(result); } +/* + * mergejoinscansel - Scan selectivity of merge join. + * + * A merge join will stop as soon as it exhausts either input stream. + * Therefore, if we can estimate the ranges of both input variables, + * we can estimate how much of the input will actually be read. This + * can have a considerable impact on the cost when using indexscans. + * + * clause should be a clause already known to be mergejoinable. + * + * *leftscan is set to the fraction of the left-hand variable expected + * to be scanned (0 to 1), and similarly *rightscan for the right-hand + * variable. + */ +void +mergejoinscansel(Query *root, Node *clause, + Selectivity *leftscan, + Selectivity *rightscan) +{ + Var *left, + *right; + Oid opno, + lsortop, + rsortop, + ltop, + gtop, + revltop; + Datum leftmax, + rightmax; + double selec; + + /* Set default results if we can't figure anything out. */ + *leftscan = *rightscan = 1.0; + + /* Deconstruct the merge clause */ + if (!is_opclause(clause)) + return; /* shouldn't happen */ + opno = ((Oper *) ((Expr *) clause)->oper)->opno; + left = get_leftop((Expr *) clause); + right = get_rightop((Expr *) clause); + if (!right) + return; /* shouldn't happen */ + + /* Can't do anything if inputs are not Vars */ + if (!IsA(left, Var) ||!IsA(right, Var)) + return; + + /* Verify mergejoinability and get left and right "<" operators */ + if (!op_mergejoinable(opno, + left->vartype, + right->vartype, + &lsortop, + &rsortop)) + return; /* shouldn't happen */ + + /* Try to get maximum values of both vars */ + if (!get_var_maximum(root, left, lsortop, &leftmax)) + return; /* no max available from stats */ + + if (!get_var_maximum(root, right, rsortop, &rightmax)) + return; /* no max available from stats */ + + /* Look up the "left < right" and "left > right" operators */ + op_mergejoin_crossops(opno, <op, >op, NULL, NULL); + + /* Look up the "right < left" operator */ + revltop = get_commutator(gtop); + if (!OidIsValid(revltop)) + return; /* shouldn't happen */ + + /* + * Now, the fraction of the left variable that will be scanned is the + * fraction that's <= the right-side maximum value. But only believe + * non-default estimates, else stick with our 1.0. + */ + selec = scalarineqsel(root, ltop, false, left, + rightmax, right->vartype); + if (selec != DEFAULT_INEQ_SEL) + *leftscan = selec; + + /* And similarly for the right variable. */ + selec = scalarineqsel(root, revltop, false, right, + leftmax, left->vartype); + if (selec != DEFAULT_INEQ_SEL) + *rightscan = selec; + + /* + * Only one of the two fractions can really be less than 1.0; believe + * the smaller estimate and reset the other one to exactly 1.0. + */ + if (*leftscan > *rightscan) + *leftscan = 1.0; + else + *rightscan = 1.0; +} + +/* + * get_var_maximum + * Estimate the maximum value of the specified variable. + * If successful, store value in *max and return TRUE. + * If no data available, return FALSE. + * + * sortop is the "<" comparison operator to use. (To extract the + * minimum instead of the maximum, just pass the ">" operator instead.) + */ +static bool +get_var_maximum(Query *root, Var *var, Oid sortop, Datum *max) +{ + Datum tmax = 0; + bool have_max = false; + Oid relid; + HeapTuple statsTuple; + Form_pg_statistic stats; + int16 typLen; + bool typByVal; + Datum *values; + int nvalues; + int i; + + relid = getrelid(var->varno, root->rtable); + if (relid == InvalidOid) + return false; + + /* get stats for the attribute */ + statsTuple = SearchSysCache(STATRELATT, + ObjectIdGetDatum(relid), + Int16GetDatum(var->varattno), + 0, 0); + if (!HeapTupleIsValid(statsTuple)) + { + /* no stats available, so default result */ + return false; + } + stats = (Form_pg_statistic) GETSTRUCT(statsTuple); + + get_typlenbyval(var->vartype, &typLen, &typByVal); + + /* + * If there is a histogram, grab the last or first value as appropriate. + * + * If there is a histogram that is sorted with some other operator + * than the one we want, fail --- this suggests that there is data + * we can't use. + */ + if (get_attstatsslot(statsTuple, var->vartype, var->vartypmod, + STATISTIC_KIND_HISTOGRAM, sortop, + &values, &nvalues, + NULL, NULL)) + { + if (nvalues > 0) + { + tmax = datumCopy(values[nvalues-1], typByVal, typLen); + have_max = true; + } + free_attstatsslot(var->vartype, values, nvalues, NULL, 0); + } + else + { + Oid rsortop = get_commutator(sortop); + + if (OidIsValid(rsortop) && + get_attstatsslot(statsTuple, var->vartype, var->vartypmod, + STATISTIC_KIND_HISTOGRAM, rsortop, + &values, &nvalues, + NULL, NULL)) + { + if (nvalues > 0) + { + tmax = datumCopy(values[0], typByVal, typLen); + have_max = true; + } + free_attstatsslot(var->vartype, values, nvalues, NULL, 0); + } + else if (get_attstatsslot(statsTuple, var->vartype, var->vartypmod, + STATISTIC_KIND_HISTOGRAM, InvalidOid, + &values, &nvalues, + NULL, NULL)) + { + free_attstatsslot(var->vartype, values, nvalues, NULL, 0); + ReleaseSysCache(statsTuple); + return false; + } + } + + /* + * If we have most-common-values info, look for a large MCV. This + * is needed even if we also have a histogram, since the histogram + * excludes the MCVs. However, usually the MCVs will not be the + * extreme values, so avoid unnecessary data copying. + */ + if (get_attstatsslot(statsTuple, var->vartype, var->vartypmod, + STATISTIC_KIND_MCV, InvalidOid, + &values, &nvalues, + NULL, NULL)) + { + bool large_mcv = false; + FmgrInfo opproc; + + fmgr_info(get_opcode(sortop), &opproc); + + for (i = 0; i < nvalues; i++) + { + if (!have_max) + { + tmax = values[i]; + large_mcv = have_max = true; + } + else if (DatumGetBool(FunctionCall2(&opproc, tmax, values[i]))) + { + tmax = values[i]; + large_mcv = true; + } + } + if (large_mcv) + tmax = datumCopy(tmax, typByVal, typLen); + free_attstatsslot(var->vartype, values, nvalues, NULL, 0); + } + + ReleaseSysCache(statsTuple); + + *max = tmax; + return have_max; +} /* * convert_to_scalar |