aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/selfuncs.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2001-06-25 21:11:45 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2001-06-25 21:11:45 +0000
commit4d58a7ca878bbb0a252e1d8175a7ea3a385c1d9e (patch)
treee4e6dae9e84149ea8ffe2e6d7c35cf101b6b10ab /src/backend/utils/adt/selfuncs.c
parentc31545af2720591b21ad0039b3160225acd328b9 (diff)
downloadpostgresql-4d58a7ca878bbb0a252e1d8175a7ea3a385c1d9e.tar.gz
postgresql-4d58a7ca878bbb0a252e1d8175a7ea3a385c1d9e.zip
Optimizer can now estimate selectivity of IS NULL, IS NOT NULL,
IS TRUE, etc, with some degree of verisimilitude. Split out selectivity support functions from builtins.h into a new header file selfuncs.h, so as to reduce the number of header files builtins.h must depend on. Fix a few missing inclusions exposed thereby. From Joe Conway, with some kibitzing from Tom Lane.
Diffstat (limited to 'src/backend/utils/adt/selfuncs.c')
-rw-r--r--src/backend/utils/adt/selfuncs.c328
1 files changed, 327 insertions, 1 deletions
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 099cc37e9dd..3ab3881c257 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.93 2001/06/09 22:16:18 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.94 2001/06/25 21:11:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -93,6 +93,7 @@
#include "utils/date.h"
#include "utils/int8.h"
#include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
#include "utils/syscache.h"
/*
@@ -117,6 +118,10 @@
/* default number of distinct values in a table */
#define DEFAULT_NUM_DISTINCT 200
+/* default selectivity estimate for boolean and null test nodes */
+#define DEFAULT_UNK_SEL 0.005
+#define DEFAULT_NOT_UNK_SEL (1.0 - DEFAULT_UNK_SEL)
+#define DEFAULT_BOOL_SEL 0.5
static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
Datum lobound, Datum hibound, Oid boundstypid,
@@ -934,6 +939,327 @@ icnlikesel(PG_FUNCTION_ARGS)
}
/*
+ * booltestsel - Selectivity of BooleanTest Node.
+ */
+Selectivity
+booltestsel(Query *root, BooleanTest *clause, int varRelid)
+{
+ Var *var;
+ Node *arg;
+ Oid relid;
+ HeapTuple statsTuple;
+ Datum *values;
+ int nvalues;
+ float4 *numbers;
+ int nnumbers;
+ double selec;
+
+ Assert(clause && IsA(clause, BooleanTest));
+
+ arg = (Node *) clause->arg;
+
+ /*
+ * Ignore any binary-compatible relabeling (probably unnecessary,
+ * but can't hurt)
+ */
+ if (IsA(arg, RelabelType))
+ arg = ((RelabelType *) arg)->arg;
+
+ if (IsA(arg, Var) && (varRelid == 0 || varRelid == ((Var *) arg)->varno))
+ var = (Var *) arg;
+ else
+ {
+ /*
+ * If argument is not a Var, we can't get statistics for it, but
+ * perhaps clause_selectivity can do something with it. We ignore
+ * the possibility of a NULL value when using clause_selectivity,
+ * and just assume the value is either TRUE or FALSE.
+ */
+ switch (clause->booltesttype)
+ {
+ case IS_UNKNOWN:
+ selec = DEFAULT_UNK_SEL;
+ break;
+ case IS_NOT_UNKNOWN:
+ selec = DEFAULT_NOT_UNK_SEL;
+ break;
+ case IS_TRUE:
+ case IS_NOT_FALSE:
+ selec = (double) clause_selectivity(root, arg, varRelid);
+ break;
+ case IS_FALSE:
+ case IS_NOT_TRUE:
+ selec = 1.0 - (double) clause_selectivity(root, arg, varRelid);
+ break;
+ default:
+ elog(ERROR, "booltestsel: unexpected booltesttype %d",
+ (int) clause->booltesttype);
+ selec = 0.0; /* Keep compiler quiet */
+ break;
+ }
+ return (Selectivity) selec;
+ }
+
+ /* get stats for the attribute, if available */
+ relid = getrelid(var->varno, root->rtable);
+ if (relid == InvalidOid)
+ statsTuple = NULL;
+ else
+ statsTuple = SearchSysCache(STATRELATT,
+ ObjectIdGetDatum(relid),
+ Int16GetDatum(var->varattno),
+ 0, 0);
+
+ if (HeapTupleIsValid(statsTuple))
+ {
+ Form_pg_statistic stats;
+ double freq_null;
+
+ stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
+
+ freq_null = stats->stanullfrac;
+
+ if (get_attstatsslot(statsTuple, var->vartype, var->vartypmod,
+ STATISTIC_KIND_MCV, InvalidOid,
+ &values, &nvalues,
+ &numbers, &nnumbers)
+ && nnumbers > 0)
+ {
+ double freq_true;
+ double freq_false;
+
+ /*
+ * Get first MCV frequency and derive frequency for true.
+ */
+ if (DatumGetBool(values[0]))
+ freq_true = numbers[0];
+ else
+ freq_true = 1.0 - numbers[0] - freq_null;
+
+ /*
+ * Next derive freqency for false.
+ * Then use these as appropriate to derive frequency for each case.
+ */
+ freq_false = 1.0 - freq_true - freq_null;
+
+ switch (clause->booltesttype)
+ {
+ case IS_UNKNOWN:
+ /* select only NULL values */
+ selec = freq_null;
+ break;
+ case IS_NOT_UNKNOWN:
+ /* select non-NULL values */
+ selec = 1.0 - freq_null;
+ break;
+ case IS_TRUE:
+ /* select only TRUE values */
+ selec = freq_true;
+ break;
+ case IS_NOT_TRUE:
+ /* select non-TRUE values */
+ selec = 1.0 - freq_true;
+ break;
+ case IS_FALSE:
+ /* select only FALSE values */
+ selec = freq_false;
+ break;
+ case IS_NOT_FALSE:
+ /* select non-FALSE values */
+ selec = 1.0 - freq_false;
+ break;
+ default:
+ elog(ERROR, "booltestsel: unexpected booltesttype %d",
+ (int) clause->booltesttype);
+ selec = 0.0; /* Keep compiler quiet */
+ break;
+ }
+
+ free_attstatsslot(var->vartype, values, nvalues,
+ numbers, nnumbers);
+ }
+ else
+ {
+ /*
+ * No most-common-value info available.
+ * Still have null fraction information,
+ * so use it for IS [NOT] UNKNOWN.
+ * Otherwise adjust for null fraction and
+ * assume an even split for boolean tests.
+ */
+ switch (clause->booltesttype)
+ {
+ case IS_UNKNOWN:
+ /*
+ * Use freq_null directly.
+ */
+ selec = freq_null;
+ break;
+ case IS_NOT_UNKNOWN:
+ /*
+ * Select not unknown (not null) values.
+ * Calculate from freq_null.
+ */
+ selec = 1.0 - freq_null;
+ break;
+ case IS_TRUE:
+ case IS_NOT_TRUE:
+ case IS_FALSE:
+ case IS_NOT_FALSE:
+ selec = (1.0 - freq_null) / 2.0;
+ break;
+ default:
+ elog(ERROR, "booltestsel: unexpected booltesttype %d",
+ (int) clause->booltesttype);
+ selec = 0.0; /* Keep compiler quiet */
+ break;
+ }
+ }
+
+ ReleaseSysCache(statsTuple);
+ }
+ else
+ {
+ /*
+ * No VACUUM ANALYZE stats available, so use a default value.
+ * (Note: not much point in recursing to clause_selectivity here.)
+ */
+ switch (clause->booltesttype)
+ {
+ case IS_UNKNOWN:
+ selec = DEFAULT_UNK_SEL;
+ break;
+ case IS_NOT_UNKNOWN:
+ selec = DEFAULT_NOT_UNK_SEL;
+ break;
+ case IS_TRUE:
+ case IS_NOT_TRUE:
+ case IS_FALSE:
+ case IS_NOT_FALSE:
+ selec = DEFAULT_BOOL_SEL;
+ break;
+ default:
+ elog(ERROR, "booltestsel: unexpected booltesttype %d",
+ (int) clause->booltesttype);
+ selec = 0.0; /* Keep compiler quiet */
+ break;
+ }
+ }
+
+ /* result should be in range, but make sure... */
+ if (selec < 0.0)
+ selec = 0.0;
+ else if (selec > 1.0)
+ selec = 1.0;
+
+ return (Selectivity) selec;
+}
+
+/*
+ * nulltestsel - Selectivity of NullTest Node.
+ */
+Selectivity
+nulltestsel(Query *root, NullTest *clause, int varRelid)
+{
+ Var *var;
+ Node *arg;
+ Oid relid;
+ HeapTuple statsTuple;
+ double selec;
+ double defselec;
+ double freq_null;
+
+ Assert(clause && IsA(clause, NullTest));
+
+ switch (clause->nulltesttype)
+ {
+ case IS_NULL:
+ defselec = DEFAULT_UNK_SEL;
+ break;
+ case IS_NOT_NULL:
+ defselec = DEFAULT_NOT_UNK_SEL;
+ break;
+ default:
+ elog(ERROR, "nulltestsel: unexpected nulltesttype %d",
+ (int) clause->nulltesttype);
+ return (Selectivity) 0; /* keep compiler quiet */
+ }
+
+ arg = (Node *) clause->arg;
+
+ /*
+ * Ignore any binary-compatible relabeling
+ */
+ if (IsA(arg, RelabelType))
+ arg = ((RelabelType *) arg)->arg;
+
+ if (IsA(arg, Var) && (varRelid == 0 || varRelid == ((Var *) arg)->varno))
+ var = (Var *) arg;
+ else
+ {
+ /*
+ * punt if non-Var argument
+ */
+ return (Selectivity) defselec;
+ }
+
+ relid = getrelid(var->varno, root->rtable);
+ if (relid == InvalidOid)
+ return (Selectivity) defselec;
+
+ /* get stats for the attribute, if available */
+ statsTuple = SearchSysCache(STATRELATT,
+ ObjectIdGetDatum(relid),
+ Int16GetDatum(var->varattno),
+ 0, 0);
+ if (HeapTupleIsValid(statsTuple))
+ {
+ Form_pg_statistic stats;
+
+ stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
+ freq_null = stats->stanullfrac;
+
+ switch (clause->nulltesttype)
+ {
+ case IS_NULL:
+ /*
+ * Use freq_null directly.
+ */
+ selec = freq_null;
+ break;
+ case IS_NOT_NULL:
+ /*
+ * Select not unknown (not null) values.
+ * Calculate from freq_null.
+ */
+ selec = 1.0 - freq_null;
+ break;
+ default:
+ elog(ERROR, "nulltestsel: unexpected nulltesttype %d",
+ (int) clause->nulltesttype);
+ return (Selectivity) 0; /* keep compiler quiet */
+ }
+
+ ReleaseSysCache(statsTuple);
+ }
+ else
+ {
+ /*
+ * No VACUUM ANALYZE stats available, so make a guess
+ */
+ selec = defselec;
+ }
+
+ /* result should be in range, but make sure... */
+ if (selec < 0.0)
+ selec = 0.0;
+ else if (selec > 1.0)
+ selec = 1.0;
+
+ return (Selectivity) selec;
+}
+
+/*
* eqjoinsel - Join selectivity of "="
*/
Datum