diff options
Diffstat (limited to 'src/backend/optimizer/path/clausesel.c')
-rw-r--r-- | src/backend/optimizer/path/clausesel.c | 245 |
1 files changed, 235 insertions, 10 deletions
diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c index d3a494f9bc9..edce3d21291 100644 --- a/src/backend/optimizer/path/clausesel.c +++ b/src/backend/optimizer/path/clausesel.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.28 2000/01/23 02:06:58 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.29 2000/01/24 07:16:46 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -23,6 +23,23 @@ #include "utils/lsyscache.h" +/* + * Data structure for accumulating info about possible range-query + * clause pairs in clauselist_selectivity. + */ +typedef struct RangeQueryClause { + struct RangeQueryClause *next; /* next in linked list */ + Node *var; /* The common variable of the clauses */ + bool have_lobound; /* found a low-bound clause yet? */ + bool have_hibound; /* found a high-bound clause yet? */ + Selectivity lobound; /* Selectivity of a var > something clause */ + Selectivity hibound; /* Selectivity of a var < something clause */ +} RangeQueryClause; + +static void addRangeClause(RangeQueryClause **rqlist, Node *clause, + int flag, bool isLTsel, Selectivity s2); + + /**************************************************************************** * ROUTINES TO COMPUTE SELECTIVITIES ****************************************************************************/ @@ -55,30 +72,238 @@ restrictlist_selectivity(Query *root, * must be returned. * * See clause_selectivity() for the meaning of the varRelid parameter. + * + * Our basic approach is to take the product of the selectivities of the + * subclauses. However, that's only right if the subclauses have independent + * probabilities, and in reality they are often NOT independent. So, + * we want to be smarter where we can. + + * Currently, the only extra smarts we have is to recognize "range queries", + * such as "x > 34 AND x < 42". Clauses are recognized as possible range + * query components if they are restriction opclauses whose operators have + * scalarltsel() or scalargtsel() as their restriction selectivity estimator. + * We pair up clauses of this form that refer to the same variable. An + * unpairable clause of this kind is simply multiplied into the selectivity + * product in the normal way. But when we find a pair, we know that the + * selectivities represent the relative positions of the low and high bounds + * within the column's range, so instead of figuring the selectivity as + * hisel * losel, we can figure it as hisel + losel - 1. (To visualize this, + * see that hisel is the fraction of the range below the high bound, while + * losel is the fraction above the low bound; so hisel can be interpreted + * directly as a 0..1 value but we need to convert losel to 1-losel before + * interpreting it as a value. Then the available range is 1-losel to hisel.) + * If the calculation yields zero or negative, however, we chicken out and + * use the default interpretation; that probably means that one or both + * selectivities is a default estimate rather than an actual range value. + * Of course this is all very dependent on the behavior of + * scalarltsel/scalargtsel; perhaps some day we can generalize the approach. */ Selectivity clauselist_selectivity(Query *root, List *clauses, int varRelid) { - Selectivity s1 = 1.0; - List *clause; + Selectivity s1 = 1.0; + RangeQueryClause *rqlist = NULL; + List *clist; - /* Use the product of the selectivities of the subclauses. - * XXX this is too optimistic, since the subclauses - * are very likely not independent... + /* + * Initial scan over clauses. Anything that doesn't look like a + * potential rangequery clause gets multiplied into s1 and forgotten. + * Anything that does gets inserted into an rqlist entry. */ - foreach(clause, clauses) + foreach(clist, clauses) { - Selectivity s2 = clause_selectivity(root, - (Node *) lfirst(clause), - varRelid); + Node *clause = (Node *) lfirst(clist); + Selectivity s2; + + /* + * See if it looks like a restriction clause with a constant. + * (If it's not a constant we can't really trust the selectivity!) + * NB: for consistency of results, this fragment of code had + * better match what clause_selectivity() would do. + */ + if (varRelid != 0 || NumRelids(clause) == 1) + { + int relidx; + AttrNumber attno; + Datum constval; + int flag; + + get_relattval(clause, varRelid, + &relidx, &attno, &constval, &flag); + if (relidx != 0 && (flag & SEL_CONSTANT)) + { + /* if get_relattval succeeded, it must be an opclause */ + Oid opno = ((Oper *) ((Expr *) clause)->oper)->opno; + RegProcedure oprrest = get_oprrest(opno); + + if (!oprrest) + s2 = (Selectivity) 0.5; + else + s2 = restriction_selectivity(oprrest, opno, + getrelid(relidx, + root->rtable), + attno, + constval, flag); + /* + * If we reach here, we have computed the same result + * that clause_selectivity would, so we can just use s2 + * if it's the wrong oprrest. But if it's the right + * oprrest, add the clause to rqlist for later processing. + */ + switch (oprrest) + { + case F_SCALARLTSEL: + addRangeClause(&rqlist, clause, flag, true, s2); + break; + case F_SCALARGTSEL: + addRangeClause(&rqlist, clause, flag, false, s2); + break; + default: + /* Just merge the selectivity in generically */ + s1 = s1 * s2; + break; + } + continue; /* drop to loop bottom */ + } + } + /* Not the right form, so treat it generically. */ + s2 = clause_selectivity(root, clause, varRelid); s1 = s1 * s2; } + + /* + * Now scan the rangequery pair list. + */ + while (rqlist != NULL) + { + RangeQueryClause *rqnext; + + if (rqlist->have_lobound && rqlist->have_hibound) + { + /* Successfully matched a pair of range clauses */ + Selectivity s2 = rqlist->hibound + rqlist->lobound - 1.0; + + if (s2 > 0.0) + { + /* All our hard work has paid off! */ + s1 *= s2; + } + else + { + /* One or both is probably a default estimate, + * so punt and just merge them in generically. + */ + s1 *= rqlist->hibound * rqlist->lobound; + } + } + else + { + /* Only found one of a pair, merge it in generically */ + if (rqlist->have_lobound) + s1 *= rqlist->lobound; + else + s1 *= rqlist->hibound; + } + /* release storage and advance */ + rqnext = rqlist->next; + pfree(rqlist); + rqlist = rqnext; + } + return s1; } /* + * addRangeClause --- add a new range clause for clauselist_selectivity + * + * Here is where we try to match up pairs of range-query clauses + */ +static void +addRangeClause(RangeQueryClause **rqlist, Node *clause, + int flag, bool isLTsel, Selectivity s2) +{ + RangeQueryClause *rqelem; + Node *var; + bool is_lobound; + + /* get_relattval sets flag&SEL_RIGHT if the var is on the LEFT. */ + if (flag & SEL_RIGHT) + { + var = (Node *) get_leftop((Expr *) clause); + is_lobound = ! isLTsel; /* x < something is high bound */ + } + else + { + var = (Node *) get_rightop((Expr *) clause); + is_lobound = isLTsel; /* something < x is low bound */ + } + + for (rqelem = *rqlist; rqelem; rqelem = rqelem->next) + { + /* We use full equal() here because the "var" might be a function + * of one or more attributes of the same relation... + */ + if (! equal(var, rqelem->var)) + continue; + /* Found the right group to put this clause in */ + if (is_lobound) + { + if (! rqelem->have_lobound) + { + rqelem->have_lobound = true; + rqelem->lobound = s2; + } + else + { + /* We have found two similar clauses, such as + * x < y AND x < z. Keep only the more restrictive one. + */ + if (rqelem->lobound > s2) + rqelem->lobound = s2; + } + } + else + { + if (! rqelem->have_hibound) + { + rqelem->have_hibound = true; + rqelem->hibound = s2; + } + else + { + /* We have found two similar clauses, such as + * x > y AND x > z. Keep only the more restrictive one. + */ + if (rqelem->hibound > s2) + rqelem->hibound = s2; + } + } + return; + } + + /* No matching var found, so make a new clause-pair data structure */ + rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause)); + rqelem->var = var; + if (is_lobound) + { + rqelem->have_lobound = true; + rqelem->have_hibound = false; + rqelem->lobound = s2; + } + else + { + rqelem->have_lobound = false; + rqelem->have_hibound = true; + rqelem->hibound = s2; + } + rqelem->next = *rqlist; + *rqlist = rqelem; +} + + +/* * clause_selectivity - * Compute the selectivity of a general boolean expression clause. * |