aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/selfuncs.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2007-03-21 22:18:12 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2007-03-21 22:18:12 +0000
commit54d20024c1dad339acc8624d7ca902b762fe0844 (patch)
tree107270059759b4e114b661838d896da4f4df8ada /src/backend/utils/adt/selfuncs.c
parent2b49e5d3cb8b6a71797969c50f5c247f232de989 (diff)
downloadpostgresql-54d20024c1dad339acc8624d7ca902b762fe0844.tar.gz
postgresql-54d20024c1dad339acc8624d7ca902b762fe0844.zip
Fix some problems with selectivity estimation for partial indexes.
First, genericcostestimate() was being way too liberal about including partial-index conditions in its selectivity estimate, resulting in substantial underestimates for situations such as an indexqual "x = 42" used with an index on x "WHERE x >= 40 AND x < 50". While the code is intentionally set up to favor selecting partial indexes when available, this was too much... Second, choose_bitmap_and() was likewise easily fooled by cases of this type, since it would similarly think that the partial index had selectivity independent of the indexqual. Fixed by using predicate_implied_by() rather than simple equality checks to determine redundancy. This is a good deal more expensive but I don't see much alternative. At least the extra cost is only paid when there's actually a partial index under consideration. Per report from Jeff Davis. I'm not going to risk back-patching this, though.
Diffstat (limited to 'src/backend/utils/adt/selfuncs.c')
-rw-r--r--src/backend/utils/adt/selfuncs.c51
1 files changed, 27 insertions, 24 deletions
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index df61fea567b..a92317aeac1 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.229 2007/03/17 00:11:05 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.230 2007/03/21 22:18:12 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -86,6 +86,7 @@
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/plancat.h"
+#include "optimizer/predtest.h"
#include "optimizer/restrictinfo.h"
#include "optimizer/var.h"
#include "parser/parse_coerce.h"
@@ -4775,36 +4776,38 @@ genericcostestimate(PlannerInfo *root,
List *selectivityQuals;
ListCell *l;
- /*
+ /*----------
* If the index is partial, AND the index predicate with the explicitly
* given indexquals to produce a more accurate idea of the index
- * selectivity. This may produce redundant clauses. We get rid of exact
- * duplicates in the code below. We expect that most cases of partial
- * redundancy (such as "x < 4" from the qual and "x < 5" from the
- * predicate) will be recognized and handled correctly by
- * clauselist_selectivity(). This assumption is somewhat fragile, since
- * it depends on predicate_implied_by() and clauselist_selectivity()
- * having similar capabilities, and there are certainly many cases where
- * we will end up with a too-low selectivity estimate. This will bias the
- * system in favor of using partial indexes where possible, which is not
- * necessarily a bad thing. But it'd be nice to do better someday.
+ * selectivity. However, we need to be careful not to insert redundant
+ * clauses, because clauselist_selectivity() is easily fooled into
+ * computing a too-low selectivity estimate. Our approach is to add
+ * only the index predicate clause(s) that cannot be proven to be implied
+ * by the given indexquals. This successfully handles cases such as a
+ * qual "x = 42" used with a partial index "WHERE x >= 40 AND x < 50".
+ * There are many other cases where we won't detect redundancy, leading
+ * to a too-low selectivity estimate, which will bias the system in favor
+ * of using partial indexes where possible. That is not necessarily bad
+ * though.
*
- * Note that index->indpred and indexQuals are both in implicit-AND form,
- * so ANDing them together just takes merging the lists. However,
- * eliminating duplicates is a bit trickier because indexQuals contains
- * RestrictInfo nodes and the indpred does not. It is okay to pass a
- * mixed list to clauselist_selectivity, but we have to work a bit to
- * generate a list without logical duplicates. (We could just list_union
- * indpred and strippedQuals, but then we'd not get caching of per-qual
- * selectivity estimates.)
+ * Note that indexQuals contains RestrictInfo nodes while the indpred
+ * does not. This is OK for both predicate_implied_by() and
+ * clauselist_selectivity().
+ *----------
*/
if (index->indpred != NIL)
{
- List *strippedQuals;
- List *predExtraQuals;
+ List *predExtraQuals = NIL;
+
+ foreach(l, index->indpred)
+ {
+ Node *predQual = (Node *) lfirst(l);
+ List *oneQual = list_make1(predQual);
- strippedQuals = get_actual_clauses(indexQuals);
- predExtraQuals = list_difference(index->indpred, strippedQuals);
+ if (!predicate_implied_by(oneQual, indexQuals))
+ predExtraQuals = list_concat(predExtraQuals, oneQual);
+ }
+ /* list_concat avoids modifying the passed-in indexQuals list */
selectivityQuals = list_concat(predExtraQuals, indexQuals);
}
else