aboutsummaryrefslogtreecommitdiff
path: root/src/backend/optimizer/prep/prepqual.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/optimizer/prep/prepqual.c')
-rw-r--r--src/backend/optimizer/prep/prepqual.c230
1 files changed, 220 insertions, 10 deletions
diff --git a/src/backend/optimizer/prep/prepqual.c b/src/backend/optimizer/prep/prepqual.c
index 72e26f35074..41288bd3687 100644
--- a/src/backend/optimizer/prep/prepqual.c
+++ b/src/backend/optimizer/prep/prepqual.c
@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepqual.c,v 1.19 1999/09/12 18:08:17 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepqual.c,v 1.20 1999/09/13 00:17:13 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -29,6 +29,9 @@ static Expr *find_ors(Expr *qual);
static Expr *or_normalize(List *orlist);
static Expr *find_ands(Expr *qual);
static Expr *and_normalize(List *andlist);
+static Expr *qual_cleanup(Expr *qual);
+static List *remove_duplicates(List *list);
+static int count_bool_nodes(Expr *qual);
/*****************************************************************************
*
@@ -37,21 +40,125 @@ static Expr *and_normalize(List *andlist);
* These routines convert an arbitrary boolean expression into
* conjunctive normal form or disjunctive normal form.
*
- * The result of these routines differs from a "true" CNF/DNF in that
- * we do not bother to detect common subexpressions; e.g., ("AND" A A)
- * does not get simplified to A. Testing for identical subexpressions
- * is a waste of time if the query is written intelligently, and it
- * takes an unreasonable amount of time if there are many subexpressions
- * (since it's roughly O(N^2) in the number of subexpressions).
+ * Normalization is only carried out in the top AND/OR/NOT portion
+ * of the given tree; we do not attempt to normalize boolean expressions
+ * that may appear as arguments of operators or functions in the tree.
*
- * Because of that restriction, it would be unwise to apply dnfify()
- * to the result of cnfify() or vice versa. Instead apply both to
- * the original user-written qual expression.
+ * Query qualifications (WHERE clauses) are ordinarily transformed into
+ * CNF, ie, AND-of-ORs form, because then the optimizer can use any one
+ * of the independent AND clauses as a filtering qualification. However,
+ * quals that are naturally expressed as OR-of-ANDs can suffer an
+ * exponential growth in size in this transformation, so we also consider
+ * converting to DNF (OR-of-ANDs), and we may also leave well enough alone
+ * if both transforms cause unreasonable growth. The OR-of-ANDs format
+ * is useful for indexscan implementation, so we prefer that format when
+ * there is just one relation involved.
*
+ * canonicalize_qual() does "smart" conversion to either CNF or DNF, per
+ * the above considerations, while cnfify() and dnfify() simply perform
+ * the demanded transformation. The latter two may become dead code
+ * eventually.
*****************************************************************************/
/*
+ * canonicalize_qual
+ * Convert a qualification to the most useful normalized form.
+ *
+ * Returns the modified qualification.
+ *
+ * If 'removeAndFlag' is true then it removes explicit AND at the top level,
+ * producing a list of implicitly-ANDed conditions. Otherwise, a regular
+ * boolean expression is returned. Since most callers pass 'true', we
+ * prefer to declare the result as List *, not Expr *.
+ *
+ * XXX This code could be much smarter, at the cost of also being slower,
+ * if we tried to compute selectivities and/or see whether there are
+ * actually indexes to support an indexscan implementation of a DNF qual.
+ * We could even try converting the CNF clauses that mention a single
+ * relation into a single DNF clause to see if that looks cheaper to
+ * implement. For now, though, we just try to avoid doing anything
+ * quite as stupid as unconditionally converting to CNF was...
+ */
+List *
+canonicalize_qual(Expr *qual, bool removeAndFlag)
+{
+ Expr *newqual,
+ *cnfqual,
+ *dnfqual;
+ int qualcnt,
+ cnfcnt,
+ dnfcnt;
+
+ if (qual == NULL)
+ return NIL;
+
+ /* Flatten AND and OR groups throughout the tree.
+ * This improvement is always worthwhile, so do it unconditionally.
+ */
+ qual = flatten_andors(qual);
+ /* Push down NOTs. We do this only in the top-level boolean
+ * expression, without examining arguments of operators/functions.
+ * Even so, it might not be a win if we are unable to find negators
+ * for all the operators involved; so we keep the flattened-but-not-
+ * NOT-pushed qual as the reference point for comparsions.
+ */
+ newqual = find_nots(qual);
+ /*
+ * Generate both CNF and DNF forms from newqual.
+ */
+ /* Normalize into conjunctive normal form, and clean up the result. */
+ cnfqual = qual_cleanup(find_ors(newqual));
+ /* Likewise for DNF */
+ dnfqual = qual_cleanup(find_ands(newqual));
+
+ /*
+ * Now, choose whether to return qual, cnfqual, or dnfqual.
+ *
+ * First heuristic is to forget about either CNF or DNF if it shows
+ * unreasonable growth compared to the original form of the qual,
+ * where we define "unreasonable" a tad arbitrarily as 4x more
+ * operators.
+ */
+ qualcnt = count_bool_nodes(qual);
+ cnfcnt = count_bool_nodes(cnfqual);
+ dnfcnt = count_bool_nodes(dnfqual);
+ if (cnfcnt >= 4 * qualcnt)
+ cnfqual = NULL; /* mark CNF not usable */
+ if (dnfcnt >= 4 * qualcnt)
+ dnfqual = NULL; /* mark DNF not usable */
+
+ /*
+ * Second heuristic is to prefer DNF if only one relation is mentioned
+ * and it is smaller than the CNF representation.
+ */
+ if (dnfqual && dnfcnt < cnfcnt && NumRelids((Node *) dnfqual) == 1)
+ cnfqual = NULL;
+
+ /*
+ * Otherwise, we prefer CNF.
+ *
+ * XXX obviously, these rules could be improved upon.
+ */
+
+ /* pick preferred survivor */
+ if (cnfqual)
+ newqual = cnfqual;
+ else if (dnfqual)
+ newqual = dnfqual;
+ else
+ newqual = qual;
+
+ /* Convert to implicit-AND list if requested */
+ if (removeAndFlag)
+ {
+ newqual = (Expr *) make_ands_implicit(newqual);
+ }
+
+ return (List *) newqual;
+}
+
+/*
* cnfify
* Convert a qualification to conjunctive normal form by applying
* successive normalizations.
@@ -81,6 +188,8 @@ cnfify(Expr *qual, bool removeAndFlag)
newqual = find_nots(newqual);
/* Normalize into conjunctive normal form. */
newqual = find_ors(newqual);
+ /* Clean up the result. */
+ newqual = qual_cleanup(newqual);
if (removeAndFlag)
{
@@ -118,6 +227,8 @@ dnfify(Expr *qual)
newqual = find_nots(newqual);
/* Normalize into disjunctive normal form. */
newqual = find_ands(newqual);
+ /* Clean up the result. */
+ newqual = qual_cleanup(newqual);
return newqual;
}
@@ -641,3 +752,102 @@ and_normalize(List *andlist)
/* pull_ors is needed in case any sub-and_normalize succeeded */
return make_orclause(pull_ors(orclauses));
}
+
+/*
+ * qual_cleanup
+ * Fix up a qualification by removing duplicate entries (which could be
+ * created during normalization, if identical subexpressions from different
+ * parts of the tree are brought together). Also, check for AND and OR
+ * clauses with only one remaining subexpression, and simplify.
+ *
+ * Returns the modified qualification.
+ */
+static Expr *
+qual_cleanup(Expr *qual)
+{
+ if (qual == NULL)
+ return NULL;
+
+ if (and_clause((Node *) qual))
+ {
+ List *andlist = NIL;
+ List *temp;
+
+ foreach(temp, qual->args)
+ andlist = lappend(andlist, qual_cleanup(lfirst(temp)));
+
+ andlist = remove_duplicates(pull_ands(andlist));
+
+ if (length(andlist) > 1)
+ return make_andclause(andlist);
+ else
+ return lfirst(andlist);
+ }
+ else if (or_clause((Node *) qual))
+ {
+ List *orlist = NIL;
+ List *temp;
+
+ foreach(temp, qual->args)
+ orlist = lappend(orlist, qual_cleanup(lfirst(temp)));
+
+ orlist = remove_duplicates(pull_ors(orlist));
+
+ if (length(orlist) > 1)
+ return make_orclause(orlist);
+ else
+ return lfirst(orlist);
+ }
+ else if (not_clause((Node *) qual))
+ return make_notclause(qual_cleanup(get_notclausearg(qual)));
+ else
+ return qual;
+}
+
+/*
+ * remove_duplicates
+ */
+static List *
+remove_duplicates(List *list)
+{
+ List *result = NIL;
+ List *i;
+
+ if (length(list) <= 1)
+ return list;
+
+ foreach(i, list)
+ {
+ if (! member(lfirst(i), result))
+ result = lappend(result, lfirst(i));
+ }
+ return result;
+}
+
+/*
+ * count_bool_nodes
+ * Support for heuristics in canonicalize_qual(): count the
+ * number of nodes in the top level AND/OR/NOT part of a qual tree
+ */
+static int
+count_bool_nodes(Expr *qual)
+{
+ if (qual == NULL)
+ return 0;
+
+ if (and_clause((Node *) qual) ||
+ or_clause((Node *) qual))
+ {
+ int sum = 1; /* for the and/or itself */
+ List *temp;
+
+ foreach(temp, qual->args)
+ sum += count_bool_nodes(lfirst(temp));
+
+ return sum;
+ }
+ else if (not_clause((Node *) qual))
+ return count_bool_nodes(get_notclausearg(qual)) + 1;
+ else
+ return 1; /* anything else counts 1 for my purposes */
+}