diff options
Diffstat (limited to 'src/backend/optimizer')
-rw-r--r-- | src/backend/optimizer/path/clausesel.c | 38 | ||||
-rw-r--r-- | src/backend/optimizer/path/costsize.c | 125 | ||||
-rw-r--r-- | src/backend/optimizer/path/indxpath.c | 8 | ||||
-rw-r--r-- | src/backend/optimizer/plan/subselect.c | 4 | ||||
-rw-r--r-- | src/backend/optimizer/util/plancat.c | 13 |
5 files changed, 113 insertions, 75 deletions
diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c index 84041a566d1..9df0a794782 100644 --- a/src/backend/optimizer/path/clausesel.c +++ b/src/backend/optimizer/path/clausesel.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.55 2003/01/15 19:35:39 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.56 2003/01/28 22:13:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -65,12 +65,13 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause, Selectivity restrictlist_selectivity(Query *root, List *restrictinfo_list, - int varRelid) + int varRelid, + JoinType jointype) { List *clauselist = get_actual_clauses(restrictinfo_list); Selectivity result; - result = clauselist_selectivity(root, clauselist, varRelid); + result = clauselist_selectivity(root, clauselist, varRelid, jointype); freeList(clauselist); return result; } @@ -81,7 +82,7 @@ restrictlist_selectivity(Query *root, * expression clauses. The list can be empty, in which case 1.0 * must be returned. * - * See clause_selectivity() for the meaning of the varRelid parameter. + * See clause_selectivity() for the meaning of the additional parameters. * * Our basic approach is to take the product of the selectivities of the * subclauses. However, that's only right if the subclauses have independent @@ -113,7 +114,8 @@ restrictlist_selectivity(Query *root, Selectivity clauselist_selectivity(Query *root, List *clauses, - int varRelid) + int varRelid, + JoinType jointype) { Selectivity s1 = 1.0; RangeQueryClause *rqlist = NULL; @@ -184,7 +186,7 @@ clauselist_selectivity(Query *root, } } /* Not the right form, so treat it generically. */ - s2 = clause_selectivity(root, clause, varRelid); + s2 = clause_selectivity(root, clause, varRelid, jointype); s1 = s1 * s2; } @@ -362,11 +364,15 @@ addRangeClause(RangeQueryClause **rqlist, Node *clause, * * When varRelid is 0, all variables are treated as variables. This * is appropriate for ordinary join clauses and restriction clauses. + * + * jointype is the join type, if the clause is a join clause. Pass JOIN_INNER + * if the clause isn't a join clause or the context is uncertain. */ Selectivity clause_selectivity(Query *root, Node *clause, - int varRelid) + int varRelid, + JoinType jointype) { Selectivity s1 = 1.0; /* default for any unhandled clause type */ @@ -424,14 +430,16 @@ clause_selectivity(Query *root, /* inverse of the selectivity of the underlying clause */ s1 = 1.0 - clause_selectivity(root, (Node *) get_notclausearg((Expr *) clause), - varRelid); + varRelid, + jointype); } else if (and_clause(clause)) { /* share code with clauselist_selectivity() */ s1 = clauselist_selectivity(root, ((BoolExpr *) clause)->args, - varRelid); + varRelid, + jointype); } else if (or_clause(clause)) { @@ -447,7 +455,8 @@ clause_selectivity(Query *root, { Selectivity s2 = clause_selectivity(root, (Node *) lfirst(arg), - varRelid); + varRelid, + jointype); s1 = s1 + s2 - s1 * s2; } @@ -479,7 +488,8 @@ clause_selectivity(Query *root, { /* Estimate selectivity for a join clause. */ s1 = join_selectivity(root, opno, - ((OpExpr *) clause)->args); + ((OpExpr *) clause)->args, + jointype); } else { @@ -519,14 +529,16 @@ clause_selectivity(Query *root, s1 = booltestsel(root, ((BooleanTest *) clause)->booltesttype, (Node *) ((BooleanTest *) clause)->arg, - varRelid); + varRelid, + jointype); } else if (IsA(clause, RelabelType)) { /* Not sure this case is needed, but it can't hurt */ s1 = clause_selectivity(root, (Node *) ((RelabelType *) clause)->arg, - varRelid); + varRelid, + jointype); } #ifdef SELECTIVITY_DEBUG diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index d18e29ad6f4..56282406129 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -49,7 +49,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.103 2003/01/27 20:51:50 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.104 2003/01/28 22:13:33 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -104,7 +104,8 @@ bool enable_hashjoin = true; static Selectivity estimate_hash_bucketsize(Query *root, Var *var, int nbuckets); static bool cost_qual_eval_walker(Node *node, QualCost *total); -static Selectivity approx_selectivity(Query *root, List *quals); +static Selectivity approx_selectivity(Query *root, List *quals, + JoinType jointype); static void set_rel_width(Query *root, RelOptInfo *rel); static double relation_byte_size(double tuples, int width); static double page_size(double tuples, int width); @@ -697,7 +698,8 @@ cost_nestloop(NestPath *path, Query *root) */ if (path->jointype == JOIN_IN) { - Selectivity qual_selec = approx_selectivity(root, restrictlist); + Selectivity qual_selec = approx_selectivity(root, restrictlist, + path->jointype); double qptuples; qptuples = ceil(qual_selec * outer_path_rows * inner_path_rows); @@ -816,10 +818,12 @@ cost_mergejoin(MergePath *path, Query *root) * Note: it's probably bogus to use the normal selectivity calculation * here when either the outer or inner path is a UniquePath. */ - merge_selec = approx_selectivity(root, mergeclauses); + merge_selec = approx_selectivity(root, mergeclauses, + path->jpath.jointype); cost_qual_eval(&merge_qual_cost, mergeclauses); qpquals = set_ptrDifference(restrictlist, mergeclauses); - qp_selec = approx_selectivity(root, qpquals); + qp_selec = approx_selectivity(root, qpquals, + path->jpath.jointype); cost_qual_eval(&qp_qual_cost, qpquals); freeList(qpquals); @@ -1044,10 +1048,12 @@ cost_hashjoin(HashPath *path, Query *root) * Note: it's probably bogus to use the normal selectivity calculation * here when either the outer or inner path is a UniquePath. */ - hash_selec = approx_selectivity(root, hashclauses); + hash_selec = approx_selectivity(root, hashclauses, + path->jpath.jointype); cost_qual_eval(&hash_qual_cost, hashclauses); qpquals = set_ptrDifference(restrictlist, hashclauses); - qp_selec = approx_selectivity(root, qpquals); + qp_selec = approx_selectivity(root, qpquals, + path->jpath.jointype); cost_qual_eval(&qp_qual_cost, qpquals); freeList(qpquals); @@ -1084,54 +1090,67 @@ cost_hashjoin(HashPath *path, Query *root) * Determine bucketsize fraction for inner relation. We use the * smallest bucketsize estimated for any individual hashclause; * this is undoubtedly conservative. + * + * BUT: if inner relation has been unique-ified, we can assume it's + * good for hashing. This is important both because it's the right + * answer, and because we avoid contaminating the cache with a value + * that's wrong for non-unique-ified paths. */ - innerbucketsize = 1.0; - foreach(hcl, hashclauses) + if (IsA(inner_path, UniquePath)) + innerbucketsize = 1.0 / virtualbuckets; + else { - RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(hcl); - Selectivity thisbucketsize; + innerbucketsize = 1.0; + foreach(hcl, hashclauses) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(hcl); + Selectivity thisbucketsize; - Assert(IsA(restrictinfo, RestrictInfo)); + Assert(IsA(restrictinfo, RestrictInfo)); - /* - * First we have to figure out which side of the hashjoin clause is the - * inner side. - * - * Since we tend to visit the same clauses over and over when planning - * a large query, we cache the bucketsize estimate in the RestrictInfo - * node to avoid repeated lookups of statistics. - */ - if (is_subseti(restrictinfo->right_relids, inner_path->parent->relids)) - { - /* righthand side is inner */ - thisbucketsize = restrictinfo->right_bucketsize; - if (thisbucketsize < 0) + /* + * First we have to figure out which side of the hashjoin clause + * is the inner side. + * + * Since we tend to visit the same clauses over and over when + * planning a large query, we cache the bucketsize estimate in the + * RestrictInfo node to avoid repeated lookups of statistics. + */ + if (is_subseti(restrictinfo->right_relids, + inner_path->parent->relids)) { - /* not cached yet */ - thisbucketsize = estimate_hash_bucketsize(root, + /* righthand side is inner */ + thisbucketsize = restrictinfo->right_bucketsize; + if (thisbucketsize < 0) + { + /* not cached yet */ + thisbucketsize = + estimate_hash_bucketsize(root, (Var *) get_rightop(restrictinfo->clause), - virtualbuckets); - restrictinfo->right_bucketsize = thisbucketsize; + virtualbuckets); + restrictinfo->right_bucketsize = thisbucketsize; + } } - } - else - { - Assert(is_subseti(restrictinfo->left_relids, - inner_path->parent->relids)); - /* lefthand side is inner */ - thisbucketsize = restrictinfo->left_bucketsize; - if (thisbucketsize < 0) + else { - /* not cached yet */ - thisbucketsize = estimate_hash_bucketsize(root, + Assert(is_subseti(restrictinfo->left_relids, + inner_path->parent->relids)); + /* lefthand side is inner */ + thisbucketsize = restrictinfo->left_bucketsize; + if (thisbucketsize < 0) + { + /* not cached yet */ + thisbucketsize = + estimate_hash_bucketsize(root, (Var *) get_leftop(restrictinfo->clause), - virtualbuckets); - restrictinfo->left_bucketsize = thisbucketsize; + virtualbuckets); + restrictinfo->left_bucketsize = thisbucketsize; + } } - } - if (innerbucketsize > thisbucketsize) - innerbucketsize = thisbucketsize; + if (innerbucketsize > thisbucketsize) + innerbucketsize = thisbucketsize; + } } /* @@ -1557,7 +1576,7 @@ cost_qual_eval_walker(Node *node, QualCost *total) * seems OK to live with the approximation. */ static Selectivity -approx_selectivity(Query *root, List *quals) +approx_selectivity(Query *root, List *quals, JoinType jointype) { Selectivity total = 1.0; List *l; @@ -1582,13 +1601,14 @@ approx_selectivity(Query *root, List *quals) restrictinfo->this_selec = clause_selectivity(root, (Node *) restrictinfo->clause, - 0); + 0, + jointype); selec = restrictinfo->this_selec; } else { /* If it's a bare expression, must always do it the hard way */ - selec = clause_selectivity(root, qual, 0); + selec = clause_selectivity(root, qual, 0, jointype); } total *= selec; } @@ -1620,7 +1640,8 @@ set_baserel_size_estimates(Query *root, RelOptInfo *rel) temp = rel->tuples * restrictlist_selectivity(root, rel->baserestrictinfo, - lfirsti(rel->relids)); + lfirsti(rel->relids), + JOIN_INNER); /* * Force estimate to be at least one row, to make explain output look @@ -1682,7 +1703,8 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel, */ selec = restrictlist_selectivity(root, restrictlist, - 0); + 0, + jointype); /* * Basically, we multiply size of Cartesian product by selectivity. @@ -1694,8 +1716,6 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel, * For JOIN_IN and variants, the Cartesian product is figured with * respect to a unique-ified input, and then we can clamp to the size * of the other input. - * XXX it's not at all clear that the ordinary selectivity calculation - * is appropriate in this case. */ switch (jointype) { @@ -1798,7 +1818,8 @@ set_function_size_estimates(Query *root, RelOptInfo *rel) temp = rel->tuples * restrictlist_selectivity(root, rel->baserestrictinfo, - lfirsti(rel->relids)); + lfirsti(rel->relids), + JOIN_INNER); /* * Force estimate to be at least one row, to make explain output look diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 443d54c6473..98e4d59f2df 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.133 2003/01/24 03:58:34 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.134 2003/01/28 22:13:33 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1599,12 +1599,16 @@ make_innerjoin_index_path(Query *root, * selectivity. However, since RestrictInfo nodes aren't copied when * linking them into different lists, it should be sufficient to use * pointer comparison to remove duplicates.) + * + * Always assume the join type is JOIN_INNER; even if some of the + * join clauses come from other contexts, that's not our problem. */ pathnode->rows = rel->tuples * restrictlist_selectivity(root, set_ptrUnion(rel->baserestrictinfo, clausegroup), - lfirsti(rel->relids)); + lfirsti(rel->relids), + JOIN_INNER); /* Like costsize.c, force estimate to be at least one row */ if (pathnode->rows < 1.0) pathnode->rows = 1.0; diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 5f420f37250..9f56a9f38d5 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.68 2003/01/20 18:54:53 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.69 2003/01/28 22:13:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -351,7 +351,7 @@ make_subplan(SubLink *slink, List *lefthand, bool isTopQual) qualsel = clauselist_selectivity(subquery, plan->qual, - 0); + 0, JOIN_INNER); /* Is 10% selectivity a good threshold?? */ use_material = qualsel < 0.10; } diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 15120fafcd8..4a9f63312c3 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.75 2002/11/24 21:52:14 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.76 2003/01/28 22:13:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -196,8 +196,7 @@ find_secondary_indexes(Oid relationObjectId) * This code executes registered procedures stored in the * operator relation, by calling the function manager. * - * varRelid is either 0 or a rangetable index. See clause_selectivity() - * for details about its meaning. + * See clause_selectivity() for the meaning of the additional parameters. */ Selectivity restriction_selectivity(Query *root, @@ -237,7 +236,8 @@ restriction_selectivity(Query *root, Selectivity join_selectivity(Query *root, Oid operator, - List *args) + List *args, + JoinType jointype) { RegProcedure oprjoin = get_oprjoin(operator); float8 result; @@ -249,10 +249,11 @@ join_selectivity(Query *root, if (!oprjoin) return (Selectivity) 0.5; - result = DatumGetFloat8(OidFunctionCall3(oprjoin, + result = DatumGetFloat8(OidFunctionCall4(oprjoin, PointerGetDatum(root), ObjectIdGetDatum(operator), - PointerGetDatum(args))); + PointerGetDatum(args), + Int16GetDatum(jointype))); if (result < 0.0 || result > 1.0) elog(ERROR, "join_selectivity: bad value %f", result); |