aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2003-01-28 22:13:41 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2003-01-28 22:13:41 +0000
commit2e46b762eb1c15de2bcda785469a753a753747fb (patch)
tree54bb20d645df6e5bede49e6c405a93b137c65a20 /src
parent955a1f81a702489102b2526e24631b9f51e14247 (diff)
downloadpostgresql-2e46b762eb1c15de2bcda785469a753a753747fb.tar.gz
postgresql-2e46b762eb1c15de2bcda785469a753a753747fb.zip
Extend join-selectivity API (oprjoin interface) so that join type is
passed to join selectivity estimators. Make use of this in eqjoinsel to derive non-bogus selectivity for IN clauses. Further tweaking of cost estimation for IN. initdb forced because of pg_proc.h changes.
Diffstat (limited to 'src')
-rw-r--r--src/backend/catalog/pg_operator.c7
-rw-r--r--src/backend/optimizer/path/clausesel.c38
-rw-r--r--src/backend/optimizer/path/costsize.c125
-rw-r--r--src/backend/optimizer/path/indxpath.c8
-rw-r--r--src/backend/optimizer/plan/subselect.c4
-rw-r--r--src/backend/optimizer/util/plancat.c13
-rw-r--r--src/backend/utils/adt/selfuncs.c75
-rw-r--r--src/include/catalog/catversion.h4
-rw-r--r--src/include/catalog/pg_proc.h32
-rw-r--r--src/include/optimizer/cost.h17
-rw-r--r--src/include/optimizer/plancat.h7
-rw-r--r--src/include/utils/selfuncs.h4
-rw-r--r--src/test/regress/expected/opr_sanity.out7
-rw-r--r--src/test/regress/expected/subselect.out4
-rw-r--r--src/test/regress/sql/opr_sanity.sql7
15 files changed, 218 insertions, 134 deletions
diff --git a/src/backend/catalog/pg_operator.c b/src/backend/catalog/pg_operator.c
index 941212a649f..4c09a40b1d7 100644
--- a/src/backend/catalog/pg_operator.c
+++ b/src/backend/catalog/pg_operator.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/catalog/pg_operator.c,v 1.77 2002/09/04 20:31:14 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/catalog/pg_operator.c,v 1.78 2003/01/28 22:13:25 tgl Exp $
*
* NOTES
* these routines moved here from commands/define.c and somewhat cleaned up.
@@ -485,10 +485,11 @@ OperatorCreate(const char *operatorName,
typeId[0] = INTERNALOID; /* Query */
typeId[1] = OIDOID; /* operator OID */
typeId[2] = INTERNALOID; /* args list */
+ typeId[3] = INT2OID; /* jointype */
- joinOid = LookupFuncName(joinName, 3, typeId);
+ joinOid = LookupFuncName(joinName, 4, typeId);
if (!OidIsValid(joinOid))
- func_error("OperatorDef", joinName, 3, typeId, NULL);
+ func_error("OperatorDef", joinName, 4, typeId, NULL);
}
else
joinOid = InvalidOid;
diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c
index 84041a566d1..9df0a794782 100644
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.55 2003/01/15 19:35:39 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.56 2003/01/28 22:13:29 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -65,12 +65,13 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
Selectivity
restrictlist_selectivity(Query *root,
List *restrictinfo_list,
- int varRelid)
+ int varRelid,
+ JoinType jointype)
{
List *clauselist = get_actual_clauses(restrictinfo_list);
Selectivity result;
- result = clauselist_selectivity(root, clauselist, varRelid);
+ result = clauselist_selectivity(root, clauselist, varRelid, jointype);
freeList(clauselist);
return result;
}
@@ -81,7 +82,7 @@ restrictlist_selectivity(Query *root,
* expression clauses. The list can be empty, in which case 1.0
* must be returned.
*
- * See clause_selectivity() for the meaning of the varRelid parameter.
+ * See clause_selectivity() for the meaning of the additional parameters.
*
* Our basic approach is to take the product of the selectivities of the
* subclauses. However, that's only right if the subclauses have independent
@@ -113,7 +114,8 @@ restrictlist_selectivity(Query *root,
Selectivity
clauselist_selectivity(Query *root,
List *clauses,
- int varRelid)
+ int varRelid,
+ JoinType jointype)
{
Selectivity s1 = 1.0;
RangeQueryClause *rqlist = NULL;
@@ -184,7 +186,7 @@ clauselist_selectivity(Query *root,
}
}
/* Not the right form, so treat it generically. */
- s2 = clause_selectivity(root, clause, varRelid);
+ s2 = clause_selectivity(root, clause, varRelid, jointype);
s1 = s1 * s2;
}
@@ -362,11 +364,15 @@ addRangeClause(RangeQueryClause **rqlist, Node *clause,
*
* When varRelid is 0, all variables are treated as variables. This
* is appropriate for ordinary join clauses and restriction clauses.
+ *
+ * jointype is the join type, if the clause is a join clause. Pass JOIN_INNER
+ * if the clause isn't a join clause or the context is uncertain.
*/
Selectivity
clause_selectivity(Query *root,
Node *clause,
- int varRelid)
+ int varRelid,
+ JoinType jointype)
{
Selectivity s1 = 1.0; /* default for any unhandled clause type */
@@ -424,14 +430,16 @@ clause_selectivity(Query *root,
/* inverse of the selectivity of the underlying clause */
s1 = 1.0 - clause_selectivity(root,
(Node *) get_notclausearg((Expr *) clause),
- varRelid);
+ varRelid,
+ jointype);
}
else if (and_clause(clause))
{
/* share code with clauselist_selectivity() */
s1 = clauselist_selectivity(root,
((BoolExpr *) clause)->args,
- varRelid);
+ varRelid,
+ jointype);
}
else if (or_clause(clause))
{
@@ -447,7 +455,8 @@ clause_selectivity(Query *root,
{
Selectivity s2 = clause_selectivity(root,
(Node *) lfirst(arg),
- varRelid);
+ varRelid,
+ jointype);
s1 = s1 + s2 - s1 * s2;
}
@@ -479,7 +488,8 @@ clause_selectivity(Query *root,
{
/* Estimate selectivity for a join clause. */
s1 = join_selectivity(root, opno,
- ((OpExpr *) clause)->args);
+ ((OpExpr *) clause)->args,
+ jointype);
}
else
{
@@ -519,14 +529,16 @@ clause_selectivity(Query *root,
s1 = booltestsel(root,
((BooleanTest *) clause)->booltesttype,
(Node *) ((BooleanTest *) clause)->arg,
- varRelid);
+ varRelid,
+ jointype);
}
else if (IsA(clause, RelabelType))
{
/* Not sure this case is needed, but it can't hurt */
s1 = clause_selectivity(root,
(Node *) ((RelabelType *) clause)->arg,
- varRelid);
+ varRelid,
+ jointype);
}
#ifdef SELECTIVITY_DEBUG
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index d18e29ad6f4..56282406129 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -49,7 +49,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.103 2003/01/27 20:51:50 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.104 2003/01/28 22:13:33 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -104,7 +104,8 @@ bool enable_hashjoin = true;
static Selectivity estimate_hash_bucketsize(Query *root, Var *var,
int nbuckets);
static bool cost_qual_eval_walker(Node *node, QualCost *total);
-static Selectivity approx_selectivity(Query *root, List *quals);
+static Selectivity approx_selectivity(Query *root, List *quals,
+ JoinType jointype);
static void set_rel_width(Query *root, RelOptInfo *rel);
static double relation_byte_size(double tuples, int width);
static double page_size(double tuples, int width);
@@ -697,7 +698,8 @@ cost_nestloop(NestPath *path, Query *root)
*/
if (path->jointype == JOIN_IN)
{
- Selectivity qual_selec = approx_selectivity(root, restrictlist);
+ Selectivity qual_selec = approx_selectivity(root, restrictlist,
+ path->jointype);
double qptuples;
qptuples = ceil(qual_selec * outer_path_rows * inner_path_rows);
@@ -816,10 +818,12 @@ cost_mergejoin(MergePath *path, Query *root)
* Note: it's probably bogus to use the normal selectivity calculation
* here when either the outer or inner path is a UniquePath.
*/
- merge_selec = approx_selectivity(root, mergeclauses);
+ merge_selec = approx_selectivity(root, mergeclauses,
+ path->jpath.jointype);
cost_qual_eval(&merge_qual_cost, mergeclauses);
qpquals = set_ptrDifference(restrictlist, mergeclauses);
- qp_selec = approx_selectivity(root, qpquals);
+ qp_selec = approx_selectivity(root, qpquals,
+ path->jpath.jointype);
cost_qual_eval(&qp_qual_cost, qpquals);
freeList(qpquals);
@@ -1044,10 +1048,12 @@ cost_hashjoin(HashPath *path, Query *root)
* Note: it's probably bogus to use the normal selectivity calculation
* here when either the outer or inner path is a UniquePath.
*/
- hash_selec = approx_selectivity(root, hashclauses);
+ hash_selec = approx_selectivity(root, hashclauses,
+ path->jpath.jointype);
cost_qual_eval(&hash_qual_cost, hashclauses);
qpquals = set_ptrDifference(restrictlist, hashclauses);
- qp_selec = approx_selectivity(root, qpquals);
+ qp_selec = approx_selectivity(root, qpquals,
+ path->jpath.jointype);
cost_qual_eval(&qp_qual_cost, qpquals);
freeList(qpquals);
@@ -1084,54 +1090,67 @@ cost_hashjoin(HashPath *path, Query *root)
* Determine bucketsize fraction for inner relation. We use the
* smallest bucketsize estimated for any individual hashclause;
* this is undoubtedly conservative.
+ *
+ * BUT: if inner relation has been unique-ified, we can assume it's
+ * good for hashing. This is important both because it's the right
+ * answer, and because we avoid contaminating the cache with a value
+ * that's wrong for non-unique-ified paths.
*/
- innerbucketsize = 1.0;
- foreach(hcl, hashclauses)
+ if (IsA(inner_path, UniquePath))
+ innerbucketsize = 1.0 / virtualbuckets;
+ else
{
- RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(hcl);
- Selectivity thisbucketsize;
+ innerbucketsize = 1.0;
+ foreach(hcl, hashclauses)
+ {
+ RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(hcl);
+ Selectivity thisbucketsize;
- Assert(IsA(restrictinfo, RestrictInfo));
+ Assert(IsA(restrictinfo, RestrictInfo));
- /*
- * First we have to figure out which side of the hashjoin clause is the
- * inner side.
- *
- * Since we tend to visit the same clauses over and over when planning
- * a large query, we cache the bucketsize estimate in the RestrictInfo
- * node to avoid repeated lookups of statistics.
- */
- if (is_subseti(restrictinfo->right_relids, inner_path->parent->relids))
- {
- /* righthand side is inner */
- thisbucketsize = restrictinfo->right_bucketsize;
- if (thisbucketsize < 0)
+ /*
+ * First we have to figure out which side of the hashjoin clause
+ * is the inner side.
+ *
+ * Since we tend to visit the same clauses over and over when
+ * planning a large query, we cache the bucketsize estimate in the
+ * RestrictInfo node to avoid repeated lookups of statistics.
+ */
+ if (is_subseti(restrictinfo->right_relids,
+ inner_path->parent->relids))
{
- /* not cached yet */
- thisbucketsize = estimate_hash_bucketsize(root,
+ /* righthand side is inner */
+ thisbucketsize = restrictinfo->right_bucketsize;
+ if (thisbucketsize < 0)
+ {
+ /* not cached yet */
+ thisbucketsize =
+ estimate_hash_bucketsize(root,
(Var *) get_rightop(restrictinfo->clause),
- virtualbuckets);
- restrictinfo->right_bucketsize = thisbucketsize;
+ virtualbuckets);
+ restrictinfo->right_bucketsize = thisbucketsize;
+ }
}
- }
- else
- {
- Assert(is_subseti(restrictinfo->left_relids,
- inner_path->parent->relids));
- /* lefthand side is inner */
- thisbucketsize = restrictinfo->left_bucketsize;
- if (thisbucketsize < 0)
+ else
{
- /* not cached yet */
- thisbucketsize = estimate_hash_bucketsize(root,
+ Assert(is_subseti(restrictinfo->left_relids,
+ inner_path->parent->relids));
+ /* lefthand side is inner */
+ thisbucketsize = restrictinfo->left_bucketsize;
+ if (thisbucketsize < 0)
+ {
+ /* not cached yet */
+ thisbucketsize =
+ estimate_hash_bucketsize(root,
(Var *) get_leftop(restrictinfo->clause),
- virtualbuckets);
- restrictinfo->left_bucketsize = thisbucketsize;
+ virtualbuckets);
+ restrictinfo->left_bucketsize = thisbucketsize;
+ }
}
- }
- if (innerbucketsize > thisbucketsize)
- innerbucketsize = thisbucketsize;
+ if (innerbucketsize > thisbucketsize)
+ innerbucketsize = thisbucketsize;
+ }
}
/*
@@ -1557,7 +1576,7 @@ cost_qual_eval_walker(Node *node, QualCost *total)
* seems OK to live with the approximation.
*/
static Selectivity
-approx_selectivity(Query *root, List *quals)
+approx_selectivity(Query *root, List *quals, JoinType jointype)
{
Selectivity total = 1.0;
List *l;
@@ -1582,13 +1601,14 @@ approx_selectivity(Query *root, List *quals)
restrictinfo->this_selec =
clause_selectivity(root,
(Node *) restrictinfo->clause,
- 0);
+ 0,
+ jointype);
selec = restrictinfo->this_selec;
}
else
{
/* If it's a bare expression, must always do it the hard way */
- selec = clause_selectivity(root, qual, 0);
+ selec = clause_selectivity(root, qual, 0, jointype);
}
total *= selec;
}
@@ -1620,7 +1640,8 @@ set_baserel_size_estimates(Query *root, RelOptInfo *rel)
temp = rel->tuples *
restrictlist_selectivity(root,
rel->baserestrictinfo,
- lfirsti(rel->relids));
+ lfirsti(rel->relids),
+ JOIN_INNER);
/*
* Force estimate to be at least one row, to make explain output look
@@ -1682,7 +1703,8 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
*/
selec = restrictlist_selectivity(root,
restrictlist,
- 0);
+ 0,
+ jointype);
/*
* Basically, we multiply size of Cartesian product by selectivity.
@@ -1694,8 +1716,6 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
* For JOIN_IN and variants, the Cartesian product is figured with
* respect to a unique-ified input, and then we can clamp to the size
* of the other input.
- * XXX it's not at all clear that the ordinary selectivity calculation
- * is appropriate in this case.
*/
switch (jointype)
{
@@ -1798,7 +1818,8 @@ set_function_size_estimates(Query *root, RelOptInfo *rel)
temp = rel->tuples *
restrictlist_selectivity(root,
rel->baserestrictinfo,
- lfirsti(rel->relids));
+ lfirsti(rel->relids),
+ JOIN_INNER);
/*
* Force estimate to be at least one row, to make explain output look
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 443d54c6473..98e4d59f2df 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.133 2003/01/24 03:58:34 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.134 2003/01/28 22:13:33 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1599,12 +1599,16 @@ make_innerjoin_index_path(Query *root,
* selectivity. However, since RestrictInfo nodes aren't copied when
* linking them into different lists, it should be sufficient to use
* pointer comparison to remove duplicates.)
+ *
+ * Always assume the join type is JOIN_INNER; even if some of the
+ * join clauses come from other contexts, that's not our problem.
*/
pathnode->rows = rel->tuples *
restrictlist_selectivity(root,
set_ptrUnion(rel->baserestrictinfo,
clausegroup),
- lfirsti(rel->relids));
+ lfirsti(rel->relids),
+ JOIN_INNER);
/* Like costsize.c, force estimate to be at least one row */
if (pathnode->rows < 1.0)
pathnode->rows = 1.0;
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 5f420f37250..9f56a9f38d5 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.68 2003/01/20 18:54:53 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.69 2003/01/28 22:13:35 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -351,7 +351,7 @@ make_subplan(SubLink *slink, List *lefthand, bool isTopQual)
qualsel = clauselist_selectivity(subquery,
plan->qual,
- 0);
+ 0, JOIN_INNER);
/* Is 10% selectivity a good threshold?? */
use_material = qualsel < 0.10;
}
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 15120fafcd8..4a9f63312c3 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.75 2002/11/24 21:52:14 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.76 2003/01/28 22:13:35 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -196,8 +196,7 @@ find_secondary_indexes(Oid relationObjectId)
* This code executes registered procedures stored in the
* operator relation, by calling the function manager.
*
- * varRelid is either 0 or a rangetable index. See clause_selectivity()
- * for details about its meaning.
+ * See clause_selectivity() for the meaning of the additional parameters.
*/
Selectivity
restriction_selectivity(Query *root,
@@ -237,7 +236,8 @@ restriction_selectivity(Query *root,
Selectivity
join_selectivity(Query *root,
Oid operator,
- List *args)
+ List *args,
+ JoinType jointype)
{
RegProcedure oprjoin = get_oprjoin(operator);
float8 result;
@@ -249,10 +249,11 @@ join_selectivity(Query *root,
if (!oprjoin)
return (Selectivity) 0.5;
- result = DatumGetFloat8(OidFunctionCall3(oprjoin,
+ result = DatumGetFloat8(OidFunctionCall4(oprjoin,
PointerGetDatum(root),
ObjectIdGetDatum(operator),
- PointerGetDatum(args)));
+ PointerGetDatum(args),
+ Int16GetDatum(jointype)));
if (result < 0.0 || result > 1.0)
elog(ERROR, "join_selectivity: bad value %f", result);
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 8fb4e84ad77..d099262c46f 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.130 2003/01/27 20:51:54 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.131 2003/01/28 22:13:35 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -56,13 +56,18 @@
* float8 oprrest (internal, oid, internal, int4);
*
* The call convention for a join estimator (oprjoin function) is similar
- * except that varRelid is not needed:
+ * except that varRelid is not needed, and instead the join type is
+ * supplied:
*
* Selectivity oprjoin (Query *root,
* Oid operator,
- * List *args);
+ * List *args,
+ * JoinType jointype);
+ *
+ * float8 oprjoin (internal, oid, internal, int2);
*
- * float8 oprjoin (internal, oid, internal);
+ * (We deliberately make the SQL signature different to facilitate
+ * catching errors.)
*----------
*/
@@ -1009,7 +1014,8 @@ icnlikesel(PG_FUNCTION_ARGS)
* booltestsel - Selectivity of BooleanTest Node.
*/
Selectivity
-booltestsel(Query *root, BoolTestType booltesttype, Node *arg, int varRelid)
+booltestsel(Query *root, BoolTestType booltesttype, Node *arg,
+ int varRelid, JoinType jointype)
{
Var *var;
Oid relid;
@@ -1047,11 +1053,13 @@ booltestsel(Query *root, BoolTestType booltesttype, Node *arg, int varRelid)
break;
case IS_TRUE:
case IS_NOT_FALSE:
- selec = (double) clause_selectivity(root, arg, varRelid);
+ selec = (double) clause_selectivity(root, arg,
+ varRelid, jointype);
break;
case IS_FALSE:
case IS_NOT_TRUE:
- selec = 1.0 - (double) clause_selectivity(root, arg, varRelid);
+ selec = 1.0 - (double) clause_selectivity(root, arg,
+ varRelid, jointype);
break;
default:
elog(ERROR, "booltestsel: unexpected booltesttype %d",
@@ -1321,6 +1329,7 @@ eqjoinsel(PG_FUNCTION_ARGS)
Query *root = (Query *) PG_GETARG_POINTER(0);
Oid operator = PG_GETARG_OID(1);
List *args = (List *) PG_GETARG_POINTER(2);
+ JoinType jointype = (JoinType) PG_GETARG_INT16(3);
Var *var1;
Var *var2;
double selec;
@@ -1421,6 +1430,8 @@ eqjoinsel(PG_FUNCTION_ARGS)
FmgrInfo eqproc;
bool *hasmatch1;
bool *hasmatch2;
+ double nullfrac1 = stats1->stanullfrac;
+ double nullfrac2 = stats2->stanullfrac;
double matchprodfreq,
matchfreq1,
matchfreq2,
@@ -1434,10 +1445,36 @@ eqjoinsel(PG_FUNCTION_ARGS)
nmatches;
fmgr_info(get_opcode(operator), &eqproc);
- hasmatch1 = (bool *) palloc(nvalues1 * sizeof(bool));
- memset(hasmatch1, 0, nvalues1 * sizeof(bool));
- hasmatch2 = (bool *) palloc(nvalues2 * sizeof(bool));
- memset(hasmatch2, 0, nvalues2 * sizeof(bool));
+ hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool));
+ hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool));
+
+ /*
+ * If we are doing any variant of JOIN_IN, pretend all the values
+ * of the righthand relation are unique (ie, act as if it's been
+ * DISTINCT'd).
+ *
+ * NOTE: it might seem that we should unique-ify the lefthand
+ * input when considering JOIN_REVERSE_IN. But this is not so,
+ * because the join clause we've been handed has not been
+ * commuted from the way the parser originally wrote it. We know
+ * that the unique side of the IN clause is *always* on the right.
+ *
+ * NOTE: it would be dangerous to try to be smart about JOIN_LEFT
+ * or JOIN_RIGHT here, because we do not have enough information
+ * to determine which var is really on which side of the join.
+ * Perhaps someday we should pass in more information.
+ */
+ if (jointype == JOIN_IN ||
+ jointype == JOIN_REVERSE_IN ||
+ jointype == JOIN_UNIQUE_INNER ||
+ jointype == JOIN_UNIQUE_OUTER)
+ {
+ float4 oneovern = 1.0 / nd2;
+
+ for (i = 0; i < nvalues2; i++)
+ numbers2[i] = oneovern;
+ nullfrac2 = oneovern;
+ }
/*
* Note we assume that each MCV will match at most one member
@@ -1496,8 +1533,8 @@ eqjoinsel(PG_FUNCTION_ARGS)
* Compute total frequency of non-null values that are not in
* the MCV lists.
*/
- otherfreq1 = 1.0 - stats1->stanullfrac - matchfreq1 - unmatchfreq1;
- otherfreq2 = 1.0 - stats2->stanullfrac - matchfreq2 - unmatchfreq2;
+ otherfreq1 = 1.0 - nullfrac1 - matchfreq1 - unmatchfreq1;
+ otherfreq2 = 1.0 - nullfrac2 - matchfreq2 - unmatchfreq2;
CLAMP_PROBABILITY(otherfreq1);
CLAMP_PROBABILITY(otherfreq2);
@@ -1585,6 +1622,7 @@ neqjoinsel(PG_FUNCTION_ARGS)
Query *root = (Query *) PG_GETARG_POINTER(0);
Oid operator = PG_GETARG_OID(1);
List *args = (List *) PG_GETARG_POINTER(2);
+ JoinType jointype = (JoinType) PG_GETARG_INT16(3);
Oid eqop;
float8 result;
@@ -1595,11 +1633,11 @@ neqjoinsel(PG_FUNCTION_ARGS)
eqop = get_negator(operator);
if (eqop)
{
- result = DatumGetFloat8(DirectFunctionCall3(eqjoinsel,
+ result = DatumGetFloat8(DirectFunctionCall4(eqjoinsel,
PointerGetDatum(root),
- ObjectIdGetDatum(eqop),
- PointerGetDatum(args)));
-
+ ObjectIdGetDatum(eqop),
+ PointerGetDatum(args),
+ Int16GetDatum(jointype)));
}
else
{
@@ -3784,7 +3822,8 @@ genericcostestimate(Query *root, RelOptInfo *rel,
/* Estimate the fraction of main-table tuples that will be visited */
*indexSelectivity = clauselist_selectivity(root, selectivityQuals,
- lfirsti(rel->relids));
+ lfirsti(rel->relids),
+ JOIN_INNER);
/*
* Estimate the number of tuples that will be visited. We do it in
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index b679fdb5ddc..d234eb32895 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: catversion.h,v 1.173 2003/01/23 23:39:04 petere Exp $
+ * $Id: catversion.h,v 1.174 2003/01/28 22:13:36 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 200301241
+#define CATALOG_VERSION_NO 200301281
#endif
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index db907f745fa..d7b13a762eb 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: pg_proc.h,v 1.281 2003/01/09 00:58:41 tgl Exp $
+ * $Id: pg_proc.h,v 1.282 2003/01/28 22:13:36 tgl Exp $
*
* NOTES
* The script catalog/genbki.sh reads this file and generates .bki
@@ -218,13 +218,13 @@ DATA(insert OID = 103 ( scalarltsel PGNSP PGUID 12 f f t f s 4 701 "2281 26
DESCR("restriction selectivity of < and related operators on scalar datatypes");
DATA(insert OID = 104 ( scalargtsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 23" scalargtsel - _null_ ));
DESCR("restriction selectivity of > and related operators on scalar datatypes");
-DATA(insert OID = 105 ( eqjoinsel PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281" eqjoinsel - _null_ ));
+DATA(insert OID = 105 ( eqjoinsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21" eqjoinsel - _null_ ));
DESCR("join selectivity of = and related operators");
-DATA(insert OID = 106 ( neqjoinsel PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281" neqjoinsel - _null_ ));
+DATA(insert OID = 106 ( neqjoinsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21" neqjoinsel - _null_ ));
DESCR("join selectivity of <> and related operators");
-DATA(insert OID = 107 ( scalarltjoinsel PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281" scalarltjoinsel - _null_ ));
+DATA(insert OID = 107 ( scalarltjoinsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21" scalarltjoinsel - _null_ ));
DESCR("join selectivity of < and related operators on scalar datatypes");
-DATA(insert OID = 108 ( scalargtjoinsel PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281" scalargtjoinsel - _null_ ));
+DATA(insert OID = 108 ( scalargtjoinsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21" scalargtjoinsel - _null_ ));
DESCR("join selectivity of > and related operators on scalar datatypes");
DATA(insert OID = 109 ( unknownin PGNSP PGUID 12 f f t f i 1 705 "2275" unknownin - _null_ ));
@@ -290,7 +290,7 @@ DATA(insert OID = 138 ( box_center PGNSP PGUID 12 f f t f i 1 600 "603" bo
DESCR("center of");
DATA(insert OID = 139 ( areasel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 23" areasel - _null_ ));
DESCR("restriction selectivity for area-comparison operators");
-DATA(insert OID = 140 ( areajoinsel PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281" areajoinsel - _null_ ));
+DATA(insert OID = 140 ( areajoinsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21" areajoinsel - _null_ ));
DESCR("join selectivity for area-comparison operators");
DATA(insert OID = 141 ( int4mul PGNSP PGUID 12 f f t f i 2 23 "23 23" int4mul - _null_ ));
DESCR("multiply");
@@ -1590,11 +1590,11 @@ DESCR("current transaction time");
DATA(insert OID = 1300 ( positionsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 23" positionsel - _null_ ));
DESCR("restriction selectivity for position-comparison operators");
-DATA(insert OID = 1301 ( positionjoinsel PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281" positionjoinsel - _null_ ));
+DATA(insert OID = 1301 ( positionjoinsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21" positionjoinsel - _null_ ));
DESCR("join selectivity for position-comparison operators");
DATA(insert OID = 1302 ( contsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 23" contsel - _null_ ));
DESCR("restriction selectivity for containment comparison operators");
-DATA(insert OID = 1303 ( contjoinsel PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281" contjoinsel - _null_ ));
+DATA(insert OID = 1303 ( contjoinsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21" contjoinsel - _null_ ));
DESCR("join selectivity for containment comparison operators");
DATA(insert OID = 1304 ( overlaps PGNSP PGUID 12 f f f f i 4 16 "1184 1184 1184 1184" overlaps_timestamp - _null_ ));
@@ -2545,9 +2545,9 @@ DATA(insert OID = 1814 ( iclikesel PGNSP PGUID 12 f f t f s 4 701 "2281 26 228
DESCR("restriction selectivity of ILIKE");
DATA(insert OID = 1815 ( icnlikesel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 23" icnlikesel - _null_ ));
DESCR("restriction selectivity of NOT ILIKE");
-DATA(insert OID = 1816 ( iclikejoinsel PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281" iclikejoinsel - _null_ ));
+DATA(insert OID = 1816 ( iclikejoinsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21" iclikejoinsel - _null_ ));
DESCR("join selectivity of ILIKE");
-DATA(insert OID = 1817 ( icnlikejoinsel PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281" icnlikejoinsel - _null_ ));
+DATA(insert OID = 1817 ( icnlikejoinsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21" icnlikejoinsel - _null_ ));
DESCR("join selectivity of NOT ILIKE");
DATA(insert OID = 1818 ( regexeqsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 23" regexeqsel - _null_ ));
DESCR("restriction selectivity of regex match");
@@ -2561,17 +2561,17 @@ DATA(insert OID = 1822 ( nlikesel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281
DESCR("restriction selectivity of NOT LIKE");
DATA(insert OID = 1823 ( icregexnesel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 23" icregexnesel - _null_ ));
DESCR("restriction selectivity of case-insensitive regex non-match");
-DATA(insert OID = 1824 ( regexeqjoinsel PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281" regexeqjoinsel - _null_ ));
+DATA(insert OID = 1824 ( regexeqjoinsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21" regexeqjoinsel - _null_ ));
DESCR("join selectivity of regex match");
-DATA(insert OID = 1825 ( likejoinsel PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281" likejoinsel - _null_ ));
+DATA(insert OID = 1825 ( likejoinsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21" likejoinsel - _null_ ));
DESCR("join selectivity of LIKE");
-DATA(insert OID = 1826 ( icregexeqjoinsel PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281" icregexeqjoinsel - _null_ ));
+DATA(insert OID = 1826 ( icregexeqjoinsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21" icregexeqjoinsel - _null_ ));
DESCR("join selectivity of case-insensitive regex match");
-DATA(insert OID = 1827 ( regexnejoinsel PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281" regexnejoinsel - _null_ ));
+DATA(insert OID = 1827 ( regexnejoinsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21" regexnejoinsel - _null_ ));
DESCR("join selectivity of regex non-match");
-DATA(insert OID = 1828 ( nlikejoinsel PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281" nlikejoinsel - _null_ ));
+DATA(insert OID = 1828 ( nlikejoinsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21" nlikejoinsel - _null_ ));
DESCR("join selectivity of NOT LIKE");
-DATA(insert OID = 1829 ( icregexnejoinsel PGNSP PGUID 12 f f t f s 3 701 "2281 26 2281" icregexnejoinsel - _null_ ));
+DATA(insert OID = 1829 ( icregexnejoinsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 21" icregexnejoinsel - _null_ ));
DESCR("join selectivity of case-insensitive regex non-match");
/* Aggregate-related functions */
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index aca6097bc1c..0feb56dd7c9 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: cost.h,v 1.51 2003/01/27 20:51:54 tgl Exp $
+ * $Id: cost.h,v 1.52 2003/01/28 22:13:41 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -88,13 +88,16 @@ extern void set_function_size_estimates(Query *root, RelOptInfo *rel);
* routines to compute clause selectivities
*/
extern Selectivity restrictlist_selectivity(Query *root,
- List *restrictinfo_list,
- int varRelid);
+ List *restrictinfo_list,
+ int varRelid,
+ JoinType jointype);
extern Selectivity clauselist_selectivity(Query *root,
- List *clauses,
- int varRelid);
+ List *clauses,
+ int varRelid,
+ JoinType jointype);
extern Selectivity clause_selectivity(Query *root,
- Node *clause,
- int varRelid);
+ Node *clause,
+ int varRelid,
+ JoinType jointype);
#endif /* COST_H */
diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h
index abd09871feb..255d196d7d7 100644
--- a/src/include/optimizer/plancat.h
+++ b/src/include/optimizer/plancat.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: plancat.h,v 1.27 2002/06/20 20:29:51 momjian Exp $
+ * $Id: plancat.h,v 1.28 2003/01/28 22:13:41 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -34,7 +34,8 @@ extern Selectivity restriction_selectivity(Query *root,
int varRelid);
extern Selectivity join_selectivity(Query *root,
- Oid operator,
- List *args);
+ Oid operator,
+ List *args,
+ JoinType jointype);
#endif /* PLANCAT_H */
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index 037c2b2f5e3..757c0e1e1ac 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: selfuncs.h,v 1.11 2003/01/20 18:55:07 tgl Exp $
+ * $Id: selfuncs.h,v 1.12 2003/01/28 22:13:41 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -67,7 +67,7 @@ extern Datum nlikejoinsel(PG_FUNCTION_ARGS);
extern Datum icnlikejoinsel(PG_FUNCTION_ARGS);
extern Selectivity booltestsel(Query *root, BoolTestType booltesttype,
- Node *arg, int varRelid);
+ Node *arg, int varRelid, JoinType jointype);
extern Selectivity nulltestsel(Query *root, NullTestType nulltesttype,
Node *arg, int varRelid);
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 7ef807a95db..dcf295919c9 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -530,16 +530,17 @@ WHERE p1.oprrest = p2.oid AND
-- If oprjoin is set, the operator must be a binary boolean op,
-- and it must link to a proc with the right signature
-- to be a join selectivity estimator.
--- The proc signature we want is: float8 proc(internal, oid, internal)
+-- The proc signature we want is: float8 proc(internal, oid, internal, int2)
SELECT p1.oid, p1.oprname, p2.oid, p2.proname
FROM pg_operator AS p1, pg_proc AS p2
WHERE p1.oprjoin = p2.oid AND
(p1.oprkind != 'b' OR p1.oprresult != 'bool'::regtype OR
p2.prorettype != 'float8'::regtype OR p2.proretset OR
- p2.pronargs != 3 OR
+ p2.pronargs != 4 OR
p2.proargtypes[0] != 'internal'::regtype OR
p2.proargtypes[1] != 'oid'::regtype OR
- p2.proargtypes[2] != 'internal'::regtype);
+ p2.proargtypes[2] != 'internal'::regtype OR
+ p2.proargtypes[3] != 'int2'::regtype);
oid | oprname | oid | proname
-----+---------+-----+---------
(0 rows)
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index 8d7597863fc..5a2ef11c21b 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -134,10 +134,10 @@ SELECT '' AS five, f1 AS "Correlated Field"
WHERE f3 IS NOT NULL);
five | Correlated Field
------+------------------
- | 1
- | 2
| 2
| 3
+ | 1
+ | 2
| 3
(5 rows)
diff --git a/src/test/regress/sql/opr_sanity.sql b/src/test/regress/sql/opr_sanity.sql
index 650073cccc1..8d543932a7c 100644
--- a/src/test/regress/sql/opr_sanity.sql
+++ b/src/test/regress/sql/opr_sanity.sql
@@ -444,17 +444,18 @@ WHERE p1.oprrest = p2.oid AND
-- If oprjoin is set, the operator must be a binary boolean op,
-- and it must link to a proc with the right signature
-- to be a join selectivity estimator.
--- The proc signature we want is: float8 proc(internal, oid, internal)
+-- The proc signature we want is: float8 proc(internal, oid, internal, int2)
SELECT p1.oid, p1.oprname, p2.oid, p2.proname
FROM pg_operator AS p1, pg_proc AS p2
WHERE p1.oprjoin = p2.oid AND
(p1.oprkind != 'b' OR p1.oprresult != 'bool'::regtype OR
p2.prorettype != 'float8'::regtype OR p2.proretset OR
- p2.pronargs != 3 OR
+ p2.pronargs != 4 OR
p2.proargtypes[0] != 'internal'::regtype OR
p2.proargtypes[1] != 'oid'::regtype OR
- p2.proargtypes[2] != 'internal'::regtype);
+ p2.proargtypes[2] != 'internal'::regtype OR
+ p2.proargtypes[3] != 'int2'::regtype);
-- **************** pg_aggregate ****************