aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2005-02-01 23:09:00 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2005-02-01 23:09:00 +0000
commit914db102894754802bdf5ad0ce94dfd29f45c1d9 (patch)
tree42bd69f76c66df4b82e5b63efc2e5579a253f174 /src
parent86206573ede6f6fa85af57f6b70549e6f1d3f56e (diff)
downloadpostgresql-914db102894754802bdf5ad0ce94dfd29f45c1d9.tar.gz
postgresql-914db102894754802bdf5ad0ce94dfd29f45c1d9.zip
Adjust estimate_num_groups() to not clamp per-relation group count
estimate to less than the number of values estimated for any one grouping Var, as suggested by Manfred. This is intuitively right, and what's more it puts the plan choices in the subselect regression test back the way they were before ...
Diffstat (limited to 'src')
-rw-r--r--src/backend/utils/adt/selfuncs.c18
-rw-r--r--src/test/regress/expected/subselect.out4
2 files changed, 18 insertions, 4 deletions
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 2f2f2c4e5e6..1621b6ee294 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.169.4.1 2005/01/28 20:35:10 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.169.4.2 2005/02/01 23:08:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -2043,6 +2043,7 @@ estimate_num_groups(Query *root, List *groupExprs, double input_rows)
GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
RelOptInfo *rel = varinfo1->rel;
double reldistinct = varinfo1->ndistinct;
+ double relmaxndistinct = reldistinct;
int relvarcount = 1;
List *newvarinfos = NIL;
@@ -2057,6 +2058,8 @@ estimate_num_groups(Query *root, List *groupExprs, double input_rows)
if (varinfo2->rel == varinfo1->rel)
{
reldistinct *= varinfo2->ndistinct;
+ if (relmaxndistinct < varinfo2->ndistinct)
+ relmaxndistinct = varinfo2->ndistinct;
relvarcount++;
}
else
@@ -2075,12 +2078,23 @@ estimate_num_groups(Query *root, List *groupExprs, double input_rows)
/*
* Clamp to size of rel, or size of rel / 10 if multiple Vars.
* The fudge factor is because the Vars are probably correlated
- * but we don't know by how much.
+ * but we don't know by how much. We should never clamp to less
+ * than the largest ndistinct value for any of the Vars, though,
+ * since there will surely be at least that many groups.
*/
double clamp = rel->tuples;
if (relvarcount > 1)
+ {
clamp *= 0.1;
+ if (clamp < relmaxndistinct)
+ {
+ clamp = relmaxndistinct;
+ /* for sanity in case some ndistinct is too large: */
+ if (clamp > rel->tuples)
+ clamp = rel->tuples;
+ }
+ }
if (reldistinct > clamp)
reldistinct = clamp;
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index 56bea035981..07e727de482 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -134,11 +134,11 @@ SELECT '' AS five, f1 AS "Correlated Field"
WHERE f3 IS NOT NULL);
five | Correlated Field
------+------------------
+ | 2
| 3
| 1
- | 3
- | 2
| 2
+ | 3
(5 rows)
--