aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/optimizer/path/costsize.c75
-rw-r--r--src/backend/utils/cache/lsyscache.c78
-rw-r--r--src/include/utils/lsyscache.h4
3 files changed, 123 insertions, 34 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index bdfbbb18186..7dfe834b779 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -41,7 +41,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.71 2001/05/07 00:43:20 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.72 2001/05/09 00:35:09 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -61,12 +61,6 @@
#include "utils/syscache.h"
-/*
- * The length of a variable-length field in bytes (stupid estimate...)
- */
-#define _DEFAULT_ATTRIBUTE_WIDTH_ 12
-
-
#define LOG2(x) (log(x) / 0.693147180559945)
#define LOG6(x) (log(x) / 1.79175946922805)
@@ -90,7 +84,6 @@ bool enable_hashjoin = true;
static bool cost_qual_eval_walker(Node *node, Cost *total);
static void set_rel_width(Query *root, RelOptInfo *rel);
-static int compute_attribute_width(TargetEntry *tlistentry);
static double relation_byte_size(double tuples, int width);
static double page_size(double tuples, int width);
@@ -1082,36 +1075,54 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
/*
* set_rel_width
* Set the estimated output width of the relation.
+ *
+ * NB: this works best on base relations because it prefers to look at
+ * real Vars. It will fail to make use of pg_statistic info when applied
+ * to a subquery relation, even if the subquery outputs are simple vars
+ * that we could have gotten info for. Is it worth trying to be smarter
+ * about subqueries?
*/
static void
set_rel_width(Query *root, RelOptInfo *rel)
{
- int tuple_width = 0;
- List *tle;
+ int32 tuple_width = 0;
+ List *tllist;
- foreach(tle, rel->targetlist)
- tuple_width += compute_attribute_width((TargetEntry *) lfirst(tle));
- Assert(tuple_width >= 0);
- rel->width = tuple_width;
-}
+ foreach(tllist, rel->targetlist)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(tllist);
+ int32 item_width;
-/*
- * compute_attribute_width
- * Given a target list entry, find the size in bytes of the attribute.
- *
- * If a field is variable-length, we make a default assumption. Would be
- * better if VACUUM recorded some stats about the average field width...
- * also, we have access to the atttypmod, but fail to use it...
- */
-static int
-compute_attribute_width(TargetEntry *tlistentry)
-{
- int width = get_typlen(tlistentry->resdom->restype);
+ /*
+ * If it's a Var, try to get statistical info from pg_statistic.
+ */
+ if (tle->expr && IsA(tle->expr, Var))
+ {
+ Var *var = (Var *) tle->expr;
+ Oid relid;
- if (width < 0)
- return _DEFAULT_ATTRIBUTE_WIDTH_;
- else
- return width;
+ relid = getrelid(var->varno, root->rtable);
+ if (relid != InvalidOid)
+ {
+ item_width = get_attavgwidth(relid, var->varattno);
+ if (item_width > 0)
+ {
+ tuple_width += item_width;
+ continue;
+ }
+ }
+ }
+ /*
+ * Not a Var, or can't find statistics for it. Estimate using
+ * just the type info.
+ */
+ item_width = get_typavgwidth(tle->resdom->restype,
+ tle->resdom->restypmod);
+ Assert(item_width > 0);
+ tuple_width += item_width;
+ }
+ Assert(tuple_width >= 0);
+ rel->width = tuple_width;
}
/*
@@ -1122,7 +1133,7 @@ compute_attribute_width(TargetEntry *tlistentry)
static double
relation_byte_size(double tuples, int width)
{
- return tuples * ((double) (width + sizeof(HeapTupleData)));
+ return tuples * ((double) MAXALIGN(width + sizeof(HeapTupleData)));
}
/*
diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c
index 3995de5d7a1..ee15a940cc5 100644
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.53 2001/05/07 00:43:24 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.54 2001/05/09 00:35:09 tgl Exp $
*
* NOTES
* Eventually, the index information should go through here, too.
@@ -750,6 +750,56 @@ get_typdefault(Oid typid)
}
/*
+ * get_typavgwidth
+ *
+ * Given a type OID and a typmod value (pass -1 if typmod is unknown),
+ * estimate the average width of values of the type. This is used by
+ * the planner, which doesn't require absolutely correct results;
+ * it's OK (and expected) to guess if we don't know for sure.
+ */
+int32
+get_typavgwidth(Oid typid, int32 typmod)
+{
+ int typlen = get_typlen(typid);
+ int32 maxwidth;
+
+ /*
+ * Easy if it's a fixed-width type
+ */
+ if (typlen > 0)
+ return typlen;
+ /*
+ * type_maximum_size knows the encoding of typmod for some datatypes;
+ * don't duplicate that knowledge here.
+ */
+ maxwidth = type_maximum_size(typid, typmod);
+ if (maxwidth > 0)
+ {
+ /*
+ * For BPCHAR, the max width is also the only width. Otherwise
+ * we need to guess about the typical data width given the max.
+ * A sliding scale for percentage of max width seems reasonable.
+ */
+ if (typid == BPCHAROID)
+ return maxwidth;
+ if (maxwidth <= 32)
+ return maxwidth; /* assume full width */
+ if (maxwidth < 1000)
+ return 32 + (maxwidth - 32) / 2; /* assume 50% */
+ /*
+ * Beyond 1000, assume we're looking at something like
+ * "varchar(10000)" where the limit isn't actually reached often,
+ * and use a fixed estimate.
+ */
+ return 32 + (1000 - 32) / 2;
+ }
+ /*
+ * Ooops, we have no idea ... wild guess time.
+ */
+ return 32;
+}
+
+/*
* get_typtype
*
* Given the type OID, find if it is a basic type, a named relation
@@ -783,6 +833,32 @@ get_typtype(Oid typid)
/* ---------- STATISTICS CACHE ---------- */
/*
+ * get_attavgwidth
+ *
+ * Given the table and attribute number of a column, get the average
+ * width of entries in the column. Return zero if no data available.
+ */
+int32
+get_attavgwidth(Oid relid, AttrNumber attnum)
+{
+ HeapTuple tp;
+
+ tp = SearchSysCache(STATRELATT,
+ ObjectIdGetDatum(relid),
+ Int16GetDatum(attnum),
+ 0, 0);
+ if (HeapTupleIsValid(tp))
+ {
+ int32 stawidth = ((Form_pg_statistic) GETSTRUCT(tp))->stawidth;
+
+ ReleaseSysCache(tp);
+ if (stawidth > 0)
+ return stawidth;
+ }
+ return 0;
+}
+
+/*
* get_attstatsslot
*
* Extract the contents of a "slot" of a pg_statistic tuple.
diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h
index 6b35deed286..3f18a4aea63 100644
--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: lsyscache.h,v 1.31 2001/05/07 00:43:26 tgl Exp $
+ * $Id: lsyscache.h,v 1.32 2001/05/09 00:35:09 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -39,6 +39,8 @@ extern bool get_typbyval(Oid typid);
extern void get_typlenbyval(Oid typid, int16 *typlen, bool *typbyval);
extern char get_typstorage(Oid typid);
extern Datum get_typdefault(Oid typid);
+extern int32 get_typavgwidth(Oid typid, int32 typmod);
+extern int32 get_attavgwidth(Oid relid, AttrNumber attnum);
extern bool get_attstatsslot(HeapTuple statstuple,
Oid atttype, int32 atttypmod,
int reqkind, Oid reqop,