aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2011-08-23 17:11:41 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2011-08-23 17:13:30 -0400
commit48f545a3aef461a635b975443a99dd268269590a (patch)
tree0fa47ea0a32c671ad9e883056384fb34cbc75be6
parent26a092657ba4ac52212d43bae9a822a119efbd8a (diff)
downloadpostgresql-48f545a3aef461a635b975443a99dd268269590a.tar.gz
postgresql-48f545a3aef461a635b975443a99dd268269590a.zip
Fix overoptimistic assumptions in column width estimation for subqueries.
set_append_rel_pathlist supposed that, while computing per-column width estimates for the appendrel, it could ignore child rels for which the translated reltargetlist entry wasn't a Var. This gave rise to completely silly estimates in some common cases, such as constant outputs from some or all of the arms of a UNION ALL. Instead, fall back on get_typavgwidth to estimate from the value's datatype; which might be a poor estimate but at least it's not completely wacko. That problem was exposed by an Assert in set_subquery_size_estimates, which unfortunately was still overoptimistic even with that fix, since we don't compute attr_widths estimates for appendrels that are entirely excluded by constraints. So remove the Assert; we'll just fall back on get_typavgwidth in such cases. Also, since set_subquery_size_estimates calls set_baserel_size_estimates which calls set_rel_width, there's no need for set_subquery_size_estimates to call get_typavgwidth; set_rel_width will handle it for us if we just leave the estimate set to zero. Remove the unnecessary code. Per report from Erik Rijkers and subsequent investigation.
-rw-r--r--src/backend/optimizer/path/allpaths.c45
-rw-r--r--src/backend/optimizer/path/costsize.c21
2 files changed, 44 insertions, 22 deletions
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 6b43aee1833..a14b809a14c 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -403,7 +403,15 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
continue;
}
- /* CE failed, so finish copying targetlist and join quals */
+ /*
+ * CE failed, so finish copying/modifying targetlist and join quals.
+ *
+ * Note: the resulting childrel->reltargetlist may contain arbitrary
+ * expressions, which normally would not occur in a reltargetlist.
+ * That is okay because nothing outside of this routine will look at
+ * the child rel's reltargetlist. We do have to cope with the case
+ * while constructing attr_widths estimates below, though.
+ */
childrel->joininfo = (List *)
adjust_appendrel_attrs((Node *) rel->joininfo,
appinfo);
@@ -486,23 +494,36 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
parent_rows += childrel->rows;
parent_size += childrel->width * childrel->rows;
+ /*
+ * Accumulate per-column estimates too. We need not do anything
+ * for PlaceHolderVars in the parent list. If child expression
+ * isn't a Var, or we didn't record a width estimate for it, we
+ * have to fall back on a datatype-based estimate.
+ *
+ * By construction, child's reltargetlist is 1-to-1 with parent's.
+ */
forboth(parentvars, rel->reltargetlist,
childvars, childrel->reltargetlist)
{
Var *parentvar = (Var *) lfirst(parentvars);
- Var *childvar = (Var *) lfirst(childvars);
-
- /*
- * Accumulate per-column estimates too. Whole-row Vars and
- * PlaceHolderVars can be ignored here.
- */
- if (IsA(parentvar, Var) &&
- IsA(childvar, Var))
+ Node *childvar = (Node *) lfirst(childvars);
+
+ if (IsA(parentvar, Var))
{
int pndx = parentvar->varattno - rel->min_attr;
- int cndx = childvar->varattno - childrel->min_attr;
-
- parent_attrsizes[pndx] += childrel->attr_widths[cndx] * childrel->rows;
+ int32 child_width = 0;
+
+ if (IsA(childvar, Var))
+ {
+ int cndx = ((Var *) childvar)->varattno - childrel->min_attr;
+
+ child_width = childrel->attr_widths[cndx];
+ }
+ if (child_width <= 0)
+ child_width = get_typavgwidth(exprType(childvar),
+ exprTypmod(childvar));
+ Assert(child_width > 0);
+ parent_attrsizes[pndx] += child_width * childrel->rows;
}
}
}
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 1b264a29e29..c853bd8dcb6 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -3238,14 +3238,14 @@ set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel,
/*
* Compute per-output-column width estimates by examining the subquery's
* targetlist. For any output that is a plain Var, get the width estimate
- * that was made while planning the subquery. Otherwise, fall back on a
- * datatype-based estimate.
+ * that was made while planning the subquery. Otherwise, we leave it to
+ * set_rel_width to fill in a datatype-based default estimate.
*/
foreach(lc, subroot->parse->targetList)
{
TargetEntry *te = (TargetEntry *) lfirst(lc);
Node *texpr = (Node *) te->expr;
- int32 item_width;
+ int32 item_width = 0;
Assert(IsA(te, TargetEntry));
/* junk columns aren't visible to upper query */
@@ -3256,8 +3256,14 @@ set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel,
* XXX This currently doesn't work for subqueries containing set
* operations, because the Vars in their tlists are bogus references
* to the first leaf subquery, which wouldn't give the right answer
- * even if we could still get to its PlannerInfo. So fall back on
- * datatype in that case.
+ * even if we could still get to its PlannerInfo.
+ *
+ * Also, the subquery could be an appendrel for which all branches are
+ * known empty due to constraint exclusion, in which case
+ * set_append_rel_pathlist will have left the attr_widths set to zero.
+ *
+ * In either case, we just leave the width estimate zero until
+ * set_rel_width fixes it.
*/
if (IsA(texpr, Var) &&
subroot->parse->setOperations == NULL)
@@ -3267,11 +3273,6 @@ set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel,
item_width = subrel->attr_widths[var->varattno - subrel->min_attr];
}
- else
- {
- item_width = get_typavgwidth(exprType(texpr), exprTypmod(texpr));
- }
- Assert(item_width > 0);
Assert(te->resno >= rel->min_attr && te->resno <= rel->max_attr);
rel->attr_widths[te->resno - rel->min_attr] = item_width;
}