diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2011-08-23 17:11:41 -0400 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2011-08-23 17:13:30 -0400 |
commit | 48f545a3aef461a635b975443a99dd268269590a (patch) | |
tree | 0fa47ea0a32c671ad9e883056384fb34cbc75be6 | |
parent | 26a092657ba4ac52212d43bae9a822a119efbd8a (diff) | |
download | postgresql-48f545a3aef461a635b975443a99dd268269590a.tar.gz postgresql-48f545a3aef461a635b975443a99dd268269590a.zip |
Fix overoptimistic assumptions in column width estimation for subqueries.
set_append_rel_pathlist supposed that, while computing per-column width
estimates for the appendrel, it could ignore child rels for which the
translated reltargetlist entry wasn't a Var. This gave rise to completely
silly estimates in some common cases, such as constant outputs from some or
all of the arms of a UNION ALL. Instead, fall back on get_typavgwidth to
estimate from the value's datatype; which might be a poor estimate but at
least it's not completely wacko.
That problem was exposed by an Assert in set_subquery_size_estimates, which
unfortunately was still overoptimistic even with that fix, since we don't
compute attr_widths estimates for appendrels that are entirely excluded by
constraints. So remove the Assert; we'll just fall back on get_typavgwidth
in such cases.
Also, since set_subquery_size_estimates calls set_baserel_size_estimates
which calls set_rel_width, there's no need for set_subquery_size_estimates
to call get_typavgwidth; set_rel_width will handle it for us if we just
leave the estimate set to zero. Remove the unnecessary code.
Per report from Erik Rijkers and subsequent investigation.
-rw-r--r-- | src/backend/optimizer/path/allpaths.c | 45 | ||||
-rw-r--r-- | src/backend/optimizer/path/costsize.c | 21 |
2 files changed, 44 insertions, 22 deletions
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 6b43aee1833..a14b809a14c 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -403,7 +403,15 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, continue; } - /* CE failed, so finish copying targetlist and join quals */ + /* + * CE failed, so finish copying/modifying targetlist and join quals. + * + * Note: the resulting childrel->reltargetlist may contain arbitrary + * expressions, which normally would not occur in a reltargetlist. + * That is okay because nothing outside of this routine will look at + * the child rel's reltargetlist. We do have to cope with the case + * while constructing attr_widths estimates below, though. + */ childrel->joininfo = (List *) adjust_appendrel_attrs((Node *) rel->joininfo, appinfo); @@ -486,23 +494,36 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, parent_rows += childrel->rows; parent_size += childrel->width * childrel->rows; + /* + * Accumulate per-column estimates too. We need not do anything + * for PlaceHolderVars in the parent list. If child expression + * isn't a Var, or we didn't record a width estimate for it, we + * have to fall back on a datatype-based estimate. + * + * By construction, child's reltargetlist is 1-to-1 with parent's. + */ forboth(parentvars, rel->reltargetlist, childvars, childrel->reltargetlist) { Var *parentvar = (Var *) lfirst(parentvars); - Var *childvar = (Var *) lfirst(childvars); - - /* - * Accumulate per-column estimates too. Whole-row Vars and - * PlaceHolderVars can be ignored here. - */ - if (IsA(parentvar, Var) && - IsA(childvar, Var)) + Node *childvar = (Node *) lfirst(childvars); + + if (IsA(parentvar, Var)) { int pndx = parentvar->varattno - rel->min_attr; - int cndx = childvar->varattno - childrel->min_attr; - - parent_attrsizes[pndx] += childrel->attr_widths[cndx] * childrel->rows; + int32 child_width = 0; + + if (IsA(childvar, Var)) + { + int cndx = ((Var *) childvar)->varattno - childrel->min_attr; + + child_width = childrel->attr_widths[cndx]; + } + if (child_width <= 0) + child_width = get_typavgwidth(exprType(childvar), + exprTypmod(childvar)); + Assert(child_width > 0); + parent_attrsizes[pndx] += child_width * childrel->rows; } } } diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 1b264a29e29..c853bd8dcb6 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -3238,14 +3238,14 @@ set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel, /* * Compute per-output-column width estimates by examining the subquery's * targetlist. For any output that is a plain Var, get the width estimate - * that was made while planning the subquery. Otherwise, fall back on a - * datatype-based estimate. + * that was made while planning the subquery. Otherwise, we leave it to + * set_rel_width to fill in a datatype-based default estimate. */ foreach(lc, subroot->parse->targetList) { TargetEntry *te = (TargetEntry *) lfirst(lc); Node *texpr = (Node *) te->expr; - int32 item_width; + int32 item_width = 0; Assert(IsA(te, TargetEntry)); /* junk columns aren't visible to upper query */ @@ -3256,8 +3256,14 @@ set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel, * XXX This currently doesn't work for subqueries containing set * operations, because the Vars in their tlists are bogus references * to the first leaf subquery, which wouldn't give the right answer - * even if we could still get to its PlannerInfo. So fall back on - * datatype in that case. + * even if we could still get to its PlannerInfo. + * + * Also, the subquery could be an appendrel for which all branches are + * known empty due to constraint exclusion, in which case + * set_append_rel_pathlist will have left the attr_widths set to zero. + * + * In either case, we just leave the width estimate zero until + * set_rel_width fixes it. */ if (IsA(texpr, Var) && subroot->parse->setOperations == NULL) @@ -3267,11 +3273,6 @@ set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel, item_width = subrel->attr_widths[var->varattno - subrel->min_attr]; } - else - { - item_width = get_typavgwidth(exprType(texpr), exprTypmod(texpr)); - } - Assert(item_width > 0); Assert(te->resno >= rel->min_attr && te->resno <= rel->max_attr); rel->attr_widths[te->resno - rel->min_attr] = item_width; } |