diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2016-08-24 14:37:50 -0400 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2016-08-24 14:37:50 -0400 |
commit | 25fe5f758c10c52b96497b04658382c2bb577e8f (patch) | |
tree | fb0a6a548d4d87b3a7cdbc1e0029eea6b15d60d6 | |
parent | da9659f87c868ae638ea34654737f9ef0409211f (diff) | |
download | postgresql-25fe5f758c10c52b96497b04658382c2bb577e8f.tar.gz postgresql-25fe5f758c10c52b96497b04658382c2bb577e8f.zip |
Fix improper repetition of previous results from a hashed aggregate.
ExecReScanAgg's check for whether it could re-use a previously calculated
hashtable neglected the possibility that the Agg node might reference
PARAM_EXEC Params that are not referenced by its input plan node. That's
okay if the Params are in upper tlist or qual expressions; but if one
appears in aggregate input expressions, then the hashtable contents need
to be recomputed when the Param's value changes.
To avoid unnecessary performance degradation in the case of a Param that
isn't within an aggregate input, add logic to the planner to determine
which Params are within aggregate inputs. This requires a new field in
struct Agg, but fortunately we never write plans to disk, so this isn't
an initdb-forcing change.
Per report from Jeevan Chalke. This has been broken since forever,
so back-patch to all supported branches.
Andrew Gierth, with minor adjustments by me
Report: <CAM2+6=VY8ykfLT5Q8vb9B6EbeBk-NGuLbT6seaQ+Fq4zXvrDcA@mail.gmail.com>
-rw-r--r-- | src/backend/executor/nodeAgg.c | 10 | ||||
-rw-r--r-- | src/backend/nodes/copyfuncs.c | 1 | ||||
-rw-r--r-- | src/backend/nodes/outfuncs.c | 2 | ||||
-rw-r--r-- | src/backend/optimizer/plan/createplan.c | 1 | ||||
-rw-r--r-- | src/backend/optimizer/plan/subselect.c | 48 | ||||
-rw-r--r-- | src/include/nodes/plannodes.h | 2 | ||||
-rw-r--r-- | src/test/regress/expected/aggregates.out | 75 | ||||
-rw-r--r-- | src/test/regress/sql/aggregates.sql | 22 |
8 files changed, 155 insertions, 6 deletions
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 2bf48c54e3c..4c82176671b 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -2666,11 +2666,13 @@ ExecReScanAgg(AggState *node) return; /* - * If we do have the hash table and the subplan does not have any - * parameter changes, then we can just rescan the existing hash table; - * no need to build it again. + * If we do have the hash table, and the subplan does not have any + * parameter changes, and none of our own parameter changes affect + * input expressions of the aggregated functions, then we can just + * rescan the existing hash table; no need to build it again. */ - if (outerPlan->chgParam == NULL) + if (outerPlan->chgParam == NULL && + !bms_overlap(node->ss.ps.chgParam, aggnode->aggParams)) { ResetTupleHashIterator(node->hashtable, &node->hashiter); return; diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 3f19226b06a..db492a7dd1d 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -846,6 +846,7 @@ _copyAgg(const Agg *from) COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid)); } COPY_SCALAR_FIELD(numGroups); + COPY_BITMAPSET_FIELD(aggParams); COPY_NODE_FIELD(groupingSets); COPY_NODE_FIELD(chain); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 2487f396cce..5c03e9f2aa8 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -683,7 +683,7 @@ _outAgg(StringInfo str, const Agg *node) appendStringInfo(str, " %u", node->grpOperators[i]); WRITE_LONG_FIELD(numGroups); - + WRITE_BITMAPSET_FIELD(aggParams); WRITE_NODE_FIELD(groupingSets); WRITE_NODE_FIELD(chain); } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 40f801e41c8..6af68b08d27 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -4535,6 +4535,7 @@ make_agg(PlannerInfo *root, List *tlist, List *qual, node->grpColIdx = grpColIdx; node->grpOperators = grpOperators; node->numGroups = numGroups; + node->aggParams = NULL; /* SS_finalize_plan() will fill this */ copy_plan_costsize(plan, lefttree); /* only care about copying size */ cost_agg(&agg_path, root, diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 979802f70d5..e400ea983cc 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -81,6 +81,7 @@ static Bitmapset *finalize_plan(PlannerInfo *root, Bitmapset *valid_params, Bitmapset *scan_params); static bool finalize_primnode(Node *node, finalize_primnode_context *context); +static bool finalize_agg_primnode(Node *node, finalize_primnode_context *context); /* @@ -2558,6 +2559,29 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params, locally_added_param); break; + case T_Agg: + { + Agg *agg = (Agg *) plan; + + /* + * AGG_HASHED plans need to know which Params are referenced + * in aggregate calls. Do a separate scan to identify them. + */ + if (agg->aggstrategy == AGG_HASHED) + { + finalize_primnode_context aggcontext; + + aggcontext.root = root; + aggcontext.paramids = NULL; + finalize_agg_primnode((Node *) agg->plan.targetlist, + &aggcontext); + finalize_agg_primnode((Node *) agg->plan.qual, + &aggcontext); + agg->aggParams = aggcontext.paramids; + } + } + break; + case T_WindowAgg: finalize_primnode(((WindowAgg *) plan)->startOffset, &context); @@ -2566,7 +2590,6 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params, break; case T_Hash: - case T_Agg: case T_Material: case T_Sort: case T_Unique: @@ -2711,6 +2734,29 @@ finalize_primnode(Node *node, finalize_primnode_context *context) } /* + * finalize_agg_primnode: find all Aggref nodes in the given expression tree, + * and add IDs of all PARAM_EXEC params appearing within their aggregated + * arguments to the result set. + */ +static bool +finalize_agg_primnode(Node *node, finalize_primnode_context *context) +{ + if (node == NULL) + return false; + if (IsA(node, Aggref)) + { + Aggref *agg = (Aggref *) node; + + /* we should not consider the direct arguments, if any */ + finalize_primnode((Node *) agg->args, context); + finalize_primnode((Node *) agg->aggfilter, context); + return false; /* there can't be any Aggrefs below here */ + } + return expression_tree_walker(node, finalize_agg_primnode, + (void *) context); +} + +/* * SS_make_initplan_from_plan - given a plan tree, make it an InitPlan * * The plan is expected to return a scalar value of the given type/collation. diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 7a6a3fea479..cfcb9944f97 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -721,6 +721,8 @@ typedef struct Agg AttrNumber *grpColIdx; /* their indexes in the target list */ Oid *grpOperators; /* equality operators to compare with */ long numGroups; /* estimated number of groups in input */ + Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */ + /* Note: planner provides numGroups & aggParams only in AGG_HASHED case */ List *groupingSets; /* grouping sets to use */ List *chain; /* chained Agg/Sort nodes */ } Agg; diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 8852051e932..99ca3271e20 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -366,6 +366,81 @@ from tenk1 o; 9999 (1 row) +-- Test handling of Params within aggregate arguments in hashed aggregation. +-- Per bug report from Jeevan Chalke. +explain (verbose, costs off) +select s1, s2, sm +from generate_series(1, 3) s1, + lateral (select s2, sum(s1 + s2) sm + from generate_series(1, 3) s2 group by s2) ss +order by 1, 2; + QUERY PLAN +------------------------------------------------------------------ + Sort + Output: s1.s1, s2.s2, (sum((s1.s1 + s2.s2))) + Sort Key: s1.s1, s2.s2 + -> Nested Loop + Output: s1.s1, s2.s2, (sum((s1.s1 + s2.s2))) + -> Function Scan on pg_catalog.generate_series s1 + Output: s1.s1 + Function Call: generate_series(1, 3) + -> HashAggregate + Output: s2.s2, sum((s1.s1 + s2.s2)) + Group Key: s2.s2 + -> Function Scan on pg_catalog.generate_series s2 + Output: s2.s2 + Function Call: generate_series(1, 3) +(14 rows) + +select s1, s2, sm +from generate_series(1, 3) s1, + lateral (select s2, sum(s1 + s2) sm + from generate_series(1, 3) s2 group by s2) ss +order by 1, 2; + s1 | s2 | sm +----+----+---- + 1 | 1 | 2 + 1 | 2 | 3 + 1 | 3 | 4 + 2 | 1 | 3 + 2 | 2 | 4 + 2 | 3 | 5 + 3 | 1 | 4 + 3 | 2 | 5 + 3 | 3 | 6 +(9 rows) + +explain (verbose, costs off) +select array(select sum(x+y) s + from generate_series(1,3) y group by y order by s) + from generate_series(1,3) x; + QUERY PLAN +------------------------------------------------------------------- + Function Scan on pg_catalog.generate_series x + Output: (SubPlan 1) + Function Call: generate_series(1, 3) + SubPlan 1 + -> Sort + Output: (sum((x.x + y.y))), y.y + Sort Key: (sum((x.x + y.y))) + -> HashAggregate + Output: sum((x.x + y.y)), y.y + Group Key: y.y + -> Function Scan on pg_catalog.generate_series y + Output: y.y + Function Call: generate_series(1, 3) +(13 rows) + +select array(select sum(x+y) s + from generate_series(1,3) y group by y order by s) + from generate_series(1,3) x; + array +--------- + {2,3,4} + {3,4,5} + {4,5,6} +(3 rows) + -- -- test for bitwise integer aggregates -- diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index a84327d24cc..c12a2d8b7b6 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -98,6 +98,28 @@ select (select max((select i.unique2 from tenk1 i where i.unique1 = o.unique1))) from tenk1 o; +-- Test handling of Params within aggregate arguments in hashed aggregation. +-- Per bug report from Jeevan Chalke. +explain (verbose, costs off) +select s1, s2, sm +from generate_series(1, 3) s1, + lateral (select s2, sum(s1 + s2) sm + from generate_series(1, 3) s2 group by s2) ss +order by 1, 2; +select s1, s2, sm +from generate_series(1, 3) s1, + lateral (select s2, sum(s1 + s2) sm + from generate_series(1, 3) s2 group by s2) ss +order by 1, 2; + +explain (verbose, costs off) +select array(select sum(x+y) s + from generate_series(1,3) y group by y order by s) + from generate_series(1,3) x; +select array(select sum(x+y) s + from generate_series(1,3) y group by y order by s) + from generate_series(1,3) x; + -- -- test for bitwise integer aggregates -- |