aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Haas <rhaas@postgresql.org>2017-12-05 14:35:42 -0500
committerRobert Haas <rhaas@postgresql.org>2017-12-05 14:35:42 -0500
commit778e78ae9fa51e58f41cbdc72b293291d02d8984 (patch)
tree0ed3b8ba40f80b4ac6fa8909f9e49d1d944cd90f
parentff1473078d2523fa5af7d678dcb4002d9e7f32bd (diff)
downloadpostgresql-778e78ae9fa51e58f41cbdc72b293291d02d8984.tar.gz
postgresql-778e78ae9fa51e58f41cbdc72b293291d02d8984.zip
Fix accumulation of parallel worker instrumentation.
When a Gather or Gather Merge node is started and stopped multiple times, the old code wouldn't reset the shared state between executions, potentially resulting in dramatically inflated instrumentation data for nodes beneath it. (The per-worker instrumentation ended up OK, I think, but the overall totals were inflated.) Report by hubert depesz lubaczewski. Analysis and fix by Amit Kapila, reviewed and tweaked a bit by me. Discussion: http://postgr.es/m/20171127175631.GA405@depesz.com
-rw-r--r--src/backend/executor/execParallel.c51
-rw-r--r--src/test/regress/expected/select_parallel.out21
-rw-r--r--src/test/regress/sql/select_parallel.sql7
3 files changed, 66 insertions, 13 deletions
diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c
index 7dda399daf3..42c3e473a9a 100644
--- a/src/backend/executor/execParallel.c
+++ b/src/backend/executor/execParallel.c
@@ -612,6 +612,19 @@ ExecParallelReinitialize(PlanState *planstate,
/* Old workers must already be shut down */
Assert(pei->finished);
+ /* Clear the instrumentation space from the last round. */
+ if (pei->instrumentation)
+ {
+ Instrumentation *instrument;
+ SharedExecutorInstrumentation *sh_instr;
+ int i;
+
+ sh_instr = pei->instrumentation;
+ instrument = GetInstrumentationArray(sh_instr);
+ for (i = 0; i < sh_instr->num_workers * sh_instr->num_plan_nodes; ++i)
+ InstrInit(&instrument[i], pei->planstate->state->es_instrument);
+ }
+
ReinitializeParallelDSM(pei->pcxt);
pei->tqueue = ExecParallelSetupTupleQueues(pei->pcxt, true);
pei->reader = NULL;
@@ -699,21 +712,33 @@ ExecParallelRetrieveInstrumentation(PlanState *planstate,
for (n = 0; n < instrumentation->num_workers; ++n)
InstrAggNode(planstate->instrument, &instrument[n]);
- /*
- * Also store the per-worker detail.
- *
- * Worker instrumentation should be allocated in the same context as the
- * regular instrumentation information, which is the per-query context.
- * Switch into per-query memory context.
- */
- oldcontext = MemoryContextSwitchTo(planstate->state->es_query_cxt);
- ibytes = mul_size(instrumentation->num_workers, sizeof(Instrumentation));
- planstate->worker_instrument =
- palloc(ibytes + offsetof(WorkerInstrumentation, instrument));
- MemoryContextSwitchTo(oldcontext);
+ if (!planstate->worker_instrument)
+ {
+ /*
+ * Allocate space for the per-worker detail.
+ *
+ * Worker instrumentation should be allocated in the same context as
+ * the regular instrumentation information, which is the per-query
+ * context. Switch into per-query memory context.
+ */
+ oldcontext = MemoryContextSwitchTo(planstate->state->es_query_cxt);
+ ibytes =
+ mul_size(instrumentation->num_workers, sizeof(Instrumentation));
+ planstate->worker_instrument =
+ palloc(ibytes + offsetof(WorkerInstrumentation, instrument));
+ MemoryContextSwitchTo(oldcontext);
+
+ for (n = 0; n < instrumentation->num_workers; ++n)
+ InstrInit(&planstate->worker_instrument->instrument[n],
+ planstate->state->es_instrument);
+ }
planstate->worker_instrument->num_workers = instrumentation->num_workers;
- memcpy(&planstate->worker_instrument->instrument, instrument, ibytes);
+
+ /* Accumulate the per-worker detail. */
+ for (n = 0; n < instrumentation->num_workers; ++n)
+ InstrAggNode(&planstate->worker_instrument->instrument[n],
+ &instrument[n]);
return planstate_tree_walker(planstate, ExecParallelRetrieveInstrumentation,
instrumentation);
diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out
index ecabd35cca6..1fc29c1bbbb 100644
--- a/src/test/regress/expected/select_parallel.out
+++ b/src/test/regress/expected/select_parallel.out
@@ -300,7 +300,28 @@ select count(*) from bmscantest where a>1;
99999
(1 row)
+-- test accumulation of stats for parallel node
reset enable_seqscan;
+alter table tenk2 set (parallel_workers = 0);
+explain (analyze, timing off, summary off, costs off)
+ select count(*) from tenk1, tenk2 where tenk1.hundred > 1
+ and tenk2.thousand=0;
+ QUERY PLAN
+--------------------------------------------------------------------------
+ Aggregate (actual rows=1 loops=1)
+ -> Nested Loop (actual rows=98000 loops=1)
+ -> Seq Scan on tenk2 (actual rows=10 loops=1)
+ Filter: (thousand = 0)
+ Rows Removed by Filter: 9990
+ -> Gather (actual rows=9800 loops=10)
+ Workers Planned: 4
+ Workers Launched: 4
+ -> Parallel Seq Scan on tenk1 (actual rows=1960 loops=50)
+ Filter: (hundred > 1)
+ Rows Removed by Filter: 40
+(11 rows)
+
+alter table tenk2 reset (parallel_workers);
reset enable_indexscan;
reset enable_hashjoin;
reset enable_mergejoin;
diff --git a/src/test/regress/sql/select_parallel.sql b/src/test/regress/sql/select_parallel.sql
index 280c7876536..599a14f44fe 100644
--- a/src/test/regress/sql/select_parallel.sql
+++ b/src/test/regress/sql/select_parallel.sql
@@ -116,7 +116,14 @@ insert into bmscantest select r, 'fooooooooooooooooooooooooooooooooooooooooooooo
create index i_bmtest ON bmscantest(a);
select count(*) from bmscantest where a>1;
+-- test accumulation of stats for parallel node
reset enable_seqscan;
+alter table tenk2 set (parallel_workers = 0);
+explain (analyze, timing off, summary off, costs off)
+ select count(*) from tenk1, tenk2 where tenk1.hundred > 1
+ and tenk2.thousand=0;
+alter table tenk2 reset (parallel_workers);
+
reset enable_indexscan;
reset enable_hashjoin;
reset enable_mergejoin;