aboutsummaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
authorAndres Freund <andres@anarazel.de>2018-09-25 12:54:29 -0700
committerAndres Freund <andres@anarazel.de>2018-09-25 13:12:44 -0700
commit33001fd7a7072d483272115a9376478fdc007fb9 (patch)
tree6a01a567ac1b665c34a2a7e97ece0bb2ea35b503 /src/backend
parent5e22171310f8d7c82219a6b978440e5144e88683 (diff)
downloadpostgresql-33001fd7a7072d483272115a9376478fdc007fb9.tar.gz
postgresql-33001fd7a7072d483272115a9376478fdc007fb9.zip
Collect JIT instrumentation from workers.
Previously, when using parallel query, EXPLAIN (ANALYZE)'s JIT compilation timings did not include the overhead from doing so on the workers. Fix that. We do so by simply aggregating the cost of doing JIT compilation on workers and the leader together. Arguably that's not quite accurate, because the total time spend doing so is spent in parallel - but it's hard to do much better. For additional detail, when VERBOSE is specified, the stats for workers are displayed separately. Author: Amit Khandekar and Andres Freund Discussion: https://postgr.es/m/CAJ3gD9eLrz51RK_gTkod+71iDcjpB_N8eC6vU2AW-VicsAERpQ@mail.gmail.com Backpatch: 11-
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/commands/explain.c87
-rw-r--r--src/backend/executor/execMain.c16
-rw-r--r--src/backend/executor/execParallel.c82
-rw-r--r--src/backend/jit/jit.c11
-rw-r--r--src/backend/jit/llvm/llvmjit.c14
-rw-r--r--src/backend/jit/llvm/llvmjit_expr.c2
6 files changed, 176 insertions, 36 deletions
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 01520154e48..ed6afe79a9f 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -563,9 +563,9 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es,
* depending on build options. Might want to separate that out from COSTS
* at a later stage.
*/
- if (queryDesc->estate->es_jit && es->costs &&
- queryDesc->estate->es_jit->created_functions > 0)
- ExplainPrintJIT(es, queryDesc);
+ if (es->costs)
+ ExplainPrintJIT(es, queryDesc->estate->es_jit_flags,
+ queryDesc->estate->es_jit_combined_instr, -1);
/*
* Close down the query and free resources. Include time for this in the
@@ -691,19 +691,27 @@ ExplainPrintTriggers(ExplainState *es, QueryDesc *queryDesc)
/*
* ExplainPrintJIT -
* Append information about JITing to es->str.
+ *
+ * Can be used to print the JIT instrumentation of the backend (worker_num =
+ * -1) or that of a specific worker (worker_num = ...).
*/
void
-ExplainPrintJIT(ExplainState *es, QueryDesc *queryDesc)
+ExplainPrintJIT(ExplainState *es, int jit_flags,
+ JitInstrumentation *ji, int worker_num)
{
- JitContext *jc = queryDesc->estate->es_jit;
instr_time total_time;
+ bool for_workers = (worker_num >= 0);
+
+ /* don't print information if no JITing happened */
+ if (!ji || ji->created_functions == 0)
+ return;
/* calculate total time */
INSTR_TIME_SET_ZERO(total_time);
- INSTR_TIME_ADD(total_time, jc->generation_counter);
- INSTR_TIME_ADD(total_time, jc->inlining_counter);
- INSTR_TIME_ADD(total_time, jc->optimization_counter);
- INSTR_TIME_ADD(total_time, jc->emission_counter);
+ INSTR_TIME_ADD(total_time, ji->generation_counter);
+ INSTR_TIME_ADD(total_time, ji->inlining_counter);
+ INSTR_TIME_ADD(total_time, ji->optimization_counter);
+ INSTR_TIME_ADD(total_time, ji->emission_counter);
ExplainOpenGroup("JIT", "JIT", true, es);
@@ -711,27 +719,30 @@ ExplainPrintJIT(ExplainState *es, QueryDesc *queryDesc)
if (es->format == EXPLAIN_FORMAT_TEXT)
{
appendStringInfoSpaces(es->str, es->indent * 2);
- appendStringInfo(es->str, "JIT:\n");
+ if (for_workers)
+ appendStringInfo(es->str, "JIT for worker %u:\n", worker_num);
+ else
+ appendStringInfo(es->str, "JIT:\n");
es->indent += 1;
- ExplainPropertyInteger("Functions", NULL, jc->created_functions, es);
+ ExplainPropertyInteger("Functions", NULL, ji->created_functions, es);
appendStringInfoSpaces(es->str, es->indent * 2);
appendStringInfo(es->str, "Options: %s %s, %s %s, %s %s, %s %s\n",
- "Inlining", jc->flags & PGJIT_INLINE ? "true" : "false",
- "Optimization", jc->flags & PGJIT_OPT3 ? "true" : "false",
- "Expressions", jc->flags & PGJIT_EXPR ? "true" : "false",
- "Deforming", jc->flags & PGJIT_DEFORM ? "true" : "false");
+ "Inlining", jit_flags & PGJIT_INLINE ? "true" : "false",
+ "Optimization", jit_flags & PGJIT_OPT3 ? "true" : "false",
+ "Expressions", jit_flags & PGJIT_EXPR ? "true" : "false",
+ "Deforming", jit_flags & PGJIT_DEFORM ? "true" : "false");
if (es->analyze && es->timing)
{
appendStringInfoSpaces(es->str, es->indent * 2);
appendStringInfo(es->str,
"Timing: %s %.3f ms, %s %.3f ms, %s %.3f ms, %s %.3f ms, %s %.3f ms\n",
- "Generation", 1000.0 * INSTR_TIME_GET_DOUBLE(jc->generation_counter),
- "Inlining", 1000.0 * INSTR_TIME_GET_DOUBLE(jc->inlining_counter),
- "Optimization", 1000.0 * INSTR_TIME_GET_DOUBLE(jc->optimization_counter),
- "Emission", 1000.0 * INSTR_TIME_GET_DOUBLE(jc->emission_counter),
+ "Generation", 1000.0 * INSTR_TIME_GET_DOUBLE(ji->generation_counter),
+ "Inlining", 1000.0 * INSTR_TIME_GET_DOUBLE(ji->inlining_counter),
+ "Optimization", 1000.0 * INSTR_TIME_GET_DOUBLE(ji->optimization_counter),
+ "Emission", 1000.0 * INSTR_TIME_GET_DOUBLE(ji->emission_counter),
"Total", 1000.0 * INSTR_TIME_GET_DOUBLE(total_time));
}
@@ -739,13 +750,14 @@ ExplainPrintJIT(ExplainState *es, QueryDesc *queryDesc)
}
else
{
- ExplainPropertyInteger("Functions", NULL, jc->created_functions, es);
+ ExplainPropertyInteger("Worker Number", NULL, worker_num, es);
+ ExplainPropertyInteger("Functions", NULL, ji->created_functions, es);
ExplainOpenGroup("Options", "Options", true, es);
- ExplainPropertyBool("Inlining", jc->flags & PGJIT_INLINE, es);
- ExplainPropertyBool("Optimization", jc->flags & PGJIT_OPT3, es);
- ExplainPropertyBool("Expressions", jc->flags & PGJIT_EXPR, es);
- ExplainPropertyBool("Deforming", jc->flags & PGJIT_DEFORM, es);
+ ExplainPropertyBool("Inlining", jit_flags & PGJIT_INLINE, es);
+ ExplainPropertyBool("Optimization", jit_flags & PGJIT_OPT3, es);
+ ExplainPropertyBool("Expressions", jit_flags & PGJIT_EXPR, es);
+ ExplainPropertyBool("Deforming", jit_flags & PGJIT_DEFORM, es);
ExplainCloseGroup("Options", "Options", true, es);
if (es->analyze && es->timing)
@@ -753,16 +765,16 @@ ExplainPrintJIT(ExplainState *es, QueryDesc *queryDesc)
ExplainOpenGroup("Timing", "Timing", true, es);
ExplainPropertyFloat("Generation", "ms",
- 1000.0 * INSTR_TIME_GET_DOUBLE(jc->generation_counter),
+ 1000.0 * INSTR_TIME_GET_DOUBLE(ji->generation_counter),
3, es);
ExplainPropertyFloat("Inlining", "ms",
- 1000.0 * INSTR_TIME_GET_DOUBLE(jc->inlining_counter),
+ 1000.0 * INSTR_TIME_GET_DOUBLE(ji->inlining_counter),
3, es);
ExplainPropertyFloat("Optimization", "ms",
- 1000.0 * INSTR_TIME_GET_DOUBLE(jc->optimization_counter),
+ 1000.0 * INSTR_TIME_GET_DOUBLE(ji->optimization_counter),
3, es);
ExplainPropertyFloat("Emission", "ms",
- 1000.0 * INSTR_TIME_GET_DOUBLE(jc->emission_counter),
+ 1000.0 * INSTR_TIME_GET_DOUBLE(ji->emission_counter),
3, es);
ExplainPropertyFloat("Total", "ms",
1000.0 * INSTR_TIME_GET_DOUBLE(total_time),
@@ -1554,6 +1566,25 @@ ExplainNode(PlanState *planstate, List *ancestors,
ExplainPropertyInteger("Workers Launched", NULL,
nworkers, es);
}
+
+ /*
+ * Print per-worker Jit instrumentation. Use same conditions
+ * as for the leader's JIT instrumentation, see comment there.
+ */
+ if (es->costs && es->verbose &&
+ outerPlanState(planstate)->worker_jit_instrument)
+ {
+ PlanState *child = outerPlanState(planstate);
+ int n;
+ SharedJitInstrumentation *w = child->worker_jit_instrument;
+
+ for (n = 0; n < w->num_workers; ++n)
+ {
+ ExplainPrintJIT(es, child->state->es_jit_flags,
+ &w->jit_instr[n], n);
+ }
+ }
+
if (gather->single_copy || es->format != EXPLAIN_FORMAT_TEXT)
ExplainPropertyBool("Single Copy", gather->single_copy, es);
}
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index 85d980356b7..04f14c91787 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -48,6 +48,7 @@
#include "executor/execdebug.h"
#include "executor/nodeSubplan.h"
#include "foreign/fdwapi.h"
+#include "jit/jit.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "optimizer/clauses.h"
@@ -494,6 +495,21 @@ standard_ExecutorEnd(QueryDesc *queryDesc)
ExecEndPlan(queryDesc->planstate, estate);
+ /*
+ * If this process has done JIT, either merge stats into worker stats, or
+ * use this process' stats as the global stats if no parallelism was used
+ * / no workers did JIT.
+ */
+ if (estate->es_instrument && queryDesc->estate->es_jit)
+ {
+ if (queryDesc->estate->es_jit_combined_instr)
+ InstrJitAgg(queryDesc->estate->es_jit_combined_instr,
+ &queryDesc->estate->es_jit->instr);
+ else
+ queryDesc->estate->es_jit_combined_instr =
+ &queryDesc->estate->es_jit->instr;
+ }
+
/* do away with our snapshots */
UnregisterSnapshot(estate->es_snapshot);
UnregisterSnapshot(estate->es_crosscheck_snapshot);
diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c
index c93084e4d2a..838ec842991 100644
--- a/src/backend/executor/execParallel.c
+++ b/src/backend/executor/execParallel.c
@@ -37,6 +37,7 @@
#include "executor/nodeSort.h"
#include "executor/nodeSubplan.h"
#include "executor/tqueue.h"
+#include "jit/jit.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/planmain.h"
#include "optimizer/planner.h"
@@ -62,6 +63,7 @@
#define PARALLEL_KEY_INSTRUMENTATION UINT64CONST(0xE000000000000006)
#define PARALLEL_KEY_DSA UINT64CONST(0xE000000000000007)
#define PARALLEL_KEY_QUERY_TEXT UINT64CONST(0xE000000000000008)
+#define PARALLEL_KEY_JIT_INSTRUMENTATION UINT64CONST(0xE000000000000009)
#define PARALLEL_TUPLE_QUEUE_SIZE 65536
@@ -573,9 +575,11 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate,
char *paramlistinfo_space;
BufferUsage *bufusage_space;
SharedExecutorInstrumentation *instrumentation = NULL;
+ SharedJitInstrumentation *jit_instrumentation = NULL;
int pstmt_len;
int paramlistinfo_len;
int instrumentation_len = 0;
+ int jit_instrumentation_len = 0;
int instrument_offset = 0;
Size dsa_minsize = dsa_minimum_size();
char *query_string;
@@ -669,6 +673,16 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate,
mul_size(e.nnodes, nworkers));
shm_toc_estimate_chunk(&pcxt->estimator, instrumentation_len);
shm_toc_estimate_keys(&pcxt->estimator, 1);
+
+ /* Estimate space for JIT instrumentation, if required. */
+ if (estate->es_jit_flags != PGJIT_NONE)
+ {
+ jit_instrumentation_len =
+ offsetof(SharedJitInstrumentation, jit_instr) +
+ sizeof(JitInstrumentation) * nworkers;
+ shm_toc_estimate_chunk(&pcxt->estimator, jit_instrumentation_len);
+ shm_toc_estimate_keys(&pcxt->estimator, 1);
+ }
}
/* Estimate space for DSA area. */
@@ -742,6 +756,18 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate,
shm_toc_insert(pcxt->toc, PARALLEL_KEY_INSTRUMENTATION,
instrumentation);
pei->instrumentation = instrumentation;
+
+ if (estate->es_jit_flags != PGJIT_NONE)
+ {
+ jit_instrumentation = shm_toc_allocate(pcxt->toc,
+ jit_instrumentation_len);
+ jit_instrumentation->num_workers = nworkers;
+ memset(jit_instrumentation->jit_instr, 0,
+ sizeof(JitInstrumentation) * nworkers);
+ shm_toc_insert(pcxt->toc, PARALLEL_KEY_JIT_INSTRUMENTATION,
+ jit_instrumentation);
+ pei->jit_instrumentation = jit_instrumentation;
+ }
}
/*
@@ -1004,6 +1030,46 @@ ExecParallelRetrieveInstrumentation(PlanState *planstate,
}
/*
+ * Add up the workers' JIT instrumentation from dynamic shared memory.
+ */
+static void
+ExecParallelRetrieveJitInstrumentation(PlanState *planstate,
+ SharedJitInstrumentation *shared_jit)
+{
+ JitInstrumentation *combined;
+ int ibytes;
+
+ int n;
+
+ /*
+ * Accumulate worker JIT instrumentation into the combined JIT
+ * instrumentation, allocating it if required. Note this is kept separate
+ * from the leader's own instrumentation.
+ */
+ if (!planstate->state->es_jit_combined_instr)
+ planstate->state->es_jit_combined_instr =
+ MemoryContextAllocZero(planstate->state->es_query_cxt, sizeof(JitInstrumentation));
+ combined = planstate->state->es_jit_combined_instr;
+
+ /* Accummulate all the workers' instrumentations. */
+ for (n = 0; n < shared_jit->num_workers; ++n)
+ InstrJitAgg(combined, &shared_jit->jit_instr[n]);
+
+ /*
+ * Store the per-worker detail.
+ *
+ * Similar to ExecParallelRetrieveInstrumentation(), allocate the
+ * instrumentation in per-query context.
+ */
+ ibytes = offsetof(SharedJitInstrumentation, jit_instr)
+ + mul_size(shared_jit->num_workers, sizeof(JitInstrumentation));
+ planstate->worker_jit_instrument =
+ MemoryContextAlloc(planstate->state->es_query_cxt, ibytes);
+
+ memcpy(planstate->worker_jit_instrument, shared_jit, ibytes);
+}
+
+/*
* Finish parallel execution. We wait for parallel workers to finish, and
* accumulate their buffer usage.
*/
@@ -1068,6 +1134,11 @@ ExecParallelCleanup(ParallelExecutorInfo *pei)
ExecParallelRetrieveInstrumentation(pei->planstate,
pei->instrumentation);
+ /* Accumulate JIT instrumentation, if any. */
+ if (pei->jit_instrumentation)
+ ExecParallelRetrieveJitInstrumentation(pei->planstate,
+ pei->jit_instrumentation);
+
/* Free any serialized parameters. */
if (DsaPointerIsValid(pei->param_exec))
{
@@ -1274,6 +1345,7 @@ ParallelQueryMain(dsm_segment *seg, shm_toc *toc)
DestReceiver *receiver;
QueryDesc *queryDesc;
SharedExecutorInstrumentation *instrumentation;
+ SharedJitInstrumentation *jit_instrumentation;
int instrument_options = 0;
void *area_space;
dsa_area *area;
@@ -1287,6 +1359,8 @@ ParallelQueryMain(dsm_segment *seg, shm_toc *toc)
instrumentation = shm_toc_lookup(toc, PARALLEL_KEY_INSTRUMENTATION, true);
if (instrumentation != NULL)
instrument_options = instrumentation->instrument_options;
+ jit_instrumentation = shm_toc_lookup(toc, PARALLEL_KEY_JIT_INSTRUMENTATION,
+ true);
queryDesc = ExecParallelGetQueryDesc(toc, receiver, instrument_options);
/* Setting debug_query_string for individual workers */
@@ -1350,6 +1424,14 @@ ParallelQueryMain(dsm_segment *seg, shm_toc *toc)
ExecParallelReportInstrumentation(queryDesc->planstate,
instrumentation);
+ /* Report JIT instrumentation data if any */
+ if (queryDesc->estate->es_jit && jit_instrumentation != NULL)
+ {
+ Assert(ParallelWorkerNumber < jit_instrumentation->num_workers);
+ jit_instrumentation->jit_instr[ParallelWorkerNumber] =
+ queryDesc->estate->es_jit->instr;
+ }
+
/* Must do this after capturing instrumentation. */
ExecutorEnd(queryDesc);
diff --git a/src/backend/jit/jit.c b/src/backend/jit/jit.c
index 7d3883d3bf4..5d1f2e57be1 100644
--- a/src/backend/jit/jit.c
+++ b/src/backend/jit/jit.c
@@ -182,6 +182,17 @@ jit_compile_expr(struct ExprState *state)
return false;
}
+/* Aggregate JIT instrumentation information */
+void
+InstrJitAgg(JitInstrumentation *dst, JitInstrumentation *add)
+{
+ dst->created_functions += add->created_functions;
+ INSTR_TIME_ADD(dst->generation_counter, add->generation_counter);
+ INSTR_TIME_ADD(dst->inlining_counter, add->inlining_counter);
+ INSTR_TIME_ADD(dst->optimization_counter, add->optimization_counter);
+ INSTR_TIME_ADD(dst->emission_counter, add->emission_counter);
+}
+
static bool
file_exists(const char *name)
{
diff --git a/src/backend/jit/llvm/llvmjit.c b/src/backend/jit/llvm/llvmjit.c
index 640c27fc408..7510698f863 100644
--- a/src/backend/jit/llvm/llvmjit.c
+++ b/src/backend/jit/llvm/llvmjit.c
@@ -224,7 +224,7 @@ llvm_expand_funcname(struct LLVMJitContext *context, const char *basename)
{
Assert(context->module != NULL);
- context->base.created_functions++;
+ context->base.instr.created_functions++;
/*
* Previously we used dots to separate, but turns out some tools, e.g.
@@ -504,7 +504,7 @@ llvm_compile_module(LLVMJitContext *context)
INSTR_TIME_SET_CURRENT(starttime);
llvm_inline(context->module);
INSTR_TIME_SET_CURRENT(endtime);
- INSTR_TIME_ACCUM_DIFF(context->base.inlining_counter,
+ INSTR_TIME_ACCUM_DIFF(context->base.instr.inlining_counter,
endtime, starttime);
}
@@ -524,7 +524,7 @@ llvm_compile_module(LLVMJitContext *context)
INSTR_TIME_SET_CURRENT(starttime);
llvm_optimize_module(context, context->module);
INSTR_TIME_SET_CURRENT(endtime);
- INSTR_TIME_ACCUM_DIFF(context->base.optimization_counter,
+ INSTR_TIME_ACCUM_DIFF(context->base.instr.optimization_counter,
endtime, starttime);
if (jit_dump_bitcode)
@@ -575,7 +575,7 @@ llvm_compile_module(LLVMJitContext *context)
}
#endif
INSTR_TIME_SET_CURRENT(endtime);
- INSTR_TIME_ACCUM_DIFF(context->base.emission_counter,
+ INSTR_TIME_ACCUM_DIFF(context->base.instr.emission_counter,
endtime, starttime);
context->module = NULL;
@@ -596,9 +596,9 @@ llvm_compile_module(LLVMJitContext *context)
ereport(DEBUG1,
(errmsg("time to inline: %.3fs, opt: %.3fs, emit: %.3fs",
- INSTR_TIME_GET_DOUBLE(context->base.inlining_counter),
- INSTR_TIME_GET_DOUBLE(context->base.optimization_counter),
- INSTR_TIME_GET_DOUBLE(context->base.emission_counter)),
+ INSTR_TIME_GET_DOUBLE(context->base.instr.inlining_counter),
+ INSTR_TIME_GET_DOUBLE(context->base.instr.optimization_counter),
+ INSTR_TIME_GET_DOUBLE(context->base.instr.emission_counter)),
errhidestmt(true),
errhidecontext(true)));
}
diff --git a/src/backend/jit/llvm/llvmjit_expr.c b/src/backend/jit/llvm/llvmjit_expr.c
index 0f3109334e8..7454d05acaf 100644
--- a/src/backend/jit/llvm/llvmjit_expr.c
+++ b/src/backend/jit/llvm/llvmjit_expr.c
@@ -2557,7 +2557,7 @@ llvm_compile_expr(ExprState *state)
llvm_leave_fatal_on_oom();
INSTR_TIME_SET_CURRENT(endtime);
- INSTR_TIME_ACCUM_DIFF(context->base.generation_counter,
+ INSTR_TIME_ACCUM_DIFF(context->base.instr.generation_counter,
endtime, starttime);
return true;