aboutsummaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/commands/explain.c60
-rw-r--r--src/backend/executor/execParallel.c43
-rw-r--r--src/backend/executor/execProcnode.c3
-rw-r--r--src/backend/executor/nodeHash.c104
4 files changed, 184 insertions, 26 deletions
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 447f69d044e..7e4fbafc535 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -19,7 +19,7 @@
#include "commands/createas.h"
#include "commands/defrem.h"
#include "commands/prepare.h"
-#include "executor/hashjoin.h"
+#include "executor/nodeHash.h"
#include "foreign/fdwapi.h"
#include "nodes/extensible.h"
#include "nodes/nodeFuncs.h"
@@ -2379,34 +2379,62 @@ show_sort_info(SortState *sortstate, ExplainState *es)
static void
show_hash_info(HashState *hashstate, ExplainState *es)
{
- HashJoinTable hashtable;
+ HashInstrumentation *hinstrument = NULL;
- hashtable = hashstate->hashtable;
+ /*
+ * In a parallel query, the leader process may or may not have run the
+ * hash join, and even if it did it may not have built a hash table due to
+ * timing (if it started late it might have seen no tuples in the outer
+ * relation and skipped building the hash table). Therefore we have to be
+ * prepared to get instrumentation data from a worker if there is no hash
+ * table.
+ */
+ if (hashstate->hashtable)
+ {
+ hinstrument = (HashInstrumentation *)
+ palloc(sizeof(HashInstrumentation));
+ ExecHashGetInstrumentation(hinstrument, hashstate->hashtable);
+ }
+ else if (hashstate->shared_info)
+ {
+ SharedHashInfo *shared_info = hashstate->shared_info;
+ int i;
+
+ /* Find the first worker that built a hash table. */
+ for (i = 0; i < shared_info->num_workers; ++i)
+ {
+ if (shared_info->hinstrument[i].nbatch > 0)
+ {
+ hinstrument = &shared_info->hinstrument[i];
+ break;
+ }
+ }
+ }
- if (hashtable)
+ if (hinstrument)
{
- long spacePeakKb = (hashtable->spacePeak + 1023) / 1024;
+ long spacePeakKb = (hinstrument->space_peak + 1023) / 1024;
if (es->format != EXPLAIN_FORMAT_TEXT)
{
- ExplainPropertyLong("Hash Buckets", hashtable->nbuckets, es);
+ ExplainPropertyLong("Hash Buckets", hinstrument->nbuckets, es);
ExplainPropertyLong("Original Hash Buckets",
- hashtable->nbuckets_original, es);
- ExplainPropertyLong("Hash Batches", hashtable->nbatch, es);
+ hinstrument->nbuckets_original, es);
+ ExplainPropertyLong("Hash Batches", hinstrument->nbatch, es);
ExplainPropertyLong("Original Hash Batches",
- hashtable->nbatch_original, es);
+ hinstrument->nbatch_original, es);
ExplainPropertyLong("Peak Memory Usage", spacePeakKb, es);
}
- else if (hashtable->nbatch_original != hashtable->nbatch ||
- hashtable->nbuckets_original != hashtable->nbuckets)
+ else if (hinstrument->nbatch_original != hinstrument->nbatch ||
+ hinstrument->nbuckets_original != hinstrument->nbuckets)
{
appendStringInfoSpaces(es->str, es->indent * 2);
appendStringInfo(es->str,
"Buckets: %d (originally %d) Batches: %d (originally %d) Memory Usage: %ldkB\n",
- hashtable->nbuckets,
- hashtable->nbuckets_original,
- hashtable->nbatch,
- hashtable->nbatch_original,
+ hinstrument->nbuckets,
+ hinstrument->nbuckets_original,
+ hinstrument->nbatch,
+ hinstrument->nbatch_original,
spacePeakKb);
}
else
@@ -2414,7 +2442,7 @@ show_hash_info(HashState *hashstate, ExplainState *es)
appendStringInfoSpaces(es->str, es->indent * 2);
appendStringInfo(es->str,
"Buckets: %d Batches: %d Memory Usage: %ldkB\n",
- hashtable->nbuckets, hashtable->nbatch,
+ hinstrument->nbuckets, hinstrument->nbatch,
spacePeakKb);
}
}
diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c
index 53c5254be13..0aca00b0e68 100644
--- a/src/backend/executor/execParallel.c
+++ b/src/backend/executor/execParallel.c
@@ -29,6 +29,7 @@
#include "executor/nodeBitmapHeapscan.h"
#include "executor/nodeCustom.h"
#include "executor/nodeForeignscan.h"
+#include "executor/nodeHash.h"
#include "executor/nodeIndexscan.h"
#include "executor/nodeIndexonlyscan.h"
#include "executor/nodeSeqscan.h"
@@ -259,8 +260,12 @@ ExecParallelEstimate(PlanState *planstate, ExecParallelEstimateContext *e)
ExecBitmapHeapEstimate((BitmapHeapScanState *) planstate,
e->pcxt);
break;
+ case T_HashState:
+ /* even when not parallel-aware, for EXPLAIN ANALYZE */
+ ExecHashEstimate((HashState *) planstate, e->pcxt);
+ break;
case T_SortState:
- /* even when not parallel-aware */
+ /* even when not parallel-aware, for EXPLAIN ANALYZE */
ExecSortEstimate((SortState *) planstate, e->pcxt);
break;
@@ -458,8 +463,12 @@ ExecParallelInitializeDSM(PlanState *planstate,
ExecBitmapHeapInitializeDSM((BitmapHeapScanState *) planstate,
d->pcxt);
break;
+ case T_HashState:
+ /* even when not parallel-aware, for EXPLAIN ANALYZE */
+ ExecHashInitializeDSM((HashState *) planstate, d->pcxt);
+ break;
case T_SortState:
- /* even when not parallel-aware */
+ /* even when not parallel-aware, for EXPLAIN ANALYZE */
ExecSortInitializeDSM((SortState *) planstate, d->pcxt);
break;
@@ -872,8 +881,12 @@ ExecParallelReInitializeDSM(PlanState *planstate,
ExecBitmapHeapReInitializeDSM((BitmapHeapScanState *) planstate,
pcxt);
break;
+ case T_HashState:
+ /* even when not parallel-aware, for EXPLAIN ANALYZE */
+ ExecHashReInitializeDSM((HashState *) planstate, pcxt);
+ break;
case T_SortState:
- /* even when not parallel-aware */
+ /* even when not parallel-aware, for EXPLAIN ANALYZE */
ExecSortReInitializeDSM((SortState *) planstate, pcxt);
break;
@@ -928,12 +941,18 @@ ExecParallelRetrieveInstrumentation(PlanState *planstate,
planstate->worker_instrument->num_workers = instrumentation->num_workers;
memcpy(&planstate->worker_instrument->instrument, instrument, ibytes);
- /*
- * Perform any node-type-specific work that needs to be done. Currently,
- * only Sort nodes need to do anything here.
- */
- if (IsA(planstate, SortState))
- ExecSortRetrieveInstrumentation((SortState *) planstate);
+ /* Perform any node-type-specific work that needs to be done. */
+ switch (nodeTag(planstate))
+ {
+ case T_SortState:
+ ExecSortRetrieveInstrumentation((SortState *) planstate);
+ break;
+ case T_HashState:
+ ExecHashRetrieveInstrumentation((HashState *) planstate);
+ break;
+ default:
+ break;
+ }
return planstate_tree_walker(planstate, ExecParallelRetrieveInstrumentation,
instrumentation);
@@ -1160,8 +1179,12 @@ ExecParallelInitializeWorker(PlanState *planstate, ParallelWorkerContext *pwcxt)
ExecBitmapHeapInitializeWorker((BitmapHeapScanState *) planstate,
pwcxt);
break;
+ case T_HashState:
+ /* even when not parallel-aware, for EXPLAIN ANALYZE */
+ ExecHashInitializeWorker((HashState *) planstate, pwcxt);
+ break;
case T_SortState:
- /* even when not parallel-aware */
+ /* even when not parallel-aware, for EXPLAIN ANALYZE */
ExecSortInitializeWorker((SortState *) planstate, pwcxt);
break;
diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c
index c1aa5064c90..9befca90161 100644
--- a/src/backend/executor/execProcnode.c
+++ b/src/backend/executor/execProcnode.c
@@ -751,6 +751,9 @@ ExecShutdownNode(PlanState *node)
case T_GatherMergeState:
ExecShutdownGatherMerge((GatherMergeState *) node);
break;
+ case T_HashState:
+ ExecShutdownHash((HashState *) node);
+ break;
default:
break;
}
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index f7cd8fb3472..6fe5d69d558 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -1638,6 +1638,110 @@ ExecHashRemoveNextSkewBucket(HashJoinTable hashtable)
}
/*
+ * Reserve space in the DSM segment for instrumentation data.
+ */
+void
+ExecHashEstimate(HashState *node, ParallelContext *pcxt)
+{
+ size_t size;
+
+ size = mul_size(pcxt->nworkers, sizeof(HashInstrumentation));
+ size = add_size(size, offsetof(SharedHashInfo, hinstrument));
+ shm_toc_estimate_chunk(&pcxt->estimator, size);
+ shm_toc_estimate_keys(&pcxt->estimator, 1);
+}
+
+/*
+ * Set up a space in the DSM for all workers to record instrumentation data
+ * about their hash table.
+ */
+void
+ExecHashInitializeDSM(HashState *node, ParallelContext *pcxt)
+{
+ size_t size;
+
+ size = offsetof(SharedHashInfo, hinstrument) +
+ pcxt->nworkers * sizeof(HashInstrumentation);
+ node->shared_info = (SharedHashInfo *) shm_toc_allocate(pcxt->toc, size);
+ memset(node->shared_info, 0, size);
+ node->shared_info->num_workers = pcxt->nworkers;
+ shm_toc_insert(pcxt->toc, node->ps.plan->plan_node_id,
+ node->shared_info);
+}
+
+/*
+ * Reset shared state before beginning a fresh scan.
+ */
+void
+ExecHashReInitializeDSM(HashState *node, ParallelContext *pcxt)
+{
+ if (node->shared_info != NULL)
+ {
+ memset(node->shared_info->hinstrument, 0,
+ node->shared_info->num_workers * sizeof(HashInstrumentation));
+ }
+}
+
+/*
+ * Locate the DSM space for hash table instrumentation data that we'll write
+ * to at shutdown time.
+ */
+void
+ExecHashInitializeWorker(HashState *node, ParallelWorkerContext *pwcxt)
+{
+ SharedHashInfo *shared_info;
+
+ shared_info = (SharedHashInfo *)
+ shm_toc_lookup(pwcxt->toc, node->ps.plan->plan_node_id, true);
+ node->hinstrument = &shared_info->hinstrument[ParallelWorkerNumber];
+}
+
+/*
+ * Copy instrumentation data from this worker's hash table (if it built one)
+ * to DSM memory so the leader can retrieve it. This must be done in an
+ * ExecShutdownHash() rather than ExecEndHash() because the latter runs after
+ * we've detached from the DSM segment.
+ */
+void
+ExecShutdownHash(HashState *node)
+{
+ if (node->hinstrument && node->hashtable)
+ ExecHashGetInstrumentation(node->hinstrument, node->hashtable);
+}
+
+/*
+ * Retrieve instrumentation data from workers before the DSM segment is
+ * detached, so that EXPLAIN can access it.
+ */
+void
+ExecHashRetrieveInstrumentation(HashState *node)
+{
+ SharedHashInfo *shared_info = node->shared_info;
+ size_t size;
+
+ /* Replace node->shared_info with a copy in backend-local memory. */
+ size = offsetof(SharedHashInfo, hinstrument) +
+ shared_info->num_workers * sizeof(HashInstrumentation);
+ node->shared_info = palloc(size);
+ memcpy(node->shared_info, shared_info, size);
+}
+
+/*
+ * Copy the instrumentation data from 'hashtable' into a HashInstrumentation
+ * struct.
+ */
+void
+ExecHashGetInstrumentation(HashInstrumentation *instrument,
+ HashJoinTable hashtable)
+{
+ instrument->nbuckets = hashtable->nbuckets;
+ instrument->nbuckets_original = hashtable->nbuckets_original;
+ instrument->nbatch = hashtable->nbatch;
+ instrument->nbatch_original = hashtable->nbatch_original;
+ instrument->space_peak = hashtable->spacePeak;
+}
+
+/*
* Allocate 'size' bytes from the currently active HashMemoryChunk
*/
static void *