diff options
Diffstat (limited to 'src/backend')
-rw-r--r-- | src/backend/commands/explain.c | 60 | ||||
-rw-r--r-- | src/backend/executor/execParallel.c | 43 | ||||
-rw-r--r-- | src/backend/executor/execProcnode.c | 3 | ||||
-rw-r--r-- | src/backend/executor/nodeHash.c | 104 |
4 files changed, 184 insertions, 26 deletions
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 447f69d044e..7e4fbafc535 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -19,7 +19,7 @@ #include "commands/createas.h" #include "commands/defrem.h" #include "commands/prepare.h" -#include "executor/hashjoin.h" +#include "executor/nodeHash.h" #include "foreign/fdwapi.h" #include "nodes/extensible.h" #include "nodes/nodeFuncs.h" @@ -2379,34 +2379,62 @@ show_sort_info(SortState *sortstate, ExplainState *es) static void show_hash_info(HashState *hashstate, ExplainState *es) { - HashJoinTable hashtable; + HashInstrumentation *hinstrument = NULL; - hashtable = hashstate->hashtable; + /* + * In a parallel query, the leader process may or may not have run the + * hash join, and even if it did it may not have built a hash table due to + * timing (if it started late it might have seen no tuples in the outer + * relation and skipped building the hash table). Therefore we have to be + * prepared to get instrumentation data from a worker if there is no hash + * table. + */ + if (hashstate->hashtable) + { + hinstrument = (HashInstrumentation *) + palloc(sizeof(HashInstrumentation)); + ExecHashGetInstrumentation(hinstrument, hashstate->hashtable); + } + else if (hashstate->shared_info) + { + SharedHashInfo *shared_info = hashstate->shared_info; + int i; + + /* Find the first worker that built a hash table. */ + for (i = 0; i < shared_info->num_workers; ++i) + { + if (shared_info->hinstrument[i].nbatch > 0) + { + hinstrument = &shared_info->hinstrument[i]; + break; + } + } + } - if (hashtable) + if (hinstrument) { - long spacePeakKb = (hashtable->spacePeak + 1023) / 1024; + long spacePeakKb = (hinstrument->space_peak + 1023) / 1024; if (es->format != EXPLAIN_FORMAT_TEXT) { - ExplainPropertyLong("Hash Buckets", hashtable->nbuckets, es); + ExplainPropertyLong("Hash Buckets", hinstrument->nbuckets, es); ExplainPropertyLong("Original Hash Buckets", - hashtable->nbuckets_original, es); - ExplainPropertyLong("Hash Batches", hashtable->nbatch, es); + hinstrument->nbuckets_original, es); + ExplainPropertyLong("Hash Batches", hinstrument->nbatch, es); ExplainPropertyLong("Original Hash Batches", - hashtable->nbatch_original, es); + hinstrument->nbatch_original, es); ExplainPropertyLong("Peak Memory Usage", spacePeakKb, es); } - else if (hashtable->nbatch_original != hashtable->nbatch || - hashtable->nbuckets_original != hashtable->nbuckets) + else if (hinstrument->nbatch_original != hinstrument->nbatch || + hinstrument->nbuckets_original != hinstrument->nbuckets) { appendStringInfoSpaces(es->str, es->indent * 2); appendStringInfo(es->str, "Buckets: %d (originally %d) Batches: %d (originally %d) Memory Usage: %ldkB\n", - hashtable->nbuckets, - hashtable->nbuckets_original, - hashtable->nbatch, - hashtable->nbatch_original, + hinstrument->nbuckets, + hinstrument->nbuckets_original, + hinstrument->nbatch, + hinstrument->nbatch_original, spacePeakKb); } else @@ -2414,7 +2442,7 @@ show_hash_info(HashState *hashstate, ExplainState *es) appendStringInfoSpaces(es->str, es->indent * 2); appendStringInfo(es->str, "Buckets: %d Batches: %d Memory Usage: %ldkB\n", - hashtable->nbuckets, hashtable->nbatch, + hinstrument->nbuckets, hinstrument->nbatch, spacePeakKb); } } diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c index 53c5254be13..0aca00b0e68 100644 --- a/src/backend/executor/execParallel.c +++ b/src/backend/executor/execParallel.c @@ -29,6 +29,7 @@ #include "executor/nodeBitmapHeapscan.h" #include "executor/nodeCustom.h" #include "executor/nodeForeignscan.h" +#include "executor/nodeHash.h" #include "executor/nodeIndexscan.h" #include "executor/nodeIndexonlyscan.h" #include "executor/nodeSeqscan.h" @@ -259,8 +260,12 @@ ExecParallelEstimate(PlanState *planstate, ExecParallelEstimateContext *e) ExecBitmapHeapEstimate((BitmapHeapScanState *) planstate, e->pcxt); break; + case T_HashState: + /* even when not parallel-aware, for EXPLAIN ANALYZE */ + ExecHashEstimate((HashState *) planstate, e->pcxt); + break; case T_SortState: - /* even when not parallel-aware */ + /* even when not parallel-aware, for EXPLAIN ANALYZE */ ExecSortEstimate((SortState *) planstate, e->pcxt); break; @@ -458,8 +463,12 @@ ExecParallelInitializeDSM(PlanState *planstate, ExecBitmapHeapInitializeDSM((BitmapHeapScanState *) planstate, d->pcxt); break; + case T_HashState: + /* even when not parallel-aware, for EXPLAIN ANALYZE */ + ExecHashInitializeDSM((HashState *) planstate, d->pcxt); + break; case T_SortState: - /* even when not parallel-aware */ + /* even when not parallel-aware, for EXPLAIN ANALYZE */ ExecSortInitializeDSM((SortState *) planstate, d->pcxt); break; @@ -872,8 +881,12 @@ ExecParallelReInitializeDSM(PlanState *planstate, ExecBitmapHeapReInitializeDSM((BitmapHeapScanState *) planstate, pcxt); break; + case T_HashState: + /* even when not parallel-aware, for EXPLAIN ANALYZE */ + ExecHashReInitializeDSM((HashState *) planstate, pcxt); + break; case T_SortState: - /* even when not parallel-aware */ + /* even when not parallel-aware, for EXPLAIN ANALYZE */ ExecSortReInitializeDSM((SortState *) planstate, pcxt); break; @@ -928,12 +941,18 @@ ExecParallelRetrieveInstrumentation(PlanState *planstate, planstate->worker_instrument->num_workers = instrumentation->num_workers; memcpy(&planstate->worker_instrument->instrument, instrument, ibytes); - /* - * Perform any node-type-specific work that needs to be done. Currently, - * only Sort nodes need to do anything here. - */ - if (IsA(planstate, SortState)) - ExecSortRetrieveInstrumentation((SortState *) planstate); + /* Perform any node-type-specific work that needs to be done. */ + switch (nodeTag(planstate)) + { + case T_SortState: + ExecSortRetrieveInstrumentation((SortState *) planstate); + break; + case T_HashState: + ExecHashRetrieveInstrumentation((HashState *) planstate); + break; + default: + break; + } return planstate_tree_walker(planstate, ExecParallelRetrieveInstrumentation, instrumentation); @@ -1160,8 +1179,12 @@ ExecParallelInitializeWorker(PlanState *planstate, ParallelWorkerContext *pwcxt) ExecBitmapHeapInitializeWorker((BitmapHeapScanState *) planstate, pwcxt); break; + case T_HashState: + /* even when not parallel-aware, for EXPLAIN ANALYZE */ + ExecHashInitializeWorker((HashState *) planstate, pwcxt); + break; case T_SortState: - /* even when not parallel-aware */ + /* even when not parallel-aware, for EXPLAIN ANALYZE */ ExecSortInitializeWorker((SortState *) planstate, pwcxt); break; diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index c1aa5064c90..9befca90161 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -751,6 +751,9 @@ ExecShutdownNode(PlanState *node) case T_GatherMergeState: ExecShutdownGatherMerge((GatherMergeState *) node); break; + case T_HashState: + ExecShutdownHash((HashState *) node); + break; default: break; } diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index f7cd8fb3472..6fe5d69d558 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -1638,6 +1638,110 @@ ExecHashRemoveNextSkewBucket(HashJoinTable hashtable) } /* + * Reserve space in the DSM segment for instrumentation data. + */ +void +ExecHashEstimate(HashState *node, ParallelContext *pcxt) +{ + size_t size; + + size = mul_size(pcxt->nworkers, sizeof(HashInstrumentation)); + size = add_size(size, offsetof(SharedHashInfo, hinstrument)); + shm_toc_estimate_chunk(&pcxt->estimator, size); + shm_toc_estimate_keys(&pcxt->estimator, 1); +} + +/* + * Set up a space in the DSM for all workers to record instrumentation data + * about their hash table. + */ +void +ExecHashInitializeDSM(HashState *node, ParallelContext *pcxt) +{ + size_t size; + + size = offsetof(SharedHashInfo, hinstrument) + + pcxt->nworkers * sizeof(HashInstrumentation); + node->shared_info = (SharedHashInfo *) shm_toc_allocate(pcxt->toc, size); + memset(node->shared_info, 0, size); + node->shared_info->num_workers = pcxt->nworkers; + shm_toc_insert(pcxt->toc, node->ps.plan->plan_node_id, + node->shared_info); +} + +/* + * Reset shared state before beginning a fresh scan. + */ +void +ExecHashReInitializeDSM(HashState *node, ParallelContext *pcxt) +{ + if (node->shared_info != NULL) + { + memset(node->shared_info->hinstrument, 0, + node->shared_info->num_workers * sizeof(HashInstrumentation)); + } +} + +/* + * Locate the DSM space for hash table instrumentation data that we'll write + * to at shutdown time. + */ +void +ExecHashInitializeWorker(HashState *node, ParallelWorkerContext *pwcxt) +{ + SharedHashInfo *shared_info; + + shared_info = (SharedHashInfo *) + shm_toc_lookup(pwcxt->toc, node->ps.plan->plan_node_id, true); + node->hinstrument = &shared_info->hinstrument[ParallelWorkerNumber]; +} + +/* + * Copy instrumentation data from this worker's hash table (if it built one) + * to DSM memory so the leader can retrieve it. This must be done in an + * ExecShutdownHash() rather than ExecEndHash() because the latter runs after + * we've detached from the DSM segment. + */ +void +ExecShutdownHash(HashState *node) +{ + if (node->hinstrument && node->hashtable) + ExecHashGetInstrumentation(node->hinstrument, node->hashtable); +} + +/* + * Retrieve instrumentation data from workers before the DSM segment is + * detached, so that EXPLAIN can access it. + */ +void +ExecHashRetrieveInstrumentation(HashState *node) +{ + SharedHashInfo *shared_info = node->shared_info; + size_t size; + + /* Replace node->shared_info with a copy in backend-local memory. */ + size = offsetof(SharedHashInfo, hinstrument) + + shared_info->num_workers * sizeof(HashInstrumentation); + node->shared_info = palloc(size); + memcpy(node->shared_info, shared_info, size); +} + +/* + * Copy the instrumentation data from 'hashtable' into a HashInstrumentation + * struct. + */ +void +ExecHashGetInstrumentation(HashInstrumentation *instrument, + HashJoinTable hashtable) +{ + instrument->nbuckets = hashtable->nbuckets; + instrument->nbuckets_original = hashtable->nbuckets_original; + instrument->nbatch = hashtable->nbatch; + instrument->nbatch_original = hashtable->nbatch_original; + instrument->space_peak = hashtable->spacePeak; +} + +/* * Allocate 'size' bytes from the currently active HashMemoryChunk */ static void * |