diff options
author | Andres Freund <andres@anarazel.de> | 2017-12-05 10:55:56 -0800 |
---|---|---|
committer | Andres Freund <andres@anarazel.de> | 2017-12-05 10:55:56 -0800 |
commit | 5bcf389ecfd40daf92238e1abbff4fc4d3f18b33 (patch) | |
tree | 085fae86ad5cb6cd155e4a9e2612d56fcae19f97 /src/backend/executor/nodeHash.c | |
parent | 82c5c533d1f55dfbf3eacc61aff64fedc8915d78 (diff) | |
download | postgresql-5bcf389ecfd40daf92238e1abbff4fc4d3f18b33.tar.gz postgresql-5bcf389ecfd40daf92238e1abbff4fc4d3f18b33.zip |
Fix EXPLAIN ANALYZE of hash join when the leader doesn't participate.
If a hash join appears in a parallel query, there may be no hash table
available for explain.c to inspect even though a hash table may have
been built in other processes. This could happen either because
parallel_leader_participation was set to off or because the leader
happened to hit the end of the outer relation immediately (even though
the complete relation is not empty) and decided not to build the hash
table.
Commit bf11e7ee introduced a way for workers to exchange
instrumentation via the DSM segment for Sort nodes even though they
are not parallel-aware. This commit does the same for Hash nodes, so
that explain.c has a way to find instrumentation data from an
arbitrary participant that actually built the hash table.
Author: Thomas Munro
Reviewed-By: Andres Freund
Discussion: https://postgr.es/m/CAEepm%3D3DUQC2-z252N55eOcZBer6DPdM%3DFzrxH9dZc5vYLsjaA%40mail.gmail.com
Diffstat (limited to 'src/backend/executor/nodeHash.c')
-rw-r--r-- | src/backend/executor/nodeHash.c | 104 |
1 files changed, 104 insertions, 0 deletions
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index f7cd8fb3472..6fe5d69d558 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -1638,6 +1638,110 @@ ExecHashRemoveNextSkewBucket(HashJoinTable hashtable) } /* + * Reserve space in the DSM segment for instrumentation data. + */ +void +ExecHashEstimate(HashState *node, ParallelContext *pcxt) +{ + size_t size; + + size = mul_size(pcxt->nworkers, sizeof(HashInstrumentation)); + size = add_size(size, offsetof(SharedHashInfo, hinstrument)); + shm_toc_estimate_chunk(&pcxt->estimator, size); + shm_toc_estimate_keys(&pcxt->estimator, 1); +} + +/* + * Set up a space in the DSM for all workers to record instrumentation data + * about their hash table. + */ +void +ExecHashInitializeDSM(HashState *node, ParallelContext *pcxt) +{ + size_t size; + + size = offsetof(SharedHashInfo, hinstrument) + + pcxt->nworkers * sizeof(HashInstrumentation); + node->shared_info = (SharedHashInfo *) shm_toc_allocate(pcxt->toc, size); + memset(node->shared_info, 0, size); + node->shared_info->num_workers = pcxt->nworkers; + shm_toc_insert(pcxt->toc, node->ps.plan->plan_node_id, + node->shared_info); +} + +/* + * Reset shared state before beginning a fresh scan. + */ +void +ExecHashReInitializeDSM(HashState *node, ParallelContext *pcxt) +{ + if (node->shared_info != NULL) + { + memset(node->shared_info->hinstrument, 0, + node->shared_info->num_workers * sizeof(HashInstrumentation)); + } +} + +/* + * Locate the DSM space for hash table instrumentation data that we'll write + * to at shutdown time. + */ +void +ExecHashInitializeWorker(HashState *node, ParallelWorkerContext *pwcxt) +{ + SharedHashInfo *shared_info; + + shared_info = (SharedHashInfo *) + shm_toc_lookup(pwcxt->toc, node->ps.plan->plan_node_id, true); + node->hinstrument = &shared_info->hinstrument[ParallelWorkerNumber]; +} + +/* + * Copy instrumentation data from this worker's hash table (if it built one) + * to DSM memory so the leader can retrieve it. This must be done in an + * ExecShutdownHash() rather than ExecEndHash() because the latter runs after + * we've detached from the DSM segment. + */ +void +ExecShutdownHash(HashState *node) +{ + if (node->hinstrument && node->hashtable) + ExecHashGetInstrumentation(node->hinstrument, node->hashtable); +} + +/* + * Retrieve instrumentation data from workers before the DSM segment is + * detached, so that EXPLAIN can access it. + */ +void +ExecHashRetrieveInstrumentation(HashState *node) +{ + SharedHashInfo *shared_info = node->shared_info; + size_t size; + + /* Replace node->shared_info with a copy in backend-local memory. */ + size = offsetof(SharedHashInfo, hinstrument) + + shared_info->num_workers * sizeof(HashInstrumentation); + node->shared_info = palloc(size); + memcpy(node->shared_info, shared_info, size); +} + +/* + * Copy the instrumentation data from 'hashtable' into a HashInstrumentation + * struct. + */ +void +ExecHashGetInstrumentation(HashInstrumentation *instrument, + HashJoinTable hashtable) +{ + instrument->nbuckets = hashtable->nbuckets; + instrument->nbuckets_original = hashtable->nbuckets_original; + instrument->nbatch = hashtable->nbatch; + instrument->nbatch_original = hashtable->nbatch_original; + instrument->space_peak = hashtable->spacePeak; +} + +/* * Allocate 'size' bytes from the currently active HashMemoryChunk */ static void * |