aboutsummaryrefslogtreecommitdiff
path: root/src/backend/executor/nodeHash.c
diff options
context:
space:
mode:
authorThomas Munro <tmunro@postgresql.org>2021-03-18 00:35:04 +1300
committerThomas Munro <tmunro@postgresql.org>2021-03-18 01:10:55 +1300
commit7f7f25f15edb6eacec58179ef5285e874aa4435b (patch)
tree93e05bd9b9d7ce29ccb0835d74104147f62b313a /src/backend/executor/nodeHash.c
parent9fd2952cf4920d563e9cea51634c5b364d57f71a (diff)
downloadpostgresql-7f7f25f15edb6eacec58179ef5285e874aa4435b.tar.gz
postgresql-7f7f25f15edb6eacec58179ef5285e874aa4435b.zip
Revert "Fix race in Parallel Hash Join batch cleanup."
This reverts commit 378802e3713c6c0fce31d2390c134cd5d7c30157. This reverts commit 3b8981b6e1a2aea0f18384c803e21e9391de669a. Discussion: https://postgr.es/m/CA%2BhUKGJmcqAE3MZeDCLLXa62cWM0AJbKmp2JrJYaJ86bz36LFA%40mail.gmail.com
Diffstat (limited to 'src/backend/executor/nodeHash.c')
-rw-r--r--src/backend/executor/nodeHash.c99
1 files changed, 41 insertions, 58 deletions
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index 98db9c2fb0d..c5f2d1d22b1 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -246,10 +246,10 @@ MultiExecParallelHash(HashState *node)
*/
pstate = hashtable->parallel_state;
build_barrier = &pstate->build_barrier;
- Assert(BarrierPhase(build_barrier) >= PHJ_BUILD_ALLOCATE);
+ Assert(BarrierPhase(build_barrier) >= PHJ_BUILD_ALLOCATING);
switch (BarrierPhase(build_barrier))
{
- case PHJ_BUILD_ALLOCATE:
+ case PHJ_BUILD_ALLOCATING:
/*
* Either I just allocated the initial hash table in
@@ -259,7 +259,7 @@ MultiExecParallelHash(HashState *node)
BarrierArriveAndWait(build_barrier, WAIT_EVENT_HASH_BUILD_ALLOCATE);
/* Fall through. */
- case PHJ_BUILD_HASH_INNER:
+ case PHJ_BUILD_HASHING_INNER:
/*
* It's time to begin hashing, or if we just arrived here then
@@ -271,10 +271,10 @@ MultiExecParallelHash(HashState *node)
* below.
*/
if (PHJ_GROW_BATCHES_PHASE(BarrierAttach(&pstate->grow_batches_barrier)) !=
- PHJ_GROW_BATCHES_ELECT)
+ PHJ_GROW_BATCHES_ELECTING)
ExecParallelHashIncreaseNumBatches(hashtable);
if (PHJ_GROW_BUCKETS_PHASE(BarrierAttach(&pstate->grow_buckets_barrier)) !=
- PHJ_GROW_BUCKETS_ELECT)
+ PHJ_GROW_BUCKETS_ELECTING)
ExecParallelHashIncreaseNumBuckets(hashtable);
ExecParallelHashEnsureBatchAccessors(hashtable);
ExecParallelHashTableSetCurrentBatch(hashtable, 0);
@@ -333,22 +333,15 @@ MultiExecParallelHash(HashState *node)
hashtable->nbuckets = pstate->nbuckets;
hashtable->log2_nbuckets = my_log2(hashtable->nbuckets);
hashtable->totalTuples = pstate->total_tuples;
-
- /*
- * Unless we're completely done and the batch state has been freed, make
- * sure we have accessors.
- */
- if (BarrierPhase(build_barrier) < PHJ_BUILD_FREE)
- ExecParallelHashEnsureBatchAccessors(hashtable);
+ ExecParallelHashEnsureBatchAccessors(hashtable);
/*
* The next synchronization point is in ExecHashJoin's HJ_BUILD_HASHTABLE
- * case, which will bring the build phase to PHJ_BUILD_RUN (if it isn't
+ * case, which will bring the build phase to PHJ_BUILD_DONE (if it isn't
* there already).
*/
- Assert(BarrierPhase(build_barrier) == PHJ_BUILD_HASH_OUTER ||
- BarrierPhase(build_barrier) == PHJ_BUILD_RUN ||
- BarrierPhase(build_barrier) == PHJ_BUILD_FREE);
+ Assert(BarrierPhase(build_barrier) == PHJ_BUILD_HASHING_OUTER ||
+ BarrierPhase(build_barrier) == PHJ_BUILD_DONE);
}
/* ----------------------------------------------------------------
@@ -596,8 +589,8 @@ ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations,
* Attach to the build barrier. The corresponding detach operation is
* in ExecHashTableDetach. Note that we won't attach to the
* batch_barrier for batch 0 yet. We'll attach later and start it out
- * in PHJ_BATCH_PROBE phase, because batch 0 is allocated up front and
- * then loaded while hashing (the standard hybrid hash join
+ * in PHJ_BATCH_PROBING phase, because batch 0 is allocated up front
+ * and then loaded while hashing (the standard hybrid hash join
* algorithm), and we'll coordinate that using build_barrier.
*/
build_barrier = &pstate->build_barrier;
@@ -610,7 +603,7 @@ ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations,
* SharedHashJoinBatch objects and the hash table for batch 0. One
* backend will be elected to do that now if necessary.
*/
- if (BarrierPhase(build_barrier) == PHJ_BUILD_ELECT &&
+ if (BarrierPhase(build_barrier) == PHJ_BUILD_ELECTING &&
BarrierArriveAndWait(build_barrier, WAIT_EVENT_HASH_BUILD_ELECT))
{
pstate->nbatch = nbatch;
@@ -631,7 +624,7 @@ ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations,
/*
* The next Parallel Hash synchronization point is in
* MultiExecParallelHash(), which will progress it all the way to
- * PHJ_BUILD_RUN. The caller must not return control from this
+ * PHJ_BUILD_DONE. The caller must not return control from this
* executor node between now and then.
*/
}
@@ -1067,7 +1060,7 @@ ExecParallelHashIncreaseNumBatches(HashJoinTable hashtable)
ParallelHashJoinState *pstate = hashtable->parallel_state;
int i;
- Assert(BarrierPhase(&pstate->build_barrier) == PHJ_BUILD_HASH_INNER);
+ Assert(BarrierPhase(&pstate->build_barrier) == PHJ_BUILD_HASHING_INNER);
/*
* It's unlikely, but we need to be prepared for new participants to show
@@ -1076,7 +1069,7 @@ ExecParallelHashIncreaseNumBatches(HashJoinTable hashtable)
*/
switch (PHJ_GROW_BATCHES_PHASE(BarrierPhase(&pstate->grow_batches_barrier)))
{
- case PHJ_GROW_BATCHES_ELECT:
+ case PHJ_GROW_BATCHES_ELECTING:
/*
* Elect one participant to prepare to grow the number of batches.
@@ -1194,13 +1187,13 @@ ExecParallelHashIncreaseNumBatches(HashJoinTable hashtable)
}
/* Fall through. */
- case PHJ_GROW_BATCHES_REALLOCATE:
+ case PHJ_GROW_BATCHES_ALLOCATING:
/* Wait for the above to be finished. */
BarrierArriveAndWait(&pstate->grow_batches_barrier,
- WAIT_EVENT_HASH_GROW_BATCHES_REALLOCATE);
+ WAIT_EVENT_HASH_GROW_BATCHES_ALLOCATE);
/* Fall through. */
- case PHJ_GROW_BATCHES_REPARTITION:
+ case PHJ_GROW_BATCHES_REPARTITIONING:
/* Make sure that we have the current dimensions and buckets. */
ExecParallelHashEnsureBatchAccessors(hashtable);
ExecParallelHashTableSetCurrentBatch(hashtable, 0);
@@ -1213,7 +1206,7 @@ ExecParallelHashIncreaseNumBatches(HashJoinTable hashtable)
WAIT_EVENT_HASH_GROW_BATCHES_REPARTITION);
/* Fall through. */
- case PHJ_GROW_BATCHES_DECIDE:
+ case PHJ_GROW_BATCHES_DECIDING:
/*
* Elect one participant to clean up and decide whether further
@@ -1268,7 +1261,7 @@ ExecParallelHashIncreaseNumBatches(HashJoinTable hashtable)
}
/* Fall through. */
- case PHJ_GROW_BATCHES_FINISH:
+ case PHJ_GROW_BATCHES_FINISHING:
/* Wait for the above to complete. */
BarrierArriveAndWait(&pstate->grow_batches_barrier,
WAIT_EVENT_HASH_GROW_BATCHES_FINISH);
@@ -1508,7 +1501,7 @@ ExecParallelHashIncreaseNumBuckets(HashJoinTable hashtable)
HashMemoryChunk chunk;
dsa_pointer chunk_s;
- Assert(BarrierPhase(&pstate->build_barrier) == PHJ_BUILD_HASH_INNER);
+ Assert(BarrierPhase(&pstate->build_barrier) == PHJ_BUILD_HASHING_INNER);
/*
* It's unlikely, but we need to be prepared for new participants to show
@@ -1517,7 +1510,7 @@ ExecParallelHashIncreaseNumBuckets(HashJoinTable hashtable)
*/
switch (PHJ_GROW_BUCKETS_PHASE(BarrierPhase(&pstate->grow_buckets_barrier)))
{
- case PHJ_GROW_BUCKETS_ELECT:
+ case PHJ_GROW_BUCKETS_ELECTING:
/* Elect one participant to prepare to increase nbuckets. */
if (BarrierArriveAndWait(&pstate->grow_buckets_barrier,
WAIT_EVENT_HASH_GROW_BUCKETS_ELECT))
@@ -1546,13 +1539,13 @@ ExecParallelHashIncreaseNumBuckets(HashJoinTable hashtable)
}
/* Fall through. */
- case PHJ_GROW_BUCKETS_REALLOCATE:
+ case PHJ_GROW_BUCKETS_ALLOCATING:
/* Wait for the above to complete. */
BarrierArriveAndWait(&pstate->grow_buckets_barrier,
- WAIT_EVENT_HASH_GROW_BUCKETS_REALLOCATE);
+ WAIT_EVENT_HASH_GROW_BUCKETS_ALLOCATE);
/* Fall through. */
- case PHJ_GROW_BUCKETS_REINSERT:
+ case PHJ_GROW_BUCKETS_REINSERTING:
/* Reinsert all tuples into the hash table. */
ExecParallelHashEnsureBatchAccessors(hashtable);
ExecParallelHashTableSetCurrentBatch(hashtable, 0);
@@ -1708,7 +1701,7 @@ retry:
/* Try to load it into memory. */
Assert(BarrierPhase(&hashtable->parallel_state->build_barrier) ==
- PHJ_BUILD_HASH_INNER);
+ PHJ_BUILD_HASHING_INNER);
hashTuple = ExecParallelHashTupleAlloc(hashtable,
HJTUPLE_OVERHEAD + tuple->t_len,
&shared);
@@ -2862,7 +2855,7 @@ ExecParallelHashTupleAlloc(HashJoinTable hashtable, size_t size,
if (pstate->growth != PHJ_GROWTH_DISABLED)
{
Assert(curbatch == 0);
- Assert(BarrierPhase(&pstate->build_barrier) == PHJ_BUILD_HASH_INNER);
+ Assert(BarrierPhase(&pstate->build_barrier) == PHJ_BUILD_HASHING_INNER);
/*
* Check if our space limit would be exceeded. To avoid choking on
@@ -2982,7 +2975,7 @@ ExecParallelHashJoinSetUpBatches(HashJoinTable hashtable, int nbatch)
{
/* Batch 0 doesn't need to be loaded. */
BarrierAttach(&shared->batch_barrier);
- while (BarrierPhase(&shared->batch_barrier) < PHJ_BATCH_PROBE)
+ while (BarrierPhase(&shared->batch_barrier) < PHJ_BATCH_PROBING)
BarrierArriveAndWait(&shared->batch_barrier, 0);
BarrierDetach(&shared->batch_barrier);
}
@@ -3055,11 +3048,14 @@ ExecParallelHashEnsureBatchAccessors(HashJoinTable hashtable)
}
/*
- * We should never see a state where the batch-tracking array is freed,
- * because we should have given up sooner if we join when the build
- * barrier has reached the PHJ_BUILD_FREE phase.
+ * It's possible for a backend to start up very late so that the whole
+ * join is finished and the shm state for tracking batches has already
+ * been freed by ExecHashTableDetach(). In that case we'll just leave
+ * hashtable->batches as NULL so that ExecParallelHashJoinNewBatch() gives
+ * up early.
*/
- Assert(DsaPointerIsValid(pstate->batches));
+ if (!DsaPointerIsValid(pstate->batches))
+ return;
/* Use hash join memory context. */
oldcxt = MemoryContextSwitchTo(hashtable->hashCxt);
@@ -3140,7 +3136,7 @@ ExecHashTableDetachBatch(HashJoinTable hashtable)
* longer attached, but since there is no way it's moving after
* this point it seems safe to make the following assertion.
*/
- Assert(BarrierPhase(&batch->batch_barrier) == PHJ_BATCH_FREE);
+ Assert(BarrierPhase(&batch->batch_barrier) == PHJ_BATCH_DONE);
/* Free shared chunks and buckets. */
while (DsaPointerIsValid(batch->chunks))
@@ -3179,17 +3175,9 @@ ExecHashTableDetachBatch(HashJoinTable hashtable)
void
ExecHashTableDetach(HashJoinTable hashtable)
{
- ParallelHashJoinState *pstate = hashtable->parallel_state;
-
- /*
- * If we're involved in a parallel query, we must either have got all the
- * way to PHJ_BUILD_RUN, or joined too late and be in PHJ_BUILD_FREE.
- */
- Assert(!pstate ||
- BarrierPhase(&pstate->build_barrier) >= PHJ_BUILD_RUN);
-
- if (pstate && BarrierPhase(&pstate->build_barrier) == PHJ_BUILD_RUN)
+ if (hashtable->parallel_state)
{
+ ParallelHashJoinState *pstate = hashtable->parallel_state;
int i;
/* Make sure any temporary files are closed. */
@@ -3205,22 +3193,17 @@ ExecHashTableDetach(HashJoinTable hashtable)
}
/* If we're last to detach, clean up shared memory. */
- if (BarrierArriveAndDetach(&pstate->build_barrier))
+ if (BarrierDetach(&pstate->build_barrier))
{
- /*
- * Late joining processes will see this state and give up
- * immediately.
- */
- Assert(BarrierPhase(&pstate->build_barrier) == PHJ_BUILD_FREE);
-
if (DsaPointerIsValid(pstate->batches))
{
dsa_free(hashtable->area, pstate->batches);
pstate->batches = InvalidDsaPointer;
}
}
+
+ hashtable->parallel_state = NULL;
}
- hashtable->parallel_state = NULL;
}
/*