diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2009-03-21 00:04:40 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2009-03-21 00:04:40 +0000 |
commit | 596efd27edce20bba706f50de99a0f15bcc2a567 (patch) | |
tree | 63c07c3c310e1b072f0a29a79220c81254dba3d8 /src/backend/executor/nodeHashjoin.c | |
parent | 249d936bed069877923f0369bd2ce51a6f8f925e (diff) | |
download | postgresql-596efd27edce20bba706f50de99a0f15bcc2a567.tar.gz postgresql-596efd27edce20bba706f50de99a0f15bcc2a567.zip |
Optimize multi-batch hash joins when the outer relation has a nonuniform
distribution, by creating a special fast path for the (first few) most common
values of the outer relation. Tuples having hashvalues matching the MCVs
are effectively forced to be in the first batch, so that we never write
them out to the batch temp files.
Bryce Cutt and Ramon Lawrence, with some editorialization by me.
Diffstat (limited to 'src/backend/executor/nodeHashjoin.c')
-rw-r--r-- | src/backend/executor/nodeHashjoin.c | 29 |
1 files changed, 24 insertions, 5 deletions
diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index ad0c302e029..aea2ab3e4fd 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.97 2009/01/01 17:23:41 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.98 2009/03/21 00:04:38 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -198,19 +198,23 @@ ExecHashJoin(HashJoinState *node) node->hj_MatchedOuter = false; /* - * now we have an outer tuple, find the corresponding bucket for - * this tuple from the hash table + * Now we have an outer tuple; find the corresponding bucket for + * this tuple in the main hash table or skew hash table. */ node->hj_CurHashValue = hashvalue; ExecHashGetBucketAndBatch(hashtable, hashvalue, &node->hj_CurBucketNo, &batchno); + node->hj_CurSkewBucketNo = ExecHashGetSkewBucket(hashtable, + hashvalue); node->hj_CurTuple = NULL; /* * Now we've got an outer tuple and the corresponding hash bucket, - * but this tuple may not belong to the current batch. + * but it might not belong to the current batch, or it might + * match a skew bucket. */ - if (batchno != hashtable->curbatch) + if (batchno != hashtable->curbatch && + node->hj_CurSkewBucketNo == INVALID_SKEW_BUCKET_NO) { /* * Need to postpone this outer tuple to a later batch. Save it @@ -452,6 +456,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) hjstate->hj_CurHashValue = 0; hjstate->hj_CurBucketNo = 0; + hjstate->hj_CurSkewBucketNo = INVALID_SKEW_BUCKET_NO; hjstate->hj_CurTuple = NULL; /* @@ -651,6 +656,19 @@ start_over: BufFileClose(hashtable->outerBatchFile[curbatch]); hashtable->outerBatchFile[curbatch] = NULL; } + else /* we just finished the first batch */ + { + /* + * Reset some of the skew optimization state variables, since we + * no longer need to consider skew tuples after the first batch. + * The memory context reset we are about to do will release the + * skew hashtable itself. + */ + hashtable->skewEnabled = false; + hashtable->skewBucket = NULL; + hashtable->skewBucketNums = NULL; + hashtable->spaceUsedSkew = 0; + } /* * We can always skip over any batches that are completely empty on both @@ -880,6 +898,7 @@ ExecReScanHashJoin(HashJoinState *node, ExprContext *exprCtxt) /* Always reset intra-tuple state */ node->hj_CurHashValue = 0; node->hj_CurBucketNo = 0; + node->hj_CurSkewBucketNo = INVALID_SKEW_BUCKET_NO; node->hj_CurTuple = NULL; node->js.ps.ps_TupFromTlist = false; |