aboutsummaryrefslogtreecommitdiff
path: root/src/backend/executor/nodeHashjoin.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2009-03-21 00:04:40 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2009-03-21 00:04:40 +0000
commit596efd27edce20bba706f50de99a0f15bcc2a567 (patch)
tree63c07c3c310e1b072f0a29a79220c81254dba3d8 /src/backend/executor/nodeHashjoin.c
parent249d936bed069877923f0369bd2ce51a6f8f925e (diff)
downloadpostgresql-596efd27edce20bba706f50de99a0f15bcc2a567.tar.gz
postgresql-596efd27edce20bba706f50de99a0f15bcc2a567.zip
Optimize multi-batch hash joins when the outer relation has a nonuniform
distribution, by creating a special fast path for the (first few) most common values of the outer relation. Tuples having hashvalues matching the MCVs are effectively forced to be in the first batch, so that we never write them out to the batch temp files. Bryce Cutt and Ramon Lawrence, with some editorialization by me.
Diffstat (limited to 'src/backend/executor/nodeHashjoin.c')
-rw-r--r--src/backend/executor/nodeHashjoin.c29
1 files changed, 24 insertions, 5 deletions
diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c
index ad0c302e029..aea2ab3e4fd 100644
--- a/src/backend/executor/nodeHashjoin.c
+++ b/src/backend/executor/nodeHashjoin.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.97 2009/01/01 17:23:41 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.98 2009/03/21 00:04:38 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -198,19 +198,23 @@ ExecHashJoin(HashJoinState *node)
node->hj_MatchedOuter = false;
/*
- * now we have an outer tuple, find the corresponding bucket for
- * this tuple from the hash table
+ * Now we have an outer tuple; find the corresponding bucket for
+ * this tuple in the main hash table or skew hash table.
*/
node->hj_CurHashValue = hashvalue;
ExecHashGetBucketAndBatch(hashtable, hashvalue,
&node->hj_CurBucketNo, &batchno);
+ node->hj_CurSkewBucketNo = ExecHashGetSkewBucket(hashtable,
+ hashvalue);
node->hj_CurTuple = NULL;
/*
* Now we've got an outer tuple and the corresponding hash bucket,
- * but this tuple may not belong to the current batch.
+ * but it might not belong to the current batch, or it might
+ * match a skew bucket.
*/
- if (batchno != hashtable->curbatch)
+ if (batchno != hashtable->curbatch &&
+ node->hj_CurSkewBucketNo == INVALID_SKEW_BUCKET_NO)
{
/*
* Need to postpone this outer tuple to a later batch. Save it
@@ -452,6 +456,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
hjstate->hj_CurHashValue = 0;
hjstate->hj_CurBucketNo = 0;
+ hjstate->hj_CurSkewBucketNo = INVALID_SKEW_BUCKET_NO;
hjstate->hj_CurTuple = NULL;
/*
@@ -651,6 +656,19 @@ start_over:
BufFileClose(hashtable->outerBatchFile[curbatch]);
hashtable->outerBatchFile[curbatch] = NULL;
}
+ else /* we just finished the first batch */
+ {
+ /*
+ * Reset some of the skew optimization state variables, since we
+ * no longer need to consider skew tuples after the first batch.
+ * The memory context reset we are about to do will release the
+ * skew hashtable itself.
+ */
+ hashtable->skewEnabled = false;
+ hashtable->skewBucket = NULL;
+ hashtable->skewBucketNums = NULL;
+ hashtable->spaceUsedSkew = 0;
+ }
/*
* We can always skip over any batches that are completely empty on both
@@ -880,6 +898,7 @@ ExecReScanHashJoin(HashJoinState *node, ExprContext *exprCtxt)
/* Always reset intra-tuple state */
node->hj_CurHashValue = 0;
node->hj_CurBucketNo = 0;
+ node->hj_CurSkewBucketNo = INVALID_SKEW_BUCKET_NO;
node->hj_CurTuple = NULL;
node->js.ps.ps_TupFromTlist = false;