diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/executor/nodeHash.c | 151 | ||||
-rw-r--r-- | src/backend/executor/nodeHashjoin.c | 576 | ||||
-rw-r--r-- | src/backend/optimizer/path/equivclass.c | 7 | ||||
-rw-r--r-- | src/backend/optimizer/path/joinpath.c | 145 | ||||
-rw-r--r-- | src/backend/optimizer/path/joinrels.c | 12 | ||||
-rw-r--r-- | src/backend/optimizer/plan/initsplan.c | 10 | ||||
-rw-r--r-- | src/include/access/htup.h | 23 | ||||
-rw-r--r-- | src/include/executor/hashjoin.h | 2 | ||||
-rw-r--r-- | src/include/executor/nodeHash.h | 10 | ||||
-rw-r--r-- | src/include/nodes/execnodes.h | 23 |
10 files changed, 596 insertions, 363 deletions
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index 5cd7332237b..934a283b8d2 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -105,7 +105,8 @@ MultiExecHash(HashState *node) break; /* We have to compute the hash value */ econtext->ecxt_innertuple = slot; - if (ExecHashGetHashValue(hashtable, econtext, hashkeys, false, false, + if (ExecHashGetHashValue(hashtable, econtext, hashkeys, + false, hashtable->keepNulls, &hashvalue)) { int bucketNumber; @@ -231,7 +232,7 @@ ExecEndHash(HashState *node) * ---------------------------------------------------------------- */ HashJoinTable -ExecHashTableCreate(Hash *node, List *hashOperators) +ExecHashTableCreate(Hash *node, List *hashOperators, bool keepNulls) { HashJoinTable hashtable; Plan *outerNode; @@ -273,6 +274,7 @@ ExecHashTableCreate(Hash *node, List *hashOperators) hashtable->nbuckets = nbuckets; hashtable->log2_nbuckets = log2_nbuckets; hashtable->buckets = NULL; + hashtable->keepNulls = keepNulls; hashtable->skewEnabled = false; hashtable->skewBucket = NULL; hashtable->skewBucketLen = 0; @@ -712,13 +714,26 @@ ExecHashTableInsert(HashJoinTable hashtable, HashJoinTuple hashTuple; int hashTupleSize; + /* Create the HashJoinTuple */ hashTupleSize = HJTUPLE_OVERHEAD + tuple->t_len; hashTuple = (HashJoinTuple) MemoryContextAlloc(hashtable->batchCxt, hashTupleSize); hashTuple->hashvalue = hashvalue; memcpy(HJTUPLE_MINTUPLE(hashTuple), tuple, tuple->t_len); + + /* + * We always reset the tuple-matched flag on insertion. This is okay + * even when reloading a tuple from a batch file, since the tuple + * could not possibly have been matched to an outer tuple before it + * went into the batch file. + */ + HeapTupleHeaderClearMatch(HJTUPLE_MINTUPLE(hashTuple)); + + /* Push it onto the front of the bucket's list */ hashTuple->next = hashtable->buckets[bucketno]; hashtable->buckets[bucketno] = hashTuple; + + /* Account for space used, and back off if we've used too much */ hashtable->spaceUsed += hashTupleSize; if (hashtable->spaceUsed > hashtable->spacePeak) hashtable->spacePeak = hashtable->spaceUsed; @@ -878,8 +893,12 @@ ExecHashGetBucketAndBatch(HashJoinTable hashtable, * scan a hash bucket for matches to the current outer tuple * * The current outer tuple must be stored in econtext->ecxt_outertuple. + * + * On success, the inner tuple is stored into hjstate->hj_CurTuple and + * econtext->ecxt_innertuple, using hjstate->hj_HashTupleSlot as the slot + * for the latter. */ -HashJoinTuple +bool ExecScanHashBucket(HashJoinState *hjstate, ExprContext *econtext) { @@ -920,7 +939,7 @@ ExecScanHashBucket(HashJoinState *hjstate, if (ExecQual(hjclauses, econtext, false)) { hjstate->hj_CurTuple = hashTuple; - return hashTuple; + return true; } } @@ -930,7 +949,99 @@ ExecScanHashBucket(HashJoinState *hjstate, /* * no match */ - return NULL; + return false; +} + +/* + * ExecPrepHashTableForUnmatched + * set up for a series of ExecScanHashTableForUnmatched calls + */ +void +ExecPrepHashTableForUnmatched(HashJoinState *hjstate) +{ + /* + *---------- + * During this scan we use the HashJoinState fields as follows: + * + * hj_CurBucketNo: next regular bucket to scan + * hj_CurSkewBucketNo: next skew bucket (an index into skewBucketNums) + * hj_CurTuple: last tuple returned, or NULL to start next bucket + *---------- + */ + hjstate->hj_CurBucketNo = 0; + hjstate->hj_CurSkewBucketNo = 0; + hjstate->hj_CurTuple = NULL; +} + +/* + * ExecScanHashTableForUnmatched + * scan the hash table for unmatched inner tuples + * + * On success, the inner tuple is stored into hjstate->hj_CurTuple and + * econtext->ecxt_innertuple, using hjstate->hj_HashTupleSlot as the slot + * for the latter. + */ +bool +ExecScanHashTableForUnmatched(HashJoinState *hjstate, ExprContext *econtext) +{ + HashJoinTable hashtable = hjstate->hj_HashTable; + HashJoinTuple hashTuple = hjstate->hj_CurTuple; + + for (;;) + { + /* + * hj_CurTuple is the address of the tuple last returned from the + * current bucket, or NULL if it's time to start scanning a new + * bucket. + */ + if (hashTuple != NULL) + hashTuple = hashTuple->next; + else if (hjstate->hj_CurBucketNo < hashtable->nbuckets) + { + hashTuple = hashtable->buckets[hjstate->hj_CurBucketNo]; + hjstate->hj_CurBucketNo++; + } + else if (hjstate->hj_CurSkewBucketNo < hashtable->nSkewBuckets) + { + int j = hashtable->skewBucketNums[hjstate->hj_CurSkewBucketNo]; + + hashTuple = hashtable->skewBucket[j]->tuples; + hjstate->hj_CurSkewBucketNo++; + } + else + break; /* finished all buckets */ + + while (hashTuple != NULL) + { + if (!HeapTupleHeaderHasMatch(HJTUPLE_MINTUPLE(hashTuple))) + { + TupleTableSlot *inntuple; + + /* insert hashtable's tuple into exec slot */ + inntuple = ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(hashTuple), + hjstate->hj_HashTupleSlot, + false); /* do not pfree */ + econtext->ecxt_innertuple = inntuple; + + /* + * Reset temp memory each time; although this function doesn't + * do any qual eval, the caller will, so let's keep it + * parallel to ExecScanHashBucket. + */ + ResetExprContext(econtext); + + hjstate->hj_CurTuple = hashTuple; + return true; + } + + hashTuple = hashTuple->next; + } + } + + /* + * no more unmatched tuples + */ + return false; } /* @@ -960,6 +1071,35 @@ ExecHashTableReset(HashJoinTable hashtable) MemoryContextSwitchTo(oldcxt); } +/* + * ExecHashTableResetMatchFlags + * Clear all the HeapTupleHeaderHasMatch flags in the table + */ +void +ExecHashTableResetMatchFlags(HashJoinTable hashtable) +{ + HashJoinTuple tuple; + int i; + + /* Reset all flags in the main table ... */ + for (i = 0; i < hashtable->nbuckets; i++) + { + for (tuple = hashtable->buckets[i]; tuple != NULL; tuple = tuple->next) + HeapTupleHeaderClearMatch(HJTUPLE_MINTUPLE(tuple)); + } + + /* ... and the same for the skew buckets, if any */ + for (i = 0; i < hashtable->nSkewBuckets; i++) + { + int j = hashtable->skewBucketNums[i]; + HashSkewBucket *skewBucket = hashtable->skewBucket[j]; + + for (tuple = skewBucket->tuples; tuple != NULL; tuple = tuple->next) + HeapTupleHeaderClearMatch(HJTUPLE_MINTUPLE(tuple)); + } +} + + void ExecReScanHash(HashState *node) { @@ -1203,6 +1343,7 @@ ExecHashSkewTableInsert(HashJoinTable hashtable, hashTupleSize); hashTuple->hashvalue = hashvalue; memcpy(HJTUPLE_MINTUPLE(hashTuple), tuple, tuple->t_len); + HeapTupleHeaderClearMatch(HJTUPLE_MINTUPLE(hashTuple)); /* Push it onto the front of the skew bucket's list */ hashTuple->next = hashtable->skewBucket[bucketNumber]->tuples; diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index dfca8cac522..cae31ba43a9 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -22,8 +22,20 @@ #include "utils/memutils.h" -/* Returns true for JOIN_LEFT and JOIN_ANTI jointypes */ -#define HASHJOIN_IS_OUTER(hjstate) ((hjstate)->hj_NullInnerTupleSlot != NULL) +/* + * States of the ExecHashJoin state machine + */ +#define HJ_BUILD_HASHTABLE 1 +#define HJ_NEED_NEW_OUTER 2 +#define HJ_SCAN_BUCKET 3 +#define HJ_FILL_OUTER_TUPLE 4 +#define HJ_FILL_INNER_TUPLES 5 +#define HJ_NEED_NEW_BATCH 6 + +/* Returns true if doing null-fill on outer relation */ +#define HJ_FILL_OUTER(hjstate) ((hjstate)->hj_NullInnerTupleSlot != NULL) +/* Returns true if doing null-fill on inner relation */ +#define HJ_FILL_INNER(hjstate) ((hjstate)->hj_NullOuterTupleSlot != NULL) static TupleTableSlot *ExecHashJoinOuterGetTuple(PlanState *outerNode, HashJoinState *hjstate, @@ -32,7 +44,7 @@ static TupleTableSlot *ExecHashJoinGetSavedTuple(HashJoinState *hjstate, BufFile *file, uint32 *hashvalue, TupleTableSlot *tupleSlot); -static int ExecHashJoinNewBatch(HashJoinState *hjstate); +static bool ExecHashJoinNewBatch(HashJoinState *hjstate); /* ---------------------------------------------------------------- @@ -52,11 +64,9 @@ ExecHashJoin(HashJoinState *node) HashState *hashNode; List *joinqual; List *otherqual; - TupleTableSlot *inntuple; ExprContext *econtext; ExprDoneCond isDone; HashJoinTable hashtable; - HashJoinTuple curtuple; TupleTableSlot *outerTupleSlot; uint32 hashvalue; int batchno; @@ -69,10 +79,6 @@ ExecHashJoin(HashJoinState *node) otherqual = node->js.ps.qual; hashNode = (HashState *) innerPlanState(node); outerNode = outerPlanState(node); - - /* - * get information from HashJoin state - */ hashtable = node->hj_HashTable; econtext = node->js.ps.ps_ExprContext; @@ -100,238 +106,300 @@ ExecHashJoin(HashJoinState *node) ResetExprContext(econtext); /* - * if this is the first call, build the hash table for inner relation + * run the hash join state machine */ - if (hashtable == NULL) + for (;;) { - /* - * If the outer relation is completely empty, we can quit without - * building the hash table. However, for an inner join it is only a - * win to check this when the outer relation's startup cost is less - * than the projected cost of building the hash table. Otherwise it's - * best to build the hash table first and see if the inner relation is - * empty. (When it's an outer join, we should always make this check, - * since we aren't going to be able to skip the join on the strength - * of an empty inner relation anyway.) - * - * If we are rescanning the join, we make use of information gained on - * the previous scan: don't bother to try the prefetch if the previous - * scan found the outer relation nonempty. This is not 100% reliable - * since with new parameters the outer relation might yield different - * results, but it's a good heuristic. - * - * The only way to make the check is to try to fetch a tuple from the - * outer plan node. If we succeed, we have to stash it away for later - * consumption by ExecHashJoinOuterGetTuple. - */ - if (HASHJOIN_IS_OUTER(node) || - (outerNode->plan->startup_cost < hashNode->ps.plan->total_cost && - !node->hj_OuterNotEmpty)) + switch (node->hj_JoinState) { - node->hj_FirstOuterTupleSlot = ExecProcNode(outerNode); - if (TupIsNull(node->hj_FirstOuterTupleSlot)) - { - node->hj_OuterNotEmpty = false; - return NULL; - } - else - node->hj_OuterNotEmpty = true; - } - else - node->hj_FirstOuterTupleSlot = NULL; + case HJ_BUILD_HASHTABLE: + /* + * First time through: build hash table for inner relation. + */ + Assert(hashtable == NULL); - /* - * create the hash table - */ - hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan, - node->hj_HashOperators); - node->hj_HashTable = hashtable; + /* + * If the outer relation is completely empty, and it's not + * right/full join, we can quit without building the hash + * table. However, for an inner join it is only a win to + * check this when the outer relation's startup cost is less + * than the projected cost of building the hash + * table. Otherwise it's best to build the hash table first + * and see if the inner relation is empty. (When it's a left + * join, we should always make this check, since we aren't + * going to be able to skip the join on the strength of an + * empty inner relation anyway.) + * + * If we are rescanning the join, we make use of information + * gained on the previous scan: don't bother to try the + * prefetch if the previous scan found the outer relation + * nonempty. This is not 100% reliable since with new + * parameters the outer relation might yield different + * results, but it's a good heuristic. + * + * The only way to make the check is to try to fetch a tuple + * from the outer plan node. If we succeed, we have to stash + * it away for later consumption by ExecHashJoinOuterGetTuple. + */ + if (HJ_FILL_INNER(node)) + { + /* no chance to not build the hash table */ + node->hj_FirstOuterTupleSlot = NULL; + } + else if (HJ_FILL_OUTER(node) || + (outerNode->plan->startup_cost < hashNode->ps.plan->total_cost && + !node->hj_OuterNotEmpty)) + { + node->hj_FirstOuterTupleSlot = ExecProcNode(outerNode); + if (TupIsNull(node->hj_FirstOuterTupleSlot)) + { + node->hj_OuterNotEmpty = false; + return NULL; + } + else + node->hj_OuterNotEmpty = true; + } + else + node->hj_FirstOuterTupleSlot = NULL; - /* - * execute the Hash node, to build the hash table - */ - hashNode->hashtable = hashtable; - (void) MultiExecProcNode((PlanState *) hashNode); + /* + * create the hash table + */ + hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan, + node->hj_HashOperators, + HJ_FILL_INNER(node)); + node->hj_HashTable = hashtable; - /* - * If the inner relation is completely empty, and we're not doing an - * outer join, we can quit without scanning the outer relation. - */ - if (hashtable->totalTuples == 0 && !HASHJOIN_IS_OUTER(node)) - return NULL; + /* + * execute the Hash node, to build the hash table + */ + hashNode->hashtable = hashtable; + (void) MultiExecProcNode((PlanState *) hashNode); - /* - * need to remember whether nbatch has increased since we began - * scanning the outer relation - */ - hashtable->nbatch_outstart = hashtable->nbatch; + /* + * If the inner relation is completely empty, and we're not + * doing a left outer join, we can quit without scanning the + * outer relation. + */ + if (hashtable->totalTuples == 0 && !HJ_FILL_OUTER(node)) + return NULL; - /* - * Reset OuterNotEmpty for scan. (It's OK if we fetched a tuple - * above, because ExecHashJoinOuterGetTuple will immediately set it - * again.) - */ - node->hj_OuterNotEmpty = false; - } + /* + * need to remember whether nbatch has increased since we began + * scanning the outer relation + */ + hashtable->nbatch_outstart = hashtable->nbatch; - /* - * run the hash join process - */ - for (;;) - { - /* - * If we don't have an outer tuple, get the next one - */ - if (node->hj_NeedNewOuter) - { - outerTupleSlot = ExecHashJoinOuterGetTuple(outerNode, - node, - &hashvalue); - if (TupIsNull(outerTupleSlot)) - { - /* end of join */ - return NULL; - } + /* + * Reset OuterNotEmpty for scan. (It's OK if we fetched a + * tuple above, because ExecHashJoinOuterGetTuple will + * immediately set it again.) + */ + node->hj_OuterNotEmpty = false; - econtext->ecxt_outertuple = outerTupleSlot; - node->hj_NeedNewOuter = false; - node->hj_MatchedOuter = false; + node->hj_JoinState = HJ_NEED_NEW_OUTER; - /* - * Now we have an outer tuple; find the corresponding bucket for - * this tuple in the main hash table or skew hash table. - */ - node->hj_CurHashValue = hashvalue; - ExecHashGetBucketAndBatch(hashtable, hashvalue, - &node->hj_CurBucketNo, &batchno); - node->hj_CurSkewBucketNo = ExecHashGetSkewBucket(hashtable, - hashvalue); - node->hj_CurTuple = NULL; + /* FALL THRU */ - /* - * Now we've got an outer tuple and the corresponding hash bucket, - * but it might not belong to the current batch, or it might match - * a skew bucket. - */ - if (batchno != hashtable->curbatch && - node->hj_CurSkewBucketNo == INVALID_SKEW_BUCKET_NO) - { + case HJ_NEED_NEW_OUTER: /* - * Need to postpone this outer tuple to a later batch. Save it - * in the corresponding outer-batch file. + * We don't have an outer tuple, try to get the next one */ - Assert(batchno > hashtable->curbatch); - ExecHashJoinSaveTuple(ExecFetchSlotMinimalTuple(outerTupleSlot), - hashvalue, - &hashtable->outerBatchFile[batchno]); - node->hj_NeedNewOuter = true; - continue; /* loop around for a new outer tuple */ - } - } + outerTupleSlot = ExecHashJoinOuterGetTuple(outerNode, + node, + &hashvalue); + if (TupIsNull(outerTupleSlot)) + { + /* end of batch, or maybe whole join */ + if (HJ_FILL_INNER(node)) + { + /* set up to scan for unmatched inner tuples */ + ExecPrepHashTableForUnmatched(node); + node->hj_JoinState = HJ_FILL_INNER_TUPLES; + } + else + node->hj_JoinState = HJ_NEED_NEW_BATCH; + continue; + } - /* - * OK, scan the selected hash bucket for matches - */ - for (;;) - { - curtuple = ExecScanHashBucket(node, econtext); - if (curtuple == NULL) - break; /* out of matches */ + econtext->ecxt_outertuple = outerTupleSlot; + node->hj_MatchedOuter = false; - /* - * we've got a match, but still need to test non-hashed quals - */ - inntuple = ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(curtuple), - node->hj_HashTupleSlot, - false); /* don't pfree */ - econtext->ecxt_innertuple = inntuple; + /* + * Find the corresponding bucket for this tuple in the main + * hash table or skew hash table. + */ + node->hj_CurHashValue = hashvalue; + ExecHashGetBucketAndBatch(hashtable, hashvalue, + &node->hj_CurBucketNo, &batchno); + node->hj_CurSkewBucketNo = ExecHashGetSkewBucket(hashtable, + hashvalue); + node->hj_CurTuple = NULL; - /* reset temp memory each time to avoid leaks from qual expr */ - ResetExprContext(econtext); + /* + * The tuple might not belong to the current batch (where + * "current batch" includes the skew buckets if any). + */ + if (batchno != hashtable->curbatch && + node->hj_CurSkewBucketNo == INVALID_SKEW_BUCKET_NO) + { + /* + * Need to postpone this outer tuple to a later batch. + * Save it in the corresponding outer-batch file. + */ + Assert(batchno > hashtable->curbatch); + ExecHashJoinSaveTuple(ExecFetchSlotMinimalTuple(outerTupleSlot), + hashvalue, + &hashtable->outerBatchFile[batchno]); + /* Loop around, staying in HJ_NEED_NEW_OUTER state */ + continue; + } - /* - * if we pass the qual, then save state for next call and have - * ExecProject form the projection, store it in the tuple table, - * and return the slot. - * - * Only the joinquals determine MatchedOuter status, but all quals - * must pass to actually return the tuple. - */ - if (joinqual == NIL || ExecQual(joinqual, econtext, false)) - { - node->hj_MatchedOuter = true; + /* OK, let's scan the bucket for matches */ + node->hj_JoinState = HJ_SCAN_BUCKET; + + /* FALL THRU */ - /* In an antijoin, we never return a matched tuple */ - if (node->js.jointype == JOIN_ANTI) + case HJ_SCAN_BUCKET: + /* + * Scan the selected hash bucket for matches to current outer + */ + if (!ExecScanHashBucket(node, econtext)) { - node->hj_NeedNewOuter = true; - break; /* out of loop over hash bucket */ + /* out of matches; check for possible outer-join fill */ + node->hj_JoinState = HJ_FILL_OUTER_TUPLE; + continue; } /* - * In a semijoin, we'll consider returning the first match, - * but after that we're done with this outer tuple. + * We've got a match, but still need to test non-hashed quals. + * ExecScanHashBucket already set up all the state needed to + * call ExecQual. + * + * If we pass the qual, then save state for next call and have + * ExecProject form the projection, store it in the tuple + * table, and return the slot. + * + * Only the joinquals determine tuple match status, but all + * quals must pass to actually return the tuple. */ - if (node->js.jointype == JOIN_SEMI) - node->hj_NeedNewOuter = true; - - if (otherqual == NIL || ExecQual(otherqual, econtext, false)) + if (joinqual == NIL || ExecQual(joinqual, econtext, false)) { - TupleTableSlot *result; + node->hj_MatchedOuter = true; + HeapTupleHeaderSetMatch(HJTUPLE_MINTUPLE(node->hj_CurTuple)); - result = ExecProject(node->js.ps.ps_ProjInfo, &isDone); + /* In an antijoin, we never return a matched tuple */ + if (node->js.jointype == JOIN_ANTI) + { + node->hj_JoinState = HJ_NEED_NEW_OUTER; + continue; + } - if (isDone != ExprEndResult) + /* + * In a semijoin, we'll consider returning the first match, + * but after that we're done with this outer tuple. + */ + if (node->js.jointype == JOIN_SEMI) + node->hj_JoinState = HJ_NEED_NEW_OUTER; + + if (otherqual == NIL || + ExecQual(otherqual, econtext, false)) { - node->js.ps.ps_TupFromTlist = - (isDone == ExprMultipleResult); - return result; + TupleTableSlot *result; + + result = ExecProject(node->js.ps.ps_ProjInfo, &isDone); + + if (isDone != ExprEndResult) + { + node->js.ps.ps_TupFromTlist = + (isDone == ExprMultipleResult); + return result; + } } } + break; + case HJ_FILL_OUTER_TUPLE: /* - * If semijoin and we didn't return the tuple, we're still - * done with this outer tuple. + * The current outer tuple has run out of matches, so check + * whether to emit a dummy outer-join tuple. Whether we + * emit one or not, the next state is NEED_NEW_OUTER. */ - if (node->js.jointype == JOIN_SEMI) - break; /* out of loop over hash bucket */ - } - } + node->hj_JoinState = HJ_NEED_NEW_OUTER; - /* - * Now the current outer tuple has run out of matches, so check - * whether to emit a dummy outer-join tuple. If not, loop around to - * get a new outer tuple. - */ - node->hj_NeedNewOuter = true; + if (!node->hj_MatchedOuter && + HJ_FILL_OUTER(node)) + { + /* + * Generate a fake join tuple with nulls for the inner + * tuple, and return it if it passes the non-join quals. + */ + econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot; + + if (otherqual == NIL || + ExecQual(otherqual, econtext, false)) + { + TupleTableSlot *result; - if (!node->hj_MatchedOuter && - HASHJOIN_IS_OUTER(node)) - { - /* - * We are doing an outer join and there were no join matches for - * this outer tuple. Generate a fake join tuple with nulls for - * the inner tuple, and return it if it passes the non-join quals. - */ - econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot; + result = ExecProject(node->js.ps.ps_ProjInfo, &isDone); - if (otherqual == NIL || ExecQual(otherqual, econtext, false)) - { + if (isDone != ExprEndResult) + { + node->js.ps.ps_TupFromTlist = + (isDone == ExprMultipleResult); + return result; + } + } + } + break; + + case HJ_FILL_INNER_TUPLES: /* - * qualification was satisfied so we project and return the - * slot containing the result tuple using ExecProject(). + * We have finished a batch, but we are doing right/full join, + * so any unmatched inner tuples in the hashtable have to be + * emitted before we continue to the next batch. */ - TupleTableSlot *result; + if (!ExecScanHashTableForUnmatched(node, econtext)) + { + /* no more unmatched tuples */ + node->hj_JoinState = HJ_NEED_NEW_BATCH; + continue; + } - result = ExecProject(node->js.ps.ps_ProjInfo, &isDone); + /* + * Generate a fake join tuple with nulls for the outer tuple, + * and return it if it passes the non-join quals. + */ + econtext->ecxt_outertuple = node->hj_NullOuterTupleSlot; - if (isDone != ExprEndResult) + if (otherqual == NIL || + ExecQual(otherqual, econtext, false)) { - node->js.ps.ps_TupFromTlist = - (isDone == ExprMultipleResult); - return result; + TupleTableSlot *result; + + result = ExecProject(node->js.ps.ps_ProjInfo, &isDone); + + if (isDone != ExprEndResult) + { + node->js.ps.ps_TupFromTlist = + (isDone == ExprMultipleResult); + return result; + } } - } + break; + + case HJ_NEED_NEW_BATCH: + /* + * Try to advance to next batch. Done if there are no more. + */ + if (!ExecHashJoinNewBatch(node)) + return NULL; /* end of join */ + node->hj_JoinState = HJ_NEED_NEW_OUTER; + break; + + default: + elog(ERROR, "unrecognized hashjoin state: %d", + (int) node->hj_JoinState); } } } @@ -406,7 +474,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) ExecInitResultTupleSlot(estate, &hjstate->js.ps); hjstate->hj_OuterTupleSlot = ExecInitExtraTupleSlot(estate); - /* note: HASHJOIN_IS_OUTER macro depends on this initialization */ + /* set up null tuples for outer joins, if needed */ switch (node->join.jointype) { case JOIN_INNER: @@ -418,6 +486,19 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) ExecInitNullTupleSlot(estate, ExecGetResultType(innerPlanState(hjstate))); break; + case JOIN_RIGHT: + hjstate->hj_NullOuterTupleSlot = + ExecInitNullTupleSlot(estate, + ExecGetResultType(outerPlanState(hjstate))); + break; + case JOIN_FULL: + hjstate->hj_NullOuterTupleSlot = + ExecInitNullTupleSlot(estate, + ExecGetResultType(outerPlanState(hjstate))); + hjstate->hj_NullInnerTupleSlot = + ExecInitNullTupleSlot(estate, + ExecGetResultType(innerPlanState(hjstate))); + break; default: elog(ERROR, "unrecognized join type: %d", (int) node->join.jointype); @@ -425,7 +506,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) /* * now for some voodoo. our temporary tuple slot is actually the result - * tuple slot of the Hash node (which is our inner plan). we do this + * tuple slot of the Hash node (which is our inner plan). we can do this * because Hash nodes don't return tuples via ExecProcNode() -- instead * the hash join node uses ExecScanHashBucket() to get at the contents of * the hash table. -cim 6/9/91 @@ -485,7 +566,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) ((HashState *) innerPlanState(hjstate))->hashkeys = rclauses; hjstate->js.ps.ps_TupFromTlist = false; - hjstate->hj_NeedNewOuter = true; + hjstate->hj_JoinState = HJ_BUILD_HASHTABLE; hjstate->hj_MatchedOuter = false; hjstate->hj_OuterNotEmpty = false; @@ -533,12 +614,13 @@ ExecEndHashJoin(HashJoinState *node) * ExecHashJoinOuterGetTuple * * get the next outer tuple for hashjoin: either by - * executing a plan node in the first pass, or from + * executing the outer plan node in the first pass, or from * the temp files for the hashjoin batches. * - * Returns a null slot if no more outer tuples. On success, the tuple's - * hash value is stored at *hashvalue --- this is either originally computed, - * or re-read from the temp file. + * Returns a null slot if no more outer tuples (within the current batch). + * + * On success, the tuple's hash value is stored at *hashvalue --- this is + * either originally computed, or re-read from the temp file. */ static TupleTableSlot * ExecHashJoinOuterGetTuple(PlanState *outerNode, @@ -572,7 +654,7 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode, if (ExecHashGetHashValue(hashtable, econtext, hjstate->hj_OuterHashKeys, true, /* outer tuple */ - HASHJOIN_IS_OUTER(hjstate), + HJ_FILL_OUTER(hjstate), hashvalue)) { /* remember outer relation is not empty for possible rescan */ @@ -587,31 +669,27 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode, */ slot = ExecProcNode(outerNode); } + } + else if (curbatch < hashtable->nbatch) + { + BufFile *file = hashtable->outerBatchFile[curbatch]; /* - * We have just reached the end of the first pass. Try to switch to a - * saved batch. + * In outer-join cases, we could get here even though the batch file + * is empty. */ - curbatch = ExecHashJoinNewBatch(hjstate); - } + if (file == NULL) + return NULL; - /* - * Try to read from a temp file. Loop allows us to advance to new batches - * as needed. NOTE: nbatch could increase inside ExecHashJoinNewBatch, so - * don't try to optimize this loop. - */ - while (curbatch < hashtable->nbatch) - { slot = ExecHashJoinGetSavedTuple(hjstate, - hashtable->outerBatchFile[curbatch], + file, hashvalue, hjstate->hj_OuterTupleSlot); if (!TupIsNull(slot)) return slot; - curbatch = ExecHashJoinNewBatch(hjstate); } - /* Out of batches... */ + /* End of this batch */ return NULL; } @@ -619,10 +697,9 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode, * ExecHashJoinNewBatch * switch to a new hashjoin batch * - * Returns the number of the new batch (1..nbatch-1), or nbatch if no more. - * We will never return a batch number that has an empty outer batch file. + * Returns true if successful, false if there are no more batches. */ -static int +static bool ExecHashJoinNewBatch(HashJoinState *hjstate) { HashJoinTable hashtable = hjstate->hj_HashTable; @@ -632,7 +709,6 @@ ExecHashJoinNewBatch(HashJoinState *hjstate) TupleTableSlot *slot; uint32 hashvalue; -start_over: nbatch = hashtable->nbatch; curbatch = hashtable->curbatch; @@ -657,6 +733,7 @@ start_over: hashtable->skewEnabled = false; hashtable->skewBucket = NULL; hashtable->skewBucketNums = NULL; + hashtable->nSkewBuckets = 0; hashtable->spaceUsedSkew = 0; } @@ -665,8 +742,9 @@ start_over: * sides. We can sometimes skip over batches that are empty on only one * side, but there are exceptions: * - * 1. In an outer join, we have to process outer batches even if the inner - * batch is empty. + * 1. In a left/full outer join, we have to process outer batches even if + * the inner batch is empty. Similarly, in a right/full outer join, we + * have to process inner batches even if the outer batch is empty. * * 2. If we have increased nbatch since the initial estimate, we have to * scan inner batches since they might contain tuples that need to be @@ -682,7 +760,10 @@ start_over: hashtable->innerBatchFile[curbatch] == NULL)) { if (hashtable->outerBatchFile[curbatch] && - HASHJOIN_IS_OUTER(hjstate)) + HJ_FILL_OUTER(hjstate)) + break; /* must process due to rule 1 */ + if (hashtable->innerBatchFile[curbatch] && + HJ_FILL_INNER(hjstate)) break; /* must process due to rule 1 */ if (hashtable->innerBatchFile[curbatch] && nbatch != hashtable->nbatch_original) @@ -702,7 +783,7 @@ start_over: } if (curbatch >= nbatch) - return curbatch; /* no more batches */ + return false; /* no more batches */ hashtable->curbatch = curbatch; @@ -741,20 +822,17 @@ start_over: } /* - * If there's no outer batch file, advance to next batch. + * Rewind outer batch file (if present), so that we can start reading it. */ - if (hashtable->outerBatchFile[curbatch] == NULL) - goto start_over; - - /* - * Rewind outer batch file, so that we can start reading it. - */ - if (BufFileSeek(hashtable->outerBatchFile[curbatch], 0, 0L, SEEK_SET)) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not rewind hash-join temporary file: %m"))); + if (hashtable->outerBatchFile[curbatch] != NULL) + { + if (BufFileSeek(hashtable->outerBatchFile[curbatch], 0, 0L, SEEK_SET)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not rewind hash-join temporary file: %m"))); + } - return curbatch; + return true; } /* @@ -857,9 +935,16 @@ ExecReScanHashJoin(HashJoinState *node) node->js.ps.righttree->chgParam == NULL) { /* - * okay to reuse the hash table; needn't rescan inner, either. + * Okay to reuse the hash table; needn't rescan inner, either. * - * What we do need to do is reset our state about the emptiness of + * However, if it's a right/full join, we'd better reset the + * inner-tuple match flags contained in the table. + */ + if (HJ_FILL_INNER(node)) + ExecHashTableResetMatchFlags(node->hj_HashTable); + + /* + * Also, we need to reset our state about the emptiness of * the outer relation, so that the new scan of the outer will * update it correctly if it turns out to be empty this time. * (There's no harm in clearing it now because ExecHashJoin won't @@ -869,12 +954,16 @@ ExecReScanHashJoin(HashJoinState *node) * through.) */ node->hj_OuterNotEmpty = false; + + /* ExecHashJoin can skip the BUILD_HASHTABLE step */ + node->hj_JoinState = HJ_NEED_NEW_OUTER; } else { /* must destroy and rebuild hash table */ ExecHashTableDestroy(node->hj_HashTable); node->hj_HashTable = NULL; + node->hj_JoinState = HJ_BUILD_HASHTABLE; /* * if chgParam of subnode is not null then plan will be re-scanned @@ -892,7 +981,6 @@ ExecReScanHashJoin(HashJoinState *node) node->hj_CurTuple = NULL; node->js.ps.ps_TupFromTlist = false; - node->hj_NeedNewOuter = true; node->hj_MatchedOuter = false; node->hj_FirstOuterTupleSlot = NULL; diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 7dffe18c973..fd38e8bd7a9 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -889,8 +889,8 @@ generate_join_implied_equalities_normal(PlannerInfo *root, * combination for which an opfamily member operator exists. If we have * choices, we prefer simple Var members (possibly with RelabelType) since * these are (a) cheapest to compute at runtime and (b) most likely to - * have useful statistics. Also, if enable_hashjoin is on, we prefer - * operators that are also hashjoinable. + * have useful statistics. Also, prefer operators that are also + * hashjoinable. */ if (outer_members && inner_members) { @@ -925,8 +925,7 @@ generate_join_implied_equalities_normal(PlannerInfo *root, (IsA(inner_em->em_expr, RelabelType) && IsA(((RelabelType *) inner_em->em_expr)->arg, Var))) score++; - if (!enable_hashjoin || - op_hashjoinable(eq_op, + if (op_hashjoinable(eq_op, exprType((Node *) outer_em->em_expr))) score++; if (score > best_score) diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index bea134b3f1a..54257c05c50 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -41,7 +41,8 @@ static List *select_mergejoin_clauses(PlannerInfo *root, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, - JoinType jointype); + JoinType jointype, + bool *have_nonmergeable_clause); /* @@ -77,12 +78,13 @@ add_paths_to_joinrel(PlannerInfo *root, List *restrictlist) { List *mergeclause_list = NIL; + bool have_nonmergeable_clause = false; /* * Find potential mergejoin clauses. We can skip this if we are not - * interested in doing a mergejoin. However, mergejoin is currently our - * only way of implementing full outer joins, so override mergejoin - * disable if it's a full join. + * interested in doing a mergejoin. However, mergejoin may be our only + * way of implementing a full outer join, so override enable_mergejoin if + * it's a full join. */ if (enable_mergejoin || jointype == JOIN_FULL) mergeclause_list = select_mergejoin_clauses(root, @@ -90,22 +92,27 @@ add_paths_to_joinrel(PlannerInfo *root, outerrel, innerrel, restrictlist, - jointype); + jointype, + &have_nonmergeable_clause); /* * 1. Consider mergejoin paths where both relations must be explicitly - * sorted. + * sorted. Skip this if we can't mergejoin. */ - sort_inner_and_outer(root, joinrel, outerrel, innerrel, - restrictlist, mergeclause_list, jointype, sjinfo); + if (!have_nonmergeable_clause) + sort_inner_and_outer(root, joinrel, outerrel, innerrel, + restrictlist, mergeclause_list, jointype, sjinfo); /* * 2. Consider paths where the outer relation need not be explicitly * sorted. This includes both nestloops and mergejoins where the outer - * path is already ordered. + * path is already ordered. Again, skip this if we can't mergejoin. + * (That's okay because we know that nestloop can't handle right/full + * joins at all, so it wouldn't work in those cases either.) */ - match_unsorted_outer(root, joinrel, outerrel, innerrel, - restrictlist, mergeclause_list, jointype, sjinfo); + if (!have_nonmergeable_clause) + match_unsorted_outer(root, joinrel, outerrel, innerrel, + restrictlist, mergeclause_list, jointype, sjinfo); #ifdef NOT_USED @@ -120,15 +127,17 @@ add_paths_to_joinrel(PlannerInfo *root, * those made by match_unsorted_outer when add_paths_to_joinrel() is * invoked with the two rels given in the other order. */ - match_unsorted_inner(root, joinrel, outerrel, innerrel, - restrictlist, mergeclause_list, jointype, sjinfo); + if (!have_nonmergeable_clause) + match_unsorted_inner(root, joinrel, outerrel, innerrel, + restrictlist, mergeclause_list, jointype, sjinfo); #endif /* * 4. Consider paths where both outer and inner relations must be hashed - * before being joined. + * before being joined. As above, disregard enable_hashjoin for full + * joins, because there may be no other alternative. */ - if (enable_hashjoin) + if (enable_hashjoin || jointype == JOIN_FULL) hash_inner_and_outer(root, joinrel, outerrel, innerrel, restrictlist, jointype, sjinfo); } @@ -189,38 +198,12 @@ sort_inner_and_outer(PlannerInfo *root, JoinType jointype, SpecialJoinInfo *sjinfo) { - bool useallclauses; Path *outer_path; Path *inner_path; List *all_pathkeys; ListCell *l; /* - * If we are doing a right or full join, we must use *all* the - * mergeclauses as join clauses, else we will not have a valid plan. - */ - switch (jointype) - { - case JOIN_INNER: - case JOIN_LEFT: - case JOIN_SEMI: - case JOIN_ANTI: - case JOIN_UNIQUE_OUTER: - case JOIN_UNIQUE_INNER: - useallclauses = false; - break; - case JOIN_RIGHT: - case JOIN_FULL: - useallclauses = true; - break; - default: - elog(ERROR, "unrecognized join type: %d", - (int) jointype); - useallclauses = false; /* keep compiler quiet */ - break; - } - - /* * We only consider the cheapest-total-cost input paths, since we are * assuming here that a sort is required. We will consider * cheapest-startup-cost input paths later, and only if they don't need a @@ -390,9 +373,9 @@ match_unsorted_outer(PlannerInfo *root, /* * Nestloop only supports inner, left, semi, and anti joins. Also, if we - * are doing a right or full join, we must use *all* the mergeclauses as - * join clauses, else we will not have a valid plan. (Although these two - * flags are currently inverses, keep them separate for clarity and + * are doing a right or full mergejoin, we must use *all* the mergeclauses + * as join clauses, else we will not have a valid plan. (Although these + * two flags are currently inverses, keep them separate for clarity and * possible future changes.) */ switch (jointype) @@ -574,8 +557,8 @@ match_unsorted_outer(PlannerInfo *root, * Special corner case: for "x FULL JOIN y ON true", there will be no * join clauses at all. Ordinarily we'd generate a clauseless * nestloop path, but since mergejoin is our only join type that - * supports FULL JOIN, it's necessary to generate a clauseless - * mergejoin path instead. + * supports FULL JOIN without any join clauses, it's necessary to + * generate a clauseless mergejoin path instead. */ if (mergeclauses == NIL) { @@ -781,30 +764,11 @@ hash_inner_and_outer(PlannerInfo *root, JoinType jointype, SpecialJoinInfo *sjinfo) { - bool isouterjoin; + bool isouterjoin = IS_OUTER_JOIN(jointype); List *hashclauses; ListCell *l; /* - * Hashjoin only supports inner, left, semi, and anti joins. - */ - switch (jointype) - { - case JOIN_INNER: - case JOIN_SEMI: - case JOIN_UNIQUE_OUTER: - case JOIN_UNIQUE_INNER: - isouterjoin = false; - break; - case JOIN_LEFT: - case JOIN_ANTI: - isouterjoin = true; - break; - default: - return; - } - - /* * We need to build only one hashpath for any given pair of outer and * inner relations; all of the hashable clauses will be used as keys. * @@ -963,6 +927,11 @@ best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel, * Select mergejoin clauses that are usable for a particular join. * Returns a list of RestrictInfo nodes for those clauses. * + * *have_nonmergeable_clause is set TRUE if this is a right/full join and + * there are nonmergejoinable join clauses. The executor's mergejoin + * machinery cannot handle such cases, so we have to avoid generating a + * mergejoin plan. + * * We also mark each selected RestrictInfo to show which side is currently * being considered as outer. These are transient markings that are only * good for the duration of the current add_paths_to_joinrel() call! @@ -977,13 +946,15 @@ select_mergejoin_clauses(PlannerInfo *root, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, - JoinType jointype) + JoinType jointype, + bool *have_nonmergeable_clause) { List *result_list = NIL; bool isouterjoin = IS_OUTER_JOIN(jointype); - bool have_nonmergeable_joinclause = false; ListCell *l; + *have_nonmergeable_clause = false; + foreach(l, restrictlist) { RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); @@ -991,7 +962,7 @@ select_mergejoin_clauses(PlannerInfo *root, /* * If processing an outer join, only use its own join clauses in the * merge. For inner joins we can use pushed-down clauses too. (Note: - * we don't set have_nonmergeable_joinclause here because pushed-down + * we don't set have_nonmergeable_clause here because pushed-down * clauses will become otherquals not joinquals.) */ if (isouterjoin && restrictinfo->is_pushed_down) @@ -1008,7 +979,7 @@ select_mergejoin_clauses(PlannerInfo *root, * FALSE.) */ if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const)) - have_nonmergeable_joinclause = true; + *have_nonmergeable_clause = true; continue; /* not mergejoinable */ } @@ -1017,7 +988,7 @@ select_mergejoin_clauses(PlannerInfo *root, */ if (!clause_sides_match_join(restrictinfo, outerrel, innerrel)) { - have_nonmergeable_joinclause = true; + *have_nonmergeable_clause = true; continue; /* no good for these input relations */ } @@ -1046,7 +1017,7 @@ select_mergejoin_clauses(PlannerInfo *root, if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) || EC_MUST_BE_REDUNDANT(restrictinfo->right_ec)) { - have_nonmergeable_joinclause = true; + *have_nonmergeable_clause = true; continue; /* can't handle redundant eclasses */ } @@ -1054,27 +1025,19 @@ select_mergejoin_clauses(PlannerInfo *root, } /* - * If it is a right/full join then *all* the explicit join clauses must be - * mergejoinable, else the executor will fail. If we are asked for a right - * join then just return NIL to indicate no mergejoin is possible (we can - * handle it as a left join instead). If we are asked for a full join then - * emit an error, because there is no fallback. + * If it is not a right/full join then we don't need to insist on all the + * joinclauses being mergejoinable, so reset the flag. This simplifies + * the logic in add_paths_to_joinrel. */ - if (have_nonmergeable_joinclause) + switch (jointype) { - switch (jointype) - { - case JOIN_RIGHT: - return NIL; /* not mergejoinable */ - case JOIN_FULL: - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("FULL JOIN is only supported with merge-joinable join conditions"))); - break; - default: - /* otherwise, it's OK to have nonmergeable join quals */ - break; - } + case JOIN_RIGHT: + case JOIN_FULL: + break; + default: + /* otherwise, it's OK to have nonmergeable join quals */ + *have_nonmergeable_clause = false; + break; } return result_list; diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 197c49d774c..605a32d52d6 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -658,6 +658,18 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_FULL, sjinfo, restrictlist); + + /* + * If there are join quals that aren't mergeable or hashable, we + * may not be able to build any valid plan. Complain here so that + * we can give a somewhat-useful error message. (Since we have no + * flexibility of planning for a full join, there's no chance of + * succeeding later with another pair of input rels.) + */ + if (joinrel->pathlist == NIL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("FULL JOIN is only supported with merge-joinable or hash-joinable join conditions"))); break; case JOIN_SEMI: diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 6efb6dc4bce..c74a91f8a7f 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -1328,10 +1328,9 @@ distribute_restrictinfo_to_rels(PlannerInfo *root, /* * Check for hashjoinable operators. (We don't bother setting the - * hashjoin info if we're not going to need it.) + * hashjoin info except in true join clauses.) */ - if (enable_hashjoin) - check_hashjoinable(restrictinfo); + check_hashjoinable(restrictinfo); /* * Add clause to the join lists of all the relevant relations. @@ -1458,10 +1457,9 @@ build_implied_join_equality(Oid opno, qualscope, /* required_relids */ NULL); /* nullable_relids */ - /* Set mergejoinability info always, and hashjoinability if enabled */ + /* Set mergejoinability/hashjoinability flags */ check_mergejoinable(restrictinfo); - if (enable_hashjoin) - check_hashjoinable(restrictinfo); + check_hashjoinable(restrictinfo); return restrictinfo; } diff --git a/src/include/access/htup.h b/src/include/access/htup.h index adf1321052c..f540966d684 100644 --- a/src/include/access/htup.h +++ b/src/include/access/htup.h @@ -196,6 +196,14 @@ typedef HeapTupleHeaderData *HeapTupleHeader; #define HEAP2_XACT_MASK 0xC000 /* visibility-related bits */ /* + * HEAP_TUPLE_HAS_MATCH is a temporary flag used during hash joins. It is + * only used in tuples that are in the hash table, and those don't need + * any visibility information, so we can overlay it on a visibility flag + * instead of using up a dedicated bit. + */ +#define HEAP_TUPLE_HAS_MATCH HEAP_ONLY_TUPLE /* tuple has a join match */ + +/* * HeapTupleHeader accessor macros * * Note: beware of multiple evaluations of "tup" argument. But the Set @@ -343,6 +351,21 @@ do { \ (tup)->t_infomask2 &= ~HEAP_ONLY_TUPLE \ ) +#define HeapTupleHeaderHasMatch(tup) \ +( \ + (tup)->t_infomask2 & HEAP_TUPLE_HAS_MATCH \ +) + +#define HeapTupleHeaderSetMatch(tup) \ +( \ + (tup)->t_infomask2 |= HEAP_TUPLE_HAS_MATCH \ +) + +#define HeapTupleHeaderClearMatch(tup) \ +( \ + (tup)->t_infomask2 &= ~HEAP_TUPLE_HAS_MATCH \ +) + #define HeapTupleHeaderGetNatts(tup) \ ((tup)->t_infomask2 & HEAP_NATTS_MASK) diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h index 1b93f58af67..c4aa0d37268 100644 --- a/src/include/executor/hashjoin.h +++ b/src/include/executor/hashjoin.h @@ -113,6 +113,8 @@ typedef struct HashJoinTableData struct HashJoinTupleData **buckets; /* buckets array is per-batch storage, as are all the tuples */ + bool keepNulls; /* true to store unmatchable NULL tuples */ + bool skewEnabled; /* are we using skew optimization? */ HashSkewBucket **skewBucket; /* hashtable of skew buckets */ int skewBucketLen; /* size of skewBucket array (a power of 2!) */ diff --git a/src/include/executor/nodeHash.h b/src/include/executor/nodeHash.h index 444a0137a2d..8517d1cf284 100644 --- a/src/include/executor/nodeHash.h +++ b/src/include/executor/nodeHash.h @@ -22,7 +22,8 @@ extern Node *MultiExecHash(HashState *node); extern void ExecEndHash(HashState *node); extern void ExecReScanHash(HashState *node); -extern HashJoinTable ExecHashTableCreate(Hash *node, List *hashOperators); +extern HashJoinTable ExecHashTableCreate(Hash *node, List *hashOperators, + bool keepNulls); extern void ExecHashTableDestroy(HashJoinTable hashtable); extern void ExecHashTableInsert(HashJoinTable hashtable, TupleTableSlot *slot, @@ -37,9 +38,12 @@ extern void ExecHashGetBucketAndBatch(HashJoinTable hashtable, uint32 hashvalue, int *bucketno, int *batchno); -extern HashJoinTuple ExecScanHashBucket(HashJoinState *hjstate, - ExprContext *econtext); +extern bool ExecScanHashBucket(HashJoinState *hjstate, ExprContext *econtext); +extern void ExecPrepHashTableForUnmatched(HashJoinState *hjstate); +extern bool ExecScanHashTableForUnmatched(HashJoinState *hjstate, + ExprContext *econtext); extern void ExecHashTableReset(HashJoinTable hashtable); +extern void ExecHashTableResetMatchFlags(HashJoinTable hashtable); extern void ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew, int *numbuckets, int *numbatches, diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index d669c24b981..6af4bb8d76c 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1468,6 +1468,10 @@ typedef struct MergeJoinState /* ---------------- * HashJoinState information * + * hashclauses original form of the hashjoin condition + * hj_OuterHashKeys the outer hash keys in the hashjoin condition + * hj_InnerHashKeys the inner hash keys in the hashjoin condition + * hj_HashOperators the join operators in the hashjoin condition * hj_HashTable hash table for the hashjoin * (NULL if table not built yet) * hj_CurHashValue hash value for current outer tuple @@ -1477,14 +1481,12 @@ typedef struct MergeJoinState * tuple, or NULL if starting search * (hj_CurXXX variables are undefined if * OuterTupleSlot is empty!) - * hj_OuterHashKeys the outer hash keys in the hashjoin condition - * hj_InnerHashKeys the inner hash keys in the hashjoin condition - * hj_HashOperators the join operators in the hashjoin condition * hj_OuterTupleSlot tuple slot for outer tuples - * hj_HashTupleSlot tuple slot for hashed tuples - * hj_NullInnerTupleSlot prepared null tuple for left outer joins + * hj_HashTupleSlot tuple slot for inner (hashed) tuples + * hj_NullOuterTupleSlot prepared null tuple for right/full outer joins + * hj_NullInnerTupleSlot prepared null tuple for left/full outer joins * hj_FirstOuterTupleSlot first tuple retrieved from outer plan - * hj_NeedNewOuter true if need new outer tuple on next call + * hj_JoinState current state of ExecHashJoin state machine * hj_MatchedOuter true if found a join match for current outer * hj_OuterNotEmpty true if outer relation known not empty * ---------------- @@ -1498,19 +1500,20 @@ typedef struct HashJoinState { JoinState js; /* its first field is NodeTag */ List *hashclauses; /* list of ExprState nodes */ + List *hj_OuterHashKeys; /* list of ExprState nodes */ + List *hj_InnerHashKeys; /* list of ExprState nodes */ + List *hj_HashOperators; /* list of operator OIDs */ HashJoinTable hj_HashTable; uint32 hj_CurHashValue; int hj_CurBucketNo; int hj_CurSkewBucketNo; HashJoinTuple hj_CurTuple; - List *hj_OuterHashKeys; /* list of ExprState nodes */ - List *hj_InnerHashKeys; /* list of ExprState nodes */ - List *hj_HashOperators; /* list of operator OIDs */ TupleTableSlot *hj_OuterTupleSlot; TupleTableSlot *hj_HashTupleSlot; + TupleTableSlot *hj_NullOuterTupleSlot; TupleTableSlot *hj_NullInnerTupleSlot; TupleTableSlot *hj_FirstOuterTupleSlot; - bool hj_NeedNewOuter; + int hj_JoinState; bool hj_MatchedOuter; bool hj_OuterNotEmpty; } HashJoinState; |