10 files changed, 596 insertions, 363 deletions
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index 5cd7332237b..934a283b8d2 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -105,7 +105,8 @@ MultiExecHash(HashState *node)
 			break;
 		/* We have to compute the hash value */
 		econtext->ecxt_innertuple = slot;
-		if (ExecHashGetHashValue(hashtable, econtext, hashkeys, false, false,
+		if (ExecHashGetHashValue(hashtable, econtext, hashkeys,
+								 false, hashtable->keepNulls,
 								 &hashvalue))
 		{
 			int			bucketNumber;
@@ -231,7 +232,7 @@ ExecEndHash(HashState *node)
  * ----------------------------------------------------------------
  */
 HashJoinTable
-ExecHashTableCreate(Hash *node, List *hashOperators)
+ExecHashTableCreate(Hash *node, List *hashOperators, bool keepNulls)
 {
 	HashJoinTable hashtable;
 	Plan	   *outerNode;
@@ -273,6 +274,7 @@ ExecHashTableCreate(Hash *node, List *hashOperators)
 	hashtable->nbuckets = nbuckets;
 	hashtable->log2_nbuckets = log2_nbuckets;
 	hashtable->buckets = NULL;
+	hashtable->keepNulls = keepNulls;
 	hashtable->skewEnabled = false;
 	hashtable->skewBucket = NULL;
 	hashtable->skewBucketLen = 0;
@@ -712,13 +714,26 @@ ExecHashTableInsert(HashJoinTable hashtable,
 		HashJoinTuple hashTuple;
 		int			hashTupleSize;
 
+		/* Create the HashJoinTuple */
 		hashTupleSize = HJTUPLE_OVERHEAD + tuple->t_len;
 		hashTuple = (HashJoinTuple) MemoryContextAlloc(hashtable->batchCxt,
 													   hashTupleSize);
 		hashTuple->hashvalue = hashvalue;
 		memcpy(HJTUPLE_MINTUPLE(hashTuple), tuple, tuple->t_len);
+
+		/*
+		 * We always reset the tuple-matched flag on insertion.  This is okay
+		 * even when reloading a tuple from a batch file, since the tuple
+		 * could not possibly have been matched to an outer tuple before it
+		 * went into the batch file.
+		 */
+		HeapTupleHeaderClearMatch(HJTUPLE_MINTUPLE(hashTuple));
+
+		/* Push it onto the front of the bucket's list */
 		hashTuple->next = hashtable->buckets[bucketno];
 		hashtable->buckets[bucketno] = hashTuple;
+
+		/* Account for space used, and back off if we've used too much */
 		hashtable->spaceUsed += hashTupleSize;
 		if (hashtable->spaceUsed > hashtable->spacePeak)
 			hashtable->spacePeak = hashtable->spaceUsed;
@@ -878,8 +893,12 @@ ExecHashGetBucketAndBatch(HashJoinTable hashtable,
  *		scan a hash bucket for matches to the current outer tuple
  *
  * The current outer tuple must be stored in econtext->ecxt_outertuple.
+ *
+ * On success, the inner tuple is stored into hjstate->hj_CurTuple and
+ * econtext->ecxt_innertuple, using hjstate->hj_HashTupleSlot as the slot
+ * for the latter.
  */
-HashJoinTuple
+bool
 ExecScanHashBucket(HashJoinState *hjstate,
 				   ExprContext *econtext)
 {
@@ -920,7 +939,7 @@ ExecScanHashBucket(HashJoinState *hjstate,
 			if (ExecQual(hjclauses, econtext, false))
 			{
 				hjstate->hj_CurTuple = hashTuple;
-				return hashTuple;
+				return true;
 			}
 		}
 
@@ -930,7 +949,99 @@ ExecScanHashBucket(HashJoinState *hjstate,
 	/*
 	 * no match
 	 */
-	return NULL;
+	return false;
+}
+
+/*
+ * ExecPrepHashTableForUnmatched
+ *		set up for a series of ExecScanHashTableForUnmatched calls
+ */
+void
+ExecPrepHashTableForUnmatched(HashJoinState *hjstate)
+{
+	/*
+	 *----------
+	 * During this scan we use the HashJoinState fields as follows:
+	 *
+	 * hj_CurBucketNo: next regular bucket to scan
+	 * hj_CurSkewBucketNo: next skew bucket (an index into skewBucketNums)
+	 * hj_CurTuple: last tuple returned, or NULL to start next bucket
+	 *----------
+	 */
+	hjstate->hj_CurBucketNo = 0;
+	hjstate->hj_CurSkewBucketNo = 0;
+	hjstate->hj_CurTuple = NULL;
+}
+
+/*
+ * ExecScanHashTableForUnmatched
+ *		scan the hash table for unmatched inner tuples
+ *
+ * On success, the inner tuple is stored into hjstate->hj_CurTuple and
+ * econtext->ecxt_innertuple, using hjstate->hj_HashTupleSlot as the slot
+ * for the latter.
+ */
+bool
+ExecScanHashTableForUnmatched(HashJoinState *hjstate, ExprContext *econtext)
+{
+	HashJoinTable hashtable = hjstate->hj_HashTable;
+	HashJoinTuple hashTuple = hjstate->hj_CurTuple;
+
+	for (;;)
+	{
+		/*
+		 * hj_CurTuple is the address of the tuple last returned from the
+		 * current bucket, or NULL if it's time to start scanning a new
+		 * bucket.
+		 */
+		if (hashTuple != NULL)
+			hashTuple = hashTuple->next;
+		else if (hjstate->hj_CurBucketNo < hashtable->nbuckets)
+		{
+			hashTuple = hashtable->buckets[hjstate->hj_CurBucketNo];
+			hjstate->hj_CurBucketNo++;
+		}
+		else if (hjstate->hj_CurSkewBucketNo < hashtable->nSkewBuckets)
+		{
+			int		j = hashtable->skewBucketNums[hjstate->hj_CurSkewBucketNo];
+
+			hashTuple = hashtable->skewBucket[j]->tuples;
+			hjstate->hj_CurSkewBucketNo++;
+		}
+		else
+			break;				/* finished all buckets */
+
+		while (hashTuple != NULL)
+		{
+			if (!HeapTupleHeaderHasMatch(HJTUPLE_MINTUPLE(hashTuple)))
+			{
+				TupleTableSlot *inntuple;
+
+				/* insert hashtable's tuple into exec slot */
+				inntuple = ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(hashTuple),
+												 hjstate->hj_HashTupleSlot,
+												 false);	/* do not pfree */
+				econtext->ecxt_innertuple = inntuple;
+
+				/*
+				 * Reset temp memory each time; although this function doesn't
+				 * do any qual eval, the caller will, so let's keep it
+				 * parallel to ExecScanHashBucket.
+				 */
+				ResetExprContext(econtext);
+
+				hjstate->hj_CurTuple = hashTuple;
+				return true;
+			}
+
+			hashTuple = hashTuple->next;
+		}
+	}
+
+	/*
+	 * no more unmatched tuples
+	 */
+	return false;
 }
 
 /*
@@ -960,6 +1071,35 @@ ExecHashTableReset(HashJoinTable hashtable)
 	MemoryContextSwitchTo(oldcxt);
 }
 
+/*
+ * ExecHashTableResetMatchFlags
+ *		Clear all the HeapTupleHeaderHasMatch flags in the table
+ */
+void
+ExecHashTableResetMatchFlags(HashJoinTable hashtable)
+{
+	HashJoinTuple tuple;
+	int			i;
+
+	/* Reset all flags in the main table ... */
+	for (i = 0; i < hashtable->nbuckets; i++)
+	{
+		for (tuple = hashtable->buckets[i]; tuple != NULL; tuple = tuple->next)
+			HeapTupleHeaderClearMatch(HJTUPLE_MINTUPLE(tuple));
+	}
+
+	/* ... and the same for the skew buckets, if any */
+	for (i = 0; i < hashtable->nSkewBuckets; i++)
+	{
+		int		j = hashtable->skewBucketNums[i];
+		HashSkewBucket *skewBucket = hashtable->skewBucket[j];
+
+		for (tuple = skewBucket->tuples; tuple != NULL; tuple = tuple->next)
+			HeapTupleHeaderClearMatch(HJTUPLE_MINTUPLE(tuple));
+	}
+}
+
+
 void
 ExecReScanHash(HashState *node)
 {
@@ -1203,6 +1343,7 @@ ExecHashSkewTableInsert(HashJoinTable hashtable,
 												   hashTupleSize);
 	hashTuple->hashvalue = hashvalue;
 	memcpy(HJTUPLE_MINTUPLE(hashTuple), tuple, tuple->t_len);
+	HeapTupleHeaderClearMatch(HJTUPLE_MINTUPLE(hashTuple));
 
 	/* Push it onto the front of the skew bucket's list */
 	hashTuple->next = hashtable->skewBucket[bucketNumber]->tuples;
diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c
index dfca8cac522..cae31ba43a9 100644
--- a/src/backend/executor/nodeHashjoin.c
+++ b/src/backend/executor/nodeHashjoin.c
@@ -22,8 +22,20 @@
 #include "utils/memutils.h"
 
 
-/* Returns true for JOIN_LEFT and JOIN_ANTI jointypes */
-#define HASHJOIN_IS_OUTER(hjstate)	((hjstate)->hj_NullInnerTupleSlot != NULL)
+/*
+ * States of the ExecHashJoin state machine
+ */
+#define HJ_BUILD_HASHTABLE		1
+#define HJ_NEED_NEW_OUTER		2
+#define HJ_SCAN_BUCKET			3
+#define HJ_FILL_OUTER_TUPLE		4
+#define HJ_FILL_INNER_TUPLES	5
+#define HJ_NEED_NEW_BATCH		6
+
+/* Returns true if doing null-fill on outer relation */
+#define HJ_FILL_OUTER(hjstate)	((hjstate)->hj_NullInnerTupleSlot != NULL)
+/* Returns true if doing null-fill on inner relation */
+#define HJ_FILL_INNER(hjstate)	((hjstate)->hj_NullOuterTupleSlot != NULL)
 
 static TupleTableSlot *ExecHashJoinOuterGetTuple(PlanState *outerNode,
 						  HashJoinState *hjstate,
@@ -32,7 +44,7 @@ static TupleTableSlot *ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
 						  BufFile *file,
 						  uint32 *hashvalue,
 						  TupleTableSlot *tupleSlot);
-static int	ExecHashJoinNewBatch(HashJoinState *hjstate);
+static bool ExecHashJoinNewBatch(HashJoinState *hjstate);
 
 
 /* ----------------------------------------------------------------
@@ -52,11 +64,9 @@ ExecHashJoin(HashJoinState *node)
 	HashState  *hashNode;
 	List	   *joinqual;
 	List	   *otherqual;
-	TupleTableSlot *inntuple;
 	ExprContext *econtext;
 	ExprDoneCond isDone;
 	HashJoinTable hashtable;
-	HashJoinTuple curtuple;
 	TupleTableSlot *outerTupleSlot;
 	uint32		hashvalue;
 	int			batchno;
@@ -69,10 +79,6 @@ ExecHashJoin(HashJoinState *node)
 	otherqual = node->js.ps.qual;
 	hashNode = (HashState *) innerPlanState(node);
 	outerNode = outerPlanState(node);
-
-	/*
-	 * get information from HashJoin state
-	 */
 	hashtable = node->hj_HashTable;
 	econtext = node->js.ps.ps_ExprContext;
 
@@ -100,238 +106,300 @@ ExecHashJoin(HashJoinState *node)
 	ResetExprContext(econtext);
 
 	/*
-	 * if this is the first call, build the hash table for inner relation
+	 * run the hash join state machine
 	 */
-	if (hashtable == NULL)
+	for (;;)
 	{
-		/*
-		 * If the outer relation is completely empty, we can quit without
-		 * building the hash table.  However, for an inner join it is only a
-		 * win to check this when the outer relation's startup cost is less
-		 * than the projected cost of building the hash table.	Otherwise it's
-		 * best to build the hash table first and see if the inner relation is
-		 * empty.  (When it's an outer join, we should always make this check,
-		 * since we aren't going to be able to skip the join on the strength
-		 * of an empty inner relation anyway.)
-		 *
-		 * If we are rescanning the join, we make use of information gained on
-		 * the previous scan: don't bother to try the prefetch if the previous
-		 * scan found the outer relation nonempty.	This is not 100% reliable
-		 * since with new parameters the outer relation might yield different
-		 * results, but it's a good heuristic.
-		 *
-		 * The only way to make the check is to try to fetch a tuple from the
-		 * outer plan node.  If we succeed, we have to stash it away for later
-		 * consumption by ExecHashJoinOuterGetTuple.
-		 */
-		if (HASHJOIN_IS_OUTER(node) ||
-			(outerNode->plan->startup_cost < hashNode->ps.plan->total_cost &&
-			 !node->hj_OuterNotEmpty))
+		switch (node->hj_JoinState)
 		{
-			node->hj_FirstOuterTupleSlot = ExecProcNode(outerNode);
-			if (TupIsNull(node->hj_FirstOuterTupleSlot))
-			{
-				node->hj_OuterNotEmpty = false;
-				return NULL;
-			}
-			else
-				node->hj_OuterNotEmpty = true;
-		}
-		else
-			node->hj_FirstOuterTupleSlot = NULL;
+			case HJ_BUILD_HASHTABLE:
+				/*
+				 * First time through: build hash table for inner relation.
+				 */
+				Assert(hashtable == NULL);
 
-		/*
-		 * create the hash table
-		 */
-		hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan,
-										node->hj_HashOperators);
-		node->hj_HashTable = hashtable;
+				/*
+				 * If the outer relation is completely empty, and it's not
+				 * right/full join, we can quit without building the hash
+				 * table.  However, for an inner join it is only a win to
+				 * check this when the outer relation's startup cost is less
+				 * than the projected cost of building the hash
+				 * table.  Otherwise it's best to build the hash table first
+				 * and see if the inner relation is empty.  (When it's a left
+				 * join, we should always make this check, since we aren't
+				 * going to be able to skip the join on the strength of an
+				 * empty inner relation anyway.)
+				 *
+				 * If we are rescanning the join, we make use of information
+				 * gained on the previous scan: don't bother to try the
+				 * prefetch if the previous scan found the outer relation
+				 * nonempty. This is not 100% reliable since with new
+				 * parameters the outer relation might yield different
+				 * results, but it's a good heuristic.
+				 *
+				 * The only way to make the check is to try to fetch a tuple
+				 * from the outer plan node.  If we succeed, we have to stash
+				 * it away for later consumption by ExecHashJoinOuterGetTuple.
+				 */
+				if (HJ_FILL_INNER(node))
+				{
+					/* no chance to not build the hash table */
+					node->hj_FirstOuterTupleSlot = NULL;
+				}
+				else if (HJ_FILL_OUTER(node) ||
+						 (outerNode->plan->startup_cost < hashNode->ps.plan->total_cost &&
+						  !node->hj_OuterNotEmpty))
+				{
+					node->hj_FirstOuterTupleSlot = ExecProcNode(outerNode);
+					if (TupIsNull(node->hj_FirstOuterTupleSlot))
+					{
+						node->hj_OuterNotEmpty = false;
+						return NULL;
+					}
+					else
+						node->hj_OuterNotEmpty = true;
+				}
+				else
+					node->hj_FirstOuterTupleSlot = NULL;
 
-		/*
-		 * execute the Hash node, to build the hash table
-		 */
-		hashNode->hashtable = hashtable;
-		(void) MultiExecProcNode((PlanState *) hashNode);
+				/*
+				 * create the hash table
+				 */
+				hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan,
+												node->hj_HashOperators,
+												HJ_FILL_INNER(node));
+				node->hj_HashTable = hashtable;
 
-		/*
-		 * If the inner relation is completely empty, and we're not doing an
-		 * outer join, we can quit without scanning the outer relation.
-		 */
-		if (hashtable->totalTuples == 0 && !HASHJOIN_IS_OUTER(node))
-			return NULL;
+				/*
+				 * execute the Hash node, to build the hash table
+				 */
+				hashNode->hashtable = hashtable;
+				(void) MultiExecProcNode((PlanState *) hashNode);
 
-		/*
-		 * need to remember whether nbatch has increased since we began
-		 * scanning the outer relation
-		 */
-		hashtable->nbatch_outstart = hashtable->nbatch;
+				/*
+				 * If the inner relation is completely empty, and we're not
+				 * doing a left outer join, we can quit without scanning the
+				 * outer relation.
+				 */
+				if (hashtable->totalTuples == 0 && !HJ_FILL_OUTER(node))
+					return NULL;
 
-		/*
-		 * Reset OuterNotEmpty for scan.  (It's OK if we fetched a tuple
-		 * above, because ExecHashJoinOuterGetTuple will immediately set it
-		 * again.)
-		 */
-		node->hj_OuterNotEmpty = false;
-	}
+				/*
+				 * need to remember whether nbatch has increased since we began
+				 * scanning the outer relation
+				 */
+				hashtable->nbatch_outstart = hashtable->nbatch;
 
-	/*
-	 * run the hash join process
-	 */
-	for (;;)
-	{
-		/*
-		 * If we don't have an outer tuple, get the next one
-		 */
-		if (node->hj_NeedNewOuter)
-		{
-			outerTupleSlot = ExecHashJoinOuterGetTuple(outerNode,
-													   node,
-													   &hashvalue);
-			if (TupIsNull(outerTupleSlot))
-			{
-				/* end of join */
-				return NULL;
-			}
+				/*
+				 * Reset OuterNotEmpty for scan.  (It's OK if we fetched a
+				 * tuple above, because ExecHashJoinOuterGetTuple will
+				 * immediately set it again.)
+				 */
+				node->hj_OuterNotEmpty = false;
 
-			econtext->ecxt_outertuple = outerTupleSlot;
-			node->hj_NeedNewOuter = false;
-			node->hj_MatchedOuter = false;
+				node->hj_JoinState = HJ_NEED_NEW_OUTER;
 
-			/*
-			 * Now we have an outer tuple; find the corresponding bucket for
-			 * this tuple in the main hash table or skew hash table.
-			 */
-			node->hj_CurHashValue = hashvalue;
-			ExecHashGetBucketAndBatch(hashtable, hashvalue,
-									  &node->hj_CurBucketNo, &batchno);
-			node->hj_CurSkewBucketNo = ExecHashGetSkewBucket(hashtable,
-															 hashvalue);
-			node->hj_CurTuple = NULL;
+				/* FALL THRU */
 
-			/*
-			 * Now we've got an outer tuple and the corresponding hash bucket,
-			 * but it might not belong to the current batch, or it might match
-			 * a skew bucket.
-			 */
-			if (batchno != hashtable->curbatch &&
-				node->hj_CurSkewBucketNo == INVALID_SKEW_BUCKET_NO)
-			{
+			case HJ_NEED_NEW_OUTER:
 				/*
-				 * Need to postpone this outer tuple to a later batch. Save it
-				 * in the corresponding outer-batch file.
+				 * We don't have an outer tuple, try to get the next one
 				 */
-				Assert(batchno > hashtable->curbatch);
-				ExecHashJoinSaveTuple(ExecFetchSlotMinimalTuple(outerTupleSlot),
-									  hashvalue,
-									  &hashtable->outerBatchFile[batchno]);
-				node->hj_NeedNewOuter = true;
-				continue;		/* loop around for a new outer tuple */
-			}
-		}
+				outerTupleSlot = ExecHashJoinOuterGetTuple(outerNode,
+														   node,
+														   &hashvalue);
+				if (TupIsNull(outerTupleSlot))
+				{
+					/* end of batch, or maybe whole join */
+					if (HJ_FILL_INNER(node))
+					{
+						/* set up to scan for unmatched inner tuples */
+						ExecPrepHashTableForUnmatched(node);
+						node->hj_JoinState = HJ_FILL_INNER_TUPLES;
+					}
+					else
+						node->hj_JoinState = HJ_NEED_NEW_BATCH;
+					continue;
+				}
 
-		/*
-		 * OK, scan the selected hash bucket for matches
-		 */
-		for (;;)
-		{
-			curtuple = ExecScanHashBucket(node, econtext);
-			if (curtuple == NULL)
-				break;			/* out of matches */
+				econtext->ecxt_outertuple = outerTupleSlot;
+				node->hj_MatchedOuter = false;
 
-			/*
-			 * we've got a match, but still need to test non-hashed quals
-			 */
-			inntuple = ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(curtuple),
-											 node->hj_HashTupleSlot,
-											 false);	/* don't pfree */
-			econtext->ecxt_innertuple = inntuple;
+				/*
+				 * Find the corresponding bucket for this tuple in the main
+				 * hash table or skew hash table.
+				 */
+				node->hj_CurHashValue = hashvalue;
+				ExecHashGetBucketAndBatch(hashtable, hashvalue,
+										  &node->hj_CurBucketNo, &batchno);
+				node->hj_CurSkewBucketNo = ExecHashGetSkewBucket(hashtable,
+																 hashvalue);
+				node->hj_CurTuple = NULL;
 
-			/* reset temp memory each time to avoid leaks from qual expr */
-			ResetExprContext(econtext);
+				/*
+				 * The tuple might not belong to the current batch (where
+				 * "current batch" includes the skew buckets if any).
+				 */
+				if (batchno != hashtable->curbatch &&
+					node->hj_CurSkewBucketNo == INVALID_SKEW_BUCKET_NO)
+				{
+					/*
+					 * Need to postpone this outer tuple to a later batch.
+					 * Save it in the corresponding outer-batch file.
+					 */
+					Assert(batchno > hashtable->curbatch);
+					ExecHashJoinSaveTuple(ExecFetchSlotMinimalTuple(outerTupleSlot),
+										  hashvalue,
+										  &hashtable->outerBatchFile[batchno]);
+					/* Loop around, staying in HJ_NEED_NEW_OUTER state */
+					continue;
+				}
 
-			/*
-			 * if we pass the qual, then save state for next call and have
-			 * ExecProject form the projection, store it in the tuple table,
-			 * and return the slot.
-			 *
-			 * Only the joinquals determine MatchedOuter status, but all quals
-			 * must pass to actually return the tuple.
-			 */
-			if (joinqual == NIL || ExecQual(joinqual, econtext, false))
-			{
-				node->hj_MatchedOuter = true;
+				/* OK, let's scan the bucket for matches */
+				node->hj_JoinState = HJ_SCAN_BUCKET;
+
+				/* FALL THRU */
 
-				/* In an antijoin, we never return a matched tuple */
-				if (node->js.jointype == JOIN_ANTI)
+			case HJ_SCAN_BUCKET:
+				/*
+				 * Scan the selected hash bucket for matches to current outer
+				 */
+				if (!ExecScanHashBucket(node, econtext))
 				{
-					node->hj_NeedNewOuter = true;
-					break;		/* out of loop over hash bucket */
+					/* out of matches; check for possible outer-join fill */
+					node->hj_JoinState = HJ_FILL_OUTER_TUPLE;
+					continue;
 				}
 
 				/*
-				 * In a semijoin, we'll consider returning the first match,
-				 * but after that we're done with this outer tuple.
+				 * We've got a match, but still need to test non-hashed quals.
+				 * ExecScanHashBucket already set up all the state needed to
+				 * call ExecQual.
+				 *
+				 * If we pass the qual, then save state for next call and have
+				 * ExecProject form the projection, store it in the tuple
+				 * table, and return the slot.
+				 *
+				 * Only the joinquals determine tuple match status, but all
+				 * quals must pass to actually return the tuple.
 				 */
-				if (node->js.jointype == JOIN_SEMI)
-					node->hj_NeedNewOuter = true;
-
-				if (otherqual == NIL || ExecQual(otherqual, econtext, false))
+				if (joinqual == NIL || ExecQual(joinqual, econtext, false))
 				{
-					TupleTableSlot *result;
+					node->hj_MatchedOuter = true;
+					HeapTupleHeaderSetMatch(HJTUPLE_MINTUPLE(node->hj_CurTuple));
 
-					result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
+					/* In an antijoin, we never return a matched tuple */
+					if (node->js.jointype == JOIN_ANTI)
+					{
+						node->hj_JoinState = HJ_NEED_NEW_OUTER;
+						continue;
+					}
 
-					if (isDone != ExprEndResult)
+					/*
+					 * In a semijoin, we'll consider returning the first match,
+					 * but after that we're done with this outer tuple.
+					 */
+					if (node->js.jointype == JOIN_SEMI)
+						node->hj_JoinState = HJ_NEED_NEW_OUTER;
+
+					if (otherqual == NIL ||
+						ExecQual(otherqual, econtext, false))
 					{
-						node->js.ps.ps_TupFromTlist =
-							(isDone == ExprMultipleResult);
-						return result;
+						TupleTableSlot *result;
+
+						result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
+
+						if (isDone != ExprEndResult)
+						{
+							node->js.ps.ps_TupFromTlist =
+								(isDone == ExprMultipleResult);
+							return result;
+						}
 					}
 				}
+				break;
 
+			case HJ_FILL_OUTER_TUPLE:
 				/*
-				 * If semijoin and we didn't return the tuple, we're still
-				 * done with this outer tuple.
+				 * The current outer tuple has run out of matches, so check
+				 * whether to emit a dummy outer-join tuple.  Whether we
+				 * emit one or not, the next state is NEED_NEW_OUTER.
 				 */
-				if (node->js.jointype == JOIN_SEMI)
-					break;		/* out of loop over hash bucket */
-			}
-		}
+				node->hj_JoinState = HJ_NEED_NEW_OUTER;
 
-		/*
-		 * Now the current outer tuple has run out of matches, so check
-		 * whether to emit a dummy outer-join tuple. If not, loop around to
-		 * get a new outer tuple.
-		 */
-		node->hj_NeedNewOuter = true;
+				if (!node->hj_MatchedOuter &&
+					HJ_FILL_OUTER(node))
+				{
+					/*
+					 * Generate a fake join tuple with nulls for the inner
+					 * tuple, and return it if it passes the non-join quals.
+					 */
+					econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot;
+
+					if (otherqual == NIL ||
+						ExecQual(otherqual, econtext, false))
+					{
+						TupleTableSlot *result;
 
-		if (!node->hj_MatchedOuter &&
-			HASHJOIN_IS_OUTER(node))
-		{
-			/*
-			 * We are doing an outer join and there were no join matches for
-			 * this outer tuple.  Generate a fake join tuple with nulls for
-			 * the inner tuple, and return it if it passes the non-join quals.
-			 */
-			econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot;
+						result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
 
-			if (otherqual == NIL || ExecQual(otherqual, econtext, false))
-			{
+						if (isDone != ExprEndResult)
+						{
+							node->js.ps.ps_TupFromTlist =
+								(isDone == ExprMultipleResult);
+							return result;
+						}
+					}
+				}
+				break;
+
+			case HJ_FILL_INNER_TUPLES:
 				/*
-				 * qualification was satisfied so we project and return the
-				 * slot containing the result tuple using ExecProject().
+				 * We have finished a batch, but we are doing right/full join,
+				 * so any unmatched inner tuples in the hashtable have to be
+				 * emitted before we continue to the next batch.
 				 */
-				TupleTableSlot *result;
+				if (!ExecScanHashTableForUnmatched(node, econtext))
+				{
+					/* no more unmatched tuples */
+					node->hj_JoinState = HJ_NEED_NEW_BATCH;
+					continue;
+				}
 
-				result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
+				/*
+				 * Generate a fake join tuple with nulls for the outer tuple,
+				 * and return it if it passes the non-join quals.
+				 */
+				econtext->ecxt_outertuple = node->hj_NullOuterTupleSlot;
 
-				if (isDone != ExprEndResult)
+				if (otherqual == NIL ||
+					ExecQual(otherqual, econtext, false))
 				{
-					node->js.ps.ps_TupFromTlist =
-						(isDone == ExprMultipleResult);
-					return result;
+					TupleTableSlot *result;
+
+					result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
+
+					if (isDone != ExprEndResult)
+					{
+						node->js.ps.ps_TupFromTlist =
+							(isDone == ExprMultipleResult);
+						return result;
+					}
 				}
-			}
+				break;
+
+			case HJ_NEED_NEW_BATCH:
+				/*
+				 * Try to advance to next batch.  Done if there are no more.
+				 */
+				if (!ExecHashJoinNewBatch(node))
+					return NULL;				/* end of join */
+				node->hj_JoinState = HJ_NEED_NEW_OUTER;
+				break;
+
+			default:
+				elog(ERROR, "unrecognized hashjoin state: %d",
+					 (int) node->hj_JoinState);
 		}
 	}
 }
@@ -406,7 +474,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
 	ExecInitResultTupleSlot(estate, &hjstate->js.ps);
 	hjstate->hj_OuterTupleSlot = ExecInitExtraTupleSlot(estate);
 
-	/* note: HASHJOIN_IS_OUTER macro depends on this initialization */
+	/* set up null tuples for outer joins, if needed */
 	switch (node->join.jointype)
 	{
 		case JOIN_INNER:
@@ -418,6 +486,19 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
 				ExecInitNullTupleSlot(estate,
 								 ExecGetResultType(innerPlanState(hjstate)));
 			break;
+		case JOIN_RIGHT:
+			hjstate->hj_NullOuterTupleSlot =
+				ExecInitNullTupleSlot(estate,
+								 ExecGetResultType(outerPlanState(hjstate)));
+			break;
+		case JOIN_FULL:
+			hjstate->hj_NullOuterTupleSlot =
+				ExecInitNullTupleSlot(estate,
+								 ExecGetResultType(outerPlanState(hjstate)));
+			hjstate->hj_NullInnerTupleSlot =
+				ExecInitNullTupleSlot(estate,
+								 ExecGetResultType(innerPlanState(hjstate)));
+			break;
 		default:
 			elog(ERROR, "unrecognized join type: %d",
 				 (int) node->join.jointype);
@@ -425,7 +506,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
 
 	/*
 	 * now for some voodoo.  our temporary tuple slot is actually the result
-	 * tuple slot of the Hash node (which is our inner plan).  we do this
+	 * tuple slot of the Hash node (which is our inner plan).  we can do this
 	 * because Hash nodes don't return tuples via ExecProcNode() -- instead
 	 * the hash join node uses ExecScanHashBucket() to get at the contents of
 	 * the hash table.	-cim 6/9/91
@@ -485,7 +566,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
 	((HashState *) innerPlanState(hjstate))->hashkeys = rclauses;
 
 	hjstate->js.ps.ps_TupFromTlist = false;
-	hjstate->hj_NeedNewOuter = true;
+	hjstate->hj_JoinState = HJ_BUILD_HASHTABLE;
 	hjstate->hj_MatchedOuter = false;
 	hjstate->hj_OuterNotEmpty = false;
 
@@ -533,12 +614,13 @@ ExecEndHashJoin(HashJoinState *node)
  * ExecHashJoinOuterGetTuple
  *
  *		get the next outer tuple for hashjoin: either by
- *		executing a plan node in the first pass, or from
+ *		executing the outer plan node in the first pass, or from
  *		the temp files for the hashjoin batches.
  *
- * Returns a null slot if no more outer tuples.  On success, the tuple's
- * hash value is stored at *hashvalue --- this is either originally computed,
- * or re-read from the temp file.
+ * Returns a null slot if no more outer tuples (within the current batch).
+ *
+ * On success, the tuple's hash value is stored at *hashvalue --- this is
+ * either originally computed, or re-read from the temp file.
  */
 static TupleTableSlot *
 ExecHashJoinOuterGetTuple(PlanState *outerNode,
@@ -572,7 +654,7 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
 			if (ExecHashGetHashValue(hashtable, econtext,
 									 hjstate->hj_OuterHashKeys,
 									 true,		/* outer tuple */
-									 HASHJOIN_IS_OUTER(hjstate),
+									 HJ_FILL_OUTER(hjstate),
 									 hashvalue))
 			{
 				/* remember outer relation is not empty for possible rescan */
@@ -587,31 +669,27 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
 			 */
 			slot = ExecProcNode(outerNode);
 		}
+	}
+	else if (curbatch < hashtable->nbatch)
+	{
+		BufFile    *file = hashtable->outerBatchFile[curbatch];
 
 		/*
-		 * We have just reached the end of the first pass. Try to switch to a
-		 * saved batch.
+		 * In outer-join cases, we could get here even though the batch file
+		 * is empty.
 		 */
-		curbatch = ExecHashJoinNewBatch(hjstate);
-	}
+		if (file == NULL)
+			return NULL;
 
-	/*
-	 * Try to read from a temp file. Loop allows us to advance to new batches
-	 * as needed.  NOTE: nbatch could increase inside ExecHashJoinNewBatch, so
-	 * don't try to optimize this loop.
-	 */
-	while (curbatch < hashtable->nbatch)
-	{
 		slot = ExecHashJoinGetSavedTuple(hjstate,
-										 hashtable->outerBatchFile[curbatch],
+										 file,
 										 hashvalue,
 										 hjstate->hj_OuterTupleSlot);
 		if (!TupIsNull(slot))
 			return slot;
-		curbatch = ExecHashJoinNewBatch(hjstate);
 	}
 
-	/* Out of batches... */
+	/* End of this batch */
 	return NULL;
 }
 
@@ -619,10 +697,9 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
  * ExecHashJoinNewBatch
  *		switch to a new hashjoin batch
  *
- * Returns the number of the new batch (1..nbatch-1), or nbatch if no more.
- * We will never return a batch number that has an empty outer batch file.
+ * Returns true if successful, false if there are no more batches.
  */
-static int
+static bool
 ExecHashJoinNewBatch(HashJoinState *hjstate)
 {
 	HashJoinTable hashtable = hjstate->hj_HashTable;
@@ -632,7 +709,6 @@ ExecHashJoinNewBatch(HashJoinState *hjstate)
 	TupleTableSlot *slot;
 	uint32		hashvalue;
 
-start_over:
 	nbatch = hashtable->nbatch;
 	curbatch = hashtable->curbatch;
 
@@ -657,6 +733,7 @@ start_over:
 		hashtable->skewEnabled = false;
 		hashtable->skewBucket = NULL;
 		hashtable->skewBucketNums = NULL;
+		hashtable->nSkewBuckets = 0;
 		hashtable->spaceUsedSkew = 0;
 	}
 
@@ -665,8 +742,9 @@ start_over:
 	 * sides.  We can sometimes skip over batches that are empty on only one
 	 * side, but there are exceptions:
 	 *
-	 * 1. In an outer join, we have to process outer batches even if the inner
-	 * batch is empty.
+	 * 1. In a left/full outer join, we have to process outer batches even if
+	 * the inner batch is empty.  Similarly, in a right/full outer join, we
+	 * have to process inner batches even if the outer batch is empty.
 	 *
 	 * 2. If we have increased nbatch since the initial estimate, we have to
 	 * scan inner batches since they might contain tuples that need to be
@@ -682,7 +760,10 @@ start_over:
 			hashtable->innerBatchFile[curbatch] == NULL))
 	{
 		if (hashtable->outerBatchFile[curbatch] &&
-			HASHJOIN_IS_OUTER(hjstate))
+			HJ_FILL_OUTER(hjstate))
+			break;				/* must process due to rule 1 */
+		if (hashtable->innerBatchFile[curbatch] &&
+			HJ_FILL_INNER(hjstate))
 			break;				/* must process due to rule 1 */
 		if (hashtable->innerBatchFile[curbatch] &&
 			nbatch != hashtable->nbatch_original)
@@ -702,7 +783,7 @@ start_over:
 	}
 
 	if (curbatch >= nbatch)
-		return curbatch;		/* no more batches */
+		return false;		/* no more batches */
 
 	hashtable->curbatch = curbatch;
 
@@ -741,20 +822,17 @@ start_over:
 	}
 
 	/*
-	 * If there's no outer batch file, advance to next batch.
+	 * Rewind outer batch file (if present), so that we can start reading it.
 	 */
-	if (hashtable->outerBatchFile[curbatch] == NULL)
-		goto start_over;
-
-	/*
-	 * Rewind outer batch file, so that we can start reading it.
-	 */
-	if (BufFileSeek(hashtable->outerBatchFile[curbatch], 0, 0L, SEEK_SET))
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not rewind hash-join temporary file: %m")));
+	if (hashtable->outerBatchFile[curbatch] != NULL)
+	{
+		if (BufFileSeek(hashtable->outerBatchFile[curbatch], 0, 0L, SEEK_SET))
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not rewind hash-join temporary file: %m")));
+	}
 
-	return curbatch;
+	return true;
 }
 
 /*
@@ -857,9 +935,16 @@ ExecReScanHashJoin(HashJoinState *node)
 			node->js.ps.righttree->chgParam == NULL)
 		{
 			/*
-			 * okay to reuse the hash table; needn't rescan inner, either.
+			 * Okay to reuse the hash table; needn't rescan inner, either.
 			 *
-			 * What we do need to do is reset our state about the emptiness of
+			 * However, if it's a right/full join, we'd better reset the
+			 * inner-tuple match flags contained in the table.
+			 */
+			if (HJ_FILL_INNER(node))
+				ExecHashTableResetMatchFlags(node->hj_HashTable);
+
+			/*
+			 * Also, we need to reset our state about the emptiness of
 			 * the outer relation, so that the new scan of the outer will
 			 * update it correctly if it turns out to be empty this time.
 			 * (There's no harm in clearing it now because ExecHashJoin won't
@@ -869,12 +954,16 @@ ExecReScanHashJoin(HashJoinState *node)
 			 * through.)
 			 */
 			node->hj_OuterNotEmpty = false;
+
+			/* ExecHashJoin can skip the BUILD_HASHTABLE step */
+			node->hj_JoinState = HJ_NEED_NEW_OUTER;
 		}
 		else
 		{
 			/* must destroy and rebuild hash table */
 			ExecHashTableDestroy(node->hj_HashTable);
 			node->hj_HashTable = NULL;
+			node->hj_JoinState = HJ_BUILD_HASHTABLE;
 
 			/*
 			 * if chgParam of subnode is not null then plan will be re-scanned
@@ -892,7 +981,6 @@ ExecReScanHashJoin(HashJoinState *node)
 	node->hj_CurTuple = NULL;
 
 	node->js.ps.ps_TupFromTlist = false;
-	node->hj_NeedNewOuter = true;
 	node->hj_MatchedOuter = false;
 	node->hj_FirstOuterTupleSlot = NULL;
 
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 7dffe18c973..fd38e8bd7a9 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -889,8 +889,8 @@ generate_join_implied_equalities_normal(PlannerInfo *root,
 	 * combination for which an opfamily member operator exists.  If we have
 	 * choices, we prefer simple Var members (possibly with RelabelType) since
 	 * these are (a) cheapest to compute at runtime and (b) most likely to
-	 * have useful statistics.	Also, if enable_hashjoin is on, we prefer
-	 * operators that are also hashjoinable.
+	 * have useful statistics. Also, prefer operators that are also
+	 * hashjoinable.
 	 */
 	if (outer_members && inner_members)
 	{
@@ -925,8 +925,7 @@ generate_join_implied_equalities_normal(PlannerInfo *root,
 					(IsA(inner_em->em_expr, RelabelType) &&
 					 IsA(((RelabelType *) inner_em->em_expr)->arg, Var)))
 					score++;
-				if (!enable_hashjoin ||
-					op_hashjoinable(eq_op,
+				if (op_hashjoinable(eq_op,
 									exprType((Node *) outer_em->em_expr)))
 					score++;
 				if (score > best_score)
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index bea134b3f1a..54257c05c50 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -41,7 +41,8 @@ static List *select_mergejoin_clauses(PlannerInfo *root,
 						 RelOptInfo *outerrel,
 						 RelOptInfo *innerrel,
 						 List *restrictlist,
-						 JoinType jointype);
+						 JoinType jointype,
+						 bool *have_nonmergeable_clause);
 
 
 /*
@@ -77,12 +78,13 @@ add_paths_to_joinrel(PlannerInfo *root,
 					 List *restrictlist)
 {
 	List	   *mergeclause_list = NIL;
+	bool		have_nonmergeable_clause = false;
 
 	/*
 	 * Find potential mergejoin clauses.  We can skip this if we are not
-	 * interested in doing a mergejoin.  However, mergejoin is currently our
-	 * only way of implementing full outer joins, so override mergejoin
-	 * disable if it's a full join.
+	 * interested in doing a mergejoin.  However, mergejoin may be our only
+	 * way of implementing a full outer join, so override enable_mergejoin if
+	 * it's a full join.
 	 */
 	if (enable_mergejoin || jointype == JOIN_FULL)
 		mergeclause_list = select_mergejoin_clauses(root,
@@ -90,22 +92,27 @@ add_paths_to_joinrel(PlannerInfo *root,
 													outerrel,
 													innerrel,
 													restrictlist,
-													jointype);
+													jointype,
+													&have_nonmergeable_clause);
 
 	/*
 	 * 1. Consider mergejoin paths where both relations must be explicitly
-	 * sorted.
+	 * sorted.  Skip this if we can't mergejoin.
 	 */
-	sort_inner_and_outer(root, joinrel, outerrel, innerrel,
-						 restrictlist, mergeclause_list, jointype, sjinfo);
+	if (!have_nonmergeable_clause)
+		sort_inner_and_outer(root, joinrel, outerrel, innerrel,
+							 restrictlist, mergeclause_list, jointype, sjinfo);
 
 	/*
 	 * 2. Consider paths where the outer relation need not be explicitly
 	 * sorted. This includes both nestloops and mergejoins where the outer
-	 * path is already ordered.
+	 * path is already ordered.  Again, skip this if we can't mergejoin.
+	 * (That's okay because we know that nestloop can't handle right/full
+	 * joins at all, so it wouldn't work in those cases either.)
 	 */
-	match_unsorted_outer(root, joinrel, outerrel, innerrel,
-						 restrictlist, mergeclause_list, jointype, sjinfo);
+	if (!have_nonmergeable_clause)
+		match_unsorted_outer(root, joinrel, outerrel, innerrel,
+							 restrictlist, mergeclause_list, jointype, sjinfo);
 
 #ifdef NOT_USED
 
@@ -120,15 +127,17 @@ add_paths_to_joinrel(PlannerInfo *root,
 	 * those made by match_unsorted_outer when add_paths_to_joinrel() is
 	 * invoked with the two rels given in the other order.
 	 */
-	match_unsorted_inner(root, joinrel, outerrel, innerrel,
-						 restrictlist, mergeclause_list, jointype, sjinfo);
+	if (!have_nonmergeable_clause)
+		match_unsorted_inner(root, joinrel, outerrel, innerrel,
+							 restrictlist, mergeclause_list, jointype, sjinfo);
 #endif
 
 	/*
 	 * 4. Consider paths where both outer and inner relations must be hashed
-	 * before being joined.
+	 * before being joined.  As above, disregard enable_hashjoin for full
+	 * joins, because there may be no other alternative.
 	 */
-	if (enable_hashjoin)
+	if (enable_hashjoin || jointype == JOIN_FULL)
 		hash_inner_and_outer(root, joinrel, outerrel, innerrel,
 							 restrictlist, jointype, sjinfo);
 }
@@ -189,38 +198,12 @@ sort_inner_and_outer(PlannerInfo *root,
 					 JoinType jointype,
 					 SpecialJoinInfo *sjinfo)
 {
-	bool		useallclauses;
 	Path	   *outer_path;
 	Path	   *inner_path;
 	List	   *all_pathkeys;
 	ListCell   *l;
 
 	/*
-	 * If we are doing a right or full join, we must use *all* the
-	 * mergeclauses as join clauses, else we will not have a valid plan.
-	 */
-	switch (jointype)
-	{
-		case JOIN_INNER:
-		case JOIN_LEFT:
-		case JOIN_SEMI:
-		case JOIN_ANTI:
-		case JOIN_UNIQUE_OUTER:
-		case JOIN_UNIQUE_INNER:
-			useallclauses = false;
-			break;
-		case JOIN_RIGHT:
-		case JOIN_FULL:
-			useallclauses = true;
-			break;
-		default:
-			elog(ERROR, "unrecognized join type: %d",
-				 (int) jointype);
-			useallclauses = false;		/* keep compiler quiet */
-			break;
-	}
-
-	/*
 	 * We only consider the cheapest-total-cost input paths, since we are
 	 * assuming here that a sort is required.  We will consider
 	 * cheapest-startup-cost input paths later, and only if they don't need a
@@ -390,9 +373,9 @@ match_unsorted_outer(PlannerInfo *root,
 
 	/*
 	 * Nestloop only supports inner, left, semi, and anti joins.  Also, if we
-	 * are doing a right or full join, we must use *all* the mergeclauses as
-	 * join clauses, else we will not have a valid plan.  (Although these two
-	 * flags are currently inverses, keep them separate for clarity and
+	 * are doing a right or full mergejoin, we must use *all* the mergeclauses
+	 * as join clauses, else we will not have a valid plan.  (Although these
+	 * two flags are currently inverses, keep them separate for clarity and
 	 * possible future changes.)
 	 */
 	switch (jointype)
@@ -574,8 +557,8 @@ match_unsorted_outer(PlannerInfo *root,
 		 * Special corner case: for "x FULL JOIN y ON true", there will be no
 		 * join clauses at all.  Ordinarily we'd generate a clauseless
 		 * nestloop path, but since mergejoin is our only join type that
-		 * supports FULL JOIN, it's necessary to generate a clauseless
-		 * mergejoin path instead.
+		 * supports FULL JOIN without any join clauses, it's necessary to
+		 * generate a clauseless mergejoin path instead.
 		 */
 		if (mergeclauses == NIL)
 		{
@@ -781,30 +764,11 @@ hash_inner_and_outer(PlannerInfo *root,
 					 JoinType jointype,
 					 SpecialJoinInfo *sjinfo)
 {
-	bool		isouterjoin;
+	bool		isouterjoin = IS_OUTER_JOIN(jointype);
 	List	   *hashclauses;
 	ListCell   *l;
 
 	/*
-	 * Hashjoin only supports inner, left, semi, and anti joins.
-	 */
-	switch (jointype)
-	{
-		case JOIN_INNER:
-		case JOIN_SEMI:
-		case JOIN_UNIQUE_OUTER:
-		case JOIN_UNIQUE_INNER:
-			isouterjoin = false;
-			break;
-		case JOIN_LEFT:
-		case JOIN_ANTI:
-			isouterjoin = true;
-			break;
-		default:
-			return;
-	}
-
-	/*
 	 * We need to build only one hashpath for any given pair of outer and
 	 * inner relations; all of the hashable clauses will be used as keys.
 	 *
@@ -963,6 +927,11 @@ best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
  *	  Select mergejoin clauses that are usable for a particular join.
  *	  Returns a list of RestrictInfo nodes for those clauses.
  *
+ * *have_nonmergeable_clause is set TRUE if this is a right/full join and
+ * there are nonmergejoinable join clauses.  The executor's mergejoin
+ * machinery cannot handle such cases, so we have to avoid generating a
+ * mergejoin plan.
+ *
  * We also mark each selected RestrictInfo to show which side is currently
  * being considered as outer.  These are transient markings that are only
  * good for the duration of the current add_paths_to_joinrel() call!
@@ -977,13 +946,15 @@ select_mergejoin_clauses(PlannerInfo *root,
 						 RelOptInfo *outerrel,
 						 RelOptInfo *innerrel,
 						 List *restrictlist,
-						 JoinType jointype)
+						 JoinType jointype,
+						 bool *have_nonmergeable_clause)
 {
 	List	   *result_list = NIL;
 	bool		isouterjoin = IS_OUTER_JOIN(jointype);
-	bool		have_nonmergeable_joinclause = false;
 	ListCell   *l;
 
+	*have_nonmergeable_clause = false;
+
 	foreach(l, restrictlist)
 	{
 		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
@@ -991,7 +962,7 @@ select_mergejoin_clauses(PlannerInfo *root,
 		/*
 		 * If processing an outer join, only use its own join clauses in the
 		 * merge.  For inner joins we can use pushed-down clauses too. (Note:
-		 * we don't set have_nonmergeable_joinclause here because pushed-down
+		 * we don't set have_nonmergeable_clause here because pushed-down
 		 * clauses will become otherquals not joinquals.)
 		 */
 		if (isouterjoin && restrictinfo->is_pushed_down)
@@ -1008,7 +979,7 @@ select_mergejoin_clauses(PlannerInfo *root,
 			 * FALSE.)
 			 */
 			if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const))
-				have_nonmergeable_joinclause = true;
+				*have_nonmergeable_clause = true;
 			continue;			/* not mergejoinable */
 		}
 
@@ -1017,7 +988,7 @@ select_mergejoin_clauses(PlannerInfo *root,
 		 */
 		if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
 		{
-			have_nonmergeable_joinclause = true;
+			*have_nonmergeable_clause = true;
 			continue;			/* no good for these input relations */
 		}
 
@@ -1046,7 +1017,7 @@ select_mergejoin_clauses(PlannerInfo *root,
 		if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
 			EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
 		{
-			have_nonmergeable_joinclause = true;
+			*have_nonmergeable_clause = true;
 			continue;			/* can't handle redundant eclasses */
 		}
 
@@ -1054,27 +1025,19 @@ select_mergejoin_clauses(PlannerInfo *root,
 	}
 
 	/*
-	 * If it is a right/full join then *all* the explicit join clauses must be
-	 * mergejoinable, else the executor will fail. If we are asked for a right
-	 * join then just return NIL to indicate no mergejoin is possible (we can
-	 * handle it as a left join instead). If we are asked for a full join then
-	 * emit an error, because there is no fallback.
+	 * If it is not a right/full join then we don't need to insist on all the
+	 * joinclauses being mergejoinable, so reset the flag.  This simplifies
+	 * the logic in add_paths_to_joinrel.
 	 */
-	if (have_nonmergeable_joinclause)
+	switch (jointype)
 	{
-		switch (jointype)
-		{
-			case JOIN_RIGHT:
-				return NIL;		/* not mergejoinable */
-			case JOIN_FULL:
-				ereport(ERROR,
-						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-						 errmsg("FULL JOIN is only supported with merge-joinable join conditions")));
-				break;
-			default:
-				/* otherwise, it's OK to have nonmergeable join quals */
-				break;
-		}
+		case JOIN_RIGHT:
+		case JOIN_FULL:
+			break;
+		default:
+			/* otherwise, it's OK to have nonmergeable join quals */
+			*have_nonmergeable_clause = false;
+			break;
 	}
 
 	return result_list;
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c
index 197c49d774c..605a32d52d6 100644
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -658,6 +658,18 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
 			add_paths_to_joinrel(root, joinrel, rel2, rel1,
 								 JOIN_FULL, sjinfo,
 								 restrictlist);
+
+			/*
+			 * If there are join quals that aren't mergeable or hashable, we
+			 * may not be able to build any valid plan.  Complain here so that
+			 * we can give a somewhat-useful error message.  (Since we have no
+			 * flexibility of planning for a full join, there's no chance of
+			 * succeeding later with another pair of input rels.)
+			 */
+			if (joinrel->pathlist == NIL)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("FULL JOIN is only supported with merge-joinable or hash-joinable join conditions")));
 			break;
 		case JOIN_SEMI:
 
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index 6efb6dc4bce..c74a91f8a7f 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -1328,10 +1328,9 @@ distribute_restrictinfo_to_rels(PlannerInfo *root,
 
 			/*
 			 * Check for hashjoinable operators.  (We don't bother setting the
-			 * hashjoin info if we're not going to need it.)
+			 * hashjoin info except in true join clauses.)
 			 */
-			if (enable_hashjoin)
-				check_hashjoinable(restrictinfo);
+			check_hashjoinable(restrictinfo);
 
 			/*
 			 * Add clause to the join lists of all the relevant relations.
@@ -1458,10 +1457,9 @@ build_implied_join_equality(Oid opno,
 									 qualscope, /* required_relids */
 									 NULL);		/* nullable_relids */
 
-	/* Set mergejoinability info always, and hashjoinability if enabled */
+	/* Set mergejoinability/hashjoinability flags */
 	check_mergejoinable(restrictinfo);
-	if (enable_hashjoin)
-		check_hashjoinable(restrictinfo);
+	check_hashjoinable(restrictinfo);
 
 	return restrictinfo;
 }
diff --git a/src/include/access/htup.h b/src/include/access/htup.h
index adf1321052c..f540966d684 100644
--- a/src/include/access/htup.h
+++ b/src/include/access/htup.h
@@ -196,6 +196,14 @@ typedef HeapTupleHeaderData *HeapTupleHeader;
 #define HEAP2_XACT_MASK			0xC000	/* visibility-related bits */
 
 /*
+ * HEAP_TUPLE_HAS_MATCH is a temporary flag used during hash joins.  It is
+ * only used in tuples that are in the hash table, and those don't need
+ * any visibility information, so we can overlay it on a visibility flag
+ * instead of using up a dedicated bit.
+ */
+#define HEAP_TUPLE_HAS_MATCH	HEAP_ONLY_TUPLE	/* tuple has a join match */
+
+/*
  * HeapTupleHeader accessor macros
  *
  * Note: beware of multiple evaluations of "tup" argument.	But the Set
@@ -343,6 +351,21 @@ do { \
   (tup)->t_infomask2 &= ~HEAP_ONLY_TUPLE \
 )
 
+#define HeapTupleHeaderHasMatch(tup) \
+( \
+  (tup)->t_infomask2 & HEAP_TUPLE_HAS_MATCH \
+)
+
+#define HeapTupleHeaderSetMatch(tup) \
+( \
+  (tup)->t_infomask2 |= HEAP_TUPLE_HAS_MATCH \
+)
+
+#define HeapTupleHeaderClearMatch(tup) \
+( \
+  (tup)->t_infomask2 &= ~HEAP_TUPLE_HAS_MATCH \
+)
+
 #define HeapTupleHeaderGetNatts(tup) \
 	((tup)->t_infomask2 & HEAP_NATTS_MASK)
 
diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h
index 1b93f58af67..c4aa0d37268 100644
--- a/src/include/executor/hashjoin.h
+++ b/src/include/executor/hashjoin.h
@@ -113,6 +113,8 @@ typedef struct HashJoinTableData
 	struct HashJoinTupleData **buckets;
 	/* buckets array is per-batch storage, as are all the tuples */
 
+	bool		keepNulls;		/* true to store unmatchable NULL tuples */
+
 	bool		skewEnabled;	/* are we using skew optimization? */
 	HashSkewBucket **skewBucket;	/* hashtable of skew buckets */
 	int			skewBucketLen;	/* size of skewBucket array (a power of 2!) */
diff --git a/src/include/executor/nodeHash.h b/src/include/executor/nodeHash.h
index 444a0137a2d..8517d1cf284 100644
--- a/src/include/executor/nodeHash.h
+++ b/src/include/executor/nodeHash.h
@@ -22,7 +22,8 @@ extern Node *MultiExecHash(HashState *node);
 extern void ExecEndHash(HashState *node);
 extern void ExecReScanHash(HashState *node);
 
-extern HashJoinTable ExecHashTableCreate(Hash *node, List *hashOperators);
+extern HashJoinTable ExecHashTableCreate(Hash *node, List *hashOperators,
+										 bool keepNulls);
 extern void ExecHashTableDestroy(HashJoinTable hashtable);
 extern void ExecHashTableInsert(HashJoinTable hashtable,
 					TupleTableSlot *slot,
@@ -37,9 +38,12 @@ extern void ExecHashGetBucketAndBatch(HashJoinTable hashtable,
 						  uint32 hashvalue,
 						  int *bucketno,
 						  int *batchno);
-extern HashJoinTuple ExecScanHashBucket(HashJoinState *hjstate,
-				   ExprContext *econtext);
+extern bool ExecScanHashBucket(HashJoinState *hjstate, ExprContext *econtext);
+extern void ExecPrepHashTableForUnmatched(HashJoinState *hjstate);
+extern bool ExecScanHashTableForUnmatched(HashJoinState *hjstate,
+										  ExprContext *econtext);
 extern void ExecHashTableReset(HashJoinTable hashtable);
+extern void ExecHashTableResetMatchFlags(HashJoinTable hashtable);
 extern void ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
 						int *numbuckets,
 						int *numbatches,
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index d669c24b981..6af4bb8d76c 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1468,6 +1468,10 @@ typedef struct MergeJoinState
 /* ----------------
  *	 HashJoinState information
  *
+ *		hashclauses				original form of the hashjoin condition
+ *		hj_OuterHashKeys		the outer hash keys in the hashjoin condition
+ *		hj_InnerHashKeys		the inner hash keys in the hashjoin condition
+ *		hj_HashOperators		the join operators in the hashjoin condition
  *		hj_HashTable			hash table for the hashjoin
  *								(NULL if table not built yet)
  *		hj_CurHashValue			hash value for current outer tuple
@@ -1477,14 +1481,12 @@ typedef struct MergeJoinState
  *								tuple, or NULL if starting search
  *								(hj_CurXXX variables are undefined if
  *								OuterTupleSlot is empty!)
- *		hj_OuterHashKeys		the outer hash keys in the hashjoin condition
- *		hj_InnerHashKeys		the inner hash keys in the hashjoin condition
- *		hj_HashOperators		the join operators in the hashjoin condition
  *		hj_OuterTupleSlot		tuple slot for outer tuples
- *		hj_HashTupleSlot		tuple slot for hashed tuples
- *		hj_NullInnerTupleSlot	prepared null tuple for left outer joins
+ *		hj_HashTupleSlot		tuple slot for inner (hashed) tuples
+ *		hj_NullOuterTupleSlot	prepared null tuple for right/full outer joins
+ *		hj_NullInnerTupleSlot	prepared null tuple for left/full outer joins
  *		hj_FirstOuterTupleSlot	first tuple retrieved from outer plan
- *		hj_NeedNewOuter			true if need new outer tuple on next call
+ *		hj_JoinState			current state of ExecHashJoin state machine
  *		hj_MatchedOuter			true if found a join match for current outer
  *		hj_OuterNotEmpty		true if outer relation known not empty
  * ----------------
@@ -1498,19 +1500,20 @@ typedef struct HashJoinState
 {
 	JoinState	js;				/* its first field is NodeTag */
 	List	   *hashclauses;	/* list of ExprState nodes */
+	List	   *hj_OuterHashKeys;		/* list of ExprState nodes */
+	List	   *hj_InnerHashKeys;		/* list of ExprState nodes */
+	List	   *hj_HashOperators;		/* list of operator OIDs */
 	HashJoinTable hj_HashTable;
 	uint32		hj_CurHashValue;
 	int			hj_CurBucketNo;
 	int			hj_CurSkewBucketNo;
 	HashJoinTuple hj_CurTuple;
-	List	   *hj_OuterHashKeys;		/* list of ExprState nodes */
-	List	   *hj_InnerHashKeys;		/* list of ExprState nodes */
-	List	   *hj_HashOperators;		/* list of operator OIDs */
 	TupleTableSlot *hj_OuterTupleSlot;
 	TupleTableSlot *hj_HashTupleSlot;
+	TupleTableSlot *hj_NullOuterTupleSlot;
 	TupleTableSlot *hj_NullInnerTupleSlot;
 	TupleTableSlot *hj_FirstOuterTupleSlot;
-	bool		hj_NeedNewOuter;
+	int			hj_JoinState;
 	bool		hj_MatchedOuter;
 	bool		hj_OuterNotEmpty;
 } HashJoinState;