1 files changed, 27 insertions, 13 deletions
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 23e5526a8e1..81b0bc37d27 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -142,6 +142,7 @@
 #include "utils/pg_locale.h"
 #include "utils/rel.h"
 #include "utils/selfuncs.h"
+#include "utils/snapmgr.h"
 #include "utils/spccache.h"
 #include "utils/syscache.h"
 #include "utils/timestamp.h"
@@ -5328,7 +5329,7 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
 			HeapTuple	tup;
 			Datum		values[INDEX_MAX_KEYS];
 			bool		isnull[INDEX_MAX_KEYS];
-			SnapshotData SnapshotDirty;
+			SnapshotData SnapshotNonVacuumable;
 
 			estate = CreateExecutorState();
 			econtext = GetPerTupleExprContext(estate);
@@ -5351,7 +5352,7 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
 			slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRel));
 			econtext->ecxt_scantuple = slot;
 			get_typlenbyval(vardata->atttype, &typLen, &typByVal);
-			InitDirtySnapshot(SnapshotDirty);
+			InitNonVacuumableSnapshot(SnapshotNonVacuumable, RecentGlobalXmin);
 
 			/* set up an IS NOT NULL scan key so that we ignore nulls */
 			ScanKeyEntryInitialize(&scankeys[0],
@@ -5373,17 +5374,29 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
 				 * active snapshot, which is the best approximation we've got
 				 * to what the query will see when executed.  But that won't
 				 * be exact if a new snap is taken before running the query,
-				 * and it can be very expensive if a lot of uncommitted rows
-				 * exist at the end of the index (because we'll laboriously
-				 * fetch each one and reject it).  What seems like a good
-				 * compromise is to use SnapshotDirty.  That will accept
-				 * uncommitted rows, and thus avoid fetching multiple heap
-				 * tuples in this scenario.  On the other hand, it will reject
-				 * known-dead rows, and thus not give a bogus answer when the
-				 * extreme value has been deleted; that case motivates not
-				 * using SnapshotAny here.
+				 * and it can be very expensive if a lot of recently-dead or
+				 * uncommitted rows exist at the beginning or end of the index
+				 * (because we'll laboriously fetch each one and reject it).
+				 * Instead, we use SnapshotNonVacuumable.  That will accept
+				 * recently-dead and uncommitted rows as well as normal
+				 * visible rows.  On the other hand, it will reject known-dead
+				 * rows, and thus not give a bogus answer when the extreme
+				 * value has been deleted (unless the deletion was quite
+				 * recent); that case motivates not using SnapshotAny here.
+				 *
+				 * A crucial point here is that SnapshotNonVacuumable, with
+				 * RecentGlobalXmin as horizon, yields the inverse of the
+				 * condition that the indexscan will use to decide that index
+				 * entries are killable (see heap_hot_search_buffer()).
+				 * Therefore, if the snapshot rejects a tuple and we have to
+				 * continue scanning past it, we know that the indexscan will
+				 * mark that index entry killed.  That means that the next
+				 * get_actual_variable_range() call will not have to visit
+				 * that heap entry.  In this way we avoid repetitive work when
+				 * this function is used a lot during planning.
 				 */
-				index_scan = index_beginscan(heapRel, indexRel, &SnapshotDirty,
+				index_scan = index_beginscan(heapRel, indexRel,
+											 &SnapshotNonVacuumable,
 											 1, 0);
 				index_rescan(index_scan, scankeys, 1, NULL, 0);
 
@@ -5415,7 +5428,8 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
 			/* If max is requested, and we didn't find the index is empty */
 			if (max && have_data)
 			{
-				index_scan = index_beginscan(heapRel, indexRel, &SnapshotDirty,
+				index_scan = index_beginscan(heapRel, indexRel,
+											 &SnapshotNonVacuumable,
 											 1, 0);
 				index_rescan(index_scan, scankeys, 1, NULL, 0);