aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/access/heap/heapam.c147
-rw-r--r--src/backend/access/heap/heapam_handler.c3
-rw-r--r--src/backend/commands/explain.c23
-rw-r--r--src/backend/executor/Makefile1
-rw-r--r--src/backend/executor/execAmi.c6
-rw-r--r--src/backend/executor/execCurrent.c1
-rw-r--r--src/backend/executor/execProcnode.c10
-rw-r--r--src/backend/executor/nodeTidrangescan.c413
-rw-r--r--src/backend/nodes/copyfuncs.c24
-rw-r--r--src/backend/nodes/outfuncs.c14
-rw-r--r--src/backend/optimizer/README1
-rw-r--r--src/backend/optimizer/path/costsize.c95
-rw-r--r--src/backend/optimizer/path/tidpath.c119
-rw-r--r--src/backend/optimizer/plan/createplan.c98
-rw-r--r--src/backend/optimizer/plan/setrefs.c16
-rw-r--r--src/backend/optimizer/plan/subselect.c6
-rw-r--r--src/backend/optimizer/util/pathnode.c29
-rw-r--r--src/backend/optimizer/util/plancat.c6
-rw-r--r--src/backend/optimizer/util/relnode.c3
-rw-r--r--src/backend/storage/page/itemptr.c59
-rw-r--r--src/include/access/heapam.h6
-rw-r--r--src/include/access/relscan.h4
-rw-r--r--src/include/access/tableam.h97
-rw-r--r--src/include/catalog/pg_operator.dat6
-rw-r--r--src/include/executor/nodeTidrangescan.h24
-rw-r--r--src/include/nodes/execnodes.h18
-rw-r--r--src/include/nodes/nodes.h3
-rw-r--r--src/include/nodes/pathnodes.h18
-rw-r--r--src/include/nodes/plannodes.h13
-rw-r--r--src/include/optimizer/cost.h3
-rw-r--r--src/include/optimizer/pathnode.h4
-rw-r--r--src/include/storage/itemptr.h2
-rw-r--r--src/test/regress/expected/tidrangescan.out300
-rw-r--r--src/test/regress/parallel_schedule2
-rw-r--r--src/test/regress/serial_schedule1
-rw-r--r--src/test/regress/sql/tidrangescan.sql101
36 files changed, 1654 insertions, 22 deletions
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 9c1d590dc71..3b435c107d0 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -1391,6 +1391,153 @@ heap_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *s
return true;
}
+void
+heap_set_tidrange(TableScanDesc sscan, ItemPointer mintid,
+ ItemPointer maxtid)
+{
+ HeapScanDesc scan = (HeapScanDesc) sscan;
+ BlockNumber startBlk;
+ BlockNumber numBlks;
+ ItemPointerData highestItem;
+ ItemPointerData lowestItem;
+
+ /*
+ * For relations without any pages, we can simply leave the TID range
+ * unset. There will be no tuples to scan, therefore no tuples outside
+ * the given TID range.
+ */
+ if (scan->rs_nblocks == 0)
+ return;
+
+ /*
+ * Set up some ItemPointers which point to the first and last possible
+ * tuples in the heap.
+ */
+ ItemPointerSet(&highestItem, scan->rs_nblocks - 1, MaxOffsetNumber);
+ ItemPointerSet(&lowestItem, 0, FirstOffsetNumber);
+
+ /*
+ * If the given maximum TID is below the highest possible TID in the
+ * relation, then restrict the range to that, otherwise we scan to the end
+ * of the relation.
+ */
+ if (ItemPointerCompare(maxtid, &highestItem) < 0)
+ ItemPointerCopy(maxtid, &highestItem);
+
+ /*
+ * If the given minimum TID is above the lowest possible TID in the
+ * relation, then restrict the range to only scan for TIDs above that.
+ */
+ if (ItemPointerCompare(mintid, &lowestItem) > 0)
+ ItemPointerCopy(mintid, &lowestItem);
+
+ /*
+ * Check for an empty range and protect from would be negative results
+ * from the numBlks calculation below.
+ */
+ if (ItemPointerCompare(&highestItem, &lowestItem) < 0)
+ {
+ /* Set an empty range of blocks to scan */
+ heap_setscanlimits(sscan, 0, 0);
+ return;
+ }
+
+ /*
+ * Calculate the first block and the number of blocks we must scan. We
+ * could be more aggressive here and perform some more validation to try
+ * and further narrow the scope of blocks to scan by checking if the
+ * lowerItem has an offset above MaxOffsetNumber. In this case, we could
+ * advance startBlk by one. Likewise, if highestItem has an offset of 0
+ * we could scan one fewer blocks. However, such an optimization does not
+ * seem worth troubling over, currently.
+ */
+ startBlk = ItemPointerGetBlockNumberNoCheck(&lowestItem);
+
+ numBlks = ItemPointerGetBlockNumberNoCheck(&highestItem) -
+ ItemPointerGetBlockNumberNoCheck(&lowestItem) + 1;
+
+ /* Set the start block and number of blocks to scan */
+ heap_setscanlimits(sscan, startBlk, numBlks);
+
+ /* Finally, set the TID range in sscan */
+ ItemPointerCopy(&lowestItem, &sscan->rs_mintid);
+ ItemPointerCopy(&highestItem, &sscan->rs_maxtid);
+}
+
+bool
+heap_getnextslot_tidrange(TableScanDesc sscan, ScanDirection direction,
+ TupleTableSlot *slot)
+{
+ HeapScanDesc scan = (HeapScanDesc) sscan;
+ ItemPointer mintid = &sscan->rs_mintid;
+ ItemPointer maxtid = &sscan->rs_maxtid;
+
+ /* Note: no locking manipulations needed */
+ for (;;)
+ {
+ if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
+ heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
+ else
+ heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
+
+ if (scan->rs_ctup.t_data == NULL)
+ {
+ ExecClearTuple(slot);
+ return false;
+ }
+
+ /*
+ * heap_set_tidrange will have used heap_setscanlimits to limit the
+ * range of pages we scan to only ones that can contain the TID range
+ * we're scanning for. Here we must filter out any tuples from these
+ * pages that are outwith that range.
+ */
+ if (ItemPointerCompare(&scan->rs_ctup.t_self, mintid) < 0)
+ {
+ ExecClearTuple(slot);
+
+ /*
+ * When scanning backwards, the TIDs will be in descending order.
+ * Future tuples in this direction will be lower still, so we can
+ * just return false to indicate there will be no more tuples.
+ */
+ if (ScanDirectionIsBackward(direction))
+ return false;
+
+ continue;
+ }
+
+ /*
+ * Likewise for the final page, we must filter out TIDs greater than
+ * maxtid.
+ */
+ if (ItemPointerCompare(&scan->rs_ctup.t_self, maxtid) > 0)
+ {
+ ExecClearTuple(slot);
+
+ /*
+ * When scanning forward, the TIDs will be in ascending order.
+ * Future tuples in this direction will be higher still, so we can
+ * just return false to indicate there will be no more tuples.
+ */
+ if (ScanDirectionIsForward(direction))
+ return false;
+ continue;
+ }
+
+ break;
+ }
+
+ /*
+ * if we get here it means we have a new current scan tuple, so point to
+ * the proper return buffer and return the tuple.
+ */
+ pgstat_count_heap_getnext(scan->rs_base.rs_rd);
+
+ ExecStoreBufferHeapTuple(&scan->rs_ctup, slot, scan->rs_cbuf);
+ return true;
+}
+
/*
* heap_fetch - retrieve tuple with given tid
*
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index 4a70e20a143..bd5faf0c1fb 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -2542,6 +2542,9 @@ static const TableAmRoutine heapam_methods = {
.scan_rescan = heap_rescan,
.scan_getnextslot = heap_getnextslot,
+ .scan_set_tidrange = heap_set_tidrange,
+ .scan_getnextslot_tidrange = heap_getnextslot_tidrange,
+
.parallelscan_estimate = table_block_parallelscan_estimate,
.parallelscan_initialize = table_block_parallelscan_initialize,
.parallelscan_reinitialize = table_block_parallelscan_reinitialize,
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index f80e379973a..afc45429ba4 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1057,6 +1057,7 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used)
case T_IndexOnlyScan:
case T_BitmapHeapScan:
case T_TidScan:
+ case T_TidRangeScan:
case T_SubqueryScan:
case T_FunctionScan:
case T_TableFuncScan:
@@ -1223,6 +1224,9 @@ ExplainNode(PlanState *planstate, List *ancestors,
case T_TidScan:
pname = sname = "Tid Scan";
break;
+ case T_TidRangeScan:
+ pname = sname = "Tid Range Scan";
+ break;
case T_SubqueryScan:
pname = sname = "Subquery Scan";
break;
@@ -1417,6 +1421,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
case T_SampleScan:
case T_BitmapHeapScan:
case T_TidScan:
+ case T_TidRangeScan:
case T_SubqueryScan:
case T_FunctionScan:
case T_TableFuncScan:
@@ -1871,6 +1876,23 @@ ExplainNode(PlanState *planstate, List *ancestors,
planstate, es);
}
break;
+ case T_TidRangeScan:
+ {
+ /*
+ * The tidrangequals list has AND semantics, so be sure to
+ * show it as an AND condition.
+ */
+ List *tidquals = ((TidRangeScan *) plan)->tidrangequals;
+
+ if (list_length(tidquals) > 1)
+ tidquals = list_make1(make_andclause(tidquals));
+ show_scan_qual(tidquals, "TID Cond", planstate, ancestors, es);
+ show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
+ if (plan->qual)
+ show_instrumentation_count("Rows Removed by Filter", 1,
+ planstate, es);
+ }
+ break;
case T_ForeignScan:
show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
if (plan->qual)
@@ -3558,6 +3580,7 @@ ExplainTargetRel(Plan *plan, Index rti, ExplainState *es)
case T_IndexOnlyScan:
case T_BitmapHeapScan:
case T_TidScan:
+ case T_TidRangeScan:
case T_ForeignScan:
case T_CustomScan:
case T_ModifyTable:
diff --git a/src/backend/executor/Makefile b/src/backend/executor/Makefile
index f990c6473a3..74ac59faa13 100644
--- a/src/backend/executor/Makefile
+++ b/src/backend/executor/Makefile
@@ -67,6 +67,7 @@ OBJS = \
nodeSubplan.o \
nodeSubqueryscan.o \
nodeTableFuncscan.o \
+ nodeTidrangescan.o \
nodeTidscan.o \
nodeUnique.o \
nodeValuesscan.o \
diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c
index 23bdb53cd10..4543ac79edf 100644
--- a/src/backend/executor/execAmi.c
+++ b/src/backend/executor/execAmi.c
@@ -51,6 +51,7 @@
#include "executor/nodeSubplan.h"
#include "executor/nodeSubqueryscan.h"
#include "executor/nodeTableFuncscan.h"
+#include "executor/nodeTidrangescan.h"
#include "executor/nodeTidscan.h"
#include "executor/nodeUnique.h"
#include "executor/nodeValuesscan.h"
@@ -197,6 +198,10 @@ ExecReScan(PlanState *node)
ExecReScanTidScan((TidScanState *) node);
break;
+ case T_TidRangeScanState:
+ ExecReScanTidRangeScan((TidRangeScanState *) node);
+ break;
+
case T_SubqueryScanState:
ExecReScanSubqueryScan((SubqueryScanState *) node);
break;
@@ -562,6 +567,7 @@ ExecSupportsBackwardScan(Plan *node)
case T_SeqScan:
case T_TidScan:
+ case T_TidRangeScan:
case T_FunctionScan:
case T_ValuesScan:
case T_CteScan:
diff --git a/src/backend/executor/execCurrent.c b/src/backend/executor/execCurrent.c
index 33221a4d6ce..4f430fb1603 100644
--- a/src/backend/executor/execCurrent.c
+++ b/src/backend/executor/execCurrent.c
@@ -336,6 +336,7 @@ search_plan_tree(PlanState *node, Oid table_oid,
case T_IndexOnlyScanState:
case T_BitmapHeapScanState:
case T_TidScanState:
+ case T_TidRangeScanState:
case T_ForeignScanState:
case T_CustomScanState:
{
diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c
index 414df50a054..29766d8196f 100644
--- a/src/backend/executor/execProcnode.c
+++ b/src/backend/executor/execProcnode.c
@@ -109,6 +109,7 @@
#include "executor/nodeSubplan.h"
#include "executor/nodeSubqueryscan.h"
#include "executor/nodeTableFuncscan.h"
+#include "executor/nodeTidrangescan.h"
#include "executor/nodeTidscan.h"
#include "executor/nodeUnique.h"
#include "executor/nodeValuesscan.h"
@@ -238,6 +239,11 @@ ExecInitNode(Plan *node, EState *estate, int eflags)
estate, eflags);
break;
+ case T_TidRangeScan:
+ result = (PlanState *) ExecInitTidRangeScan((TidRangeScan *) node,
+ estate, eflags);
+ break;
+
case T_SubqueryScan:
result = (PlanState *) ExecInitSubqueryScan((SubqueryScan *) node,
estate, eflags);
@@ -637,6 +643,10 @@ ExecEndNode(PlanState *node)
ExecEndTidScan((TidScanState *) node);
break;
+ case T_TidRangeScanState:
+ ExecEndTidRangeScan((TidRangeScanState *) node);
+ break;
+
case T_SubqueryScanState:
ExecEndSubqueryScan((SubqueryScanState *) node);
break;
diff --git a/src/backend/executor/nodeTidrangescan.c b/src/backend/executor/nodeTidrangescan.c
new file mode 100644
index 00000000000..2b0d205d7dd
--- /dev/null
+++ b/src/backend/executor/nodeTidrangescan.c
@@ -0,0 +1,413 @@
+/*-------------------------------------------------------------------------
+ *
+ * nodeTidrangescan.c
+ * Routines to support TID range scans of relations
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/executor/nodeTidrangescan.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/relscan.h"
+#include "access/sysattr.h"
+#include "access/tableam.h"
+#include "catalog/pg_operator.h"
+#include "executor/execdebug.h"
+#include "executor/nodeTidrangescan.h"
+#include "nodes/nodeFuncs.h"
+#include "storage/bufmgr.h"
+#include "utils/rel.h"
+
+
+#define IsCTIDVar(node) \
+ ((node) != NULL && \
+ IsA((node), Var) && \
+ ((Var *) (node))->varattno == SelfItemPointerAttributeNumber && \
+ ((Var *) (node))->varlevelsup == 0)
+
+typedef enum
+{
+ TIDEXPR_UPPER_BOUND,
+ TIDEXPR_LOWER_BOUND
+} TidExprType;
+
+/* Upper or lower range bound for scan */
+typedef struct TidOpExpr
+{
+ TidExprType exprtype; /* type of op; lower or upper */
+ ExprState *exprstate; /* ExprState for a TID-yielding subexpr */
+ bool inclusive; /* whether op is inclusive */
+} TidOpExpr;
+
+/*
+ * For the given 'expr', build and return an appropriate TidOpExpr taking into
+ * account the expr's operator and operand order.
+ */
+static TidOpExpr *
+MakeTidOpExpr(OpExpr *expr, TidRangeScanState *tidstate)
+{
+ Node *arg1 = get_leftop((Expr *) expr);
+ Node *arg2 = get_rightop((Expr *) expr);
+ ExprState *exprstate = NULL;
+ bool invert = false;
+ TidOpExpr *tidopexpr;
+
+ if (IsCTIDVar(arg1))
+ exprstate = ExecInitExpr((Expr *) arg2, &tidstate->ss.ps);
+ else if (IsCTIDVar(arg2))
+ {
+ exprstate = ExecInitExpr((Expr *) arg1, &tidstate->ss.ps);
+ invert = true;
+ }
+ else
+ elog(ERROR, "could not identify CTID variable");
+
+ tidopexpr = (TidOpExpr *) palloc(sizeof(TidOpExpr));
+ tidopexpr->inclusive = false; /* for now */
+
+ switch (expr->opno)
+ {
+ case TIDLessEqOperator:
+ tidopexpr->inclusive = true;
+ /* fall through */
+ case TIDLessOperator:
+ tidopexpr->exprtype = invert ? TIDEXPR_LOWER_BOUND : TIDEXPR_UPPER_BOUND;
+ break;
+ case TIDGreaterEqOperator:
+ tidopexpr->inclusive = true;
+ /* fall through */
+ case TIDGreaterOperator:
+ tidopexpr->exprtype = invert ? TIDEXPR_UPPER_BOUND : TIDEXPR_LOWER_BOUND;
+ break;
+ default:
+ elog(ERROR, "could not identify CTID operator");
+ }
+
+ tidopexpr->exprstate = exprstate;
+
+ return tidopexpr;
+}
+
+/*
+ * Extract the qual subexpressions that yield TIDs to search for,
+ * and compile them into ExprStates if they're ordinary expressions.
+ */
+static void
+TidExprListCreate(TidRangeScanState *tidrangestate)
+{
+ TidRangeScan *node = (TidRangeScan *) tidrangestate->ss.ps.plan;
+ List *tidexprs = NIL;
+ ListCell *l;
+
+ foreach(l, node->tidrangequals)
+ {
+ OpExpr *opexpr = lfirst(l);
+ TidOpExpr *tidopexpr;
+
+ if (!IsA(opexpr, OpExpr))
+ elog(ERROR, "could not identify CTID expression");
+
+ tidopexpr = MakeTidOpExpr(opexpr, tidrangestate);
+ tidexprs = lappend(tidexprs, tidopexpr);
+ }
+
+ tidrangestate->trss_tidexprs = tidexprs;
+}
+
+/* ----------------------------------------------------------------
+ * TidRangeEval
+ *
+ * Compute and set node's block and offset range to scan by evaluating
+ * the trss_tidexprs. Returns false if we detect the range cannot
+ * contain any tuples. Returns true if it's possible for the range to
+ * contain tuples.
+ * ----------------------------------------------------------------
+ */
+static bool
+TidRangeEval(TidRangeScanState *node)
+{
+ ExprContext *econtext = node->ss.ps.ps_ExprContext;
+ ItemPointerData lowerBound;
+ ItemPointerData upperBound;
+ ListCell *l;
+
+ /*
+ * Set the upper and lower bounds to the absolute limits of the range of
+ * the ItemPointer type. Below we'll try to narrow this range on either
+ * side by looking at the TidOpExprs.
+ */
+ ItemPointerSet(&lowerBound, 0, 0);
+ ItemPointerSet(&upperBound, InvalidBlockNumber, PG_UINT16_MAX);
+
+ foreach(l, node->trss_tidexprs)
+ {
+ TidOpExpr *tidopexpr = (TidOpExpr *) lfirst(l);
+ ItemPointer itemptr;
+ bool isNull;
+
+ /* Evaluate this bound. */
+ itemptr = (ItemPointer)
+ DatumGetPointer(ExecEvalExprSwitchContext(tidopexpr->exprstate,
+ econtext,
+ &isNull));
+
+ /* If the bound is NULL, *nothing* matches the qual. */
+ if (isNull)
+ return false;
+
+ if (tidopexpr->exprtype == TIDEXPR_LOWER_BOUND)
+ {
+ ItemPointerData lb;
+
+ ItemPointerCopy(itemptr, &lb);
+
+ /*
+ * Normalize non-inclusive ranges to become inclusive. The
+ * resulting ItemPointer here may not be a valid item pointer.
+ */
+ if (!tidopexpr->inclusive)
+ ItemPointerInc(&lb);
+
+ /* Check if we can narrow the range using this qual */
+ if (ItemPointerCompare(&lb, &lowerBound) > 0)
+ ItemPointerCopy(&lb, &lowerBound);
+ }
+
+ else if (tidopexpr->exprtype == TIDEXPR_UPPER_BOUND)
+ {
+ ItemPointerData ub;
+
+ ItemPointerCopy(itemptr, &ub);
+
+ /*
+ * Normalize non-inclusive ranges to become inclusive. The
+ * resulting ItemPointer here may not be a valid item pointer.
+ */
+ if (!tidopexpr->inclusive)
+ ItemPointerDec(&ub);
+
+ /* Check if we can narrow the range using this qual */
+ if (ItemPointerCompare(&ub, &upperBound) < 0)
+ ItemPointerCopy(&ub, &upperBound);
+ }
+ }
+
+ ItemPointerCopy(&lowerBound, &node->trss_mintid);
+ ItemPointerCopy(&upperBound, &node->trss_maxtid);
+
+ return true;
+}
+
+/* ----------------------------------------------------------------
+ * TidRangeNext
+ *
+ * Retrieve a tuple from the TidRangeScan node's currentRelation
+ * using the TIDs in the TidRangeScanState information.
+ *
+ * ----------------------------------------------------------------
+ */
+static TupleTableSlot *
+TidRangeNext(TidRangeScanState *node)
+{
+ TableScanDesc scandesc;
+ EState *estate;
+ ScanDirection direction;
+ TupleTableSlot *slot;
+
+ /*
+ * extract necessary information from TID scan node
+ */
+ scandesc = node->ss.ss_currentScanDesc;
+ estate = node->ss.ps.state;
+ slot = node->ss.ss_ScanTupleSlot;
+ direction = estate->es_direction;
+
+ if (!node->trss_inScan)
+ {
+ /* First time through, compute TID range to scan */
+ if (!TidRangeEval(node))
+ return NULL;
+
+ if (scandesc == NULL)
+ {
+ scandesc = table_beginscan_tidrange(node->ss.ss_currentRelation,
+ estate->es_snapshot,
+ &node->trss_mintid,
+ &node->trss_maxtid);
+ node->ss.ss_currentScanDesc = scandesc;
+ }
+ else
+ {
+ /* rescan with the updated TID range */
+ table_rescan_tidrange(scandesc, &node->trss_mintid,
+ &node->trss_maxtid);
+ }
+
+ node->trss_inScan = true;
+ }
+
+ /* Fetch the next tuple. */
+ if (!table_scan_getnextslot_tidrange(scandesc, direction, slot))
+ {
+ node->trss_inScan = false;
+ ExecClearTuple(slot);
+ }
+
+ return slot;
+}
+
+/*
+ * TidRangeRecheck -- access method routine to recheck a tuple in EvalPlanQual
+ */
+static bool
+TidRangeRecheck(TidRangeScanState *node, TupleTableSlot *slot)
+{
+ return true;
+}
+
+/* ----------------------------------------------------------------
+ * ExecTidRangeScan(node)
+ *
+ * Scans the relation using tids and returns the next qualifying tuple.
+ * We call the ExecScan() routine and pass it the appropriate
+ * access method functions.
+ *
+ * Conditions:
+ * -- the "cursor" maintained by the AMI is positioned at the tuple
+ * returned previously.
+ *
+ * Initial States:
+ * -- the relation indicated is opened for TID range scanning.
+ * ----------------------------------------------------------------
+ */
+static TupleTableSlot *
+ExecTidRangeScan(PlanState *pstate)
+{
+ TidRangeScanState *node = castNode(TidRangeScanState, pstate);
+
+ return ExecScan(&node->ss,
+ (ExecScanAccessMtd) TidRangeNext,
+ (ExecScanRecheckMtd) TidRangeRecheck);
+}
+
+/* ----------------------------------------------------------------
+ * ExecReScanTidRangeScan(node)
+ * ----------------------------------------------------------------
+ */
+void
+ExecReScanTidRangeScan(TidRangeScanState *node)
+{
+ /* mark scan as not in progress, and tid range list as not computed yet */
+ node->trss_inScan = false;
+
+ /*
+ * We must wait until TidRangeNext before calling table_rescan_tidrange.
+ */
+ ExecScanReScan(&node->ss);
+}
+
+/* ----------------------------------------------------------------
+ * ExecEndTidRangeScan
+ *
+ * Releases any storage allocated through C routines.
+ * Returns nothing.
+ * ----------------------------------------------------------------
+ */
+void
+ExecEndTidRangeScan(TidRangeScanState *node)
+{
+ TableScanDesc scan = node->ss.ss_currentScanDesc;
+
+ if (scan != NULL)
+ table_endscan(scan);
+
+ /*
+ * Free the exprcontext
+ */
+ ExecFreeExprContext(&node->ss.ps);
+
+ /*
+ * clear out tuple table slots
+ */
+ if (node->ss.ps.ps_ResultTupleSlot)
+ ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
+ ExecClearTuple(node->ss.ss_ScanTupleSlot);
+}
+
+/* ----------------------------------------------------------------
+ * ExecInitTidRangeScan
+ *
+ * Initializes the tid range scan's state information, creates
+ * scan keys, and opens the scan relation.
+ *
+ * Parameters:
+ * node: TidRangeScan node produced by the planner.
+ * estate: the execution state initialized in InitPlan.
+ * ----------------------------------------------------------------
+ */
+TidRangeScanState *
+ExecInitTidRangeScan(TidRangeScan *node, EState *estate, int eflags)
+{
+ TidRangeScanState *tidrangestate;
+ Relation currentRelation;
+
+ /*
+ * create state structure
+ */
+ tidrangestate = makeNode(TidRangeScanState);
+ tidrangestate->ss.ps.plan = (Plan *) node;
+ tidrangestate->ss.ps.state = estate;
+ tidrangestate->ss.ps.ExecProcNode = ExecTidRangeScan;
+
+ /*
+ * Miscellaneous initialization
+ *
+ * create expression context for node
+ */
+ ExecAssignExprContext(estate, &tidrangestate->ss.ps);
+
+ /*
+ * mark scan as not in progress, and TID range as not computed yet
+ */
+ tidrangestate->trss_inScan = false;
+
+ /*
+ * open the scan relation
+ */
+ currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
+
+ tidrangestate->ss.ss_currentRelation = currentRelation;
+ tidrangestate->ss.ss_currentScanDesc = NULL; /* no table scan here */
+
+ /*
+ * get the scan type from the relation descriptor.
+ */
+ ExecInitScanTupleSlot(estate, &tidrangestate->ss,
+ RelationGetDescr(currentRelation),
+ table_slot_callbacks(currentRelation));
+
+ /*
+ * Initialize result type and projection.
+ */
+ ExecInitResultTypeTL(&tidrangestate->ss.ps);
+ ExecAssignScanProjectionInfo(&tidrangestate->ss);
+
+ /*
+ * initialize child expressions
+ */
+ tidrangestate->ss.ps.qual =
+ ExecInitQual(node->scan.plan.qual, (PlanState *) tidrangestate);
+
+ TidExprListCreate(tidrangestate);
+
+ /*
+ * all done.
+ */
+ return tidrangestate;
+}
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 65bbc18ecba..aaba1ec2c4a 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -586,6 +586,27 @@ _copyTidScan(const TidScan *from)
}
/*
+ * _copyTidRangeScan
+ */
+static TidRangeScan *
+_copyTidRangeScan(const TidRangeScan *from)
+{
+ TidRangeScan *newnode = makeNode(TidRangeScan);
+
+ /*
+ * copy node superclass fields
+ */
+ CopyScanFields((const Scan *) from, (Scan *) newnode);
+
+ /*
+ * copy remainder of node
+ */
+ COPY_NODE_FIELD(tidrangequals);
+
+ return newnode;
+}
+
+/*
* _copySubqueryScan
*/
static SubqueryScan *
@@ -4938,6 +4959,9 @@ copyObjectImpl(const void *from)
case T_TidScan:
retval = _copyTidScan(from);
break;
+ case T_TidRangeScan:
+ retval = _copyTidRangeScan(from);
+ break;
case T_SubqueryScan:
retval = _copySubqueryScan(from);
break;
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index f5dcedf6e89..8fc432bfe17 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -609,6 +609,16 @@ _outTidScan(StringInfo str, const TidScan *node)
}
static void
+_outTidRangeScan(StringInfo str, const TidRangeScan *node)
+{
+ WRITE_NODE_TYPE("TIDRANGESCAN");
+
+ _outScanInfo(str, (const Scan *) node);
+
+ WRITE_NODE_FIELD(tidrangequals);
+}
+
+static void
_outSubqueryScan(StringInfo str, const SubqueryScan *node)
{
WRITE_NODE_TYPE("SUBQUERYSCAN");
@@ -2314,6 +2324,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node)
WRITE_NODE_FIELD(subroot);
WRITE_NODE_FIELD(subplan_params);
WRITE_INT_FIELD(rel_parallel_workers);
+ WRITE_UINT_FIELD(amflags);
WRITE_OID_FIELD(serverid);
WRITE_OID_FIELD(userid);
WRITE_BOOL_FIELD(useridiscurrent);
@@ -3810,6 +3821,9 @@ outNode(StringInfo str, const void *obj)
case T_TidScan:
_outTidScan(str, obj);
break;
+ case T_TidRangeScan:
+ _outTidRangeScan(str, obj);
+ break;
case T_SubqueryScan:
_outSubqueryScan(str, obj);
break;
diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README
index efb52858c88..4a6c3481623 100644
--- a/src/backend/optimizer/README
+++ b/src/backend/optimizer/README
@@ -374,6 +374,7 @@ RelOptInfo - a relation or joined relations
IndexPath - index scan
BitmapHeapPath - top of a bitmapped index scan
TidPath - scan by CTID
+ TidRangePath - scan a contiguous range of CTIDs
SubqueryScanPath - scan a subquery-in-FROM
ForeignPath - scan a foreign table, foreign join or foreign upper-relation
CustomPath - for custom scan providers
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index aab06c7d213..a25b674a192 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1284,6 +1284,101 @@ cost_tidscan(Path *path, PlannerInfo *root,
}
/*
+ * cost_tidrangescan
+ * Determines and sets the costs of scanning a relation using a range of
+ * TIDs for 'path'
+ *
+ * 'baserel' is the relation to be scanned
+ * 'tidrangequals' is the list of TID-checkable range quals
+ * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
+ */
+void
+cost_tidrangescan(Path *path, PlannerInfo *root,
+ RelOptInfo *baserel, List *tidrangequals,
+ ParamPathInfo *param_info)
+{
+ Selectivity selectivity;
+ double pages;
+ Cost startup_cost = 0;
+ Cost run_cost = 0;
+ QualCost qpqual_cost;
+ Cost cpu_per_tuple;
+ QualCost tid_qual_cost;
+ double ntuples;
+ double nseqpages;
+ double spc_random_page_cost;
+ double spc_seq_page_cost;
+
+ /* Should only be applied to base relations */
+ Assert(baserel->relid > 0);
+ Assert(baserel->rtekind == RTE_RELATION);
+
+ /* Mark the path with the correct row estimate */
+ if (param_info)
+ path->rows = param_info->ppi_rows;
+ else
+ path->rows = baserel->rows;
+
+ /* Count how many tuples and pages we expect to scan */
+ selectivity = clauselist_selectivity(root, tidrangequals, baserel->relid,
+ JOIN_INNER, NULL);
+ pages = ceil(selectivity * baserel->pages);
+
+ if (pages <= 0.0)
+ pages = 1.0;
+
+ /*
+ * The first page in a range requires a random seek, but each subsequent
+ * page is just a normal sequential page read. NOTE: it's desirable for
+ * TID Range Scans to cost more than the equivalent Sequential Scans,
+ * because Seq Scans have some performance advantages such as scan
+ * synchronization and parallelizability, and we'd prefer one of them to
+ * be picked unless a TID Range Scan really is better.
+ */
+ ntuples = selectivity * baserel->tuples;
+ nseqpages = pages - 1.0;
+
+ if (!enable_tidscan)
+ startup_cost += disable_cost;
+
+ /*
+ * The TID qual expressions will be computed once, any other baserestrict
+ * quals once per retrieved tuple.
+ */
+ cost_qual_eval(&tid_qual_cost, tidrangequals, root);
+
+ /* fetch estimated page cost for tablespace containing table */
+ get_tablespace_page_costs(baserel->reltablespace,
+ &spc_random_page_cost,
+ &spc_seq_page_cost);
+
+ /* disk costs; 1 random page and the remainder as seq pages */
+ run_cost += spc_random_page_cost + spc_seq_page_cost * nseqpages;
+
+ /* Add scanning CPU costs */
+ get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
+
+ /*
+ * XXX currently we assume TID quals are a subset of qpquals at this
+ * point; they will be removed (if possible) when we create the plan, so
+ * we subtract their cost from the total qpqual cost. (If the TID quals
+ * can't be removed, this is a mistake and we're going to underestimate
+ * the CPU cost a bit.)
+ */
+ startup_cost += qpqual_cost.startup + tid_qual_cost.per_tuple;
+ cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple -
+ tid_qual_cost.per_tuple;
+ run_cost += cpu_per_tuple * ntuples;
+
+ /* tlist eval costs are paid per output row, not per tuple scanned */
+ startup_cost += path->pathtarget->cost.startup;
+ run_cost += path->pathtarget->cost.per_tuple * path->rows;
+
+ path->startup_cost = startup_cost;
+ path->total_cost = startup_cost + run_cost;
+}
+
+/*
* cost_subqueryscan
* Determines and returns the cost of scanning a subquery RTE.
*
diff --git a/src/backend/optimizer/path/tidpath.c b/src/backend/optimizer/path/tidpath.c
index 0845b460e2c..0725d950c54 100644
--- a/src/backend/optimizer/path/tidpath.c
+++ b/src/backend/optimizer/path/tidpath.c
@@ -2,9 +2,9 @@
*
* tidpath.c
* Routines to determine which TID conditions are usable for scanning
- * a given relation, and create TidPaths accordingly.
+ * a given relation, and create TidPaths and TidRangePaths accordingly.
*
- * What we are looking for here is WHERE conditions of the form
+ * For TidPaths, we look for WHERE conditions of the form
* "CTID = pseudoconstant", which can be implemented by just fetching
* the tuple directly via heap_fetch(). We can also handle OR'd conditions
* such as (CTID = const1) OR (CTID = const2), as well as ScalarArrayOpExpr
@@ -23,6 +23,9 @@
* a function, but in practice it works better to keep the special node
* representation all the way through to execution.
*
+ * Additionally, TidRangePaths may be created for conditions of the form
+ * "CTID relop pseudoconstant", where relop is one of >,>=,<,<=, and
+ * AND-clauses composed of such conditions.
*
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
@@ -63,14 +66,14 @@ IsCTIDVar(Var *var, RelOptInfo *rel)
/*
* Check to see if a RestrictInfo is of the form
- * CTID = pseudoconstant
+ * CTID OP pseudoconstant
* or
- * pseudoconstant = CTID
- * where the CTID Var belongs to relation "rel", and nothing on the
- * other side of the clause does.
+ * pseudoconstant OP CTID
+ * where OP is a binary operation, the CTID Var belongs to relation "rel",
+ * and nothing on the other side of the clause does.
*/
static bool
-IsTidEqualClause(RestrictInfo *rinfo, RelOptInfo *rel)
+IsBinaryTidClause(RestrictInfo *rinfo, RelOptInfo *rel)
{
OpExpr *node;
Node *arg1,
@@ -83,10 +86,9 @@ IsTidEqualClause(RestrictInfo *rinfo, RelOptInfo *rel)
return false;
node = (OpExpr *) rinfo->clause;
- /* Operator must be tideq */
- if (node->opno != TIDEqualOperator)
+ /* OpExpr must have two arguments */
+ if (list_length(node->args) != 2)
return false;
- Assert(list_length(node->args) == 2);
arg1 = linitial(node->args);
arg2 = lsecond(node->args);
@@ -118,6 +120,50 @@ IsTidEqualClause(RestrictInfo *rinfo, RelOptInfo *rel)
/*
* Check to see if a RestrictInfo is of the form
+ * CTID = pseudoconstant
+ * or
+ * pseudoconstant = CTID
+ * where the CTID Var belongs to relation "rel", and nothing on the
+ * other side of the clause does.
+ */
+static bool
+IsTidEqualClause(RestrictInfo *rinfo, RelOptInfo *rel)
+{
+ if (!IsBinaryTidClause(rinfo, rel))
+ return false;
+
+ if (((OpExpr *) rinfo->clause)->opno == TIDEqualOperator)
+ return true;
+
+ return false;
+}
+
+/*
+ * Check to see if a RestrictInfo is of the form
+ * CTID OP pseudoconstant
+ * or
+ * pseudoconstant OP CTID
+ * where OP is a range operator such as <, <=, >, or >=, the CTID Var belongs
+ * to relation "rel", and nothing on the other side of the clause does.
+ */
+static bool
+IsTidRangeClause(RestrictInfo *rinfo, RelOptInfo *rel)
+{
+ Oid opno;
+
+ if (!IsBinaryTidClause(rinfo, rel))
+ return false;
+ opno = ((OpExpr *) rinfo->clause)->opno;
+
+ if (opno == TIDLessOperator || opno == TIDLessEqOperator ||
+ opno == TIDGreaterOperator || opno == TIDGreaterEqOperator)
+ return true;
+
+ return false;
+}
+
+/*
+ * Check to see if a RestrictInfo is of the form
* CTID = ANY (pseudoconstant_array)
* where the CTID Var belongs to relation "rel", and nothing on the
* other side of the clause does.
@@ -222,7 +268,7 @@ TidQualFromRestrictInfo(PlannerInfo *root, RestrictInfo *rinfo, RelOptInfo *rel)
*
* Returns a List of CTID qual RestrictInfos for the specified rel (with
* implicit OR semantics across the list), or NIL if there are no usable
- * conditions.
+ * equality conditions.
*
* This function is just concerned with handling AND/OR recursion.
*/
@@ -302,6 +348,34 @@ TidQualFromRestrictInfoList(PlannerInfo *root, List *rlist, RelOptInfo *rel)
}
/*
+ * Extract a set of CTID range conditions from implicit-AND List of RestrictInfos
+ *
+ * Returns a List of CTID range qual RestrictInfos for the specified rel
+ * (with implicit AND semantics across the list), or NIL if there are no
+ * usable range conditions or if the rel's table AM does not support TID range
+ * scans.
+ */
+static List *
+TidRangeQualFromRestrictInfoList(List *rlist, RelOptInfo *rel)
+{
+ List *rlst = NIL;
+ ListCell *l;
+
+ if ((rel->amflags & AMFLAG_HAS_TID_RANGE) == 0)
+ return NIL;
+
+ foreach(l, rlist)
+ {
+ RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
+
+ if (IsTidRangeClause(rinfo, rel))
+ rlst = lappend(rlst, rinfo);
+ }
+
+ return rlst;
+}
+
+/*
* Given a list of join clauses involving our rel, create a parameterized
* TidPath for each one that is a suitable TidEqual clause.
*
@@ -385,6 +459,7 @@ void
create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel)
{
List *tidquals;
+ List *tidrangequals;
/*
* If any suitable quals exist in the rel's baserestrict list, generate a
@@ -392,7 +467,7 @@ create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel)
*/
tidquals = TidQualFromRestrictInfoList(root, rel->baserestrictinfo, rel);
- if (tidquals)
+ if (tidquals != NIL)
{
/*
* This path uses no join clauses, but it could still have required
@@ -405,6 +480,26 @@ create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel)
}
/*
+ * If there are range quals in the baserestrict list, generate a
+ * TidRangePath.
+ */
+ tidrangequals = TidRangeQualFromRestrictInfoList(rel->baserestrictinfo,
+ rel);
+
+ if (tidrangequals != NIL)
+ {
+ /*
+ * This path uses no join clauses, but it could still have required
+ * parameterization due to LATERAL refs in its tlist.
+ */
+ Relids required_outer = rel->lateral_relids;
+
+ add_path(rel, (Path *) create_tidrangescan_path(root, rel,
+ tidrangequals,
+ required_outer));
+ }
+
+ /*
* Try to generate parameterized TidPaths using equality clauses extracted
* from EquivalenceClasses. (This is important since simple "t1.ctid =
* t2.ctid" clauses will turn into ECs.)
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 6c8305c977e..906cab70532 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -129,6 +129,10 @@ static Plan *create_bitmap_subplan(PlannerInfo *root, Path *bitmapqual,
static void bitmap_subplan_mark_shared(Plan *plan);
static TidScan *create_tidscan_plan(PlannerInfo *root, TidPath *best_path,
List *tlist, List *scan_clauses);
+static TidRangeScan *create_tidrangescan_plan(PlannerInfo *root,
+ TidRangePath *best_path,
+ List *tlist,
+ List *scan_clauses);
static SubqueryScan *create_subqueryscan_plan(PlannerInfo *root,
SubqueryScanPath *best_path,
List *tlist, List *scan_clauses);
@@ -193,6 +197,8 @@ static BitmapHeapScan *make_bitmap_heapscan(List *qptlist,
Index scanrelid);
static TidScan *make_tidscan(List *qptlist, List *qpqual, Index scanrelid,
List *tidquals);
+static TidRangeScan *make_tidrangescan(List *qptlist, List *qpqual,
+ Index scanrelid, List *tidrangequals);
static SubqueryScan *make_subqueryscan(List *qptlist,
List *qpqual,
Index scanrelid,
@@ -384,6 +390,7 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags)
case T_IndexOnlyScan:
case T_BitmapHeapScan:
case T_TidScan:
+ case T_TidRangeScan:
case T_SubqueryScan:
case T_FunctionScan:
case T_TableFuncScan:
@@ -679,6 +686,13 @@ create_scan_plan(PlannerInfo *root, Path *best_path, int flags)
scan_clauses);
break;
+ case T_TidRangeScan:
+ plan = (Plan *) create_tidrangescan_plan(root,
+ (TidRangePath *) best_path,
+ tlist,
+ scan_clauses);
+ break;
+
case T_SubqueryScan:
plan = (Plan *) create_subqueryscan_plan(root,
(SubqueryScanPath *) best_path,
@@ -3437,6 +3451,71 @@ create_tidscan_plan(PlannerInfo *root, TidPath *best_path,
}
/*
+ * create_tidrangescan_plan
+ * Returns a tidrangescan plan for the base relation scanned by 'best_path'
+ * with restriction clauses 'scan_clauses' and targetlist 'tlist'.
+ */
+static TidRangeScan *
+create_tidrangescan_plan(PlannerInfo *root, TidRangePath *best_path,
+ List *tlist, List *scan_clauses)
+{
+ TidRangeScan *scan_plan;
+ Index scan_relid = best_path->path.parent->relid;
+ List *tidrangequals = best_path->tidrangequals;
+
+ /* it should be a base rel... */
+ Assert(scan_relid > 0);
+ Assert(best_path->path.parent->rtekind == RTE_RELATION);
+
+ /*
+ * The qpqual list must contain all restrictions not enforced by the
+ * tidrangequals list. tidrangequals has AND semantics, so we can simply
+ * remove any qual that appears in it.
+ */
+ {
+ List *qpqual = NIL;
+ ListCell *l;
+
+ foreach(l, scan_clauses)
+ {
+ RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
+
+ if (rinfo->pseudoconstant)
+ continue; /* we may drop pseudoconstants here */
+ if (list_member_ptr(tidrangequals, rinfo))
+ continue; /* simple duplicate */
+ qpqual = lappend(qpqual, rinfo);
+ }
+ scan_clauses = qpqual;
+ }
+
+ /* Sort clauses into best execution order */
+ scan_clauses = order_qual_clauses(root, scan_clauses);
+
+ /* Reduce RestrictInfo lists to bare expressions; ignore pseudoconstants */
+ tidrangequals = extract_actual_clauses(tidrangequals, false);
+ scan_clauses = extract_actual_clauses(scan_clauses, false);
+
+ /* Replace any outer-relation variables with nestloop params */
+ if (best_path->path.param_info)
+ {
+ tidrangequals = (List *)
+ replace_nestloop_params(root, (Node *) tidrangequals);
+ scan_clauses = (List *)
+ replace_nestloop_params(root, (Node *) scan_clauses);
+ }
+
+ scan_plan = make_tidrangescan(tlist,
+ scan_clauses,
+ scan_relid,
+ tidrangequals);
+
+ copy_generic_path_info(&scan_plan->scan.plan, &best_path->path);
+
+ return scan_plan;
+}
+
+/*
* create_subqueryscan_plan
* Returns a subqueryscan plan for the base relation scanned by 'best_path'
* with restriction clauses 'scan_clauses' and targetlist 'tlist'.
@@ -5369,6 +5448,25 @@ make_tidscan(List *qptlist,
return node;
}
+static TidRangeScan *
+make_tidrangescan(List *qptlist,
+ List *qpqual,
+ Index scanrelid,
+ List *tidrangequals)
+{
+ TidRangeScan *node = makeNode(TidRangeScan);
+ Plan *plan = &node->scan.plan;
+
+ plan->targetlist = qptlist;
+ plan->qual = qpqual;
+ plan->lefttree = NULL;
+ plan->righttree = NULL;
+ node->scan.scanrelid = scanrelid;
+ node->tidrangequals = tidrangequals;
+
+ return node;
+}
+
static SubqueryScan *
make_subqueryscan(List *qptlist,
List *qpqual,
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index c3c36be13e1..42f088ad714 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -619,6 +619,22 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
rtoffset, 1);
}
break;
+ case T_TidRangeScan:
+ {
+ TidRangeScan *splan = (TidRangeScan *) plan;
+
+ splan->scan.scanrelid += rtoffset;
+ splan->scan.plan.targetlist =
+ fix_scan_list(root, splan->scan.plan.targetlist,
+ rtoffset, NUM_EXEC_TLIST(plan));
+ splan->scan.plan.qual =
+ fix_scan_list(root, splan->scan.plan.qual,
+ rtoffset, NUM_EXEC_QUAL(plan));
+ splan->tidrangequals =
+ fix_scan_list(root, splan->tidrangequals,
+ rtoffset, 1);
+ }
+ break;
case T_SubqueryScan:
/* Needs special treatment, see comments below */
return set_subqueryscan_references(root,
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 54ef61bfb35..f3e46e0959e 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -2367,6 +2367,12 @@ finalize_plan(PlannerInfo *root, Plan *plan,
context.paramids = bms_add_members(context.paramids, scan_params);
break;
+ case T_TidRangeScan:
+ finalize_primnode((Node *) ((TidRangeScan *) plan)->tidrangequals,
+ &context);
+ context.paramids = bms_add_members(context.paramids, scan_params);
+ break;
+
case T_SubqueryScan:
{
SubqueryScan *sscan = (SubqueryScan *) plan;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 9be0c4a6af5..69b83071cf2 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1204,6 +1204,35 @@ create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals,
}
/*
+ * create_tidrangescan_path
+ * Creates a path corresponding to a scan by a range of TIDs, returning
+ * the pathnode.
+ */
+TidRangePath *
+create_tidrangescan_path(PlannerInfo *root, RelOptInfo *rel,
+ List *tidrangequals, Relids required_outer)
+{
+ TidRangePath *pathnode = makeNode(TidRangePath);
+
+ pathnode->path.pathtype = T_TidRangeScan;
+ pathnode->path.parent = rel;
+ pathnode->path.pathtarget = rel->reltarget;
+ pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
+ required_outer);
+ pathnode->path.parallel_aware = false;
+ pathnode->path.parallel_safe = rel->consider_parallel;
+ pathnode->path.parallel_workers = 0;
+ pathnode->path.pathkeys = NIL; /* always unordered */
+
+ pathnode->tidrangequals = tidrangequals;
+
+ cost_tidrangescan(&pathnode->path, root, rel, tidrangequals,
+ pathnode->path.param_info);
+
+ return pathnode;
+}
+
+/*
* create_append_path
* Creates a path corresponding to an Append plan, returning the
* pathnode.
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 177e6e336ab..c5947fa4185 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -467,6 +467,12 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
/* Collect info about relation's foreign keys, if relevant */
get_relation_foreign_keys(root, rel, relation, inhparent);
+ /* Collect info about functions implemented by the rel's table AM. */
+ if (relation->rd_tableam &&
+ relation->rd_tableam->scan_set_tidrange != NULL &&
+ relation->rd_tableam->scan_getnextslot_tidrange != NULL)
+ rel->amflags |= AMFLAG_HAS_TID_RANGE;
+
/*
* Collect info about relation's partitioning scheme, if any. Only
* inheritance parents may be partitioned.
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index 731ff708b90..345c877aeb3 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -234,6 +234,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
rel->subroot = NULL;
rel->subplan_params = NIL;
rel->rel_parallel_workers = -1; /* set up in get_relation_info */
+ rel->amflags = 0;
rel->serverid = InvalidOid;
rel->userid = rte->checkAsUser;
rel->useridiscurrent = false;
@@ -646,6 +647,7 @@ build_join_rel(PlannerInfo *root,
joinrel->subroot = NULL;
joinrel->subplan_params = NIL;
joinrel->rel_parallel_workers = -1;
+ joinrel->amflags = 0;
joinrel->serverid = InvalidOid;
joinrel->userid = InvalidOid;
joinrel->useridiscurrent = false;
@@ -826,6 +828,7 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
joinrel->eclass_indexes = NULL;
joinrel->subroot = NULL;
joinrel->subplan_params = NIL;
+ joinrel->amflags = 0;
joinrel->serverid = InvalidOid;
joinrel->userid = InvalidOid;
joinrel->useridiscurrent = false;
diff --git a/src/backend/storage/page/itemptr.c b/src/backend/storage/page/itemptr.c
index 55759c383b6..f40d6c22a02 100644
--- a/src/backend/storage/page/itemptr.c
+++ b/src/backend/storage/page/itemptr.c
@@ -71,3 +71,62 @@ ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
else
return 0;
}
+
+/*
+ * ItemPointerInc
+ * Increment 'pointer' by 1 only paying attention to the ItemPointer's
+ * type's range limits and not MaxOffsetNumber and FirstOffsetNumber.
+ * This may result in 'pointer' becoming !OffsetNumberIsValid.
+ *
+ * If the pointer is already the maximum possible values permitted by the
+ * range of the ItemPointer's types, then do nothing.
+ */
+void
+ItemPointerInc(ItemPointer pointer)
+{
+ BlockNumber blk = ItemPointerGetBlockNumberNoCheck(pointer);
+ OffsetNumber off = ItemPointerGetOffsetNumberNoCheck(pointer);
+
+ if (off == PG_UINT16_MAX)
+ {
+ if (blk != InvalidBlockNumber)
+ {
+ off = 0;
+ blk++;
+ }
+ }
+ else
+ off++;
+
+ ItemPointerSet(pointer, blk, off);
+}
+
+/*
+ * ItemPointerDec
+ * Decrement 'pointer' by 1 only paying attention to the ItemPointer's
+ * type's range limits and not MaxOffsetNumber and FirstOffsetNumber.
+ * This may result in 'pointer' becoming !OffsetNumberIsValid.
+ *
+ * If the pointer is already the minimum possible values permitted by the
+ * range of the ItemPointer's types, then do nothing. This does rely on
+ * FirstOffsetNumber being 1 rather than 0.
+ */
+void
+ItemPointerDec(ItemPointer pointer)
+{
+ BlockNumber blk = ItemPointerGetBlockNumberNoCheck(pointer);
+ OffsetNumber off = ItemPointerGetOffsetNumberNoCheck(pointer);
+
+ if (off == 0)
+ {
+ if (blk != 0)
+ {
+ off = PG_UINT16_MAX;
+ blk--;
+ }
+ }
+ else
+ off--;
+
+ ItemPointerSet(pointer, blk, off);
+}
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 60e5cd3109b..bc0936bc2de 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -121,7 +121,11 @@ extern void heap_endscan(TableScanDesc scan);
extern HeapTuple heap_getnext(TableScanDesc scan, ScanDirection direction);
extern bool heap_getnextslot(TableScanDesc sscan,
ScanDirection direction, struct TupleTableSlot *slot);
-
+extern void heap_set_tidrange(TableScanDesc sscan, ItemPointer mintid,
+ ItemPointer maxtid);
+extern bool heap_getnextslot_tidrange(TableScanDesc sscan,
+ ScanDirection direction,
+ TupleTableSlot *slot);
extern bool heap_fetch(Relation relation, Snapshot snapshot,
HeapTuple tuple, Buffer *userbuf);
extern bool heap_hot_search_buffer(ItemPointer tid, Relation relation,
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h
index 005f3fdd2b8..0ef6d8edf7f 100644
--- a/src/include/access/relscan.h
+++ b/src/include/access/relscan.h
@@ -36,6 +36,10 @@ typedef struct TableScanDescData
int rs_nkeys; /* number of scan keys */
struct ScanKeyData *rs_key; /* array of scan key descriptors */
+ /* Range of ItemPointers for table_scan_getnextslot_tidrange() to scan. */
+ ItemPointerData rs_mintid;
+ ItemPointerData rs_maxtid;
+
/*
* Information about type and behaviour of the scan, a bitmask of members
* of the ScanOptions enum (see tableam.h).
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index 33bffb6815b..414b6b4d578 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -49,18 +49,19 @@ typedef enum ScanOptions
SO_TYPE_BITMAPSCAN = 1 << 1,
SO_TYPE_SAMPLESCAN = 1 << 2,
SO_TYPE_TIDSCAN = 1 << 3,
- SO_TYPE_ANALYZE = 1 << 4,
+ SO_TYPE_TIDRANGESCAN = 1 << 4,
+ SO_TYPE_ANALYZE = 1 << 5,
/* several of SO_ALLOW_* may be specified */
/* allow or disallow use of access strategy */
- SO_ALLOW_STRAT = 1 << 5,
+ SO_ALLOW_STRAT = 1 << 6,
/* report location to syncscan logic? */
- SO_ALLOW_SYNC = 1 << 6,
+ SO_ALLOW_SYNC = 1 << 7,
/* verify visibility page-at-a-time? */
- SO_ALLOW_PAGEMODE = 1 << 7,
+ SO_ALLOW_PAGEMODE = 1 << 8,
/* unregister snapshot at scan end? */
- SO_TEMP_SNAPSHOT = 1 << 8
+ SO_TEMP_SNAPSHOT = 1 << 9
} ScanOptions;
/*
@@ -325,6 +326,34 @@ typedef struct TableAmRoutine
ScanDirection direction,
TupleTableSlot *slot);
+ /*-----------
+ * Optional functions to provide scanning for ranges of ItemPointers.
+ * Implementations must either provide both of these functions, or neither
+ * of them.
+ *
+ * Implementations of scan_set_tidrange must themselves handle
+ * ItemPointers of any value. i.e, they must handle each of the following:
+ *
+ * 1) mintid or maxtid is beyond the end of the table; and
+ * 2) mintid is above maxtid; and
+ * 3) item offset for mintid or maxtid is beyond the maximum offset
+ * allowed by the AM.
+ *
+ * Implementations can assume that scan_set_tidrange is always called
+ * before can_getnextslot_tidrange or after scan_rescan and before any
+ * further calls to scan_getnextslot_tidrange.
+ */
+ void (*scan_set_tidrange) (TableScanDesc scan,
+ ItemPointer mintid,
+ ItemPointer maxtid);
+
+ /*
+ * Return next tuple from `scan` that's in the range of TIDs defined by
+ * scan_set_tidrange.
+ */
+ bool (*scan_getnextslot_tidrange) (TableScanDesc scan,
+ ScanDirection direction,
+ TupleTableSlot *slot);
/* ------------------------------------------------------------------------
* Parallel table scan related functions.
@@ -1015,6 +1044,64 @@ table_scan_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableS
return sscan->rs_rd->rd_tableam->scan_getnextslot(sscan, direction, slot);
}
+/* ----------------------------------------------------------------------------
+ * TID Range scanning related functions.
+ * ----------------------------------------------------------------------------
+ */
+
+/*
+ * table_beginscan_tidrange is the entry point for setting up a TableScanDesc
+ * for a TID range scan.
+ */
+static inline TableScanDesc
+table_beginscan_tidrange(Relation rel, Snapshot snapshot,
+ ItemPointer mintid,
+ ItemPointer maxtid)
+{
+ TableScanDesc sscan;
+ uint32 flags = SO_TYPE_TIDRANGESCAN | SO_ALLOW_PAGEMODE;
+
+ sscan = rel->rd_tableam->scan_begin(rel, snapshot, 0, NULL, NULL, flags);
+
+ /* Set the range of TIDs to scan */
+ sscan->rs_rd->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid);
+
+ return sscan;
+}
+
+/*
+ * table_rescan_tidrange resets the scan position and sets the minimum and
+ * maximum TID range to scan for a TableScanDesc created by
+ * table_beginscan_tidrange.
+ */
+static inline void
+table_rescan_tidrange(TableScanDesc sscan, ItemPointer mintid,
+ ItemPointer maxtid)
+{
+ /* Ensure table_beginscan_tidrange() was used. */
+ Assert((sscan->rs_flags & SO_TYPE_TIDRANGESCAN) != 0);
+
+ sscan->rs_rd->rd_tableam->scan_rescan(sscan, NULL, false, false, false, false);
+ sscan->rs_rd->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid);
+}
+
+/*
+ * Fetch the next tuple from `sscan` for a TID range scan created by
+ * table_beginscan_tidrange(). Stores the tuple in `slot` and returns true,
+ * or returns false if no more tuples exist in the range.
+ */
+static inline bool
+table_scan_getnextslot_tidrange(TableScanDesc sscan, ScanDirection direction,
+ TupleTableSlot *slot)
+{
+ /* Ensure table_beginscan_tidrange() was used. */
+ Assert((sscan->rs_flags & SO_TYPE_TIDRANGESCAN) != 0);
+
+ return sscan->rs_rd->rd_tableam->scan_getnextslot_tidrange(sscan,
+ direction,
+ slot);
+}
+
/* ----------------------------------------------------------------------------
* Parallel table scan related functions.
diff --git a/src/include/catalog/pg_operator.dat b/src/include/catalog/pg_operator.dat
index 0d4eac8f963..85395a81eec 100644
--- a/src/include/catalog/pg_operator.dat
+++ b/src/include/catalog/pg_operator.dat
@@ -237,15 +237,15 @@
oprname => '<', oprleft => 'tid', oprright => 'tid', oprresult => 'bool',
oprcom => '>(tid,tid)', oprnegate => '>=(tid,tid)', oprcode => 'tidlt',
oprrest => 'scalarltsel', oprjoin => 'scalarltjoinsel' },
-{ oid => '2800', descr => 'greater than',
+{ oid => '2800', oid_symbol => 'TIDGreaterOperator', descr => 'greater than',
oprname => '>', oprleft => 'tid', oprright => 'tid', oprresult => 'bool',
oprcom => '<(tid,tid)', oprnegate => '<=(tid,tid)', oprcode => 'tidgt',
oprrest => 'scalargtsel', oprjoin => 'scalargtjoinsel' },
-{ oid => '2801', descr => 'less than or equal',
+{ oid => '2801', oid_symbol => 'TIDLessEqOperator', descr => 'less than or equal',
oprname => '<=', oprleft => 'tid', oprright => 'tid', oprresult => 'bool',
oprcom => '>=(tid,tid)', oprnegate => '>(tid,tid)', oprcode => 'tidle',
oprrest => 'scalarlesel', oprjoin => 'scalarlejoinsel' },
-{ oid => '2802', descr => 'greater than or equal',
+{ oid => '2802', oid_symbol => 'TIDGreaterEqOperator', descr => 'greater than or equal',
oprname => '>=', oprleft => 'tid', oprright => 'tid', oprresult => 'bool',
oprcom => '<=(tid,tid)', oprnegate => '<(tid,tid)', oprcode => 'tidge',
oprrest => 'scalargesel', oprjoin => 'scalargejoinsel' },
diff --git a/src/include/executor/nodeTidrangescan.h b/src/include/executor/nodeTidrangescan.h
new file mode 100644
index 00000000000..a57a47ed37f
--- /dev/null
+++ b/src/include/executor/nodeTidrangescan.h
@@ -0,0 +1,24 @@
+/*-------------------------------------------------------------------------
+ *
+ * nodeTidrangescan.h
+ *
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/executor/nodeTidrangescan.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef NODETIDRANGESCAN_H
+#define NODETIDRANGESCAN_H
+
+#include "nodes/execnodes.h"
+
+extern TidRangeScanState *ExecInitTidRangeScan(TidRangeScan *node,
+ EState *estate, int eflags);
+extern void ExecEndTidRangeScan(TidRangeScanState *node);
+extern void ExecReScanTidRangeScan(TidRangeScanState *node);
+
+#endif /* NODETIDRANGESCAN_H */
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 943931f65d0..e31ad6204e6 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1625,6 +1625,24 @@ typedef struct TidScanState
} TidScanState;
/* ----------------
+ * TidRangeScanState information
+ *
+ * trss_tidexprs list of TidOpExpr structs (see nodeTidrangescan.c)
+ * trss_mintid the lowest TID in the scan range
+ * trss_maxtid the highest TID in the scan range
+ * trss_inScan is a scan currently in progress?
+ * ----------------
+ */
+typedef struct TidRangeScanState
+{
+ ScanState ss; /* its first field is NodeTag */
+ List *trss_tidexprs;
+ ItemPointerData trss_mintid;
+ ItemPointerData trss_maxtid;
+ bool trss_inScan;
+} TidRangeScanState;
+
+/* ----------------
* SubqueryScanState information
*
* SubqueryScanState is used for scanning a sub-query in the range table.
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 40ae489c235..e22df890ef4 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -59,6 +59,7 @@ typedef enum NodeTag
T_BitmapIndexScan,
T_BitmapHeapScan,
T_TidScan,
+ T_TidRangeScan,
T_SubqueryScan,
T_FunctionScan,
T_ValuesScan,
@@ -116,6 +117,7 @@ typedef enum NodeTag
T_BitmapIndexScanState,
T_BitmapHeapScanState,
T_TidScanState,
+ T_TidRangeScanState,
T_SubqueryScanState,
T_FunctionScanState,
T_TableFuncScanState,
@@ -229,6 +231,7 @@ typedef enum NodeTag
T_BitmapAndPath,
T_BitmapOrPath,
T_TidPath,
+ T_TidRangePath,
T_SubqueryScanPath,
T_ForeignPath,
T_CustomPath,
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 0ec93e648c4..b8a6e0fc9f4 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -621,6 +621,10 @@ typedef struct PartitionSchemeData *PartitionScheme;
* to simplify matching join clauses to those lists.
*----------
*/
+
+/* Bitmask of flags supported by table AMs */
+#define AMFLAG_HAS_TID_RANGE (1 << 0)
+
typedef enum RelOptKind
{
RELOPT_BASEREL,
@@ -710,6 +714,8 @@ typedef struct RelOptInfo
PlannerInfo *subroot; /* if subquery */
List *subplan_params; /* if subquery */
int rel_parallel_workers; /* wanted number of parallel workers */
+ uint32 amflags; /* Bitmask of optional features supported by
+ * the table AM */
/* Information about foreign tables and foreign joins */
Oid serverid; /* identifies server for the table or join */
@@ -1324,6 +1330,18 @@ typedef struct TidPath
} TidPath;
/*
+ * TidRangePath represents a scan by a continguous range of TIDs
+ *
+ * tidrangequals is an implicitly AND'ed list of qual expressions of the form
+ * "CTID relop pseudoconstant", where relop is one of >,>=,<,<=.
+ */
+typedef struct TidRangePath
+{
+ Path path;
+ List *tidrangequals;
+} TidRangePath;
+
+/*
* SubqueryScanPath represents a scan of an unflattened subquery-in-FROM
*
* Note that the subpath comes from a different planning domain; for example
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 43160439f05..6e62104d0b7 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -486,6 +486,19 @@ typedef struct TidScan
} TidScan;
/* ----------------
+ * tid range scan node
+ *
+ * tidrangequals is an implicitly AND'ed list of qual expressions of the form
+ * "CTID relop pseudoconstant", where relop is one of >,>=,<,<=.
+ * ----------------
+ */
+typedef struct TidRangeScan
+{
+ Scan scan;
+ List *tidrangequals; /* qual(s) involving CTID op something */
+} TidRangeScan;
+
+/* ----------------
* subquery scan node
*
* SubqueryScan is for scanning the output of a sub-query in the range table.
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index ed2e4af4be7..1be93be0983 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -83,6 +83,9 @@ extern void cost_bitmap_or_node(BitmapOrPath *path, PlannerInfo *root);
extern void cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec);
extern void cost_tidscan(Path *path, PlannerInfo *root,
RelOptInfo *baserel, List *tidquals, ParamPathInfo *param_info);
+extern void cost_tidrangescan(Path *path, PlannerInfo *root,
+ RelOptInfo *baserel, List *tidrangequals,
+ ParamPathInfo *param_info);
extern void cost_subqueryscan(SubqueryScanPath *path, PlannerInfo *root,
RelOptInfo *baserel, ParamPathInfo *param_info);
extern void cost_functionscan(Path *path, PlannerInfo *root,
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 8dfc36a4e15..54f4b782fc7 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -63,6 +63,10 @@ extern BitmapOrPath *create_bitmap_or_path(PlannerInfo *root,
List *bitmapquals);
extern TidPath *create_tidscan_path(PlannerInfo *root, RelOptInfo *rel,
List *tidquals, Relids required_outer);
+extern TidRangePath *create_tidrangescan_path(PlannerInfo *root,
+ RelOptInfo *rel,
+ List *tidrangequals,
+ Relids required_outer);
extern AppendPath *create_append_path(PlannerInfo *root, RelOptInfo *rel,
List *subpaths, List *partial_subpaths,
List *pathkeys, Relids required_outer,
diff --git a/src/include/storage/itemptr.h b/src/include/storage/itemptr.h
index 0e6990140b8..cd4b8fbacb2 100644
--- a/src/include/storage/itemptr.h
+++ b/src/include/storage/itemptr.h
@@ -202,5 +202,7 @@ typedef ItemPointerData *ItemPointer;
extern bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2);
extern int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2);
+extern void ItemPointerInc(ItemPointer pointer);
+extern void ItemPointerDec(ItemPointer pointer);
#endif /* ITEMPTR_H */
diff --git a/src/test/regress/expected/tidrangescan.out b/src/test/regress/expected/tidrangescan.out
new file mode 100644
index 00000000000..721f3b94e04
--- /dev/null
+++ b/src/test/regress/expected/tidrangescan.out
@@ -0,0 +1,300 @@
+-- tests for tidrangescans
+SET enable_seqscan TO off;
+CREATE TABLE tidrangescan(id integer, data text);
+-- empty table
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid < '(1, 0)';
+ QUERY PLAN
+-----------------------------------
+ Tid Range Scan on tidrangescan
+ TID Cond: (ctid < '(1,0)'::tid)
+(2 rows)
+
+SELECT ctid FROM tidrangescan WHERE ctid < '(1, 0)';
+ ctid
+------
+(0 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid > '(9, 0)';
+ QUERY PLAN
+-----------------------------------
+ Tid Range Scan on tidrangescan
+ TID Cond: (ctid > '(9,0)'::tid)
+(2 rows)
+
+SELECT ctid FROM tidrangescan WHERE ctid > '(9, 0)';
+ ctid
+------
+(0 rows)
+
+-- insert enough tuples to fill at least two pages
+INSERT INTO tidrangescan SELECT i,repeat('x', 100) FROM generate_series(1,200) AS s(i);
+-- remove all tuples after the 10th tuple on each page. Trying to ensure
+-- we get the same layout with all CPU architectures and smaller than standard
+-- page sizes.
+DELETE FROM tidrangescan
+WHERE substring(ctid::text FROM ',(\d+)\)')::integer > 10 OR substring(ctid::text FROM '\((\d+),')::integer > 2;
+VACUUM tidrangescan;
+-- range scans with upper bound
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid < '(1,0)';
+ QUERY PLAN
+-----------------------------------
+ Tid Range Scan on tidrangescan
+ TID Cond: (ctid < '(1,0)'::tid)
+(2 rows)
+
+SELECT ctid FROM tidrangescan WHERE ctid < '(1,0)';
+ ctid
+--------
+ (0,1)
+ (0,2)
+ (0,3)
+ (0,4)
+ (0,5)
+ (0,6)
+ (0,7)
+ (0,8)
+ (0,9)
+ (0,10)
+(10 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid <= '(1,5)';
+ QUERY PLAN
+------------------------------------
+ Tid Range Scan on tidrangescan
+ TID Cond: (ctid <= '(1,5)'::tid)
+(2 rows)
+
+SELECT ctid FROM tidrangescan WHERE ctid <= '(1,5)';
+ ctid
+--------
+ (0,1)
+ (0,2)
+ (0,3)
+ (0,4)
+ (0,5)
+ (0,6)
+ (0,7)
+ (0,8)
+ (0,9)
+ (0,10)
+ (1,1)
+ (1,2)
+ (1,3)
+ (1,4)
+ (1,5)
+(15 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid < '(0,0)';
+ QUERY PLAN
+-----------------------------------
+ Tid Range Scan on tidrangescan
+ TID Cond: (ctid < '(0,0)'::tid)
+(2 rows)
+
+SELECT ctid FROM tidrangescan WHERE ctid < '(0,0)';
+ ctid
+------
+(0 rows)
+
+-- range scans with lower bound
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid > '(2,8)';
+ QUERY PLAN
+-----------------------------------
+ Tid Range Scan on tidrangescan
+ TID Cond: (ctid > '(2,8)'::tid)
+(2 rows)
+
+SELECT ctid FROM tidrangescan WHERE ctid > '(2,8)';
+ ctid
+--------
+ (2,9)
+ (2,10)
+(2 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE '(2,8)' < ctid;
+ QUERY PLAN
+-----------------------------------
+ Tid Range Scan on tidrangescan
+ TID Cond: ('(2,8)'::tid < ctid)
+(2 rows)
+
+SELECT ctid FROM tidrangescan WHERE '(2,8)' < ctid;
+ ctid
+--------
+ (2,9)
+ (2,10)
+(2 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid >= '(2,8)';
+ QUERY PLAN
+------------------------------------
+ Tid Range Scan on tidrangescan
+ TID Cond: (ctid >= '(2,8)'::tid)
+(2 rows)
+
+SELECT ctid FROM tidrangescan WHERE ctid >= '(2,8)';
+ ctid
+--------
+ (2,8)
+ (2,9)
+ (2,10)
+(3 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid >= '(100,0)';
+ QUERY PLAN
+--------------------------------------
+ Tid Range Scan on tidrangescan
+ TID Cond: (ctid >= '(100,0)'::tid)
+(2 rows)
+
+SELECT ctid FROM tidrangescan WHERE ctid >= '(100,0)';
+ ctid
+------
+(0 rows)
+
+-- range scans with both bounds
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid > '(1,4)' AND '(1,7)' >= ctid;
+ QUERY PLAN
+----------------------------------------------------------------
+ Tid Range Scan on tidrangescan
+ TID Cond: ((ctid > '(1,4)'::tid) AND ('(1,7)'::tid >= ctid))
+(2 rows)
+
+SELECT ctid FROM tidrangescan WHERE ctid > '(1,4)' AND '(1,7)' >= ctid;
+ ctid
+-------
+ (1,5)
+ (1,6)
+ (1,7)
+(3 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE '(1,7)' >= ctid AND ctid > '(1,4)';
+ QUERY PLAN
+----------------------------------------------------------------
+ Tid Range Scan on tidrangescan
+ TID Cond: (('(1,7)'::tid >= ctid) AND (ctid > '(1,4)'::tid))
+(2 rows)
+
+SELECT ctid FROM tidrangescan WHERE '(1,7)' >= ctid AND ctid > '(1,4)';
+ ctid
+-------
+ (1,5)
+ (1,6)
+ (1,7)
+(3 rows)
+
+-- extreme offsets
+SELECT ctid FROM tidrangescan WHERE ctid > '(0,65535)' AND ctid < '(1,0)' LIMIT 1;
+ ctid
+------
+(0 rows)
+
+SELECT ctid FROM tidrangescan WHERE ctid < '(0,0)' LIMIT 1;
+ ctid
+------
+(0 rows)
+
+SELECT ctid FROM tidrangescan WHERE ctid > '(4294967295,65535)';
+ ctid
+------
+(0 rows)
+
+SELECT ctid FROM tidrangescan WHERE ctid < '(0,0)';
+ ctid
+------
+(0 rows)
+
+-- NULLs in the range cannot return tuples
+SELECT ctid FROM tidrangescan WHERE ctid >= (SELECT NULL::tid);
+ ctid
+------
+(0 rows)
+
+-- rescans
+EXPLAIN (COSTS OFF)
+SELECT t.ctid,t2.c FROM tidrangescan t,
+LATERAL (SELECT count(*) c FROM tidrangescan t2 WHERE t2.ctid <= t.ctid) t2
+WHERE t.ctid < '(1,0)';
+ QUERY PLAN
+-----------------------------------------------
+ Nested Loop
+ -> Tid Range Scan on tidrangescan t
+ TID Cond: (ctid < '(1,0)'::tid)
+ -> Aggregate
+ -> Tid Range Scan on tidrangescan t2
+ TID Cond: (ctid <= t.ctid)
+(6 rows)
+
+SELECT t.ctid,t2.c FROM tidrangescan t,
+LATERAL (SELECT count(*) c FROM tidrangescan t2 WHERE t2.ctid <= t.ctid) t2
+WHERE t.ctid < '(1,0)';
+ ctid | c
+--------+----
+ (0,1) | 1
+ (0,2) | 2
+ (0,3) | 3
+ (0,4) | 4
+ (0,5) | 5
+ (0,6) | 6
+ (0,7) | 7
+ (0,8) | 8
+ (0,9) | 9
+ (0,10) | 10
+(10 rows)
+
+-- cursors
+-- Ensure we get a TID Range scan without a Materialize node.
+EXPLAIN (COSTS OFF)
+DECLARE c SCROLL CURSOR FOR SELECT ctid FROM tidrangescan WHERE ctid < '(1,0)';
+ QUERY PLAN
+-----------------------------------
+ Tid Range Scan on tidrangescan
+ TID Cond: (ctid < '(1,0)'::tid)
+(2 rows)
+
+BEGIN;
+DECLARE c SCROLL CURSOR FOR SELECT ctid FROM tidrangescan WHERE ctid < '(1,0)';
+FETCH NEXT c;
+ ctid
+-------
+ (0,1)
+(1 row)
+
+FETCH NEXT c;
+ ctid
+-------
+ (0,2)
+(1 row)
+
+FETCH PRIOR c;
+ ctid
+-------
+ (0,1)
+(1 row)
+
+FETCH FIRST c;
+ ctid
+-------
+ (0,1)
+(1 row)
+
+FETCH LAST c;
+ ctid
+--------
+ (0,10)
+(1 row)
+
+COMMIT;
+DROP TABLE tidrangescan;
+RESET enable_seqscan;
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 12bb67e4911..c77b0d7342f 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -80,7 +80,7 @@ test: brin gin gist spgist privileges init_privs security_label collate matview
# ----------
# Another group of parallel tests
# ----------
-test: create_table_like alter_generic alter_operator misc async dbsize misc_functions sysviews tsrf tid tidscan collate.icu.utf8 incremental_sort
+test: create_table_like alter_generic alter_operator misc async dbsize misc_functions sysviews tsrf tid tidscan tidrangescan collate.icu.utf8 incremental_sort
# rules cannot run concurrently with any test that creates
# a view or rule in the public schema
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule
index 59b416fd80c..0264a97324c 100644
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -138,6 +138,7 @@ test: sysviews
test: tsrf
test: tid
test: tidscan
+test: tidrangescan
test: collate.icu.utf8
test: rules
test: psql
diff --git a/src/test/regress/sql/tidrangescan.sql b/src/test/regress/sql/tidrangescan.sql
new file mode 100644
index 00000000000..ac09ebb6262
--- /dev/null
+++ b/src/test/regress/sql/tidrangescan.sql
@@ -0,0 +1,101 @@
+-- tests for tidrangescans
+
+SET enable_seqscan TO off;
+CREATE TABLE tidrangescan(id integer, data text);
+
+-- empty table
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid < '(1, 0)';
+SELECT ctid FROM tidrangescan WHERE ctid < '(1, 0)';
+
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid > '(9, 0)';
+SELECT ctid FROM tidrangescan WHERE ctid > '(9, 0)';
+
+-- insert enough tuples to fill at least two pages
+INSERT INTO tidrangescan SELECT i,repeat('x', 100) FROM generate_series(1,200) AS s(i);
+
+-- remove all tuples after the 10th tuple on each page. Trying to ensure
+-- we get the same layout with all CPU architectures and smaller than standard
+-- page sizes.
+DELETE FROM tidrangescan
+WHERE substring(ctid::text FROM ',(\d+)\)')::integer > 10 OR substring(ctid::text FROM '\((\d+),')::integer > 2;
+VACUUM tidrangescan;
+
+-- range scans with upper bound
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid < '(1,0)';
+SELECT ctid FROM tidrangescan WHERE ctid < '(1,0)';
+
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid <= '(1,5)';
+SELECT ctid FROM tidrangescan WHERE ctid <= '(1,5)';
+
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid < '(0,0)';
+SELECT ctid FROM tidrangescan WHERE ctid < '(0,0)';
+
+-- range scans with lower bound
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid > '(2,8)';
+SELECT ctid FROM tidrangescan WHERE ctid > '(2,8)';
+
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE '(2,8)' < ctid;
+SELECT ctid FROM tidrangescan WHERE '(2,8)' < ctid;
+
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid >= '(2,8)';
+SELECT ctid FROM tidrangescan WHERE ctid >= '(2,8)';
+
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid >= '(100,0)';
+SELECT ctid FROM tidrangescan WHERE ctid >= '(100,0)';
+
+-- range scans with both bounds
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid > '(1,4)' AND '(1,7)' >= ctid;
+SELECT ctid FROM tidrangescan WHERE ctid > '(1,4)' AND '(1,7)' >= ctid;
+
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE '(1,7)' >= ctid AND ctid > '(1,4)';
+SELECT ctid FROM tidrangescan WHERE '(1,7)' >= ctid AND ctid > '(1,4)';
+
+-- extreme offsets
+SELECT ctid FROM tidrangescan WHERE ctid > '(0,65535)' AND ctid < '(1,0)' LIMIT 1;
+SELECT ctid FROM tidrangescan WHERE ctid < '(0,0)' LIMIT 1;
+
+SELECT ctid FROM tidrangescan WHERE ctid > '(4294967295,65535)';
+SELECT ctid FROM tidrangescan WHERE ctid < '(0,0)';
+
+-- NULLs in the range cannot return tuples
+SELECT ctid FROM tidrangescan WHERE ctid >= (SELECT NULL::tid);
+
+-- rescans
+EXPLAIN (COSTS OFF)
+SELECT t.ctid,t2.c FROM tidrangescan t,
+LATERAL (SELECT count(*) c FROM tidrangescan t2 WHERE t2.ctid <= t.ctid) t2
+WHERE t.ctid < '(1,0)';
+
+SELECT t.ctid,t2.c FROM tidrangescan t,
+LATERAL (SELECT count(*) c FROM tidrangescan t2 WHERE t2.ctid <= t.ctid) t2
+WHERE t.ctid < '(1,0)';
+
+-- cursors
+
+-- Ensure we get a TID Range scan without a Materialize node.
+EXPLAIN (COSTS OFF)
+DECLARE c SCROLL CURSOR FOR SELECT ctid FROM tidrangescan WHERE ctid < '(1,0)';
+
+BEGIN;
+DECLARE c SCROLL CURSOR FOR SELECT ctid FROM tidrangescan WHERE ctid < '(1,0)';
+FETCH NEXT c;
+FETCH NEXT c;
+FETCH PRIOR c;
+FETCH FIRST c;
+FETCH LAST c;
+COMMIT;
+
+DROP TABLE tidrangescan;
+
+RESET enable_seqscan;