aboutsummaryrefslogtreecommitdiff
path: root/src/backend/executor/nodeIndexscan.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2011-10-07 20:13:02 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2011-10-07 20:14:13 -0400
commita2822fb9337a21f98ac4ce850bb4145acf47ca27 (patch)
treec239fe9a32ff0225e906711a76348cee1567f0d8 /src/backend/executor/nodeIndexscan.c
parentcaa1054df8408b165e5f66ff25c87b6dd0a0a1e7 (diff)
downloadpostgresql-a2822fb9337a21f98ac4ce850bb4145acf47ca27.tar.gz
postgresql-a2822fb9337a21f98ac4ce850bb4145acf47ca27.zip
Support index-only scans using the visibility map to avoid heap fetches.
When a btree index contains all columns required by the query, and the visibility map shows that all tuples on a target heap page are visible-to-all, we don't need to fetch that heap page. This patch depends on the previous patches that made the visibility map reliable. There's a fair amount left to do here, notably trying to figure out a less chintzy way of estimating the cost of an index-only scan, but the core functionality seems ready to commit. Robert Haas and Ibrar Ahmed, with some previous work by Heikki Linnakangas.
Diffstat (limited to 'src/backend/executor/nodeIndexscan.c')
-rw-r--r--src/backend/executor/nodeIndexscan.c123
1 files changed, 114 insertions, 9 deletions
diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c
index da25384e860..32ed65797ae 100644
--- a/src/backend/executor/nodeIndexscan.c
+++ b/src/backend/executor/nodeIndexscan.c
@@ -26,6 +26,7 @@
#include "access/nbtree.h"
#include "access/relscan.h"
+#include "access/visibilitymap.h"
#include "executor/execdebug.h"
#include "executor/nodeIndexscan.h"
#include "optimizer/clauses.h"
@@ -36,6 +37,7 @@
static TupleTableSlot *IndexNext(IndexScanState *node);
+static void IndexStoreHeapTuple(TupleTableSlot *slot, IndexScanDesc scandesc);
/* ----------------------------------------------------------------
@@ -54,6 +56,7 @@ IndexNext(IndexScanState *node)
IndexScanDesc scandesc;
HeapTuple tuple;
TupleTableSlot *slot;
+ ItemPointer tid;
/*
* extract necessary information from index scan node
@@ -73,19 +76,63 @@ IndexNext(IndexScanState *node)
slot = node->ss.ss_ScanTupleSlot;
/*
- * ok, now that we have what we need, fetch the next tuple.
+ * OK, now that we have what we need, fetch the next TID.
*/
- while ((tuple = index_getnext(scandesc, direction)) != NULL)
+ while ((tid = index_getnext_tid(scandesc, direction)) != NULL)
{
/*
- * Store the scanned tuple in the scan tuple slot of the scan state.
- * Note: we pass 'false' because tuples returned by amgetnext are
- * pointers onto disk pages and must not be pfree()'d.
+ * Attempt index-only scan, if possible. For this, we need to have
+ * gotten an index tuple from the AM, and we need the TID to reference
+ * a heap page on which all tuples are known visible to everybody.
+ * If that's the case, we don't need to visit the heap page for tuple
+ * visibility testing, and we don't need any column values that are
+ * not available from the index.
+ *
+ * Note: in the index-only path, we are still holding pin on the
+ * scan's xs_cbuf, ie, the previously visited heap page. It's not
+ * clear whether it'd be better to release that pin.
*/
- ExecStoreTuple(tuple, /* tuple to store */
- slot, /* slot to store in */
- scandesc->xs_cbuf, /* buffer containing tuple */
- false); /* don't pfree */
+ if (scandesc->xs_itup != NULL &&
+ visibilitymap_test(scandesc->heapRelation,
+ ItemPointerGetBlockNumber(tid),
+ &node->iss_VMBuffer))
+ {
+ /*
+ * Convert index tuple to look like a heap tuple, and store the
+ * results in the scan tuple slot.
+ */
+ IndexStoreHeapTuple(slot, scandesc);
+ }
+ else
+ {
+ /* Index-only approach not possible, so fetch heap tuple. */
+ tuple = index_fetch_heap(scandesc);
+
+ /* Tuple might not be visible. */
+ if (tuple == NULL)
+ continue;
+
+ /*
+ * Only MVCC snapshots are supported here, so there should be no
+ * need to keep following the HOT chain once a visible entry has
+ * been found. If we did want to allow that, we'd need to keep
+ * more state to remember not to call index_getnext_tid next time.
+ */
+ if (scandesc->xs_continue_hot)
+ elog(ERROR, "unsupported use of non-MVCC snapshot in executor");
+
+ /*
+ * Store the scanned tuple in the scan tuple slot of the scan
+ * state.
+ *
+ * Note: we pass 'false' because tuples returned by amgetnext are
+ * pointers onto disk pages and must not be pfree()'d.
+ */
+ ExecStoreTuple(tuple, /* tuple to store */
+ slot, /* slot to store in */
+ scandesc->xs_cbuf, /* buffer containing tuple */
+ false); /* don't pfree */
+ }
/*
* If the index was lossy, we have to recheck the index quals using
@@ -114,6 +161,53 @@ IndexNext(IndexScanState *node)
}
/*
+ * IndexStoreHeapTuple
+ *
+ * When performing an index-only scan, we build a faux heap tuple
+ * from the index tuple. Columns not present in the index are set to
+ * NULL, which is OK because we know they won't be referenced.
+ *
+ * The faux tuple is built as a virtual tuple that depends on the
+ * scandesc's xs_itup, so that must remain valid for as long as we
+ * need the slot contents.
+ */
+static void
+IndexStoreHeapTuple(TupleTableSlot *slot, IndexScanDesc scandesc)
+{
+ Form_pg_index indexForm = scandesc->indexRelation->rd_index;
+ TupleDesc indexDesc = RelationGetDescr(scandesc->indexRelation);
+ int nindexatts = indexDesc->natts;
+ int nheapatts = slot->tts_tupleDescriptor->natts;
+ Datum *values = slot->tts_values;
+ bool *isnull = slot->tts_isnull;
+ int i;
+
+ /* We must first set the slot to empty, and mark all columns as null */
+ ExecClearTuple(slot);
+
+ memset(isnull, true, nheapatts * sizeof(bool));
+
+ /* Transpose index tuple into heap tuple. */
+ for (i = 0; i < nindexatts; i++)
+ {
+ int indexatt = indexForm->indkey.values[i];
+
+ /* Ignore expression columns, as well as system attributes */
+ if (indexatt <= 0)
+ continue;
+
+ Assert(indexatt <= nheapatts);
+
+ values[indexatt - 1] = index_getattr(scandesc->xs_itup, i + 1,
+ indexDesc,
+ &isnull[indexatt - 1]);
+ }
+
+ /* And now we can mark the slot as holding a virtual tuple. */
+ ExecStoreVirtualTuple(slot);
+}
+
+/*
* IndexRecheck -- access method routine to recheck a tuple in EvalPlanQual
*/
static bool
@@ -399,6 +493,13 @@ ExecEndIndexScan(IndexScanState *node)
indexScanDesc = node->iss_ScanDesc;
relation = node->ss.ss_currentRelation;
+ /* Release VM buffer pin, if any. */
+ if (node->iss_VMBuffer != InvalidBuffer)
+ {
+ ReleaseBuffer(node->iss_VMBuffer);
+ node->iss_VMBuffer = InvalidBuffer;
+ }
+
/*
* Free the exprcontext(s) ... now dead code, see ExecFreeExprContext
*/
@@ -611,6 +712,10 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
indexstate->iss_NumScanKeys,
indexstate->iss_NumOrderByKeys);
+ /* Prepare for possible index-only scan */
+ indexstate->iss_ScanDesc->xs_want_itup = node->indexonly;
+ indexstate->iss_VMBuffer = InvalidBuffer;
+
/*
* If no run-time keys to calculate, go ahead and pass the scankeys to the
* index AM.