Support index-only scans using the visibility map to avoid heap fetches.

When a btree index contains all columns required by the query, and the visibility map shows that all tuples on a target heap page are visible-to-all, we don't need to fetch that heap page. This patch depends on the previous patches that made the visibility map reliable. There's a fair amount left to do here, notably trying to figure out a less chintzy way of estimating the cost of an index-only scan, but the core functionality seems ready to commit. Robert Haas and Ibrar Ahmed, with some previous work by Heikki Linnakangas.
author: Tom Lane <tgl@sss.pgh.pa.us> 2011-10-07 20:13:02 -0400
committer: Tom Lane <tgl@sss.pgh.pa.us> 2011-10-07 20:14:13 -0400
commit: a2822fb9337a21f98ac4ce850bb4145acf47ca27 (patch)
tree: c239fe9a32ff0225e906711a76348cee1567f0d8 /src/backend/optimizer/path/costsize.c
parent: caa1054df8408b165e5f66ff25c87b6dd0a0a1e7 (diff)
download: postgresql-a2822fb9337a21f98ac4ce850bb4145acf47ca27.tar.gz
postgresql-a2822fb9337a21f98ac4ce850bb4145acf47ca27.zip
1 files changed, 21 insertions, 0 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 7812a8628fc..e480797ca8e 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -110,6 +110,7 @@ Cost		disable_cost = 1.0e10;
 
 bool		enable_seqscan = true;
 bool		enable_indexscan = true;
+bool		enable_indexonlyscan = true;
 bool		enable_bitmapscan = true;
 bool		enable_tidscan = true;
 bool		enable_sort = true;
@@ -119,6 +120,9 @@ bool		enable_material = true;
 bool		enable_mergejoin = true;
 bool		enable_hashjoin = true;
 
+/* Possibly this should become a GUC too */
+static double visibility_fraction = 0.9;
+
 typedef struct
 {
 	PlannerInfo *root;
@@ -210,6 +214,7 @@ cost_seqscan(Path *path, PlannerInfo *root,
  * 'index' is the index to be used
  * 'indexQuals' is the list of applicable qual clauses (implicit AND semantics)
  * 'indexOrderBys' is the list of ORDER BY operators for amcanorderbyop indexes
+ * 'indexonly' is true if it's an index-only scan
  * 'outer_rel' is the outer relation when we are considering using the index
  *		scan as the inside of a nestloop join (hence, some of the indexQuals
  *		are join clauses, and we should expect repeated scans of the index);
@@ -232,6 +237,7 @@ cost_index(IndexPath *path, PlannerInfo *root,
 		   IndexOptInfo *index,
 		   List *indexQuals,
 		   List *indexOrderBys,
+		   bool indexonly,
 		   RelOptInfo *outer_rel)
 {
 	RelOptInfo *baserel = index->rel;
@@ -314,6 +320,12 @@ cost_index(IndexPath *path, PlannerInfo *root,
 	 * For partially-correlated indexes, we ought to charge somewhere between
 	 * these two estimates.  We currently interpolate linearly between the
 	 * estimates based on the correlation squared (XXX is that appropriate?).
+	 *
+	 * If it's an index-only scan, then we will not need to fetch any heap
+	 * pages for which the visibility map shows all tuples are visible.
+	 * Unfortunately, we have no stats as to how much of the heap is
+	 * all-visible, and that's likely to be a rather unstable number anyway.
+	 * We use an arbitrary constant visibility_fraction to estimate this.
 	 *----------
 	 */
 	if (outer_rel != NULL && outer_rel->rows > 1)
@@ -333,6 +345,8 @@ cost_index(IndexPath *path, PlannerInfo *root,
 											(double) index->pages,
 											root);
 
+		pages_fetched = ceil(pages_fetched * visibility_fraction);
+
 		max_IO_cost = (pages_fetched * spc_random_page_cost) / num_scans;
 
 		/*
@@ -352,6 +366,8 @@ cost_index(IndexPath *path, PlannerInfo *root,
 											(double) index->pages,
 											root);
 
+		pages_fetched = ceil(pages_fetched * visibility_fraction);
+
 		min_IO_cost = (pages_fetched * spc_random_page_cost) / num_scans;
 	}
 	else
@@ -365,11 +381,16 @@ cost_index(IndexPath *path, PlannerInfo *root,
 											(double) index->pages,
 											root);
 
+		pages_fetched = ceil(pages_fetched * visibility_fraction);
+
 		/* max_IO_cost is for the perfectly uncorrelated case (csquared=0) */
 		max_IO_cost = pages_fetched * spc_random_page_cost;
 
 		/* min_IO_cost is for the perfectly correlated case (csquared=1) */
 		pages_fetched = ceil(indexSelectivity * (double) baserel->pages);
+
+		pages_fetched = ceil(pages_fetched * visibility_fraction);
+
 		min_IO_cost = spc_random_page_cost;
 		if (pages_fetched > 1)
 			min_IO_cost += (pages_fetched - 1) * spc_seq_page_cost;
author	Tom Lane <tgl@sss.pgh.pa.us>	2011-10-07 20:13:02 -0400
committer	Tom Lane <tgl@sss.pgh.pa.us>	2011-10-07 20:14:13 -0400
commit	a2822fb9337a21f98ac4ce850bb4145acf47ca27 (patch)
tree	c239fe9a32ff0225e906711a76348cee1567f0d8 /src/backend/optimizer/path/costsize.c
parent	caa1054df8408b165e5f66ff25c87b6dd0a0a1e7 (diff)
download	postgresql-a2822fb9337a21f98ac4ce850bb4145acf47ca27.tar.gz postgresql-a2822fb9337a21f98ac4ce850bb4145acf47ca27.zip