diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2011-10-07 20:13:02 -0400 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2011-10-07 20:14:13 -0400 |
commit | a2822fb9337a21f98ac4ce850bb4145acf47ca27 (patch) | |
tree | c239fe9a32ff0225e906711a76348cee1567f0d8 /src/backend/optimizer/path/costsize.c | |
parent | caa1054df8408b165e5f66ff25c87b6dd0a0a1e7 (diff) | |
download | postgresql-a2822fb9337a21f98ac4ce850bb4145acf47ca27.tar.gz postgresql-a2822fb9337a21f98ac4ce850bb4145acf47ca27.zip |
Support index-only scans using the visibility map to avoid heap fetches.
When a btree index contains all columns required by the query, and the
visibility map shows that all tuples on a target heap page are
visible-to-all, we don't need to fetch that heap page. This patch depends
on the previous patches that made the visibility map reliable.
There's a fair amount left to do here, notably trying to figure out a less
chintzy way of estimating the cost of an index-only scan, but the core
functionality seems ready to commit.
Robert Haas and Ibrar Ahmed, with some previous work by Heikki Linnakangas.
Diffstat (limited to 'src/backend/optimizer/path/costsize.c')
-rw-r--r-- | src/backend/optimizer/path/costsize.c | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 7812a8628fc..e480797ca8e 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -110,6 +110,7 @@ Cost disable_cost = 1.0e10; bool enable_seqscan = true; bool enable_indexscan = true; +bool enable_indexonlyscan = true; bool enable_bitmapscan = true; bool enable_tidscan = true; bool enable_sort = true; @@ -119,6 +120,9 @@ bool enable_material = true; bool enable_mergejoin = true; bool enable_hashjoin = true; +/* Possibly this should become a GUC too */ +static double visibility_fraction = 0.9; + typedef struct { PlannerInfo *root; @@ -210,6 +214,7 @@ cost_seqscan(Path *path, PlannerInfo *root, * 'index' is the index to be used * 'indexQuals' is the list of applicable qual clauses (implicit AND semantics) * 'indexOrderBys' is the list of ORDER BY operators for amcanorderbyop indexes + * 'indexonly' is true if it's an index-only scan * 'outer_rel' is the outer relation when we are considering using the index * scan as the inside of a nestloop join (hence, some of the indexQuals * are join clauses, and we should expect repeated scans of the index); @@ -232,6 +237,7 @@ cost_index(IndexPath *path, PlannerInfo *root, IndexOptInfo *index, List *indexQuals, List *indexOrderBys, + bool indexonly, RelOptInfo *outer_rel) { RelOptInfo *baserel = index->rel; @@ -314,6 +320,12 @@ cost_index(IndexPath *path, PlannerInfo *root, * For partially-correlated indexes, we ought to charge somewhere between * these two estimates. We currently interpolate linearly between the * estimates based on the correlation squared (XXX is that appropriate?). + * + * If it's an index-only scan, then we will not need to fetch any heap + * pages for which the visibility map shows all tuples are visible. + * Unfortunately, we have no stats as to how much of the heap is + * all-visible, and that's likely to be a rather unstable number anyway. + * We use an arbitrary constant visibility_fraction to estimate this. *---------- */ if (outer_rel != NULL && outer_rel->rows > 1) @@ -333,6 +345,8 @@ cost_index(IndexPath *path, PlannerInfo *root, (double) index->pages, root); + pages_fetched = ceil(pages_fetched * visibility_fraction); + max_IO_cost = (pages_fetched * spc_random_page_cost) / num_scans; /* @@ -352,6 +366,8 @@ cost_index(IndexPath *path, PlannerInfo *root, (double) index->pages, root); + pages_fetched = ceil(pages_fetched * visibility_fraction); + min_IO_cost = (pages_fetched * spc_random_page_cost) / num_scans; } else @@ -365,11 +381,16 @@ cost_index(IndexPath *path, PlannerInfo *root, (double) index->pages, root); + pages_fetched = ceil(pages_fetched * visibility_fraction); + /* max_IO_cost is for the perfectly uncorrelated case (csquared=0) */ max_IO_cost = pages_fetched * spc_random_page_cost; /* min_IO_cost is for the perfectly correlated case (csquared=1) */ pages_fetched = ceil(indexSelectivity * (double) baserel->pages); + + pages_fetched = ceil(pages_fetched * visibility_fraction); + min_IO_cost = spc_random_page_cost; if (pages_fetched > 1) min_IO_cost += (pages_fetched - 1) * spc_seq_page_cost; |