aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--contrib/bloom/blcost.c4
-rw-r--r--contrib/bloom/bloom.h2
-rw-r--r--contrib/bloom/blutils.c1
-rw-r--r--doc/src/sgml/indexam.sgml2
-rw-r--r--src/backend/access/brin/brin.c1
-rw-r--r--src/backend/access/gin/ginutil.c1
-rw-r--r--src/backend/access/gist/gist.c1
-rw-r--r--src/backend/access/hash/hash.c1
-rw-r--r--src/backend/access/nbtree/nbtree.c1
-rw-r--r--src/backend/access/spgist/spgutils.c1
-rw-r--r--src/backend/executor/execParallel.c12
-rw-r--r--src/backend/executor/nodeIndexscan.c153
-rw-r--r--src/backend/optimizer/path/allpaths.c4
-rw-r--r--src/backend/optimizer/path/costsize.c53
-rw-r--r--src/backend/optimizer/path/indxpath.c67
-rw-r--r--src/backend/optimizer/plan/planner.c2
-rw-r--r--src/backend/optimizer/util/pathnode.c19
-rw-r--r--src/backend/optimizer/util/plancat.c1
-rw-r--r--src/backend/utils/adt/selfuncs.c24
-rw-r--r--src/include/access/amapi.h5
-rw-r--r--src/include/executor/nodeIndexscan.h4
-rw-r--r--src/include/nodes/execnodes.h2
-rw-r--r--src/include/nodes/relation.h1
-rw-r--r--src/include/optimizer/cost.h2
-rw-r--r--src/include/optimizer/pathnode.h3
-rw-r--r--src/include/optimizer/paths.h2
-rw-r--r--src/include/utils/index_selfuncs.h18
-rw-r--r--src/test/regress/expected/select_parallel.out23
-rw-r--r--src/test/regress/sql/select_parallel.sql11
29 files changed, 366 insertions, 55 deletions
diff --git a/contrib/bloom/blcost.c b/contrib/bloom/blcost.c
index 98a2228edf6..ba39f627fd2 100644
--- a/contrib/bloom/blcost.c
+++ b/contrib/bloom/blcost.c
@@ -24,7 +24,8 @@
void
blcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
Cost *indexStartupCost, Cost *indexTotalCost,
- Selectivity *indexSelectivity, double *indexCorrelation)
+ Selectivity *indexSelectivity, double *indexCorrelation,
+ double *indexPages)
{
IndexOptInfo *index = path->indexinfo;
List *qinfos;
@@ -45,4 +46,5 @@ blcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
*indexTotalCost = costs.indexTotalCost;
*indexSelectivity = costs.indexSelectivity;
*indexCorrelation = costs.indexCorrelation;
+ *indexPages = costs.numIndexPages;
}
diff --git a/contrib/bloom/bloom.h b/contrib/bloom/bloom.h
index 39d8d05c5d4..0cfe49aad82 100644
--- a/contrib/bloom/bloom.h
+++ b/contrib/bloom/bloom.h
@@ -208,6 +208,6 @@ extern bytea *bloptions(Datum reloptions, bool validate);
extern void blcostestimate(PlannerInfo *root, IndexPath *path,
double loop_count, Cost *indexStartupCost,
Cost *indexTotalCost, Selectivity *indexSelectivity,
- double *indexCorrelation);
+ double *indexCorrelation, double *indexPages);
#endif
diff --git a/contrib/bloom/blutils.c b/contrib/bloom/blutils.c
index 858798db85c..f2eda67e0ae 100644
--- a/contrib/bloom/blutils.c
+++ b/contrib/bloom/blutils.c
@@ -119,6 +119,7 @@ blhandler(PG_FUNCTION_ARGS)
amroutine->amstorage = false;
amroutine->amclusterable = false;
amroutine->ampredlocks = false;
+ amroutine->amcanparallel = false;
amroutine->amkeytype = InvalidOid;
amroutine->ambuild = blbuild;
diff --git a/doc/src/sgml/indexam.sgml b/doc/src/sgml/indexam.sgml
index 9afd7f64179..401b11598eb 100644
--- a/doc/src/sgml/indexam.sgml
+++ b/doc/src/sgml/indexam.sgml
@@ -110,6 +110,8 @@ typedef struct IndexAmRoutine
bool amclusterable;
/* does AM handle predicate locks? */
bool ampredlocks;
+ /* does AM support parallel scan? */
+ bool amcanparallel;
/* type of data stored in index, or InvalidOid if variable */
Oid amkeytype;
diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c
index 4ff046b4b01..b22563bf7c4 100644
--- a/src/backend/access/brin/brin.c
+++ b/src/backend/access/brin/brin.c
@@ -93,6 +93,7 @@ brinhandler(PG_FUNCTION_ARGS)
amroutine->amstorage = true;
amroutine->amclusterable = false;
amroutine->ampredlocks = false;
+ amroutine->amcanparallel = false;
amroutine->amkeytype = InvalidOid;
amroutine->ambuild = brinbuild;
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index a98d4fc3973..d03d59da6a7 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -50,6 +50,7 @@ ginhandler(PG_FUNCTION_ARGS)
amroutine->amstorage = true;
amroutine->amclusterable = false;
amroutine->ampredlocks = false;
+ amroutine->amcanparallel = false;
amroutine->amkeytype = InvalidOid;
amroutine->ambuild = ginbuild;
diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c
index 96ead531ea3..6593771361c 100644
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -71,6 +71,7 @@ gisthandler(PG_FUNCTION_ARGS)
amroutine->amstorage = true;
amroutine->amclusterable = true;
amroutine->ampredlocks = false;
+ amroutine->amcanparallel = false;
amroutine->amkeytype = InvalidOid;
amroutine->ambuild = gistbuild;
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index bca77a80c3b..24510e78f5f 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -67,6 +67,7 @@ hashhandler(PG_FUNCTION_ARGS)
amroutine->amstorage = false;
amroutine->amclusterable = false;
amroutine->ampredlocks = false;
+ amroutine->amcanparallel = false;
amroutine->amkeytype = INT4OID;
amroutine->ambuild = hashbuild;
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index cbc575d5cf2..775f2ff1f8c 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -140,6 +140,7 @@ bthandler(PG_FUNCTION_ARGS)
amroutine->amstorage = false;
amroutine->amclusterable = true;
amroutine->ampredlocks = true;
+ amroutine->amcanparallel = true;
amroutine->amkeytype = InvalidOid;
amroutine->ambuild = btbuild;
diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c
index 78846bec666..e57ac49c6b4 100644
--- a/src/backend/access/spgist/spgutils.c
+++ b/src/backend/access/spgist/spgutils.c
@@ -49,6 +49,7 @@ spghandler(PG_FUNCTION_ARGS)
amroutine->amstorage = false;
amroutine->amclusterable = false;
amroutine->ampredlocks = false;
+ amroutine->amcanparallel = false;
amroutine->amkeytype = InvalidOid;
amroutine->ambuild = spgbuild;
diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c
index 784dbaf590e..98d4f1eca76 100644
--- a/src/backend/executor/execParallel.c
+++ b/src/backend/executor/execParallel.c
@@ -28,6 +28,7 @@
#include "executor/nodeCustom.h"
#include "executor/nodeForeignscan.h"
#include "executor/nodeSeqscan.h"
+#include "executor/nodeIndexscan.h"
#include "executor/tqueue.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/planmain.h"
@@ -197,6 +198,10 @@ ExecParallelEstimate(PlanState *planstate, ExecParallelEstimateContext *e)
ExecSeqScanEstimate((SeqScanState *) planstate,
e->pcxt);
break;
+ case T_IndexScanState:
+ ExecIndexScanEstimate((IndexScanState *) planstate,
+ e->pcxt);
+ break;
case T_ForeignScanState:
ExecForeignScanEstimate((ForeignScanState *) planstate,
e->pcxt);
@@ -249,6 +254,10 @@ ExecParallelInitializeDSM(PlanState *planstate,
ExecSeqScanInitializeDSM((SeqScanState *) planstate,
d->pcxt);
break;
+ case T_IndexScanState:
+ ExecIndexScanInitializeDSM((IndexScanState *) planstate,
+ d->pcxt);
+ break;
case T_ForeignScanState:
ExecForeignScanInitializeDSM((ForeignScanState *) planstate,
d->pcxt);
@@ -725,6 +734,9 @@ ExecParallelInitializeWorker(PlanState *planstate, shm_toc *toc)
case T_SeqScanState:
ExecSeqScanInitializeWorker((SeqScanState *) planstate, toc);
break;
+ case T_IndexScanState:
+ ExecIndexScanInitializeWorker((IndexScanState *) planstate, toc);
+ break;
case T_ForeignScanState:
ExecForeignScanInitializeWorker((ForeignScanState *) planstate,
toc);
diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c
index 5734550d2c7..0a9dfdbaf30 100644
--- a/src/backend/executor/nodeIndexscan.c
+++ b/src/backend/executor/nodeIndexscan.c
@@ -22,6 +22,9 @@
* ExecEndIndexScan releases all storage.
* ExecIndexMarkPos marks scan position.
* ExecIndexRestrPos restores scan position.
+ * ExecIndexScanEstimate estimates DSM space needed for parallel index scan
+ * ExecIndexScanInitializeDSM initialize DSM for parallel indexscan
+ * ExecIndexScanInitializeWorker attach to DSM info in parallel worker
*/
#include "postgres.h"
@@ -514,6 +517,18 @@ ExecIndexScan(IndexScanState *node)
void
ExecReScanIndexScan(IndexScanState *node)
{
+ bool reset_parallel_scan = true;
+
+ /*
+ * If we are here to just update the scan keys, then don't reset parallel
+ * scan. We don't want each of the participating process in the parallel
+ * scan to update the shared parallel scan state at the start of the scan.
+ * It is quite possible that one of the participants has already begun
+ * scanning the index when another has yet to start it.
+ */
+ if (node->iss_NumRuntimeKeys != 0 && !node->iss_RuntimeKeysReady)
+ reset_parallel_scan = false;
+
/*
* If we are doing runtime key calculations (ie, any of the index key
* values weren't simple Consts), compute the new key values. But first,
@@ -539,10 +554,21 @@ ExecReScanIndexScan(IndexScanState *node)
reorderqueue_pop(node);
}
- /* reset index scan */
- index_rescan(node->iss_ScanDesc,
- node->iss_ScanKeys, node->iss_NumScanKeys,
- node->iss_OrderByKeys, node->iss_NumOrderByKeys);
+ /*
+ * Reset (parallel) index scan. For parallel-aware nodes, the scan
+ * descriptor is initialized during actual execution of node and we can
+ * reach here before that (ex. during execution of nest loop join). So,
+ * avoid updating the scan descriptor at that time.
+ */
+ if (node->iss_ScanDesc)
+ {
+ index_rescan(node->iss_ScanDesc,
+ node->iss_ScanKeys, node->iss_NumScanKeys,
+ node->iss_OrderByKeys, node->iss_NumOrderByKeys);
+
+ if (reset_parallel_scan && node->iss_ScanDesc->parallel_scan)
+ index_parallelrescan(node->iss_ScanDesc);
+ }
node->iss_ReachedEnd = false;
ExecScanReScan(&node->ss);
@@ -1013,22 +1039,29 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
}
/*
- * Initialize scan descriptor.
+ * for parallel-aware node, we initialize the scan descriptor after
+ * initializing the shared memory for parallel execution.
*/
- indexstate->iss_ScanDesc = index_beginscan(currentRelation,
- indexstate->iss_RelationDesc,
- estate->es_snapshot,
- indexstate->iss_NumScanKeys,
+ if (!node->scan.plan.parallel_aware)
+ {
+ /*
+ * Initialize scan descriptor.
+ */
+ indexstate->iss_ScanDesc = index_beginscan(currentRelation,
+ indexstate->iss_RelationDesc,
+ estate->es_snapshot,
+ indexstate->iss_NumScanKeys,
indexstate->iss_NumOrderByKeys);
- /*
- * If no run-time keys to calculate, go ahead and pass the scankeys to the
- * index AM.
- */
- if (indexstate->iss_NumRuntimeKeys == 0)
- index_rescan(indexstate->iss_ScanDesc,
- indexstate->iss_ScanKeys, indexstate->iss_NumScanKeys,
+ /*
+ * If no run-time keys to calculate, go ahead and pass the scankeys to
+ * the index AM.
+ */
+ if (indexstate->iss_NumRuntimeKeys == 0)
+ index_rescan(indexstate->iss_ScanDesc,
+ indexstate->iss_ScanKeys, indexstate->iss_NumScanKeys,
indexstate->iss_OrderByKeys, indexstate->iss_NumOrderByKeys);
+ }
/*
* all done.
@@ -1590,3 +1623,91 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index,
else if (n_array_keys != 0)
elog(ERROR, "ScalarArrayOpExpr index qual found where not allowed");
}
+
+/* ----------------------------------------------------------------
+ * Parallel Scan Support
+ * ----------------------------------------------------------------
+ */
+
+/* ----------------------------------------------------------------
+ * ExecIndexScanEstimate
+ *
+ * estimates the space required to serialize indexscan node.
+ * ----------------------------------------------------------------
+ */
+void
+ExecIndexScanEstimate(IndexScanState *node,
+ ParallelContext *pcxt)
+{
+ EState *estate = node->ss.ps.state;
+
+ node->iss_PscanLen = index_parallelscan_estimate(node->iss_RelationDesc,
+ estate->es_snapshot);
+ shm_toc_estimate_chunk(&pcxt->estimator, node->iss_PscanLen);
+ shm_toc_estimate_keys(&pcxt->estimator, 1);
+}
+
+/* ----------------------------------------------------------------
+ * ExecIndexScanInitializeDSM
+ *
+ * Set up a parallel index scan descriptor.
+ * ----------------------------------------------------------------
+ */
+void
+ExecIndexScanInitializeDSM(IndexScanState *node,
+ ParallelContext *pcxt)
+{
+ EState *estate = node->ss.ps.state;
+ ParallelIndexScanDesc piscan;
+
+ piscan = shm_toc_allocate(pcxt->toc, node->iss_PscanLen);
+ index_parallelscan_initialize(node->ss.ss_currentRelation,
+ node->iss_RelationDesc,
+ estate->es_snapshot,
+ piscan);
+ shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, piscan);
+ node->iss_ScanDesc =
+ index_beginscan_parallel(node->ss.ss_currentRelation,
+ node->iss_RelationDesc,
+ node->iss_NumScanKeys,
+ node->iss_NumOrderByKeys,
+ piscan);
+
+ /*
+ * If no run-time keys to calculate, go ahead and pass the scankeys to the
+ * index AM.
+ */
+ if (node->iss_NumRuntimeKeys == 0)
+ index_rescan(node->iss_ScanDesc,
+ node->iss_ScanKeys, node->iss_NumScanKeys,
+ node->iss_OrderByKeys, node->iss_NumOrderByKeys);
+}
+
+/* ----------------------------------------------------------------
+ * ExecIndexScanInitializeWorker
+ *
+ * Copy relevant information from TOC into planstate.
+ * ----------------------------------------------------------------
+ */
+void
+ExecIndexScanInitializeWorker(IndexScanState *node, shm_toc *toc)
+{
+ ParallelIndexScanDesc piscan;
+
+ piscan = shm_toc_lookup(toc, node->ss.ps.plan->plan_node_id);
+ node->iss_ScanDesc =
+ index_beginscan_parallel(node->ss.ss_currentRelation,
+ node->iss_RelationDesc,
+ node->iss_NumScanKeys,
+ node->iss_NumOrderByKeys,
+ piscan);
+
+ /*
+ * If no run-time keys to calculate, go ahead and pass the scankeys to the
+ * index AM.
+ */
+ if (node->iss_NumRuntimeKeys == 0)
+ index_rescan(node->iss_ScanDesc,
+ node->iss_ScanKeys, node->iss_NumScanKeys,
+ node->iss_OrderByKeys, node->iss_NumOrderByKeys);
+}
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 85505c57d36..eeacf815e35 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -127,8 +127,6 @@ static void subquery_push_qual(Query *subquery,
static void recurse_push_qual(Node *setOp, Query *topquery,
RangeTblEntry *rte, Index rti, Node *qual);
static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel);
-static int compute_parallel_worker(RelOptInfo *rel, BlockNumber heap_pages,
- BlockNumber index_pages);
/*
@@ -2885,7 +2883,7 @@ remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel)
* "heap_pages" is the number of pages from the table that we expect to scan.
* "index_pages" is the number of pages from the index that we expect to scan.
*/
-static int
+int
compute_parallel_worker(RelOptInfo *rel, BlockNumber heap_pages,
BlockNumber index_pages)
{
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index a43daa744c7..d01630f8dba 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -391,7 +391,8 @@ cost_gather(GatherPath *path, PlannerInfo *root,
* we have to fetch from the table, so they don't reduce the scan cost.
*/
void
-cost_index(IndexPath *path, PlannerInfo *root, double loop_count)
+cost_index(IndexPath *path, PlannerInfo *root, double loop_count,
+ bool partial_path)
{
IndexOptInfo *index = path->indexinfo;
RelOptInfo *baserel = index->rel;
@@ -400,6 +401,7 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count)
List *qpquals;
Cost startup_cost = 0;
Cost run_cost = 0;
+ Cost cpu_run_cost = 0;
Cost indexStartupCost;
Cost indexTotalCost;
Selectivity indexSelectivity;
@@ -413,6 +415,8 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count)
Cost cpu_per_tuple;
double tuples_fetched;
double pages_fetched;
+ double rand_heap_pages;
+ double index_pages;
/* Should only be applied to base relations */
Assert(IsA(baserel, RelOptInfo) &&
@@ -459,7 +463,8 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count)
amcostestimate = (amcostestimate_function) index->amcostestimate;
amcostestimate(root, path, loop_count,
&indexStartupCost, &indexTotalCost,
- &indexSelectivity, &indexCorrelation);
+ &indexSelectivity, &indexCorrelation,
+ &index_pages);
/*
* Save amcostestimate's results for possible use in bitmap scan planning.
@@ -526,6 +531,8 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count)
if (indexonly)
pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
+ rand_heap_pages = pages_fetched;
+
max_IO_cost = (pages_fetched * spc_random_page_cost) / loop_count;
/*
@@ -564,6 +571,8 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count)
if (indexonly)
pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
+ rand_heap_pages = pages_fetched;
+
/* max_IO_cost is for the perfectly uncorrelated case (csquared=0) */
max_IO_cost = pages_fetched * spc_random_page_cost;
@@ -583,6 +592,29 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count)
min_IO_cost = 0;
}
+ if (partial_path)
+ {
+ /*
+ * Estimate the number of parallel workers required to scan index. Use
+ * the number of heap pages computed considering heap fetches won't be
+ * sequential as for parallel scans the pages are accessed in random
+ * order.
+ */
+ path->path.parallel_workers = compute_parallel_worker(baserel,
+ (BlockNumber) rand_heap_pages,
+ (BlockNumber) index_pages);
+
+ /*
+ * Fall out if workers can't be assigned for parallel scan, because in
+ * such a case this path will be rejected. So there is no benefit in
+ * doing extra computation.
+ */
+ if (path->path.parallel_workers <= 0)
+ return;
+
+ path->path.parallel_aware = true;
+ }
+
/*
* Now interpolate based on estimated index order correlation to get total
* disk I/O cost for main table accesses.
@@ -602,11 +634,24 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count)
startup_cost += qpqual_cost.startup;
cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
- run_cost += cpu_per_tuple * tuples_fetched;
+ cpu_run_cost += cpu_per_tuple * tuples_fetched;
/* tlist eval costs are paid per output row, not per tuple scanned */
startup_cost += path->path.pathtarget->cost.startup;
- run_cost += path->path.pathtarget->cost.per_tuple * path->path.rows;
+ cpu_run_cost += path->path.pathtarget->cost.per_tuple * path->path.rows;
+
+ /* Adjust costing for parallelism, if used. */
+ if (path->path.parallel_workers > 0)
+ {
+ double parallel_divisor = get_parallel_divisor(&path->path);
+
+ path->path.rows = clamp_row_est(path->path.rows / parallel_divisor);
+
+ /* The CPU cost is divided among all the workers. */
+ cpu_run_cost /= parallel_divisor;
+ }
+
+ run_cost += cpu_run_cost;
path->path.startup_cost = startup_cost;
path->path.total_cost = startup_cost + run_cost;
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 52834689881..56eccafd7b0 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -813,7 +813,7 @@ get_index_paths(PlannerInfo *root, RelOptInfo *rel,
/*
* build_index_paths
* Given an index and a set of index clauses for it, construct zero
- * or more IndexPaths.
+ * or more IndexPaths. It also constructs zero or more partial IndexPaths.
*
* We return a list of paths because (1) this routine checks some cases
* that should cause us to not generate any IndexPath, and (2) in some
@@ -1042,8 +1042,41 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
NoMovementScanDirection,
index_only_scan,
outer_relids,
- loop_count);
+ loop_count,
+ false);
result = lappend(result, ipath);
+
+ /*
+ * If appropriate, consider parallel index scan. We don't allow
+ * parallel index scan for bitmap or index only scans.
+ */
+ if (index->amcanparallel && !index_only_scan &&
+ rel->consider_parallel && outer_relids == NULL &&
+ scantype != ST_BITMAPSCAN)
+ {
+ ipath = create_index_path(root, index,
+ index_clauses,
+ clause_columns,
+ orderbyclauses,
+ orderbyclausecols,
+ useful_pathkeys,
+ index_is_ordered ?
+ ForwardScanDirection :
+ NoMovementScanDirection,
+ index_only_scan,
+ outer_relids,
+ loop_count,
+ true);
+
+ /*
+ * if, after costing the path, we find that it's not worth
+ * using parallel workers, just free it.
+ */
+ if (ipath->path.parallel_workers > 0)
+ add_partial_path(rel, (Path *) ipath);
+ else
+ pfree(ipath);
+ }
}
/*
@@ -1066,8 +1099,36 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
BackwardScanDirection,
index_only_scan,
outer_relids,
- loop_count);
+ loop_count,
+ false);
result = lappend(result, ipath);
+
+ /* If appropriate, consider parallel index scan */
+ if (index->amcanparallel && !index_only_scan &&
+ rel->consider_parallel && outer_relids == NULL &&
+ scantype != ST_BITMAPSCAN)
+ {
+ ipath = create_index_path(root, index,
+ index_clauses,
+ clause_columns,
+ NIL,
+ NIL,
+ useful_pathkeys,
+ BackwardScanDirection,
+ index_only_scan,
+ outer_relids,
+ loop_count,
+ true);
+
+ /*
+ * if, after costing the path, we find that it's not worth
+ * using parallel workers, just free it.
+ */
+ if (ipath->path.parallel_workers > 0)
+ add_partial_path(rel, (Path *) ipath);
+ else
+ pfree(ipath);
+ }
}
}
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index abb4f12cea1..3d33d469713 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -5333,7 +5333,7 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid)
indexScanPath = create_index_path(root, indexInfo,
NIL, NIL, NIL, NIL, NIL,
ForwardScanDirection, false,
- NULL, 1.0);
+ NULL, 1.0, false);
return (seqScanAndSortPath.total_cost < indexScanPath->path.total_cost);
}
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index f440875ceb1..324829690d2 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -744,10 +744,9 @@ add_path_precheck(RelOptInfo *parent_rel,
* As with add_path, we pfree paths that are found to be dominated by
* another partial path; this requires that there be no other references to
* such paths yet. Hence, GatherPaths must not be created for a rel until
- * we're done creating all partial paths for it. We do not currently build
- * partial indexscan paths, so there is no need for an exception for
- * IndexPaths here; for safety, we instead Assert that a path to be freed
- * isn't an IndexPath.
+ * we're done creating all partial paths for it. Unlike add_path, we don't
+ * take an exception for IndexPaths as partial index paths won't be
+ * referenced by partial BitmapHeapPaths.
*/
void
add_partial_path(RelOptInfo *parent_rel, Path *new_path)
@@ -826,8 +825,6 @@ add_partial_path(RelOptInfo *parent_rel, Path *new_path)
{
parent_rel->partial_pathlist =
list_delete_cell(parent_rel->partial_pathlist, p1, p1_prev);
- /* we should not see IndexPaths here, so always safe to delete */
- Assert(!IsA(old_path, IndexPath));
pfree(old_path);
/* p1_prev does not advance */
}
@@ -860,8 +857,6 @@ add_partial_path(RelOptInfo *parent_rel, Path *new_path)
}
else
{
- /* we should not see IndexPaths here, so always safe to delete */
- Assert(!IsA(new_path, IndexPath));
/* Reject and recycle the new path */
pfree(new_path);
}
@@ -1005,6 +1000,7 @@ create_samplescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer
* 'required_outer' is the set of outer relids for a parameterized path.
* 'loop_count' is the number of repetitions of the indexscan to factor into
* estimates of caching behavior.
+ * 'partial_path' is true if constructing a parallel index scan path.
*
* Returns the new path node.
*/
@@ -1019,7 +1015,8 @@ create_index_path(PlannerInfo *root,
ScanDirection indexscandir,
bool indexonly,
Relids required_outer,
- double loop_count)
+ double loop_count,
+ bool partial_path)
{
IndexPath *pathnode = makeNode(IndexPath);
RelOptInfo *rel = index->rel;
@@ -1049,7 +1046,7 @@ create_index_path(PlannerInfo *root,
pathnode->indexorderbycols = indexorderbycols;
pathnode->indexscandir = indexscandir;
- cost_index(pathnode, root, loop_count);
+ cost_index(pathnode, root, loop_count, partial_path);
return pathnode;
}
@@ -3247,7 +3244,7 @@ reparameterize_path(PlannerInfo *root, Path *path,
memcpy(newpath, ipath, sizeof(IndexPath));
newpath->path.param_info =
get_baserel_parampathinfo(root, rel, required_outer);
- cost_index(newpath, root, loop_count);
+ cost_index(newpath, root, loop_count, false);
return (Path *) newpath;
}
case T_BitmapHeapScan:
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 7836e6b3f85..4ed27054a11 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -241,6 +241,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
info->amoptionalkey = amroutine->amoptionalkey;
info->amsearcharray = amroutine->amsearcharray;
info->amsearchnulls = amroutine->amsearchnulls;
+ info->amcanparallel = amroutine->amcanparallel;
info->amhasgettuple = (amroutine->amgettuple != NULL);
info->amhasgetbitmap = (amroutine->amgetbitmap != NULL);
info->amcostestimate = amroutine->amcostestimate;
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index fa32e9eabe1..d14f0f97a8a 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -6471,7 +6471,8 @@ add_predicate_to_quals(IndexOptInfo *index, List *indexQuals)
void
btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
Cost *indexStartupCost, Cost *indexTotalCost,
- Selectivity *indexSelectivity, double *indexCorrelation)
+ Selectivity *indexSelectivity, double *indexCorrelation,
+ double *indexPages)
{
IndexOptInfo *index = path->indexinfo;
List *qinfos;
@@ -6761,12 +6762,14 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
*indexTotalCost = costs.indexTotalCost;
*indexSelectivity = costs.indexSelectivity;
*indexCorrelation = costs.indexCorrelation;
+ *indexPages = costs.numIndexPages;
}
void
hashcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
Cost *indexStartupCost, Cost *indexTotalCost,
- Selectivity *indexSelectivity, double *indexCorrelation)
+ Selectivity *indexSelectivity, double *indexCorrelation,
+ double *indexPages)
{
List *qinfos;
GenericCosts costs;
@@ -6807,12 +6810,14 @@ hashcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
*indexTotalCost = costs.indexTotalCost;
*indexSelectivity = costs.indexSelectivity;
*indexCorrelation = costs.indexCorrelation;
+ *indexPages = costs.numIndexPages;
}
void
gistcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
Cost *indexStartupCost, Cost *indexTotalCost,
- Selectivity *indexSelectivity, double *indexCorrelation)
+ Selectivity *indexSelectivity, double *indexCorrelation,
+ double *indexPages)
{
IndexOptInfo *index = path->indexinfo;
List *qinfos;
@@ -6866,12 +6871,14 @@ gistcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
*indexTotalCost = costs.indexTotalCost;
*indexSelectivity = costs.indexSelectivity;
*indexCorrelation = costs.indexCorrelation;
+ *indexPages = costs.numIndexPages;
}
void
spgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
Cost *indexStartupCost, Cost *indexTotalCost,
- Selectivity *indexSelectivity, double *indexCorrelation)
+ Selectivity *indexSelectivity, double *indexCorrelation,
+ double *indexPages)
{
IndexOptInfo *index = path->indexinfo;
List *qinfos;
@@ -6925,6 +6932,7 @@ spgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
*indexTotalCost = costs.indexTotalCost;
*indexSelectivity = costs.indexSelectivity;
*indexCorrelation = costs.indexCorrelation;
+ *indexPages = costs.numIndexPages;
}
@@ -7222,7 +7230,8 @@ gincost_scalararrayopexpr(PlannerInfo *root,
void
gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
Cost *indexStartupCost, Cost *indexTotalCost,
- Selectivity *indexSelectivity, double *indexCorrelation)
+ Selectivity *indexSelectivity, double *indexCorrelation,
+ double *indexPages)
{
IndexOptInfo *index = path->indexinfo;
List *indexQuals = path->indexquals;
@@ -7537,6 +7546,7 @@ gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
*indexStartupCost += qual_arg_cost;
*indexTotalCost += qual_arg_cost;
*indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost);
+ *indexPages = dataPagesFetched;
}
/*
@@ -7545,7 +7555,8 @@ gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
void
brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
Cost *indexStartupCost, Cost *indexTotalCost,
- Selectivity *indexSelectivity, double *indexCorrelation)
+ Selectivity *indexSelectivity, double *indexCorrelation,
+ double *indexPages)
{
IndexOptInfo *index = path->indexinfo;
List *indexQuals = path->indexquals;
@@ -7597,6 +7608,7 @@ brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
*indexStartupCost += qual_arg_cost;
*indexTotalCost += qual_arg_cost;
*indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost);
+ *indexPages = index->pages;
/* XXX what about pages_per_range? */
}
diff --git a/src/include/access/amapi.h b/src/include/access/amapi.h
index b0730bfefae..f919cf8b873 100644
--- a/src/include/access/amapi.h
+++ b/src/include/access/amapi.h
@@ -95,7 +95,8 @@ typedef void (*amcostestimate_function) (struct PlannerInfo *root,
Cost *indexStartupCost,
Cost *indexTotalCost,
Selectivity *indexSelectivity,
- double *indexCorrelation);
+ double *indexCorrelation,
+ double *indexPages);
/* parse index reloptions */
typedef bytea *(*amoptions_function) (Datum reloptions,
@@ -188,6 +189,8 @@ typedef struct IndexAmRoutine
bool amclusterable;
/* does AM handle predicate locks? */
bool ampredlocks;
+ /* does AM support parallel scan? */
+ bool amcanparallel;
/* type of data stored in index, or InvalidOid if variable */
Oid amkeytype;
diff --git a/src/include/executor/nodeIndexscan.h b/src/include/executor/nodeIndexscan.h
index 46d6f45e837..ea3f3a5cc4d 100644
--- a/src/include/executor/nodeIndexscan.h
+++ b/src/include/executor/nodeIndexscan.h
@@ -14,6 +14,7 @@
#ifndef NODEINDEXSCAN_H
#define NODEINDEXSCAN_H
+#include "access/parallel.h"
#include "nodes/execnodes.h"
extern IndexScanState *ExecInitIndexScan(IndexScan *node, EState *estate, int eflags);
@@ -22,6 +23,9 @@ extern void ExecEndIndexScan(IndexScanState *node);
extern void ExecIndexMarkPos(IndexScanState *node);
extern void ExecIndexRestrPos(IndexScanState *node);
extern void ExecReScanIndexScan(IndexScanState *node);
+extern void ExecIndexScanEstimate(IndexScanState *node, ParallelContext *pcxt);
+extern void ExecIndexScanInitializeDSM(IndexScanState *node, ParallelContext *pcxt);
+extern void ExecIndexScanInitializeWorker(IndexScanState *node, shm_toc *toc);
/*
* These routines are exported to share code with nodeIndexonlyscan.c and
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 42c6c58ff9c..9f41babf353 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1363,6 +1363,7 @@ typedef struct
* SortSupport for reordering ORDER BY exprs
* OrderByTypByVals is the datatype of order by expression pass-by-value?
* OrderByTypLens typlens of the datatypes of order by expressions
+ * pscan_len size of parallel index scan descriptor
* ----------------
*/
typedef struct IndexScanState
@@ -1389,6 +1390,7 @@ typedef struct IndexScanState
SortSupport iss_SortSupport;
bool *iss_OrderByTypByVals;
int16 *iss_OrderByTypLens;
+ Size iss_PscanLen;
} IndexScanState;
/* ----------------
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 643be54d405..f7ac6f600fe 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -629,6 +629,7 @@ typedef struct IndexOptInfo
bool amsearchnulls; /* can AM search for NULL/NOT NULL entries? */
bool amhasgettuple; /* does AM have amgettuple interface? */
bool amhasgetbitmap; /* does AM have amgetbitmap interface? */
+ bool amcanparallel; /* does AM support parallel scan? */
/* Rather than include amapi.h here, we declare amcostestimate like this */
void (*amcostestimate) (); /* AM's cost estimator */
} IndexOptInfo;
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 0e68264a41f..72200fa5310 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -76,7 +76,7 @@ extern void cost_seqscan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
extern void cost_samplescan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
ParamPathInfo *param_info);
extern void cost_index(IndexPath *path, PlannerInfo *root,
- double loop_count);
+ double loop_count, bool partial_path);
extern void cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
ParamPathInfo *param_info,
Path *bitmapqual, double loop_count);
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 7b413176219..53cad247dc4 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -47,7 +47,8 @@ extern IndexPath *create_index_path(PlannerInfo *root,
ScanDirection indexscandir,
bool indexonly,
Relids required_outer,
- double loop_count);
+ double loop_count,
+ bool partial_path);
extern BitmapHeapPath *create_bitmap_heap_path(PlannerInfo *root,
RelOptInfo *rel,
Path *bitmapqual,
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 81e7a4274da..ebda308c416 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -54,6 +54,8 @@ extern RelOptInfo *standard_join_search(PlannerInfo *root, int levels_needed,
List *initial_rels);
extern void generate_gather_paths(PlannerInfo *root, RelOptInfo *rel);
+extern int compute_parallel_worker(RelOptInfo *rel, BlockNumber heap_pages,
+ BlockNumber index_pages);
#ifdef OPTIMIZER_DEBUG
extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel);
diff --git a/src/include/utils/index_selfuncs.h b/src/include/utils/index_selfuncs.h
index d3172420f99..17d165ca651 100644
--- a/src/include/utils/index_selfuncs.h
+++ b/src/include/utils/index_selfuncs.h
@@ -28,41 +28,47 @@ extern void brincostestimate(struct PlannerInfo *root,
Cost *indexStartupCost,
Cost *indexTotalCost,
Selectivity *indexSelectivity,
- double *indexCorrelation);
+ double *indexCorrelation,
+ double *indexPages);
extern void btcostestimate(struct PlannerInfo *root,
struct IndexPath *path,
double loop_count,
Cost *indexStartupCost,
Cost *indexTotalCost,
Selectivity *indexSelectivity,
- double *indexCorrelation);
+ double *indexCorrelation,
+ double *indexPages);
extern void hashcostestimate(struct PlannerInfo *root,
struct IndexPath *path,
double loop_count,
Cost *indexStartupCost,
Cost *indexTotalCost,
Selectivity *indexSelectivity,
- double *indexCorrelation);
+ double *indexCorrelation,
+ double *indexPages);
extern void gistcostestimate(struct PlannerInfo *root,
struct IndexPath *path,
double loop_count,
Cost *indexStartupCost,
Cost *indexTotalCost,
Selectivity *indexSelectivity,
- double *indexCorrelation);
+ double *indexCorrelation,
+ double *indexPages);
extern void spgcostestimate(struct PlannerInfo *root,
struct IndexPath *path,
double loop_count,
Cost *indexStartupCost,
Cost *indexTotalCost,
Selectivity *indexSelectivity,
- double *indexCorrelation);
+ double *indexCorrelation,
+ double *indexPages);
extern void gincostestimate(struct PlannerInfo *root,
struct IndexPath *path,
double loop_count,
Cost *indexStartupCost,
Cost *indexTotalCost,
Selectivity *indexSelectivity,
- double *indexCorrelation);
+ double *indexCorrelation,
+ double *indexPages);
#endif /* INDEX_SELFUNCS_H */
diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out
index 3692d4f1b81..48fb80e90c8 100644
--- a/src/test/regress/expected/select_parallel.out
+++ b/src/test/regress/expected/select_parallel.out
@@ -125,6 +125,29 @@ select count(*) from tenk1 where (two, four) not in
(1 row)
alter table tenk2 reset (parallel_workers);
+-- test parallel index scans.
+set enable_seqscan to off;
+set enable_bitmapscan to off;
+explain (costs off)
+ select count((unique1)) from tenk1 where hundred > 1;
+ QUERY PLAN
+--------------------------------------------------------------------
+ Finalize Aggregate
+ -> Gather
+ Workers Planned: 4
+ -> Partial Aggregate
+ -> Parallel Index Scan using tenk1_hundred on tenk1
+ Index Cond: (hundred > 1)
+(6 rows)
+
+select count((unique1)) from tenk1 where hundred > 1;
+ count
+-------
+ 9800
+(1 row)
+
+reset enable_seqscan;
+reset enable_bitmapscan;
set force_parallel_mode=1;
explain (costs off)
select stringu1::int2 from tenk1 where unique1 = 1;
diff --git a/src/test/regress/sql/select_parallel.sql b/src/test/regress/sql/select_parallel.sql
index f4f9dd5ab67..f5bc4d18733 100644
--- a/src/test/regress/sql/select_parallel.sql
+++ b/src/test/regress/sql/select_parallel.sql
@@ -48,6 +48,17 @@ select count(*) from tenk1 where (two, four) not in
(select hundred, thousand from tenk2 where thousand > 100);
alter table tenk2 reset (parallel_workers);
+-- test parallel index scans.
+set enable_seqscan to off;
+set enable_bitmapscan to off;
+
+explain (costs off)
+ select count((unique1)) from tenk1 where hundred > 1;
+select count((unique1)) from tenk1 where hundred > 1;
+
+reset enable_seqscan;
+reset enable_bitmapscan;
+
set force_parallel_mode=1;
explain (costs off)