aboutsummaryrefslogtreecommitdiff
path: root/src/backend/optimizer/path/costsize.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/optimizer/path/costsize.c')
-rw-r--r--src/backend/optimizer/path/costsize.c25
1 files changed, 23 insertions, 2 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 877827dcb52..c3daacd3ea6 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -129,6 +129,7 @@ bool enable_hashjoin = true;
bool enable_gathermerge = true;
bool enable_partition_wise_join = false;
bool enable_parallel_append = true;
+bool enable_parallel_hash = true;
typedef struct
{
@@ -3130,16 +3131,19 @@ initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace,
JoinType jointype,
List *hashclauses,
Path *outer_path, Path *inner_path,
- JoinPathExtraData *extra)
+ JoinPathExtraData *extra,
+ bool parallel_hash)
{
Cost startup_cost = 0;
Cost run_cost = 0;
double outer_path_rows = outer_path->rows;
double inner_path_rows = inner_path->rows;
+ double inner_path_rows_total = inner_path_rows;
int num_hashclauses = list_length(hashclauses);
int numbuckets;
int numbatches;
int num_skew_mcvs;
+ size_t space_allowed; /* unused */
/* cost of source data */
startup_cost += outer_path->startup_cost;
@@ -3161,6 +3165,15 @@ initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace,
run_cost += cpu_operator_cost * num_hashclauses * outer_path_rows;
/*
+ * If this is a parallel hash build, then the value we have for
+ * inner_rows_total currently refers only to the rows returned by each
+ * participant. For shared hash table size estimation, we need the total
+ * number, so we need to undo the division.
+ */
+ if (parallel_hash)
+ inner_path_rows_total *= get_parallel_divisor(inner_path);
+
+ /*
* Get hash table size that executor would use for inner relation.
*
* XXX for the moment, always assume that skew optimization will be
@@ -3170,9 +3183,12 @@ initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace,
* XXX at some point it might be interesting to try to account for skew
* optimization in the cost estimate, but for now, we don't.
*/
- ExecChooseHashTableSize(inner_path_rows,
+ ExecChooseHashTableSize(inner_path_rows_total,
inner_path->pathtarget->width,
true, /* useskew */
+ parallel_hash, /* try_combined_work_mem */
+ outer_path->parallel_workers,
+ &space_allowed,
&numbuckets,
&numbatches,
&num_skew_mcvs);
@@ -3204,6 +3220,7 @@ initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace,
workspace->run_cost = run_cost;
workspace->numbuckets = numbuckets;
workspace->numbatches = numbatches;
+ workspace->inner_rows_total = inner_path_rows_total;
}
/*
@@ -3226,6 +3243,7 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path,
Path *inner_path = path->jpath.innerjoinpath;
double outer_path_rows = outer_path->rows;
double inner_path_rows = inner_path->rows;
+ double inner_path_rows_total = workspace->inner_rows_total;
List *hashclauses = path->path_hashclauses;
Cost startup_cost = workspace->startup_cost;
Cost run_cost = workspace->run_cost;
@@ -3266,6 +3284,9 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path,
/* mark the path with estimated # of batches */
path->num_batches = numbatches;
+ /* store the total number of tuples (sum of partial row estimates) */
+ path->inner_rows_total = inner_path_rows_total;
+
/* and compute the number of "virtual" buckets in the whole join */
virtualbuckets = (double) numbuckets * (double) numbatches;