aboutsummaryrefslogtreecommitdiff
path: root/src/backend/optimizer/path/costsize.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2009-03-21 00:04:40 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2009-03-21 00:04:40 +0000
commit596efd27edce20bba706f50de99a0f15bcc2a567 (patch)
tree63c07c3c310e1b072f0a29a79220c81254dba3d8 /src/backend/optimizer/path/costsize.c
parent249d936bed069877923f0369bd2ce51a6f8f925e (diff)
downloadpostgresql-596efd27edce20bba706f50de99a0f15bcc2a567.tar.gz
postgresql-596efd27edce20bba706f50de99a0f15bcc2a567.zip
Optimize multi-batch hash joins when the outer relation has a nonuniform
distribution, by creating a special fast path for the (first few) most common values of the outer relation. Tuples having hashvalues matching the MCVs are effectively forced to be in the first batch, so that we never write them out to the batch temp files. Bryce Cutt and Ramon Lawrence, with some editorialization by me.
Diffstat (limited to 'src/backend/optimizer/path/costsize.c')
-rw-r--r--src/backend/optimizer/path/costsize.c18
1 files changed, 15 insertions, 3 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 07ddf43c8d6..b07a2599bbe 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -54,7 +54,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.204 2009/02/06 23:43:23 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.205 2009/03/21 00:04:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1821,6 +1821,7 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
int num_hashclauses = list_length(hashclauses);
int numbuckets;
int numbatches;
+ int num_skew_mcvs;
double virtualbuckets;
Selectivity innerbucketsize;
ListCell *hcl;
@@ -1862,11 +1863,22 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
* inner_path_rows;
run_cost += cpu_operator_cost * num_hashclauses * outer_path_rows;
- /* Get hash table size that executor would use for inner relation */
+ /*
+ * Get hash table size that executor would use for inner relation.
+ *
+ * XXX for the moment, always assume that skew optimization will be
+ * performed. As long as SKEW_WORK_MEM_PERCENT is small, it's not worth
+ * trying to determine that for sure.
+ *
+ * XXX at some point it might be interesting to try to account for skew
+ * optimization in the cost estimate, but for now, we don't.
+ */
ExecChooseHashTableSize(inner_path_rows,
inner_path->parent->width,
+ true, /* useskew */
&numbuckets,
- &numbatches);
+ &numbatches,
+ &num_skew_mcvs);
virtualbuckets = (double) numbuckets *(double) numbatches;
/*