diff options
Diffstat (limited to 'src/backend/optimizer/path/costsize.c')
-rw-r--r-- | src/backend/optimizer/path/costsize.c | 70 |
1 files changed, 69 insertions, 1 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index b5a0033721f..8cf694b61dc 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -77,6 +77,7 @@ #include "access/htup_details.h" #include "access/tsmapi.h" #include "executor/executor.h" +#include "executor/nodeAgg.h" #include "executor/nodeHash.h" #include "miscadmin.h" #include "nodes/makefuncs.h" @@ -128,6 +129,8 @@ bool enable_bitmapscan = true; bool enable_tidscan = true; bool enable_sort = true; bool enable_hashagg = true; +bool enable_hashagg_disk = true; +bool enable_groupingsets_hash_disk = false; bool enable_nestloop = true; bool enable_material = true; bool enable_mergejoin = true; @@ -2153,7 +2156,7 @@ cost_agg(Path *path, PlannerInfo *root, int numGroupCols, double numGroups, List *quals, Cost input_startup_cost, Cost input_total_cost, - double input_tuples) + double input_tuples, double input_width) { double output_tuples; Cost startup_cost; @@ -2228,15 +2231,80 @@ cost_agg(Path *path, PlannerInfo *root, startup_cost += disable_cost; startup_cost += aggcosts->transCost.startup; startup_cost += aggcosts->transCost.per_tuple * input_tuples; + /* cost of computing hash value */ startup_cost += (cpu_operator_cost * numGroupCols) * input_tuples; startup_cost += aggcosts->finalCost.startup; + total_cost = startup_cost; total_cost += aggcosts->finalCost.per_tuple * numGroups; + /* cost of retrieving from hash table */ total_cost += cpu_tuple_cost * numGroups; output_tuples = numGroups; } /* + * Add the disk costs of hash aggregation that spills to disk. + * + * Groups that go into the hash table stay in memory until finalized, + * so spilling and reprocessing tuples doesn't incur additional + * invocations of transCost or finalCost. Furthermore, the computed + * hash value is stored with the spilled tuples, so we don't incur + * extra invocations of the hash function. + * + * Hash Agg begins returning tuples after the first batch is + * complete. Accrue writes (spilled tuples) to startup_cost and to + * total_cost; accrue reads only to total_cost. + */ + if (aggstrategy == AGG_HASHED || aggstrategy == AGG_MIXED) + { + double pages_written = 0.0; + double pages_read = 0.0; + double hashentrysize; + double nbatches; + Size mem_limit; + uint64 ngroups_limit; + int num_partitions; + + + /* + * Estimate number of batches based on the computed limits. If less + * than or equal to one, all groups are expected to fit in memory; + * otherwise we expect to spill. + */ + hashentrysize = hash_agg_entry_size( + aggcosts->numAggs, input_width, aggcosts->transitionSpace); + hash_agg_set_limits(hashentrysize, numGroups, 0, &mem_limit, + &ngroups_limit, &num_partitions); + + nbatches = Max( (numGroups * hashentrysize) / mem_limit, + numGroups / ngroups_limit ); + + /* + * Estimate number of pages read and written. For each level of + * recursion, a tuple must be written and then later read. + */ + if (nbatches > 1.0) + { + double depth; + double pages; + + pages = relation_byte_size(input_tuples, input_width) / BLCKSZ; + + /* + * The number of partitions can change at different levels of + * recursion; but for the purposes of this calculation assume it + * stays constant. + */ + depth = ceil( log(nbatches - 1) / log(num_partitions) ); + pages_written = pages_read = pages * depth; + } + + startup_cost += pages_written * random_page_cost; + total_cost += pages_written * random_page_cost; + total_cost += pages_read * seq_page_cost; + } + + /* * If there are quals (HAVING quals), account for their cost and * selectivity. */ |