diff options
Diffstat (limited to 'src/backend/optimizer/path/pathkeys.c')
-rw-r--r-- | src/backend/optimizer/path/pathkeys.c | 581 |
1 files changed, 0 insertions, 581 deletions
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 1d21215f853..86a35cdef17 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -17,24 +17,17 @@ */ #include "postgres.h" -#include <float.h> - -#include "miscadmin.h" #include "access/stratnum.h" #include "catalog/pg_opfamily.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "nodes/plannodes.h" -#include "optimizer/cost.h" #include "optimizer/optimizer.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "partitioning/partbounds.h" #include "utils/lsyscache.h" -#include "utils/selfuncs.h" -/* Consider reordering of GROUP BY keys? */ -bool enable_group_by_reordering = true; static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys); static bool matches_boolean_partition_clause(RestrictInfo *rinfo, @@ -342,527 +335,6 @@ pathkeys_contained_in(List *keys1, List *keys2) } /* - * group_keys_reorder_by_pathkeys - * Reorder GROUP BY keys to match pathkeys of input path. - * - * Function returns new lists (pathkeys and clauses), original GROUP BY lists - * stay untouched. - * - * Returns the number of GROUP BY keys with a matching pathkey. - */ -int -group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys, - List **group_clauses) -{ - List *new_group_pathkeys = NIL, - *new_group_clauses = NIL; - ListCell *lc; - int n; - - if (pathkeys == NIL || *group_pathkeys == NIL) - return 0; - - /* - * Walk the pathkeys (determining ordering of the input path) and see if - * there's a matching GROUP BY key. If we find one, we append it to the - * list, and do the same for the clauses. - * - * Once we find the first pathkey without a matching GROUP BY key, the - * rest of the pathkeys are useless and can't be used to evaluate the - * grouping, so we abort the loop and ignore the remaining pathkeys. - * - * XXX Pathkeys are built in a way to allow simply comparing pointers. - */ - foreach(lc, pathkeys) - { - PathKey *pathkey = (PathKey *) lfirst(lc); - SortGroupClause *sgc; - - /* abort on first mismatch */ - if (!list_member_ptr(*group_pathkeys, pathkey)) - break; - - new_group_pathkeys = lappend(new_group_pathkeys, pathkey); - - sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref, - *group_clauses); - - new_group_clauses = lappend(new_group_clauses, sgc); - } - - /* remember the number of pathkeys with a matching GROUP BY key */ - n = list_length(new_group_pathkeys); - - /* append the remaining group pathkeys (will be treated as not sorted) */ - *group_pathkeys = list_concat_unique_ptr(new_group_pathkeys, - *group_pathkeys); - *group_clauses = list_concat_unique_ptr(new_group_clauses, - *group_clauses); - - return n; -} - -/* - * Used to generate all permutations of a pathkey list. - */ -typedef struct PathkeyMutatorState -{ - List *elemsList; - ListCell **elemCells; - void **elems; - int *positions; - int mutatorNColumns; - int count; -} PathkeyMutatorState; - - -/* - * PathkeyMutatorInit - * Initialize state of the permutation generator. - * - * We want to generate permutations of elements in the "elems" list. We may want - * to skip some number of elements at the beginning (when treating as presorted) - * or at the end (we only permute a limited number of group keys). - * - * The list is decomposed into elements, and we also keep pointers to individual - * cells. This allows us to build the permuted list quickly and cheaply, without - * creating any copies. - */ -static void -PathkeyMutatorInit(PathkeyMutatorState *state, List *elems, int start, int end) -{ - int i; - int n = end - start; - ListCell *lc; - - memset(state, 0, sizeof(*state)); - - state->mutatorNColumns = n; - - state->elemsList = list_copy(elems); - - state->elems = palloc(sizeof(void *) * n); - state->elemCells = palloc(sizeof(ListCell *) * n); - state->positions = palloc(sizeof(int) * n); - - i = 0; - for_each_cell(lc, state->elemsList, list_nth_cell(state->elemsList, start)) - { - state->elemCells[i] = lc; - state->elems[i] = lfirst(lc); - state->positions[i] = i + 1; - i++; - - if (i >= n) - break; - } -} - -/* Swap two elements of an array. */ -static void -PathkeyMutatorSwap(int *a, int i, int j) -{ - int s = a[i]; - - a[i] = a[j]; - a[j] = s; -} - -/* - * Generate the next permutation of elements. - */ -static bool -PathkeyMutatorNextSet(int *a, int n) -{ - int j, - k, - l, - r; - - j = n - 2; - - while (j >= 0 && a[j] >= a[j + 1]) - j--; - - if (j < 0) - return false; - - k = n - 1; - - while (k >= 0 && a[j] >= a[k]) - k--; - - PathkeyMutatorSwap(a, j, k); - - l = j + 1; - r = n - 1; - - while (l < r) - PathkeyMutatorSwap(a, l++, r--); - - return true; -} - -/* - * PathkeyMutatorNext - * Generate the next permutation of list of elements. - * - * Returns the next permutation (as a list of elements) or NIL if there are no - * more permutations. - */ -static List * -PathkeyMutatorNext(PathkeyMutatorState *state) -{ - int i; - - state->count++; - - /* first permutation is original list */ - if (state->count == 1) - return state->elemsList; - - /* when there are no more permutations, return NIL */ - if (!PathkeyMutatorNextSet(state->positions, state->mutatorNColumns)) - { - pfree(state->elems); - pfree(state->elemCells); - pfree(state->positions); - - list_free(state->elemsList); - - return NIL; - } - - /* update the list cells to point to the right elements */ - for (i = 0; i < state->mutatorNColumns; i++) - lfirst(state->elemCells[i]) = - (void *) state->elems[state->positions[i] - 1]; - - return state->elemsList; -} - -/* - * Cost of comparing pathkeys. - */ -typedef struct PathkeySortCost -{ - Cost cost; - PathKey *pathkey; -} PathkeySortCost; - -static int -pathkey_sort_cost_comparator(const void *_a, const void *_b) -{ - const PathkeySortCost *a = (PathkeySortCost *) _a; - const PathkeySortCost *b = (PathkeySortCost *) _b; - - if (a->cost < b->cost) - return -1; - else if (a->cost == b->cost) - return 0; - return 1; -} - -/* - * get_cheapest_group_keys_order - * Reorders the group pathkeys / clauses to minimize the comparison cost. - * - * Given the list of pathkeys in '*group_pathkeys', we try to arrange these - * in an order that minimizes the sort costs that will be incurred by the - * GROUP BY. The costs mainly depend on the cost of the sort comparator - * function(s) and the number of distinct values in each column of the GROUP - * BY clause (*group_clauses). Sorting on subsequent columns is only required - * for tiebreak situations where two values sort equally. - * - * In case the input is partially sorted, only the remaining pathkeys are - * considered. 'n_preordered' denotes how many of the leading *group_pathkeys - * the input is presorted by. - * - * Returns true and sets *group_pathkeys and *group_clauses to the newly - * ordered versions of the lists that were passed in via these parameters. - * If no reordering was deemed necessary then we return false, in which case - * the *group_pathkeys and *group_clauses lists are left untouched. The - * original *group_pathkeys and *group_clauses parameter values are never - * destructively modified in place. - */ -static bool -get_cheapest_group_keys_order(PlannerInfo *root, double nrows, - List **group_pathkeys, List **group_clauses, - int n_preordered) -{ - List *new_group_pathkeys = NIL, - *new_group_clauses = NIL, - *var_group_pathkeys; - - ListCell *cell; - PathkeyMutatorState mstate; - double cheapest_sort_cost = DBL_MAX; - - int nFreeKeys; - int nToPermute; - - /* If there are less than 2 unsorted pathkeys, we're done. */ - if (list_length(*group_pathkeys) - n_preordered < 2) - return false; - - /* - * We could exhaustively cost all possible orderings of the pathkeys, but - * for a large number of pathkeys it might be prohibitively expensive. So - * we try to apply simple cheap heuristics first - we sort the pathkeys by - * sort cost (as if the pathkey was sorted independently) and then check - * only the four cheapest pathkeys. The remaining pathkeys are kept - * ordered by cost. - * - * XXX This is a very simple heuristics, but likely to work fine for most - * cases (because the number of GROUP BY clauses tends to be lower than - * 4). But it ignores how the number of distinct values in each pathkey - * affects the following steps. It might be better to use "more expensive" - * pathkey first if it has many distinct values, because it then limits - * the number of comparisons for the remaining pathkeys. But evaluating - * that is likely quite the expensive. - */ - nFreeKeys = list_length(*group_pathkeys) - n_preordered; - nToPermute = 4; - if (nFreeKeys > nToPermute) - { - PathkeySortCost *costs = palloc(sizeof(PathkeySortCost) * nFreeKeys); - PathkeySortCost *cost = costs; - - /* - * Estimate cost for sorting individual pathkeys skipping the - * pre-ordered pathkeys. - */ - for_each_from(cell, *group_pathkeys, n_preordered) - { - PathKey *pathkey = (PathKey *) lfirst(cell); - List *to_cost = list_make1(pathkey); - - cost->pathkey = pathkey; - cost->cost = cost_sort_estimate(root, to_cost, 0, nrows); - cost++; - - list_free(to_cost); - } - - /* sort the pathkeys by sort cost in ascending order */ - qsort(costs, nFreeKeys, sizeof(*costs), pathkey_sort_cost_comparator); - - /* - * Rebuild the list of pathkeys - first the preordered ones, then the - * rest ordered by cost. - */ - new_group_pathkeys = list_copy_head(*group_pathkeys, n_preordered); - - for (int i = 0; i < nFreeKeys; i++) - new_group_pathkeys = lappend(new_group_pathkeys, costs[i].pathkey); - - pfree(costs); - } - else - { - /* Copy the list, so that we can free the new list by list_free. */ - new_group_pathkeys = list_copy(*group_pathkeys); - nToPermute = nFreeKeys; - } - - Assert(list_length(new_group_pathkeys) == list_length(*group_pathkeys)); - - /* - * Generate pathkey lists with permutations of the first nToPermute - * pathkeys. - * - * XXX We simply calculate sort cost for each individual pathkey list, but - * there's room for two dynamic programming optimizations here. Firstly, - * we may pass the current "best" cost to cost_sort_estimate so that it - * can "abort" if the estimated pathkeys list exceeds it. Secondly, it - * could pass the return information about the position when it exceeded - * the cost, and we could skip all permutations with the same prefix. - * - * Imagine we've already found ordering with cost C1, and we're evaluating - * another ordering - cost_sort_estimate() calculates cost by adding the - * pathkeys one by one (more or less), and the cost only grows. If at any - * point it exceeds C1, it can't possibly be "better" so we can discard - * it. But we also know that we can discard all ordering with the same - * prefix, because if we're estimating (a,b,c,d) and we exceed C1 at (a,b) - * then the same thing will happen for any ordering with this prefix. - */ - PathkeyMutatorInit(&mstate, new_group_pathkeys, n_preordered, n_preordered + nToPermute); - - while ((var_group_pathkeys = PathkeyMutatorNext(&mstate)) != NIL) - { - Cost cost; - - cost = cost_sort_estimate(root, var_group_pathkeys, n_preordered, nrows); - - if (cost < cheapest_sort_cost) - { - list_free(new_group_pathkeys); - new_group_pathkeys = list_copy(var_group_pathkeys); - cheapest_sort_cost = cost; - } - } - - /* Reorder the group clauses according to the reordered pathkeys. */ - foreach(cell, new_group_pathkeys) - { - PathKey *pathkey = (PathKey *) lfirst(cell); - - new_group_clauses = lappend(new_group_clauses, - get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref, - *group_clauses)); - } - - /* Just append the rest GROUP BY clauses */ - new_group_clauses = list_concat_unique_ptr(new_group_clauses, - *group_clauses); - - *group_pathkeys = new_group_pathkeys; - *group_clauses = new_group_clauses; - - return true; -} - -/* - * get_useful_group_keys_orderings - * Determine which orderings of GROUP BY keys are potentially interesting. - * - * Returns list of PathKeyInfo items, each representing an interesting ordering - * of GROUP BY keys. Each item stores pathkeys and clauses in matching order. - * - * The function considers (and keeps) multiple group by orderings: - * - * - the original ordering, as specified by the GROUP BY clause - * - * - GROUP BY keys reordered to minimize the sort cost - * - * - GROUP BY keys reordered to match path ordering (as much as possible), with - * the tail reordered to minimize the sort cost - * - * - GROUP BY keys to match target ORDER BY clause (as much as possible), with - * the tail reordered to minimize the sort cost - * - * There are other potentially interesting orderings (e.g. it might be best to - * match the first ORDER BY key, order the remaining keys differently and then - * rely on the incremental sort to fix this), but we ignore those for now. To - * make this work we'd have to pretty much generate all possible permutations. - */ -List * -get_useful_group_keys_orderings(PlannerInfo *root, double nrows, - List *path_pathkeys, - List *group_pathkeys, List *group_clauses) -{ - Query *parse = root->parse; - List *infos = NIL; - PathKeyInfo *info; - int n_preordered = 0; - - List *pathkeys = group_pathkeys; - List *clauses = group_clauses; - - /* always return at least the original pathkeys/clauses */ - info = makeNode(PathKeyInfo); - info->pathkeys = pathkeys; - info->clauses = clauses; - - infos = lappend(infos, info); - - /* - * Should we try generating alternative orderings of the group keys? If - * not, we produce only the order specified in the query, i.e. the - * optimization is effectively disabled. - */ - if (!enable_group_by_reordering) - return infos; - - /* for grouping sets we can't do any reordering */ - if (parse->groupingSets) - return infos; - - /* - * Try reordering pathkeys to minimize the sort cost, ignoring both the - * target ordering (ORDER BY) and ordering of the input path. - */ - if (get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses, - n_preordered)) - { - info = makeNode(PathKeyInfo); - info->pathkeys = pathkeys; - info->clauses = clauses; - - infos = lappend(infos, info); - } - - /* - * If the path is sorted in some way, try reordering the group keys to - * match as much of the ordering as possible - we get this sort for free - * (mostly). - * - * We must not do this when there are no grouping sets, because those use - * more complex logic to decide the ordering. - * - * XXX Isn't this somewhat redundant with presorted_keys? Actually, it's - * more a complement, because it allows benefiting from incremental sort - * as much as possible. - * - * XXX This does nothing if (n_preordered == 0). We shouldn't create the - * info in this case. - */ - if (path_pathkeys) - { - n_preordered = group_keys_reorder_by_pathkeys(path_pathkeys, - &pathkeys, - &clauses); - - /* reorder the tail to minimize sort cost */ - get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses, - n_preordered); - - /* - * reorder the tail to minimize sort cost - * - * XXX Ignore the return value - there may be nothing to reorder, in - * which case get_cheapest_group_keys_order returns false. But we - * still want to keep the keys reordered to path_pathkeys. - */ - info = makeNode(PathKeyInfo); - info->pathkeys = pathkeys; - info->clauses = clauses; - - infos = lappend(infos, info); - } - - /* - * Try reordering pathkeys to minimize the sort cost (this time consider - * the ORDER BY clause, but only if set debug_group_by_match_order_by). - */ - if (root->sort_pathkeys) - { - n_preordered = group_keys_reorder_by_pathkeys(root->sort_pathkeys, - &pathkeys, - &clauses); - - /* - * reorder the tail to minimize sort cost - * - * XXX Ignore the return value - there may be nothing to reorder, in - * which case get_cheapest_group_keys_order returns false. But we - * still want to keep the keys reordered to sort_pathkeys. - */ - get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses, - n_preordered); - - /* keep the group keys reordered to match ordering of input path */ - info = makeNode(PathKeyInfo); - info->pathkeys = pathkeys; - info->clauses = clauses; - - infos = lappend(infos, info); - } - - return infos; -} - -/* * pathkeys_count_contained_in * Same as pathkeys_contained_in, but also sets length of longest * common prefix of keys1 and keys2. @@ -2391,54 +1863,6 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys) } /* - * pathkeys_useful_for_grouping - * Count the number of pathkeys that are useful for grouping (instead of - * explicit sort) - * - * Group pathkeys could be reordered to benefit from the ordering. The - * ordering may not be "complete" and may require incremental sort, but that's - * fine. So we simply count prefix pathkeys with a matching group key, and - * stop once we find the first pathkey without a match. - * - * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b) - * pathkeys are useful for grouping, and we might do incremental sort to get - * path ordered by (a,b,e). - * - * This logic is necessary to retain paths with ordering not matching grouping - * keys directly, without the reordering. - * - * Returns the length of pathkey prefix with matching group keys. - */ -static int -pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys) -{ - ListCell *key; - int n = 0; - - /* no special ordering requested for grouping */ - if (root->group_pathkeys == NIL) - return 0; - - /* unordered path */ - if (pathkeys == NIL) - return 0; - - /* walk the pathkeys and search for matching group key */ - foreach(key, pathkeys) - { - PathKey *pathkey = (PathKey *) lfirst(key); - - /* no matching group key, we're done */ - if (!list_member_ptr(root->group_pathkeys, pathkey)) - break; - - n++; - } - - return n; -} - -/* * truncate_useless_pathkeys * Shorten the given pathkey list to just the useful pathkeys. */ @@ -2454,9 +1878,6 @@ truncate_useless_pathkeys(PlannerInfo *root, nuseful2 = pathkeys_useful_for_ordering(root, pathkeys); if (nuseful2 > nuseful) nuseful = nuseful2; - nuseful2 = pathkeys_useful_for_grouping(root, pathkeys); - if (nuseful2 > nuseful) - nuseful = nuseful2; /* * Note: not safe to modify input list destructively, but we can avoid @@ -2490,8 +1911,6 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel) { if (rel->joininfo != NIL || rel->has_eclass_joins) return true; /* might be able to use pathkeys for merging */ - if (root->group_pathkeys != NIL) - return true; /* might be able to use pathkeys for grouping */ if (root->query_pathkeys != NIL) return true; /* might be able to use them for ordering */ return false; /* definitely useless */ |