aboutsummaryrefslogtreecommitdiff
path: root/src/backend/optimizer/path/pathkeys.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/optimizer/path/pathkeys.c')
-rw-r--r--src/backend/optimizer/path/pathkeys.c581
1 files changed, 0 insertions, 581 deletions
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 1d21215f853..86a35cdef17 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -17,24 +17,17 @@
*/
#include "postgres.h"
-#include <float.h>
-
-#include "miscadmin.h"
#include "access/stratnum.h"
#include "catalog/pg_opfamily.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "nodes/plannodes.h"
-#include "optimizer/cost.h"
#include "optimizer/optimizer.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "partitioning/partbounds.h"
#include "utils/lsyscache.h"
-#include "utils/selfuncs.h"
-/* Consider reordering of GROUP BY keys? */
-bool enable_group_by_reordering = true;
static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
static bool matches_boolean_partition_clause(RestrictInfo *rinfo,
@@ -342,527 +335,6 @@ pathkeys_contained_in(List *keys1, List *keys2)
}
/*
- * group_keys_reorder_by_pathkeys
- * Reorder GROUP BY keys to match pathkeys of input path.
- *
- * Function returns new lists (pathkeys and clauses), original GROUP BY lists
- * stay untouched.
- *
- * Returns the number of GROUP BY keys with a matching pathkey.
- */
-int
-group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
- List **group_clauses)
-{
- List *new_group_pathkeys = NIL,
- *new_group_clauses = NIL;
- ListCell *lc;
- int n;
-
- if (pathkeys == NIL || *group_pathkeys == NIL)
- return 0;
-
- /*
- * Walk the pathkeys (determining ordering of the input path) and see if
- * there's a matching GROUP BY key. If we find one, we append it to the
- * list, and do the same for the clauses.
- *
- * Once we find the first pathkey without a matching GROUP BY key, the
- * rest of the pathkeys are useless and can't be used to evaluate the
- * grouping, so we abort the loop and ignore the remaining pathkeys.
- *
- * XXX Pathkeys are built in a way to allow simply comparing pointers.
- */
- foreach(lc, pathkeys)
- {
- PathKey *pathkey = (PathKey *) lfirst(lc);
- SortGroupClause *sgc;
-
- /* abort on first mismatch */
- if (!list_member_ptr(*group_pathkeys, pathkey))
- break;
-
- new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
-
- sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
- *group_clauses);
-
- new_group_clauses = lappend(new_group_clauses, sgc);
- }
-
- /* remember the number of pathkeys with a matching GROUP BY key */
- n = list_length(new_group_pathkeys);
-
- /* append the remaining group pathkeys (will be treated as not sorted) */
- *group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
- *group_pathkeys);
- *group_clauses = list_concat_unique_ptr(new_group_clauses,
- *group_clauses);
-
- return n;
-}
-
-/*
- * Used to generate all permutations of a pathkey list.
- */
-typedef struct PathkeyMutatorState
-{
- List *elemsList;
- ListCell **elemCells;
- void **elems;
- int *positions;
- int mutatorNColumns;
- int count;
-} PathkeyMutatorState;
-
-
-/*
- * PathkeyMutatorInit
- * Initialize state of the permutation generator.
- *
- * We want to generate permutations of elements in the "elems" list. We may want
- * to skip some number of elements at the beginning (when treating as presorted)
- * or at the end (we only permute a limited number of group keys).
- *
- * The list is decomposed into elements, and we also keep pointers to individual
- * cells. This allows us to build the permuted list quickly and cheaply, without
- * creating any copies.
- */
-static void
-PathkeyMutatorInit(PathkeyMutatorState *state, List *elems, int start, int end)
-{
- int i;
- int n = end - start;
- ListCell *lc;
-
- memset(state, 0, sizeof(*state));
-
- state->mutatorNColumns = n;
-
- state->elemsList = list_copy(elems);
-
- state->elems = palloc(sizeof(void *) * n);
- state->elemCells = palloc(sizeof(ListCell *) * n);
- state->positions = palloc(sizeof(int) * n);
-
- i = 0;
- for_each_cell(lc, state->elemsList, list_nth_cell(state->elemsList, start))
- {
- state->elemCells[i] = lc;
- state->elems[i] = lfirst(lc);
- state->positions[i] = i + 1;
- i++;
-
- if (i >= n)
- break;
- }
-}
-
-/* Swap two elements of an array. */
-static void
-PathkeyMutatorSwap(int *a, int i, int j)
-{
- int s = a[i];
-
- a[i] = a[j];
- a[j] = s;
-}
-
-/*
- * Generate the next permutation of elements.
- */
-static bool
-PathkeyMutatorNextSet(int *a, int n)
-{
- int j,
- k,
- l,
- r;
-
- j = n - 2;
-
- while (j >= 0 && a[j] >= a[j + 1])
- j--;
-
- if (j < 0)
- return false;
-
- k = n - 1;
-
- while (k >= 0 && a[j] >= a[k])
- k--;
-
- PathkeyMutatorSwap(a, j, k);
-
- l = j + 1;
- r = n - 1;
-
- while (l < r)
- PathkeyMutatorSwap(a, l++, r--);
-
- return true;
-}
-
-/*
- * PathkeyMutatorNext
- * Generate the next permutation of list of elements.
- *
- * Returns the next permutation (as a list of elements) or NIL if there are no
- * more permutations.
- */
-static List *
-PathkeyMutatorNext(PathkeyMutatorState *state)
-{
- int i;
-
- state->count++;
-
- /* first permutation is original list */
- if (state->count == 1)
- return state->elemsList;
-
- /* when there are no more permutations, return NIL */
- if (!PathkeyMutatorNextSet(state->positions, state->mutatorNColumns))
- {
- pfree(state->elems);
- pfree(state->elemCells);
- pfree(state->positions);
-
- list_free(state->elemsList);
-
- return NIL;
- }
-
- /* update the list cells to point to the right elements */
- for (i = 0; i < state->mutatorNColumns; i++)
- lfirst(state->elemCells[i]) =
- (void *) state->elems[state->positions[i] - 1];
-
- return state->elemsList;
-}
-
-/*
- * Cost of comparing pathkeys.
- */
-typedef struct PathkeySortCost
-{
- Cost cost;
- PathKey *pathkey;
-} PathkeySortCost;
-
-static int
-pathkey_sort_cost_comparator(const void *_a, const void *_b)
-{
- const PathkeySortCost *a = (PathkeySortCost *) _a;
- const PathkeySortCost *b = (PathkeySortCost *) _b;
-
- if (a->cost < b->cost)
- return -1;
- else if (a->cost == b->cost)
- return 0;
- return 1;
-}
-
-/*
- * get_cheapest_group_keys_order
- * Reorders the group pathkeys / clauses to minimize the comparison cost.
- *
- * Given the list of pathkeys in '*group_pathkeys', we try to arrange these
- * in an order that minimizes the sort costs that will be incurred by the
- * GROUP BY. The costs mainly depend on the cost of the sort comparator
- * function(s) and the number of distinct values in each column of the GROUP
- * BY clause (*group_clauses). Sorting on subsequent columns is only required
- * for tiebreak situations where two values sort equally.
- *
- * In case the input is partially sorted, only the remaining pathkeys are
- * considered. 'n_preordered' denotes how many of the leading *group_pathkeys
- * the input is presorted by.
- *
- * Returns true and sets *group_pathkeys and *group_clauses to the newly
- * ordered versions of the lists that were passed in via these parameters.
- * If no reordering was deemed necessary then we return false, in which case
- * the *group_pathkeys and *group_clauses lists are left untouched. The
- * original *group_pathkeys and *group_clauses parameter values are never
- * destructively modified in place.
- */
-static bool
-get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
- List **group_pathkeys, List **group_clauses,
- int n_preordered)
-{
- List *new_group_pathkeys = NIL,
- *new_group_clauses = NIL,
- *var_group_pathkeys;
-
- ListCell *cell;
- PathkeyMutatorState mstate;
- double cheapest_sort_cost = DBL_MAX;
-
- int nFreeKeys;
- int nToPermute;
-
- /* If there are less than 2 unsorted pathkeys, we're done. */
- if (list_length(*group_pathkeys) - n_preordered < 2)
- return false;
-
- /*
- * We could exhaustively cost all possible orderings of the pathkeys, but
- * for a large number of pathkeys it might be prohibitively expensive. So
- * we try to apply simple cheap heuristics first - we sort the pathkeys by
- * sort cost (as if the pathkey was sorted independently) and then check
- * only the four cheapest pathkeys. The remaining pathkeys are kept
- * ordered by cost.
- *
- * XXX This is a very simple heuristics, but likely to work fine for most
- * cases (because the number of GROUP BY clauses tends to be lower than
- * 4). But it ignores how the number of distinct values in each pathkey
- * affects the following steps. It might be better to use "more expensive"
- * pathkey first if it has many distinct values, because it then limits
- * the number of comparisons for the remaining pathkeys. But evaluating
- * that is likely quite the expensive.
- */
- nFreeKeys = list_length(*group_pathkeys) - n_preordered;
- nToPermute = 4;
- if (nFreeKeys > nToPermute)
- {
- PathkeySortCost *costs = palloc(sizeof(PathkeySortCost) * nFreeKeys);
- PathkeySortCost *cost = costs;
-
- /*
- * Estimate cost for sorting individual pathkeys skipping the
- * pre-ordered pathkeys.
- */
- for_each_from(cell, *group_pathkeys, n_preordered)
- {
- PathKey *pathkey = (PathKey *) lfirst(cell);
- List *to_cost = list_make1(pathkey);
-
- cost->pathkey = pathkey;
- cost->cost = cost_sort_estimate(root, to_cost, 0, nrows);
- cost++;
-
- list_free(to_cost);
- }
-
- /* sort the pathkeys by sort cost in ascending order */
- qsort(costs, nFreeKeys, sizeof(*costs), pathkey_sort_cost_comparator);
-
- /*
- * Rebuild the list of pathkeys - first the preordered ones, then the
- * rest ordered by cost.
- */
- new_group_pathkeys = list_copy_head(*group_pathkeys, n_preordered);
-
- for (int i = 0; i < nFreeKeys; i++)
- new_group_pathkeys = lappend(new_group_pathkeys, costs[i].pathkey);
-
- pfree(costs);
- }
- else
- {
- /* Copy the list, so that we can free the new list by list_free. */
- new_group_pathkeys = list_copy(*group_pathkeys);
- nToPermute = nFreeKeys;
- }
-
- Assert(list_length(new_group_pathkeys) == list_length(*group_pathkeys));
-
- /*
- * Generate pathkey lists with permutations of the first nToPermute
- * pathkeys.
- *
- * XXX We simply calculate sort cost for each individual pathkey list, but
- * there's room for two dynamic programming optimizations here. Firstly,
- * we may pass the current "best" cost to cost_sort_estimate so that it
- * can "abort" if the estimated pathkeys list exceeds it. Secondly, it
- * could pass the return information about the position when it exceeded
- * the cost, and we could skip all permutations with the same prefix.
- *
- * Imagine we've already found ordering with cost C1, and we're evaluating
- * another ordering - cost_sort_estimate() calculates cost by adding the
- * pathkeys one by one (more or less), and the cost only grows. If at any
- * point it exceeds C1, it can't possibly be "better" so we can discard
- * it. But we also know that we can discard all ordering with the same
- * prefix, because if we're estimating (a,b,c,d) and we exceed C1 at (a,b)
- * then the same thing will happen for any ordering with this prefix.
- */
- PathkeyMutatorInit(&mstate, new_group_pathkeys, n_preordered, n_preordered + nToPermute);
-
- while ((var_group_pathkeys = PathkeyMutatorNext(&mstate)) != NIL)
- {
- Cost cost;
-
- cost = cost_sort_estimate(root, var_group_pathkeys, n_preordered, nrows);
-
- if (cost < cheapest_sort_cost)
- {
- list_free(new_group_pathkeys);
- new_group_pathkeys = list_copy(var_group_pathkeys);
- cheapest_sort_cost = cost;
- }
- }
-
- /* Reorder the group clauses according to the reordered pathkeys. */
- foreach(cell, new_group_pathkeys)
- {
- PathKey *pathkey = (PathKey *) lfirst(cell);
-
- new_group_clauses = lappend(new_group_clauses,
- get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
- *group_clauses));
- }
-
- /* Just append the rest GROUP BY clauses */
- new_group_clauses = list_concat_unique_ptr(new_group_clauses,
- *group_clauses);
-
- *group_pathkeys = new_group_pathkeys;
- *group_clauses = new_group_clauses;
-
- return true;
-}
-
-/*
- * get_useful_group_keys_orderings
- * Determine which orderings of GROUP BY keys are potentially interesting.
- *
- * Returns list of PathKeyInfo items, each representing an interesting ordering
- * of GROUP BY keys. Each item stores pathkeys and clauses in matching order.
- *
- * The function considers (and keeps) multiple group by orderings:
- *
- * - the original ordering, as specified by the GROUP BY clause
- *
- * - GROUP BY keys reordered to minimize the sort cost
- *
- * - GROUP BY keys reordered to match path ordering (as much as possible), with
- * the tail reordered to minimize the sort cost
- *
- * - GROUP BY keys to match target ORDER BY clause (as much as possible), with
- * the tail reordered to minimize the sort cost
- *
- * There are other potentially interesting orderings (e.g. it might be best to
- * match the first ORDER BY key, order the remaining keys differently and then
- * rely on the incremental sort to fix this), but we ignore those for now. To
- * make this work we'd have to pretty much generate all possible permutations.
- */
-List *
-get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
- List *path_pathkeys,
- List *group_pathkeys, List *group_clauses)
-{
- Query *parse = root->parse;
- List *infos = NIL;
- PathKeyInfo *info;
- int n_preordered = 0;
-
- List *pathkeys = group_pathkeys;
- List *clauses = group_clauses;
-
- /* always return at least the original pathkeys/clauses */
- info = makeNode(PathKeyInfo);
- info->pathkeys = pathkeys;
- info->clauses = clauses;
-
- infos = lappend(infos, info);
-
- /*
- * Should we try generating alternative orderings of the group keys? If
- * not, we produce only the order specified in the query, i.e. the
- * optimization is effectively disabled.
- */
- if (!enable_group_by_reordering)
- return infos;
-
- /* for grouping sets we can't do any reordering */
- if (parse->groupingSets)
- return infos;
-
- /*
- * Try reordering pathkeys to minimize the sort cost, ignoring both the
- * target ordering (ORDER BY) and ordering of the input path.
- */
- if (get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
- n_preordered))
- {
- info = makeNode(PathKeyInfo);
- info->pathkeys = pathkeys;
- info->clauses = clauses;
-
- infos = lappend(infos, info);
- }
-
- /*
- * If the path is sorted in some way, try reordering the group keys to
- * match as much of the ordering as possible - we get this sort for free
- * (mostly).
- *
- * We must not do this when there are no grouping sets, because those use
- * more complex logic to decide the ordering.
- *
- * XXX Isn't this somewhat redundant with presorted_keys? Actually, it's
- * more a complement, because it allows benefiting from incremental sort
- * as much as possible.
- *
- * XXX This does nothing if (n_preordered == 0). We shouldn't create the
- * info in this case.
- */
- if (path_pathkeys)
- {
- n_preordered = group_keys_reorder_by_pathkeys(path_pathkeys,
- &pathkeys,
- &clauses);
-
- /* reorder the tail to minimize sort cost */
- get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
- n_preordered);
-
- /*
- * reorder the tail to minimize sort cost
- *
- * XXX Ignore the return value - there may be nothing to reorder, in
- * which case get_cheapest_group_keys_order returns false. But we
- * still want to keep the keys reordered to path_pathkeys.
- */
- info = makeNode(PathKeyInfo);
- info->pathkeys = pathkeys;
- info->clauses = clauses;
-
- infos = lappend(infos, info);
- }
-
- /*
- * Try reordering pathkeys to minimize the sort cost (this time consider
- * the ORDER BY clause, but only if set debug_group_by_match_order_by).
- */
- if (root->sort_pathkeys)
- {
- n_preordered = group_keys_reorder_by_pathkeys(root->sort_pathkeys,
- &pathkeys,
- &clauses);
-
- /*
- * reorder the tail to minimize sort cost
- *
- * XXX Ignore the return value - there may be nothing to reorder, in
- * which case get_cheapest_group_keys_order returns false. But we
- * still want to keep the keys reordered to sort_pathkeys.
- */
- get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
- n_preordered);
-
- /* keep the group keys reordered to match ordering of input path */
- info = makeNode(PathKeyInfo);
- info->pathkeys = pathkeys;
- info->clauses = clauses;
-
- infos = lappend(infos, info);
- }
-
- return infos;
-}
-
-/*
* pathkeys_count_contained_in
* Same as pathkeys_contained_in, but also sets length of longest
* common prefix of keys1 and keys2.
@@ -2391,54 +1863,6 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
}
/*
- * pathkeys_useful_for_grouping
- * Count the number of pathkeys that are useful for grouping (instead of
- * explicit sort)
- *
- * Group pathkeys could be reordered to benefit from the ordering. The
- * ordering may not be "complete" and may require incremental sort, but that's
- * fine. So we simply count prefix pathkeys with a matching group key, and
- * stop once we find the first pathkey without a match.
- *
- * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b)
- * pathkeys are useful for grouping, and we might do incremental sort to get
- * path ordered by (a,b,e).
- *
- * This logic is necessary to retain paths with ordering not matching grouping
- * keys directly, without the reordering.
- *
- * Returns the length of pathkey prefix with matching group keys.
- */
-static int
-pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
-{
- ListCell *key;
- int n = 0;
-
- /* no special ordering requested for grouping */
- if (root->group_pathkeys == NIL)
- return 0;
-
- /* unordered path */
- if (pathkeys == NIL)
- return 0;
-
- /* walk the pathkeys and search for matching group key */
- foreach(key, pathkeys)
- {
- PathKey *pathkey = (PathKey *) lfirst(key);
-
- /* no matching group key, we're done */
- if (!list_member_ptr(root->group_pathkeys, pathkey))
- break;
-
- n++;
- }
-
- return n;
-}
-
-/*
* truncate_useless_pathkeys
* Shorten the given pathkey list to just the useful pathkeys.
*/
@@ -2454,9 +1878,6 @@ truncate_useless_pathkeys(PlannerInfo *root,
nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
if (nuseful2 > nuseful)
nuseful = nuseful2;
- nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
- if (nuseful2 > nuseful)
- nuseful = nuseful2;
/*
* Note: not safe to modify input list destructively, but we can avoid
@@ -2490,8 +1911,6 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
{
if (rel->joininfo != NIL || rel->has_eclass_joins)
return true; /* might be able to use pathkeys for merging */
- if (root->group_pathkeys != NIL)
- return true; /* might be able to use pathkeys for grouping */
if (root->query_pathkeys != NIL)
return true; /* might be able to use them for ordering */
return false; /* definitely useless */