aboutsummaryrefslogtreecommitdiff
path: root/src/backend/optimizer/path/pathkeys.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/optimizer/path/pathkeys.c')
-rw-r--r--src/backend/optimizer/path/pathkeys.c569
1 files changed, 569 insertions, 0 deletions
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 86a35cdef17..75fe03fd04b 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -17,17 +17,22 @@
*/
#include "postgres.h"
+#include "miscadmin.h"
#include "access/stratnum.h"
#include "catalog/pg_opfamily.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "nodes/plannodes.h"
+#include "optimizer/cost.h"
#include "optimizer/optimizer.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "partitioning/partbounds.h"
#include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
+/* Consider reordering of GROUP BY keys? */
+bool enable_group_by_reordering = true;
static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
static bool matches_boolean_partition_clause(RestrictInfo *rinfo,
@@ -335,6 +340,517 @@ pathkeys_contained_in(List *keys1, List *keys2)
}
/*
+ * group_keys_reorder_by_pathkeys
+ * Reorder GROUP BY keys to match pathkeys of input path.
+ *
+ * Function returns new lists (pathkeys and clauses), original GROUP BY lists
+ * stay untouched.
+ *
+ * Returns the number of GROUP BY keys with a matching pathkey.
+ */
+int
+group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
+ List **group_clauses)
+{
+ List *new_group_pathkeys= NIL,
+ *new_group_clauses = NIL;
+ ListCell *lc;
+ int n;
+
+ if (pathkeys == NIL || *group_pathkeys == NIL)
+ return 0;
+
+ /*
+ * Walk the pathkeys (determining ordering of the input path) and see if
+ * there's a matching GROUP BY key. If we find one, we append it to the
+ * list, and do the same for the clauses.
+ *
+ * Once we find the first pathkey without a matching GROUP BY key, the rest
+ * of the pathkeys are useless and can't be used to evaluate the grouping,
+ * so we abort the loop and ignore the remaining pathkeys.
+ *
+ * XXX Pathkeys are built in a way to allow simply comparing pointers.
+ */
+ foreach(lc, pathkeys)
+ {
+ PathKey *pathkey = (PathKey *) lfirst(lc);
+ SortGroupClause *sgc;
+
+ /* abort on first mismatch */
+ if (!list_member_ptr(*group_pathkeys, pathkey))
+ break;
+
+ new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+
+ sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+ *group_clauses);
+
+ new_group_clauses = lappend(new_group_clauses, sgc);
+ }
+
+ /* remember the number of pathkeys with a matching GROUP BY key */
+ n = list_length(new_group_pathkeys);
+
+ /* append the remaining group pathkeys (will be treated as not sorted) */
+ *group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
+ *group_pathkeys);
+ *group_clauses = list_concat_unique_ptr(new_group_clauses,
+ *group_clauses);
+
+ return n;
+}
+
+/*
+ * Used to generate all permutations of a pathkey list.
+ */
+typedef struct PathkeyMutatorState {
+ List *elemsList;
+ ListCell **elemCells;
+ void **elems;
+ int *positions;
+ int mutatorNColumns;
+ int count;
+} PathkeyMutatorState;
+
+
+/*
+ * PathkeyMutatorInit
+ * Initialize state of the permutation generator.
+ *
+ * We want to generate permutations of elements in the "elems" list. We may want
+ * to skip some number of elements at the beginning (when treating as presorted)
+ * or at the end (we only permute a limited number of group keys).
+ *
+ * The list is decomposed into elements, and we also keep pointers to individual
+ * cells. This allows us to build the permuted list quickly and cheaply, without
+ * creating any copies.
+ */
+static void
+PathkeyMutatorInit(PathkeyMutatorState *state, List *elems, int start, int end)
+{
+ int i;
+ int n = end - start;
+ ListCell *lc;
+
+ memset(state, 0, sizeof(*state));
+
+ state->mutatorNColumns = n;
+
+ state->elemsList = list_copy(elems);
+
+ state->elems = palloc(sizeof(void*) * n);
+ state->elemCells = palloc(sizeof(ListCell*) * n);
+ state->positions = palloc(sizeof(int) * n);
+
+ i = 0;
+ for_each_cell(lc, state->elemsList, list_nth_cell(state->elemsList, start))
+ {
+ state->elemCells[i] = lc;
+ state->elems[i] = lfirst(lc);
+ state->positions[i] = i + 1;
+ i++;
+
+ if (i >= n)
+ break;
+ }
+}
+
+/* Swap two elements of an array. */
+static void
+PathkeyMutatorSwap(int *a, int i, int j)
+{
+ int s = a[i];
+
+ a[i] = a[j];
+ a[j] = s;
+}
+
+/*
+ * Generate the next permutation of elements.
+ */
+static bool
+PathkeyMutatorNextSet(int *a, int n)
+{
+ int j, k, l, r;
+
+ j = n - 2;
+
+ while (j >= 0 && a[j] >= a[j + 1])
+ j--;
+
+ if (j < 0)
+ return false;
+
+ k = n - 1;
+
+ while (k >= 0 && a[j] >= a[k])
+ k--;
+
+ PathkeyMutatorSwap(a, j, k);
+
+ l = j + 1;
+ r = n - 1;
+
+ while (l < r)
+ PathkeyMutatorSwap(a, l++, r--);
+
+ return true;
+}
+
+/*
+ * PathkeyMutatorNext
+ * Generate the next permutation of list of elements.
+ *
+ * Returns the next permutation (as a list of elements) or NIL if there are no
+ * more permutations.
+ */
+static List *
+PathkeyMutatorNext(PathkeyMutatorState *state)
+{
+ int i;
+
+ state->count++;
+
+ /* first permutation is original list */
+ if (state->count == 1)
+ return state->elemsList;
+
+ /* when there are no more permutations, return NIL */
+ if (!PathkeyMutatorNextSet(state->positions, state->mutatorNColumns))
+ {
+ pfree(state->elems);
+ pfree(state->elemCells);
+ pfree(state->positions);
+
+ list_free(state->elemsList);
+
+ return NIL;
+ }
+
+ /* update the list cells to point to the right elements */
+ for(i = 0; i < state->mutatorNColumns; i++)
+ lfirst(state->elemCells[i]) =
+ (void *) state->elems[ state->positions[i] - 1 ];
+
+ return state->elemsList;
+}
+
+/*
+ * Cost of comparing pathkeys.
+ */
+typedef struct PathkeySortCost
+{
+ Cost cost;
+ PathKey *pathkey;
+} PathkeySortCost;
+
+static int
+pathkey_sort_cost_comparator(const void *_a, const void *_b)
+{
+ const PathkeySortCost *a = (PathkeySortCost *) _a;
+ const PathkeySortCost *b = (PathkeySortCost *) _b;
+
+ if (a->cost < b->cost)
+ return -1;
+ else if (a->cost == b->cost)
+ return 0;
+ return 1;
+}
+
+/*
+ * get_cheapest_group_keys_order
+ * Reorders the group pathkeys/clauses to minimize the comparison cost.
+ *
+ * Given a list of pathkeys, we try to reorder them in a way that minimizes
+ * the CPU cost of sorting. This depends mainly on the cost of comparator
+ * function (the pathkeys may use different data types) and the number of
+ * distinct values in each column (which affects the number of comparator
+ * calls for the following pathkeys).
+ *
+ * In case the input is partially sorted, only the remaining pathkeys are
+ * considered.
+ *
+ * Returns true if the keys/clauses have been reordered (or might have been),
+ * and a new list is returned through an argument. The list is a new copy
+ * and may be freed using list_free.
+ *
+ * Returns false if no reordering was possible.
+ */
+static bool
+get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
+ List **group_pathkeys, List **group_clauses,
+ int n_preordered)
+{
+ List *new_group_pathkeys = NIL,
+ *new_group_clauses = NIL,
+ *var_group_pathkeys;
+
+ ListCell *cell;
+ PathkeyMutatorState mstate;
+ double cheapest_sort_cost = -1.0;
+
+ int nFreeKeys;
+ int nToPermute;
+
+ /* If there are less than 2 unsorted pathkeys, we're done. */
+ if (list_length(*group_pathkeys) - n_preordered < 2)
+ return false;
+
+ /*
+ * We could exhaustively cost all possible orderings of the pathkeys, but for
+ * a large number of pathkeys it might be prohibitively expensive. So we try
+ * to apply simple cheap heuristics first - we sort the pathkeys by sort cost
+ * (as if the pathkey was sorted independently) and then check only the four
+ * cheapest pathkeys. The remaining pathkeys are kept ordered by cost.
+ *
+ * XXX This is a very simple heuristics, but likely to work fine for most
+ * cases (because the number of GROUP BY clauses tends to be lower than 4).
+ * But it ignores how the number of distinct values in each pathkey affects
+ * the following steps. It might be better to use "more expensive" pathkey
+ * first if it has many distinct values, because it then limits the number
+ * of comparisons for the remaining pathkeys. But evaluating that is likely
+ * quite the expensive.
+ */
+ nFreeKeys = list_length(*group_pathkeys) - n_preordered;
+ nToPermute = 4;
+ if (nFreeKeys > nToPermute)
+ {
+ int i;
+ PathkeySortCost *costs = palloc(sizeof(PathkeySortCost) * nFreeKeys);
+
+ /* skip the pre-ordered pathkeys */
+ cell = list_nth_cell(*group_pathkeys, n_preordered);
+
+ /* estimate cost for sorting individual pathkeys */
+ for (i = 0; cell != NULL; i++, (cell = lnext(*group_pathkeys, cell)))
+ {
+ List *to_cost = list_make1(lfirst(cell));
+
+ Assert(i < nFreeKeys);
+
+ costs[i].pathkey = lfirst(cell);
+ costs[i].cost = cost_sort_estimate(root, to_cost, 0, nrows);
+
+ pfree(to_cost);
+ }
+
+ /* sort the pathkeys by sort cost in ascending order */
+ qsort(costs, nFreeKeys, sizeof(*costs), pathkey_sort_cost_comparator);
+
+ /*
+ * Rebuild the list of pathkeys - first the preordered ones, then the
+ * rest ordered by cost.
+ */
+ new_group_pathkeys = list_truncate(list_copy(*group_pathkeys), n_preordered);
+
+ for (i = 0; i < nFreeKeys; i++)
+ new_group_pathkeys = lappend(new_group_pathkeys, costs[i].pathkey);
+
+ pfree(costs);
+ }
+ else
+ {
+ /* Copy the list, so that we can free the new list by list_free. */
+ new_group_pathkeys = list_copy(*group_pathkeys);
+ nToPermute = nFreeKeys;
+ }
+
+ Assert(list_length(new_group_pathkeys) == list_length(*group_pathkeys));
+
+ /*
+ * Generate pathkey lists with permutations of the first nToPermute pathkeys.
+ *
+ * XXX We simply calculate sort cost for each individual pathkey list, but
+ * there's room for two dynamic programming optimizations here. Firstly, we
+ * may pass the current "best" cost to cost_sort_estimate so that it can
+ * "abort" if the estimated pathkeys list exceeds it. Secondly, it could pass
+ * the return information about the position when it exceeded the cost, and
+ * we could skip all permutations with the same prefix.
+ *
+ * Imagine we've already found ordering with cost C1, and we're evaluating
+ * another ordering - cost_sort_estimate() calculates cost by adding the
+ * pathkeys one by one (more or less), and the cost only grows. If at any
+ * point it exceeds C1, it can't possibly be "better" so we can discard it.
+ * But we also know that we can discard all ordering with the same prefix,
+ * because if we're estimating (a,b,c,d) and we exceed C1 at (a,b) then the
+ * same thing will happen for any ordering with this prefix.
+ */
+ PathkeyMutatorInit(&mstate, new_group_pathkeys, n_preordered, n_preordered + nToPermute);
+
+ while((var_group_pathkeys = PathkeyMutatorNext(&mstate)) != NIL)
+ {
+ Cost cost;
+
+ cost = cost_sort_estimate(root, var_group_pathkeys, n_preordered, nrows);
+
+ if (cost < cheapest_sort_cost || cheapest_sort_cost < 0)
+ {
+ list_free(new_group_pathkeys);
+ new_group_pathkeys = list_copy(var_group_pathkeys);
+ cheapest_sort_cost = cost;
+ }
+ }
+
+ /* Reorder the group clauses according to the reordered pathkeys. */
+ foreach(cell, new_group_pathkeys)
+ {
+ PathKey *pathkey = (PathKey *) lfirst(cell);
+
+ new_group_clauses = lappend(new_group_clauses,
+ get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+ *group_clauses));
+ }
+
+ /* Just append the rest GROUP BY clauses */
+ new_group_clauses = list_concat_unique_ptr(new_group_clauses,
+ *group_clauses);
+
+ *group_pathkeys = new_group_pathkeys;
+ *group_clauses = new_group_clauses;
+
+ return true;
+}
+
+/*
+ * get_useful_group_keys_orderings
+ * Determine which orderings of GROUP BY keys are potentially interesting.
+ *
+ * Returns list of PathKeyInfo items, each representing an interesting ordering
+ * of GROUP BY keys. Each item stores pathkeys and clauses in matching order.
+ *
+ * The function considers (and keeps) multiple group by orderings:
+ *
+ * - the original ordering, as specified by the GROUP BY clause
+ *
+ * - GROUP BY keys reordered to minimize the sort cost
+ *
+ * - GROUP BY keys reordered to match path ordering (as much as possible), with
+ * the tail reordered to minimize the sort cost
+ *
+ * - GROUP BY keys to match target ORDER BY clause (as much as possible), with
+ * the tail reordered to minimize the sort cost
+ *
+ * There are other potentially interesting orderings (e.g. it might be best to
+ * match the first ORDER BY key, order the remaining keys differently and then
+ * rely on the incremental sort to fix this), but we ignore those for now. To
+ * make this work we'd have to pretty much generate all possible permutations.
+ */
+List *
+get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
+ List *path_pathkeys,
+ List *group_pathkeys, List *group_clauses)
+{
+ Query *parse = root->parse;
+ List *infos = NIL;
+ PathKeyInfo *info;
+ int n_preordered = 0;
+
+ List *pathkeys = group_pathkeys;
+ List *clauses = group_clauses;
+
+ /* always return at least the original pathkeys/clauses */
+ info = makeNode(PathKeyInfo);
+ info->pathkeys = pathkeys;
+ info->clauses = clauses;
+
+ infos = lappend(infos, info);
+
+ /*
+ * Should we try generating alternative orderings of the group keys? If not,
+ * we produce only the order specified in the query, i.e. the optimization
+ * is effectively disabled.
+ */
+ if (!enable_group_by_reordering)
+ return infos;
+
+ /* for grouping sets we can't do any reordering */
+ if (parse->groupingSets)
+ return infos;
+
+ /*
+ * Try reordering pathkeys to minimize the sort cost, ignoring both the
+ * target ordering (ORDER BY) and ordering of the input path.
+ */
+ if (get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
+ n_preordered))
+ {
+ info = makeNode(PathKeyInfo);
+ info->pathkeys = pathkeys;
+ info->clauses = clauses;
+
+ infos = lappend(infos, info);
+ }
+
+ /*
+ * If the path is sorted in some way, try reordering the group keys to match
+ * as much of the ordering as possible - we get this sort for free (mostly).
+ *
+ * We must not do this when there are no grouping sets, because those use
+ * more complex logic to decide the ordering.
+ *
+ * XXX Isn't this somewhat redundant with presorted_keys? Actually, it's
+ * more a complement, because it allows benefiting from incremental sort
+ * as much as possible.
+ *
+ * XXX This does nothing if (n_preordered == 0). We shouldn't create the
+ * info in this case.
+ */
+ if (path_pathkeys)
+ {
+ n_preordered = group_keys_reorder_by_pathkeys(path_pathkeys,
+ &pathkeys,
+ &clauses);
+
+ /* reorder the tail to minimize sort cost */
+ get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
+ n_preordered);
+
+ /*
+ * reorder the tail to minimize sort cost
+ *
+ * XXX Ignore the return value - there may be nothing to reorder, in
+ * which case get_cheapest_group_keys_order returns false. But we
+ * still want to keep the keys reordered to path_pathkeys.
+ */
+ info = makeNode(PathKeyInfo);
+ info->pathkeys = pathkeys;
+ info->clauses = clauses;
+
+ infos = lappend(infos, info);
+ }
+
+ /*
+ * Try reordering pathkeys to minimize the sort cost (this time consider
+ * the ORDER BY clause, but only if set debug_group_by_match_order_by).
+ */
+ if (root->sort_pathkeys)
+ {
+ n_preordered = group_keys_reorder_by_pathkeys(root->sort_pathkeys,
+ &pathkeys,
+ &clauses);
+
+ /*
+ * reorder the tail to minimize sort cost
+ *
+ * XXX Ignore the return value - there may be nothing to reorder, in
+ * which case get_cheapest_group_keys_order returns false. But we
+ * still want to keep the keys reordered to sort_pathkeys.
+ */
+ get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
+ n_preordered);
+
+ /* keep the group keys reordered to match ordering of input path */
+ info = makeNode(PathKeyInfo);
+ info->pathkeys = pathkeys;
+ info->clauses = clauses;
+
+ infos = lappend(infos, info);
+ }
+
+ return infos;
+}
+
+/*
* pathkeys_count_contained_in
* Same as pathkeys_contained_in, but also sets length of longest
* common prefix of keys1 and keys2.
@@ -1863,6 +2379,54 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
}
/*
+ * pathkeys_useful_for_grouping
+ * Count the number of pathkeys that are useful for grouping (instead of
+ * explicit sort)
+ *
+ * Group pathkeys could be reordered to benefit from the odering. The ordering
+ * may not be "complete" and may require incremental sort, but that's fine. So
+ * we simply count prefix pathkeys with a matching group key, and stop once we
+ * find the first pathkey without a match.
+ *
+ * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b)
+ * pathkeys are useful for grouping, and we might do incremental sort to get
+ * path ordered by (a,b,e).
+ *
+ * This logic is necessary to retain paths with ordeding not matching grouping
+ * keys directly, without the reordering.
+ *
+ * Returns the length of pathkey prefix with matching group keys.
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+ ListCell *key;
+ int n = 0;
+
+ /* no special ordering requested for grouping */
+ if (root->group_pathkeys == NIL)
+ return 0;
+
+ /* unordered path */
+ if (pathkeys == NIL)
+ return 0;
+
+ /* walk the pathkeys and search for matching group key */
+ foreach(key, pathkeys)
+ {
+ PathKey *pathkey = (PathKey *) lfirst(key);
+
+ /* no matching group key, we're done */
+ if (!list_member_ptr(root->group_pathkeys, pathkey))
+ break;
+
+ n++;
+ }
+
+ return n;
+}
+
+/*
* truncate_useless_pathkeys
* Shorten the given pathkey list to just the useful pathkeys.
*/
@@ -1878,6 +2442,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
if (nuseful2 > nuseful)
nuseful = nuseful2;
+ nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
+ if (nuseful2 > nuseful)
+ nuseful = nuseful2;
/*
* Note: not safe to modify input list destructively, but we can avoid
@@ -1911,6 +2478,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
{
if (rel->joininfo != NIL || rel->has_eclass_joins)
return true; /* might be able to use pathkeys for merging */
+ if (root->group_pathkeys != NIL)
+ return true; /* might be able to use pathkeys for grouping */
if (root->query_pathkeys != NIL)
return true; /* might be able to use them for ordering */
return false; /* definitely useless */