diff options
Diffstat (limited to 'src/backend/optimizer/path/pathkeys.c')
-rw-r--r-- | src/backend/optimizer/path/pathkeys.c | 252 |
1 files changed, 252 insertions, 0 deletions
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index ca94a31f71e..82ff31273b9 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -22,12 +22,15 @@ #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "nodes/plannodes.h" +#include "optimizer/cost.h" #include "optimizer/optimizer.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "partitioning/partbounds.h" #include "utils/lsyscache.h" +/* Consider reordering of GROUP BY keys? */ +bool enable_group_by_reordering = true; static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys); static bool matches_boolean_partition_clause(RestrictInfo *rinfo, @@ -351,6 +354,202 @@ pathkeys_contained_in(List *keys1, List *keys2) } /* + * group_keys_reorder_by_pathkeys + * Reorder GROUP BY keys to match the input pathkeys. + * + * Function returns new lists (pathkeys and clauses), original GROUP BY lists + * stay untouched. + * + * Returns the number of GROUP BY keys with a matching pathkey. + */ +static int +group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys, + List **group_clauses, + int num_groupby_pathkeys) +{ + List *new_group_pathkeys = NIL, + *new_group_clauses = NIL; + ListCell *lc; + int n; + + if (pathkeys == NIL || *group_pathkeys == NIL) + return 0; + + /* + * Walk the pathkeys (determining ordering of the input path) and see if + * there's a matching GROUP BY key. If we find one, we append it to the + * list, and do the same for the clauses. + * + * Once we find the first pathkey without a matching GROUP BY key, the + * rest of the pathkeys are useless and can't be used to evaluate the + * grouping, so we abort the loop and ignore the remaining pathkeys. + */ + foreach(lc, pathkeys) + { + PathKey *pathkey = (PathKey *) lfirst(lc); + SortGroupClause *sgc; + + /* + * Pathkeys are built in a way that allows simply comparing pointers. + * Give up if we can't find the matching pointer. Also give up if + * there is no sortclause reference for some reason. + */ + if (foreach_current_index(lc) >= num_groupby_pathkeys || + !list_member_ptr(*group_pathkeys, pathkey) || + pathkey->pk_eclass->ec_sortref == 0) + break; + + /* + * Since 1349d27 pathkey coming from underlying node can be in the + * root->group_pathkeys but not in the processed_groupClause. So, we + * should be careful here. + */ + sgc = get_sortgroupref_clause_noerr(pathkey->pk_eclass->ec_sortref, + *group_clauses); + if (!sgc) + /* The grouping clause does not cover this pathkey */ + break; + + /* + * Sort group clause should have an ordering operator as long as there + * is an associated pathkey. + */ + Assert(OidIsValid(sgc->sortop)); + + new_group_pathkeys = lappend(new_group_pathkeys, pathkey); + new_group_clauses = lappend(new_group_clauses, sgc); + } + + /* remember the number of pathkeys with a matching GROUP BY key */ + n = list_length(new_group_pathkeys); + + /* append the remaining group pathkeys (will be treated as not sorted) */ + *group_pathkeys = list_concat_unique_ptr(new_group_pathkeys, + *group_pathkeys); + *group_clauses = list_concat_unique_ptr(new_group_clauses, + *group_clauses); + + return n; +} + +/* + * pathkeys_are_duplicate + * Check if give pathkeys are already contained the list of + * PathKeyInfo's. + */ +static bool +pathkeys_are_duplicate(List *infos, List *pathkeys) +{ + ListCell *lc; + + foreach(lc, infos) + { + PathKeyInfo *info = lfirst_node(PathKeyInfo, lc); + + if (compare_pathkeys(pathkeys, info->pathkeys) == PATHKEYS_EQUAL) + return true; + } + return false; +} + +/* + * get_useful_group_keys_orderings + * Determine which orderings of GROUP BY keys are potentially interesting. + * + * Returns a list of PathKeyInfo items, each representing an interesting + * ordering of GROUP BY keys. Each item stores pathkeys and clauses in the + * matching order. + * + * The function considers (and keeps) multiple GROUP BY orderings: + * + * - the original ordering, as specified by the GROUP BY clause, + * - GROUP BY keys reordered to match 'path' ordering (as much as possible), + * - GROUP BY keys to match target ORDER BY clause (as much as possible). + */ +List * +get_useful_group_keys_orderings(PlannerInfo *root, Path *path) +{ + Query *parse = root->parse; + List *infos = NIL; + PathKeyInfo *info; + + List *pathkeys = root->group_pathkeys; + List *clauses = root->processed_groupClause; + + /* always return at least the original pathkeys/clauses */ + info = makeNode(PathKeyInfo); + info->pathkeys = pathkeys; + info->clauses = clauses; + infos = lappend(infos, info); + + /* + * Should we try generating alternative orderings of the group keys? If + * not, we produce only the order specified in the query, i.e. the + * optimization is effectively disabled. + */ + if (!enable_group_by_reordering) + return infos; + + /* + * Grouping sets have own and more complex logic to decide the ordering. + */ + if (parse->groupingSets) + return infos; + + /* + * If the path is sorted in some way, try reordering the group keys to + * match the path as much of the ordering as possible. Then thanks to + * incremental sort we would get this sort as cheap as possible. + */ + if (path->pathkeys && + !pathkeys_contained_in(path->pathkeys, root->group_pathkeys)) + { + int n; + + n = group_keys_reorder_by_pathkeys(path->pathkeys, &pathkeys, &clauses, + root->num_groupby_pathkeys); + + if (n > 0 && + (enable_incremental_sort || n == root->num_groupby_pathkeys) && + !pathkeys_are_duplicate(infos, pathkeys)) + { + info = makeNode(PathKeyInfo); + info->pathkeys = pathkeys; + info->clauses = clauses; + + infos = lappend(infos, info); + } + } + + /* + * Try reordering pathkeys to minimize the sort cost (this time consider + * the ORDER BY clause). + */ + if (root->sort_pathkeys && + !pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys)) + { + int n; + + n = group_keys_reorder_by_pathkeys(root->sort_pathkeys, &pathkeys, + &clauses, + root->num_groupby_pathkeys); + + if (n > 0 && + (enable_incremental_sort || n == list_length(root->sort_pathkeys)) && + !pathkeys_are_duplicate(infos, pathkeys)) + { + info = makeNode(PathKeyInfo); + info->pathkeys = pathkeys; + info->clauses = clauses; + + infos = lappend(infos, info); + } + } + + return infos; +} + +/* * pathkeys_count_contained_in * Same as pathkeys_contained_in, but also sets length of longest * common prefix of keys1 and keys2. @@ -1940,6 +2139,54 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys) } /* + * pathkeys_useful_for_grouping + * Count the number of pathkeys that are useful for grouping (instead of + * explicit sort) + * + * Group pathkeys could be reordered to benefit from the ordering. The + * ordering may not be "complete" and may require incremental sort, but that's + * fine. So we simply count prefix pathkeys with a matching group key, and + * stop once we find the first pathkey without a match. + * + * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b) + * pathkeys are useful for grouping, and we might do incremental sort to get + * path ordered by (a,b,e). + * + * This logic is necessary to retain paths with ordering not matching grouping + * keys directly, without the reordering. + * + * Returns the length of pathkey prefix with matching group keys. + */ +static int +pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys) +{ + ListCell *key; + int n = 0; + + /* no special ordering requested for grouping */ + if (root->group_pathkeys == NIL) + return 0; + + /* unordered path */ + if (pathkeys == NIL) + return 0; + + /* walk the pathkeys and search for matching group key */ + foreach(key, pathkeys) + { + PathKey *pathkey = (PathKey *) lfirst(key); + + /* no matching group key, we're done */ + if (!list_member_ptr(root->group_pathkeys, pathkey)) + break; + + n++; + } + + return n; +} + +/* * truncate_useless_pathkeys * Shorten the given pathkey list to just the useful pathkeys. */ @@ -1955,6 +2202,9 @@ truncate_useless_pathkeys(PlannerInfo *root, nuseful2 = pathkeys_useful_for_ordering(root, pathkeys); if (nuseful2 > nuseful) nuseful = nuseful2; + nuseful2 = pathkeys_useful_for_grouping(root, pathkeys); + if (nuseful2 > nuseful) + nuseful = nuseful2; /* * Note: not safe to modify input list destructively, but we can avoid @@ -1988,6 +2238,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel) { if (rel->joininfo != NIL || rel->has_eclass_joins) return true; /* might be able to use pathkeys for merging */ + if (root->group_pathkeys != NIL) + return true; /* might be able to use pathkeys for grouping */ if (root->query_pathkeys != NIL) return true; /* might be able to use them for ordering */ return false; /* definitely useless */ |