diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2008-12-28 18:54:01 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2008-12-28 18:54:01 +0000 |
commit | 95b07bc7f5010233f52f9d11da74e2e5b653b0a7 (patch) | |
tree | 48f5858bf4eca1bfb316ef02bb959ca85f568e0a /src/backend/optimizer/plan | |
parent | 38e9348282e9d078487147ba8a85aebec54e3a08 (diff) | |
download | postgresql-95b07bc7f5010233f52f9d11da74e2e5b653b0a7.tar.gz postgresql-95b07bc7f5010233f52f9d11da74e2e5b653b0a7.zip |
Support window functions a la SQL:2008.
Hitoshi Harada, with some kibitzing from Heikki and Tom.
Diffstat (limited to 'src/backend/optimizer/plan')
-rw-r--r-- | src/backend/optimizer/plan/createplan.c | 57 | ||||
-rw-r--r-- | src/backend/optimizer/plan/planagg.c | 10 | ||||
-rw-r--r-- | src/backend/optimizer/plan/planmain.c | 21 | ||||
-rw-r--r-- | src/backend/optimizer/plan/planner.c | 457 | ||||
-rw-r--r-- | src/backend/optimizer/plan/setrefs.c | 8 | ||||
-rw-r--r-- | src/backend/optimizer/plan/subselect.c | 16 |
6 files changed, 507 insertions, 62 deletions
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index f5d4f41c032..b53b5e1470e 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.252 2008/11/20 19:52:54 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.253 2008/12/28 18:53:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -3237,8 +3237,8 @@ make_agg(PlannerInfo *root, List *tlist, List *qual, * anything for Aggref nodes; this is okay since they are really * comparable to Vars. * - * See notes in grouping_planner about why this routine and make_group are - * the only ones in this file that worry about tlist eval cost. + * See notes in grouping_planner about why only make_agg, make_windowagg + * and make_group worry about tlist eval cost. */ if (qual) { @@ -3260,6 +3260,53 @@ make_agg(PlannerInfo *root, List *tlist, List *qual, return node; } +WindowAgg * +make_windowagg(PlannerInfo *root, List *tlist, int numWindowFuncs, + int partNumCols, AttrNumber *partColIdx, Oid *partOperators, + int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, + Plan *lefttree) +{ + WindowAgg *node = makeNode(WindowAgg); + Plan *plan = &node->plan; + Path windowagg_path; /* dummy for result of cost_windowagg */ + QualCost qual_cost; + + node->partNumCols = partNumCols; + node->partColIdx = partColIdx; + node->partOperators = partOperators; + node->ordNumCols = ordNumCols; + node->ordColIdx = ordColIdx; + node->ordOperators = ordOperators; + + copy_plan_costsize(plan, lefttree); /* only care about copying size */ + cost_windowagg(&windowagg_path, root, + numWindowFuncs, partNumCols, ordNumCols, + lefttree->startup_cost, + lefttree->total_cost, + lefttree->plan_rows); + plan->startup_cost = windowagg_path.startup_cost; + plan->total_cost = windowagg_path.total_cost; + + /* + * We also need to account for the cost of evaluation of the tlist. + * + * See notes in grouping_planner about why only make_agg, make_windowagg + * and make_group worry about tlist eval cost. + */ + cost_qual_eval(&qual_cost, tlist, root); + plan->startup_cost += qual_cost.startup; + plan->total_cost += qual_cost.startup; + plan->total_cost += qual_cost.per_tuple * plan->plan_rows; + + plan->targetlist = tlist; + plan->lefttree = lefttree; + plan->righttree = NULL; + /* WindowAgg nodes never have a qual clause */ + plan->qual = NIL; + + return node; +} + Group * make_group(PlannerInfo *root, List *tlist, @@ -3300,8 +3347,8 @@ make_group(PlannerInfo *root, * lower plan level and will only be copied by the Group node. Worth * fixing? * - * See notes in grouping_planner about why this routine and make_agg are - * the only ones in this file that worry about tlist eval cost. + * See notes in grouping_planner about why only make_agg, make_windowagg + * and make_group worry about tlist eval cost. */ if (qual) { diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c index 8a6b2ad0345..f0f17d5f950 100644 --- a/src/backend/optimizer/plan/planagg.c +++ b/src/backend/optimizer/plan/planagg.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.43 2008/08/25 22:42:33 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.44 2008/12/28 18:53:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -95,11 +95,11 @@ optimize_minmax_aggregates(PlannerInfo *root, List *tlist, Path *best_path) /* * Reject unoptimizable cases. * - * We don't handle GROUP BY, because our current implementations of - * grouping require looking at all the rows anyway, and so there's not - * much point in optimizing MIN/MAX. + * We don't handle GROUP BY or windowing, because our current + * implementations of grouping require looking at all the rows anyway, + * and so there's not much point in optimizing MIN/MAX. */ - if (parse->groupClause) + if (parse->groupClause || parse->hasWindowFuncs) return NULL; /* diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index 0a1d1d1559f..a8ea043a697 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -14,7 +14,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.112 2008/10/22 20:17:51 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.113 2008/12/28 18:53:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -67,9 +67,9 @@ * PlannerInfo field and not a passed parameter is that the low-level routines * in indxpath.c need to see it.) * - * Note: the PlannerInfo node also includes group_pathkeys, distinct_pathkeys, - * and sort_pathkeys, which like query_pathkeys need to be canonicalized once - * the info is available. + * Note: the PlannerInfo node also includes group_pathkeys, window_pathkeys, + * distinct_pathkeys, and sort_pathkeys, which like query_pathkeys need to be + * canonicalized once the info is available. * * tuple_fraction is interpreted as follows: * 0: expect all tuples to be retrieved (normal case) @@ -121,6 +121,8 @@ query_planner(PlannerInfo *root, List *tlist, root->query_pathkeys); root->group_pathkeys = canonicalize_pathkeys(root, root->group_pathkeys); + root->window_pathkeys = canonicalize_pathkeys(root, + root->window_pathkeys); root->distinct_pathkeys = canonicalize_pathkeys(root, root->distinct_pathkeys); root->sort_pathkeys = canonicalize_pathkeys(root, @@ -228,11 +230,12 @@ query_planner(PlannerInfo *root, List *tlist, /* * We have completed merging equivalence sets, so it's now possible to * convert the requested query_pathkeys to canonical form. Also - * canonicalize the groupClause, distinctClause and sortClause pathkeys - * for use later. + * canonicalize the groupClause, windowClause, distinctClause and + * sortClause pathkeys for use later. */ root->query_pathkeys = canonicalize_pathkeys(root, root->query_pathkeys); root->group_pathkeys = canonicalize_pathkeys(root, root->group_pathkeys); + root->window_pathkeys = canonicalize_pathkeys(root, root->window_pathkeys); root->distinct_pathkeys = canonicalize_pathkeys(root, root->distinct_pathkeys); root->sort_pathkeys = canonicalize_pathkeys(root, root->sort_pathkeys); @@ -287,10 +290,12 @@ query_planner(PlannerInfo *root, List *tlist, * If both GROUP BY and ORDER BY are specified, we will need two * levels of sort --- and, therefore, certainly need to read all the * tuples --- unless ORDER BY is a subset of GROUP BY. Likewise if - * we have both DISTINCT and GROUP BY. + * we have both DISTINCT and GROUP BY, or if we have a window + * specification not compatible with the GROUP BY. */ if (!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys) || - !pathkeys_contained_in(root->distinct_pathkeys, root->group_pathkeys)) + !pathkeys_contained_in(root->distinct_pathkeys, root->group_pathkeys) || + !pathkeys_contained_in(root->window_pathkeys, root->group_pathkeys)) tuple_fraction = 0.0; } else if (parse->hasAggs || root->hasHavingQual) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 7f91309032a..b4b578d5973 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.247 2008/12/18 18:20:33 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.248 2008/12/28 18:53:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -82,6 +82,18 @@ static void locate_grouping_columns(PlannerInfo *root, List *sub_tlist, AttrNumber *groupColIdx); static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist); +static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists); +static List *make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc, + List *tlist, bool canonicalize); +static void get_column_info_for_window(PlannerInfo *root, WindowClause *wc, + List *tlist, + int numSortCols, AttrNumber *sortColIdx, + int *partNumCols, + AttrNumber **partColIdx, + Oid **partOperators, + int *ordNumCols, + AttrNumber **ordColIdx, + Oid **ordOperators); /***************************************************************************** @@ -852,6 +864,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) AggClauseCounts agg_counts; int numGroupCols; bool use_hashed_grouping = false; + WindowFuncLists *wflists = NULL; + List *activeWindows = NIL; MemSet(&agg_counts, 0, sizeof(AggClauseCounts)); @@ -867,6 +881,22 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) tlist = preprocess_targetlist(root, tlist); /* + * Locate any window functions in the tlist. (We don't need to look + * anywhere else, since expressions used in ORDER BY will be in there + * too.) Note that they could all have been eliminated by constant + * folding, in which case we don't need to do any more work. + */ + if (parse->hasWindowFuncs) + { + wflists = find_window_functions((Node *) tlist, + list_length(parse->windowClause)); + if (wflists->numWindowFuncs > 0) + activeWindows = select_active_windows(root, wflists); + else + parse->hasWindowFuncs = false; + } + + /* * Generate appropriate target list for subplan; may be different from * tlist if grouping or aggregation is needed. */ @@ -890,6 +920,19 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) else root->group_pathkeys = NIL; + /* We consider only the first (bottom) window in pathkeys logic */ + if (activeWindows != NIL) + { + WindowClause *wc = (WindowClause *) linitial(activeWindows); + + root->window_pathkeys = make_pathkeys_for_window(root, + wc, + tlist, + false); + } + else + root->window_pathkeys = NIL; + if (parse->distinctClause && grouping_is_sortable(parse->distinctClause)) root->distinct_pathkeys = @@ -927,11 +970,12 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) * Figure out whether we want a sorted result from query_planner. * * If we have a sortable GROUP BY clause, then we want a result sorted - * properly for grouping. Otherwise, if there's a sortable DISTINCT - * clause that's more rigorous than the ORDER BY clause, we try to - * produce output that's sufficiently well sorted for the DISTINCT. - * Otherwise, if there is an ORDER BY clause, we want to sort by the - * ORDER BY clause. + * properly for grouping. Otherwise, if we have window functions to + * evaluate, we try to sort for the first window. Otherwise, if + * there's a sortable DISTINCT clause that's more rigorous than the + * ORDER BY clause, we try to produce output that's sufficiently well + * sorted for the DISTINCT. Otherwise, if there is an ORDER BY + * clause, we want to sort by the ORDER BY clause. * * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a * superset of GROUP BY, it would be tempting to request sort by ORDER @@ -942,6 +986,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) */ if (root->group_pathkeys) root->query_pathkeys = root->group_pathkeys; + else if (root->window_pathkeys) + root->query_pathkeys = root->window_pathkeys; else if (list_length(root->distinct_pathkeys) > list_length(root->sort_pathkeys)) root->query_pathkeys = root->distinct_pathkeys; @@ -1092,10 +1138,10 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) * * Below this point, any tlist eval cost for added-on nodes * should be accounted for as we create those nodes. - * Presently, of the node types we can add on, only Agg and - * Group project new tlists (the rest just copy their input - * tuples) --- so make_agg() and make_group() are responsible - * for computing the added cost. + * Presently, of the node types we can add on, only Agg, + * WindowAgg, and Group project new tlists (the rest just copy + * their input tuples) --- so make_agg(), make_windowagg() and + * make_group() are responsible for computing the added cost. */ cost_qual_eval(&tlist_cost, sub_tlist, root); result_plan->startup_cost += tlist_cost.startup; @@ -1225,6 +1271,142 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) NULL); } } /* end of non-minmax-aggregate case */ + + /* + * Since each window function could require a different sort order, + * we stack up a WindowAgg node for each window, with sort steps + * between them as needed. + */ + if (activeWindows) + { + List *window_tlist; + ListCell *l; + + /* + * If the top-level plan node is one that cannot do expression + * evaluation, we must insert a Result node to project the + * desired tlist. (In some cases this might not really be + * required, but it's not worth trying to avoid it.) Note that + * on second and subsequent passes through the following loop, + * the top-level node will be a WindowAgg which we know can + * project; so we only need to check once. + */ + if (!is_projection_capable_plan(result_plan)) + { + result_plan = (Plan *) make_result(root, + NIL, + NULL, + result_plan); + } + + /* + * The "base" targetlist for all steps of the windowing process + * is a flat tlist of all Vars and Aggs needed in the result. + * (In some cases we wouldn't need to propagate all of these + * all the way to the top, since they might only be needed as + * inputs to WindowFuncs. It's probably not worth trying to + * optimize that though.) As we climb up the stack, we add + * outputs for the WindowFuncs computed at each level. Also, + * each input tlist has to present all the columns needed to + * sort the data for the next WindowAgg step. That's handled + * internally by make_sort_from_pathkeys, but we need the + * copyObject steps here to ensure that each plan node has + * a separately modifiable tlist. + */ + window_tlist = flatten_tlist(tlist); + if (parse->hasAggs) + window_tlist = add_to_flat_tlist(window_tlist, + pull_agg_clause((Node *) tlist)); + result_plan->targetlist = (List *) copyObject(window_tlist); + + foreach(l, activeWindows) + { + WindowClause *wc = (WindowClause *) lfirst(l); + List *window_pathkeys; + int partNumCols; + AttrNumber *partColIdx; + Oid *partOperators; + int ordNumCols; + AttrNumber *ordColIdx; + Oid *ordOperators; + + window_pathkeys = make_pathkeys_for_window(root, + wc, + tlist, + true); + + /* + * This is a bit tricky: we build a sort node even if we don't + * really have to sort. Even when no explicit sort is needed, + * we need to have suitable resjunk items added to the input + * plan's tlist for any partitioning or ordering columns that + * aren't plain Vars. Furthermore, this way we can use + * existing infrastructure to identify which input columns are + * the interesting ones. + */ + if (window_pathkeys) + { + Sort *sort_plan; + + sort_plan = make_sort_from_pathkeys(root, + result_plan, + window_pathkeys, + -1.0); + if (!pathkeys_contained_in(window_pathkeys, + current_pathkeys)) + { + /* we do indeed need to sort */ + result_plan = (Plan *) sort_plan; + current_pathkeys = window_pathkeys; + } + /* In either case, extract the per-column information */ + get_column_info_for_window(root, wc, tlist, + sort_plan->numCols, + sort_plan->sortColIdx, + &partNumCols, + &partColIdx, + &partOperators, + &ordNumCols, + &ordColIdx, + &ordOperators); + } + else + { + /* empty window specification, nothing to sort */ + partNumCols = 0; + partColIdx = NULL; + partOperators = NULL; + ordNumCols = 0; + ordColIdx = NULL; + ordOperators = NULL; + } + + if (lnext(l)) + { + /* Add the current WindowFuncs to the running tlist */ + window_tlist = add_to_flat_tlist(window_tlist, + wflists->windowFuncs[wc->winref]); + } + else + { + /* Install the original tlist in the topmost WindowAgg */ + window_tlist = tlist; + } + + /* ... and make the WindowAgg plan node */ + result_plan = (Plan *) + make_windowagg(root, + (List *) copyObject(window_tlist), + list_length(wflists->windowFuncs[wc->winref]), + partNumCols, + partColIdx, + partOperators, + ordNumCols, + ordColIdx, + ordOperators, + result_plan); + } + } } /* end of if (setOperations) */ /* @@ -2030,7 +2212,8 @@ make_subplanTargetList(PlannerInfo *root, * If we're not grouping or aggregating, there's nothing to do here; * query_planner should receive the unmodified target list. */ - if (!parse->hasAggs && !parse->groupClause && !root->hasHavingQual) + if (!parse->hasAggs && !parse->groupClause && !root->hasHavingQual && + !parse->hasWindowFuncs) { *need_tlist_eval = true; return tlist; @@ -2039,7 +2222,9 @@ make_subplanTargetList(PlannerInfo *root, /* * Otherwise, start with a "flattened" tlist (having just the vars * mentioned in the targetlist and HAVING qual --- but not upper-level - * Vars; they will be replaced by Params later on). + * Vars; they will be replaced by Params later on). Note this includes + * vars used in resjunk items, so we are covering the needs of ORDER BY + * and window specifications. */ sub_tlist = flatten_tlist(tlist); extravars = pull_var_clause(parse->havingQual, true); @@ -2066,7 +2251,7 @@ make_subplanTargetList(PlannerInfo *root, { SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl); Node *groupexpr = get_sortgroupclause_expr(grpcl, tlist); - TargetEntry *te = NULL; + TargetEntry *te; /* * Find or make a matching sub_tlist entry. If the groupexpr @@ -2074,20 +2259,10 @@ make_subplanTargetList(PlannerInfo *root, * won't make multiple groupClause entries for the same TLE.) */ if (groupexpr && IsA(groupexpr, Var)) - { - ListCell *sl; - - foreach(sl, sub_tlist) - { - TargetEntry *lte = (TargetEntry *) lfirst(sl); + te = tlist_member(groupexpr, sub_tlist); + else + te = NULL; - if (equal(groupexpr, lte->expr)) - { - te = lte; - break; - } - } - } if (!te) { te = makeTargetEntry((Expr *) groupexpr, @@ -2112,7 +2287,7 @@ make_subplanTargetList(PlannerInfo *root, * * This is only needed if we don't use the sub_tlist chosen by * make_subplanTargetList. We have to forget the column indexes found - * by that routine and re-locate the grouping vars in the real sub_tlist. + * by that routine and re-locate the grouping exprs in the real sub_tlist. */ static void locate_grouping_columns(PlannerInfo *root, @@ -2137,18 +2312,10 @@ locate_grouping_columns(PlannerInfo *root, { SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl); Node *groupexpr = get_sortgroupclause_expr(grpcl, tlist); - TargetEntry *te = NULL; - ListCell *sl; + TargetEntry *te = tlist_member(groupexpr, sub_tlist); - foreach(sl, sub_tlist) - { - te = (TargetEntry *) lfirst(sl); - if (equal(groupexpr, te->expr)) - break; - } - if (!sl) + if (!te) elog(ERROR, "failed to locate grouping columns"); - groupColIdx[keyno++] = te->resno; } } @@ -2190,3 +2357,219 @@ postprocess_setop_tlist(List *new_tlist, List *orig_tlist) elog(ERROR, "resjunk output columns are not implemented"); return new_tlist; } + +/* + * select_active_windows + * Create a list of the "active" window clauses (ie, those referenced + * by non-deleted WindowFuncs) in the order they are to be executed. + */ +static List * +select_active_windows(PlannerInfo *root, WindowFuncLists *wflists) +{ + List *result; + List *actives; + ListCell *lc; + + /* First, make a list of the active windows */ + actives = NIL; + foreach(lc, root->parse->windowClause) + { + WindowClause *wc = (WindowClause *) lfirst(lc); + + /* It's only active if wflists shows some related WindowFuncs */ + Assert(wc->winref <= wflists->maxWinRef); + if (wflists->windowFuncs[wc->winref] != NIL) + actives = lappend(actives, wc); + } + + /* + * Now, ensure that windows with identical partitioning/ordering clauses + * are adjacent in the list. This is required by the SQL standard, which + * says that only one sort is to be used for such windows, even if they + * are otherwise distinct (eg, different names or framing clauses). + * + * There is room to be much smarter here, for example detecting whether + * one window's sort keys are a prefix of another's (so that sorting + * for the latter would do for the former), or putting windows first + * that match a sort order available for the underlying query. For the + * moment we are content with meeting the spec. + */ + result = NIL; + while (actives != NIL) + { + WindowClause *wc = (WindowClause *) linitial(actives); + ListCell *prev; + ListCell *next; + + /* Move wc from actives to result */ + actives = list_delete_first(actives); + result = lappend(result, wc); + + /* Now move any matching windows from actives to result */ + prev = NULL; + for (lc = list_head(actives); lc; lc = next) + { + WindowClause *wc2 = (WindowClause *) lfirst(lc); + + next = lnext(lc); + if (equal(wc->partitionClause, wc2->partitionClause) && + equal(wc->orderClause, wc2->orderClause)) + { + actives = list_delete_cell(actives, lc, prev); + result = lappend(result, wc2); + } + else + prev = lc; + } + } + + return result; +} + +/* + * make_pathkeys_for_window + * Create a pathkeys list describing the required input ordering + * for the given WindowClause. + * + * The required ordering is first the PARTITION keys, then the ORDER keys. + * In the future we might try to implement windowing using hashing, in which + * case the ordering could be relaxed, but for now we always sort. + */ +static List * +make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc, + List *tlist, bool canonicalize) +{ + List *window_pathkeys; + List *window_sortclauses; + + /* Throw error if can't sort */ + if (!grouping_is_sortable(wc->partitionClause)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("could not implement window PARTITION BY"), + errdetail("Window partitioning columns must be of sortable datatypes."))); + if (!grouping_is_sortable(wc->orderClause)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("could not implement window ORDER BY"), + errdetail("Window ordering columns must be of sortable datatypes."))); + + /* Okay, make the combined pathkeys */ + window_sortclauses = list_concat(list_copy(wc->partitionClause), + list_copy(wc->orderClause)); + window_pathkeys = make_pathkeys_for_sortclauses(root, + window_sortclauses, + tlist, + canonicalize); + list_free(window_sortclauses); + return window_pathkeys; +} + +/*---------- + * get_column_info_for_window + * Get the partitioning/ordering column numbers and equality operators + * for a WindowAgg node. + * + * This depends on the behavior of make_pathkeys_for_window()! + * + * We are given the target WindowClause and an array of the input column + * numbers associated with the resulting pathkeys. In the easy case, there + * are the same number of pathkey columns as partitioning + ordering columns + * and we just have to copy some data around. However, it's possible that + * some of the original partitioning + ordering columns were eliminated as + * redundant during the transformation to pathkeys. (This can happen even + * though the parser gets rid of obvious duplicates. A typical scenario is a + * window specification "PARTITION BY x ORDER BY y" coupled with a clause + * "WHERE x = y" that causes the two sort columns to be recognized as + * redundant.) In that unusual case, we have to work a lot harder to + * determine which keys are significant. + * + * The method used here is a bit brute-force: add the sort columns to a list + * one at a time and note when the resulting pathkey list gets longer. But + * it's a sufficiently uncommon case that a faster way doesn't seem worth + * the amount of code refactoring that'd be needed. + *---------- + */ +static void +get_column_info_for_window(PlannerInfo *root, WindowClause *wc, List *tlist, + int numSortCols, AttrNumber *sortColIdx, + int *partNumCols, + AttrNumber **partColIdx, + Oid **partOperators, + int *ordNumCols, + AttrNumber **ordColIdx, + Oid **ordOperators) +{ + int numPart = list_length(wc->partitionClause); + int numOrder = list_length(wc->orderClause); + + if (numSortCols == numPart + numOrder) + { + /* easy case */ + *partNumCols = numPart; + *partColIdx = sortColIdx; + *partOperators = extract_grouping_ops(wc->partitionClause); + *ordNumCols = numOrder; + *ordColIdx = sortColIdx + numPart; + *ordOperators = extract_grouping_ops(wc->orderClause); + } + else + { + List *sortclauses; + List *pathkeys; + int scidx; + ListCell *lc; + + /* first, allocate what's certainly enough space for the arrays */ + *partNumCols = 0; + *partColIdx = (AttrNumber *) palloc(numPart * sizeof(AttrNumber)); + *partOperators = (Oid *) palloc(numPart * sizeof(Oid)); + *ordNumCols = 0; + *ordColIdx = (AttrNumber *) palloc(numOrder * sizeof(AttrNumber)); + *ordOperators = (Oid *) palloc(numOrder * sizeof(Oid)); + sortclauses = NIL; + pathkeys = NIL; + scidx = 0; + foreach(lc, wc->partitionClause) + { + SortGroupClause *sgc = (SortGroupClause *) lfirst(lc); + List *new_pathkeys; + + sortclauses = lappend(sortclauses, sgc); + new_pathkeys = make_pathkeys_for_sortclauses(root, + sortclauses, + tlist, + true); + if (list_length(new_pathkeys) > list_length(pathkeys)) + { + /* this sort clause is actually significant */ + *partColIdx[*partNumCols] = sortColIdx[scidx++]; + *partOperators[*partNumCols] = sgc->eqop; + (*partNumCols)++; + pathkeys = new_pathkeys; + } + } + foreach(lc, wc->orderClause) + { + SortGroupClause *sgc = (SortGroupClause *) lfirst(lc); + List *new_pathkeys; + + sortclauses = lappend(sortclauses, sgc); + new_pathkeys = make_pathkeys_for_sortclauses(root, + sortclauses, + tlist, + true); + if (list_length(new_pathkeys) > list_length(pathkeys)) + { + /* this sort clause is actually significant */ + *ordColIdx[*ordNumCols] = sortColIdx[scidx++]; + *ordOperators[*ordNumCols] = sgc->eqop; + (*ordNumCols)++; + pathkeys = new_pathkeys; + } + } + /* complain if we didn't eat exactly the right number of sort cols */ + if (scidx != numSortCols) + elog(ERROR, "failed to deconstruct sort operators into partitioning/ordering operators"); + } +} diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 9bec109f6f5..83447082f5b 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/setrefs.c,v 1.146 2008/10/21 20:42:53 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/setrefs.c,v 1.147 2008/12/28 18:53:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -415,6 +415,7 @@ set_plan_refs(PlannerGlobal *glob, Plan *plan, int rtoffset) } break; case T_Agg: + case T_WindowAgg: case T_Group: set_upper_references(glob, plan, rtoffset); break; @@ -679,6 +680,11 @@ fix_expr_common(PlannerGlobal *glob, Node *node) record_plan_function_dependency(glob, ((Aggref *) node)->aggfnoid); } + else if (IsA(node, WindowFunc)) + { + record_plan_function_dependency(glob, + ((WindowFunc *) node)->winfnoid); + } else if (IsA(node, FuncExpr)) { record_plan_function_dependency(glob, diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index c999fb6419c..a38f8c09ae7 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.143 2008/12/08 00:16:09 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.144 2008/12/28 18:53:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1243,6 +1243,7 @@ simplify_EXISTS_query(Query *query) query->intoClause || query->setOperations || query->hasAggs || + query->hasWindowFuncs || query->havingQual || query->limitOffset || query->limitCount || @@ -1258,13 +1259,14 @@ simplify_EXISTS_query(Query *query) /* * Otherwise, we can throw away the targetlist, as well as any GROUP, - * DISTINCT, and ORDER BY clauses; none of those clauses will change - * a nonzero-rows result to zero rows or vice versa. (Furthermore, + * WINDOW, DISTINCT, and ORDER BY clauses; none of those clauses will + * change a nonzero-rows result to zero rows or vice versa. (Furthermore, * since our parsetree representation of these clauses depends on the * targetlist, we'd better throw them away if we drop the targetlist.) */ query->targetList = NIL; query->groupClause = NIL; + query->windowClause = NIL; query->distinctClause = NIL; query->sortClause = NIL; query->hasDistinctOn = false; @@ -1321,8 +1323,8 @@ convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect, * The rest of the sub-select must not refer to any Vars of the parent * query. (Vars of higher levels should be okay, though.) * - * Note: we need not check for Aggs separately because we know the - * sub-select is as yet unoptimized; any uplevel Agg must therefore + * Note: we need not check for Aggrefs separately because we know the + * sub-select is as yet unoptimized; any uplevel Aggref must therefore * contain an uplevel Var reference. This is not the case below ... */ if (contain_vars_of_level((Node *) subselect, 1)) @@ -1432,7 +1434,8 @@ convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect, /* * And there can't be any child Vars in the stuff we intend to pull up. * (Note: we'd need to check for child Aggs too, except we know the - * child has no aggs at all because of simplify_EXISTS_query's check.) + * child has no aggs at all because of simplify_EXISTS_query's check. + * The same goes for window functions.) */ if (contain_vars_of_level((Node *) leftargs, 0)) return NULL; @@ -1955,6 +1958,7 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params) case T_RecursiveUnion: case T_Hash: case T_Agg: + case T_WindowAgg: case T_SeqScan: case T_Material: case T_Sort: |