Support window functions a la SQL:2008.

Hitoshi Harada, with some kibitzing from Heikki and Tom.
author: Tom Lane <tgl@sss.pgh.pa.us> 2008-12-28 18:54:01 +0000
committer: Tom Lane <tgl@sss.pgh.pa.us> 2008-12-28 18:54:01 +0000
commit: 95b07bc7f5010233f52f9d11da74e2e5b653b0a7 (patch)
tree: 48f5858bf4eca1bfb316ef02bb959ca85f568e0a /src/backend/optimizer/plan
parent: 38e9348282e9d078487147ba8a85aebec54e3a08 (diff)
download: postgresql-95b07bc7f5010233f52f9d11da74e2e5b653b0a7.tar.gz
postgresql-95b07bc7f5010233f52f9d11da74e2e5b653b0a7.zip
6 files changed, 507 insertions, 62 deletions
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index f5d4f41c032..b53b5e1470e 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,7 +10,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.252 2008/11/20 19:52:54 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.253 2008/12/28 18:53:56 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -3237,8 +3237,8 @@ make_agg(PlannerInfo *root, List *tlist, List *qual,
 	 * anything for Aggref nodes; this is okay since they are really
 	 * comparable to Vars.
 	 *
-	 * See notes in grouping_planner about why this routine and make_group are
-	 * the only ones in this file that worry about tlist eval cost.
+	 * See notes in grouping_planner about why only make_agg, make_windowagg
+	 * and make_group worry about tlist eval cost.
 	 */
 	if (qual)
 	{
@@ -3260,6 +3260,53 @@ make_agg(PlannerInfo *root, List *tlist, List *qual,
 	return node;
 }
 
+WindowAgg *
+make_windowagg(PlannerInfo *root, List *tlist, int numWindowFuncs,
+			   int partNumCols, AttrNumber *partColIdx, Oid *partOperators,
+			   int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators,
+			   Plan *lefttree)
+{
+	WindowAgg  *node = makeNode(WindowAgg);
+	Plan	   *plan = &node->plan;
+	Path		windowagg_path;		/* dummy for result of cost_windowagg */
+	QualCost	qual_cost;
+
+	node->partNumCols = partNumCols;
+	node->partColIdx = partColIdx;
+	node->partOperators = partOperators;
+	node->ordNumCols = ordNumCols;
+	node->ordColIdx = ordColIdx;
+	node->ordOperators = ordOperators;
+
+	copy_plan_costsize(plan, lefttree);	/* only care about copying size */
+	cost_windowagg(&windowagg_path, root,
+				   numWindowFuncs, partNumCols, ordNumCols,
+				   lefttree->startup_cost,
+				   lefttree->total_cost,
+				   lefttree->plan_rows);
+	plan->startup_cost = windowagg_path.startup_cost;
+	plan->total_cost = windowagg_path.total_cost;
+
+	/*
+	 * We also need to account for the cost of evaluation of the tlist.
+	 *
+	 * See notes in grouping_planner about why only make_agg, make_windowagg
+	 * and make_group worry about tlist eval cost.
+	 */
+	cost_qual_eval(&qual_cost, tlist, root);
+	plan->startup_cost += qual_cost.startup;
+	plan->total_cost += qual_cost.startup;
+	plan->total_cost += qual_cost.per_tuple * plan->plan_rows;
+
+	plan->targetlist = tlist;
+	plan->lefttree = lefttree;
+	plan->righttree = NULL;
+	/* WindowAgg nodes never have a qual clause */
+	plan->qual = NIL;
+
+	return node;
+}
+
 Group *
 make_group(PlannerInfo *root,
 		   List *tlist,
@@ -3300,8 +3347,8 @@ make_group(PlannerInfo *root,
 	 * lower plan level and will only be copied by the Group node. Worth
 	 * fixing?
 	 *
-	 * See notes in grouping_planner about why this routine and make_agg are
-	 * the only ones in this file that worry about tlist eval cost.
+	 * See notes in grouping_planner about why only make_agg, make_windowagg
+	 * and make_group worry about tlist eval cost.
 	 */
 	if (qual)
 	{
diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c
index 8a6b2ad0345..f0f17d5f950 100644
--- a/src/backend/optimizer/plan/planagg.c
+++ b/src/backend/optimizer/plan/planagg.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.43 2008/08/25 22:42:33 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.44 2008/12/28 18:53:56 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -95,11 +95,11 @@ optimize_minmax_aggregates(PlannerInfo *root, List *tlist, Path *best_path)
 	/*
 	 * Reject unoptimizable cases.
 	 *
-	 * We don't handle GROUP BY, because our current implementations of
-	 * grouping require looking at all the rows anyway, and so there's not
-	 * much point in optimizing MIN/MAX.
+	 * We don't handle GROUP BY or windowing, because our current
+	 * implementations of grouping require looking at all the rows anyway,
+	 * and so there's not much point in optimizing MIN/MAX.
 	 */
-	if (parse->groupClause)
+	if (parse->groupClause || parse->hasWindowFuncs)
 		return NULL;
 
 	/*
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index 0a1d1d1559f..a8ea043a697 100644
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -14,7 +14,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.112 2008/10/22 20:17:51 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.113 2008/12/28 18:53:56 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -67,9 +67,9 @@
  * PlannerInfo field and not a passed parameter is that the low-level routines
  * in indxpath.c need to see it.)
  *
- * Note: the PlannerInfo node also includes group_pathkeys, distinct_pathkeys,
- * and sort_pathkeys, which like query_pathkeys need to be canonicalized once
- * the info is available.
+ * Note: the PlannerInfo node also includes group_pathkeys, window_pathkeys,
+ * distinct_pathkeys, and sort_pathkeys, which like query_pathkeys need to be
+ * canonicalized once the info is available.
  *
  * tuple_fraction is interpreted as follows:
  *	  0: expect all tuples to be retrieved (normal case)
@@ -121,6 +121,8 @@ query_planner(PlannerInfo *root, List *tlist,
 													 root->query_pathkeys);
 		root->group_pathkeys = canonicalize_pathkeys(root,
 													 root->group_pathkeys);
+		root->window_pathkeys = canonicalize_pathkeys(root,
+													  root->window_pathkeys);
 		root->distinct_pathkeys = canonicalize_pathkeys(root,
 													root->distinct_pathkeys);
 		root->sort_pathkeys = canonicalize_pathkeys(root,
@@ -228,11 +230,12 @@ query_planner(PlannerInfo *root, List *tlist,
 	/*
 	 * We have completed merging equivalence sets, so it's now possible to
 	 * convert the requested query_pathkeys to canonical form.	Also
-	 * canonicalize the groupClause, distinctClause and sortClause pathkeys
-	 * for use later.
+	 * canonicalize the groupClause, windowClause, distinctClause and
+	 * sortClause pathkeys for use later.
 	 */
 	root->query_pathkeys = canonicalize_pathkeys(root, root->query_pathkeys);
 	root->group_pathkeys = canonicalize_pathkeys(root, root->group_pathkeys);
+	root->window_pathkeys = canonicalize_pathkeys(root, root->window_pathkeys);
 	root->distinct_pathkeys = canonicalize_pathkeys(root, root->distinct_pathkeys);
 	root->sort_pathkeys = canonicalize_pathkeys(root, root->sort_pathkeys);
 
@@ -287,10 +290,12 @@ query_planner(PlannerInfo *root, List *tlist,
 		 * If both GROUP BY and ORDER BY are specified, we will need two
 		 * levels of sort --- and, therefore, certainly need to read all the
 		 * tuples --- unless ORDER BY is a subset of GROUP BY.  Likewise if
-		 * we have both DISTINCT and GROUP BY.
+		 * we have both DISTINCT and GROUP BY, or if we have a window
+		 * specification not compatible with the GROUP BY.
 		 */
 		if (!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys) ||
-			!pathkeys_contained_in(root->distinct_pathkeys, root->group_pathkeys))
+			!pathkeys_contained_in(root->distinct_pathkeys, root->group_pathkeys) ||
+			!pathkeys_contained_in(root->window_pathkeys, root->group_pathkeys))
 			tuple_fraction = 0.0;
 	}
 	else if (parse->hasAggs || root->hasHavingQual)
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 7f91309032a..b4b578d5973 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.247 2008/12/18 18:20:33 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.248 2008/12/28 18:53:56 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -82,6 +82,18 @@ static void locate_grouping_columns(PlannerInfo *root,
 						List *sub_tlist,
 						AttrNumber *groupColIdx);
 static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
+static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
+static List *make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
+									  List *tlist, bool canonicalize);
+static void get_column_info_for_window(PlannerInfo *root, WindowClause *wc,
+									   List *tlist,
+									   int numSortCols, AttrNumber *sortColIdx,
+									   int *partNumCols,
+									   AttrNumber **partColIdx,
+									   Oid **partOperators,
+									   int *ordNumCols,
+									   AttrNumber **ordColIdx,
+									   Oid **ordOperators);
 
 
 /*****************************************************************************
@@ -852,6 +864,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 		AggClauseCounts agg_counts;
 		int			numGroupCols;
 		bool		use_hashed_grouping = false;
+		WindowFuncLists *wflists = NULL;
+		List	   *activeWindows = NIL;
 
 		MemSet(&agg_counts, 0, sizeof(AggClauseCounts));
 
@@ -867,6 +881,22 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 		tlist = preprocess_targetlist(root, tlist);
 
 		/*
+		 * Locate any window functions in the tlist.  (We don't need to look
+		 * anywhere else, since expressions used in ORDER BY will be in there
+		 * too.)  Note that they could all have been eliminated by constant
+		 * folding, in which case we don't need to do any more work.
+		 */
+		if (parse->hasWindowFuncs)
+		{
+			wflists = find_window_functions((Node *) tlist,
+											list_length(parse->windowClause));
+			if (wflists->numWindowFuncs > 0)
+				activeWindows = select_active_windows(root, wflists);
+			else
+				parse->hasWindowFuncs = false;
+		}
+
+		/*
 		 * Generate appropriate target list for subplan; may be different from
 		 * tlist if grouping or aggregation is needed.
 		 */
@@ -890,6 +920,19 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 		else
 			root->group_pathkeys = NIL;
 
+		/* We consider only the first (bottom) window in pathkeys logic */
+		if (activeWindows != NIL)
+		{
+			WindowClause *wc = (WindowClause *) linitial(activeWindows);
+
+			root->window_pathkeys = make_pathkeys_for_window(root,
+															 wc,
+															 tlist,
+															 false);
+		}
+		else
+			root->window_pathkeys = NIL;
+
 		if (parse->distinctClause &&
 			grouping_is_sortable(parse->distinctClause))
 			root->distinct_pathkeys =
@@ -927,11 +970,12 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 		 * Figure out whether we want a sorted result from query_planner.
 		 *
 		 * If we have a sortable GROUP BY clause, then we want a result sorted
-		 * properly for grouping.  Otherwise, if there's a sortable DISTINCT
-		 * clause that's more rigorous than the ORDER BY clause, we try to
-		 * produce output that's sufficiently well sorted for the DISTINCT.
-		 * Otherwise, if there is an ORDER BY clause, we want to sort by the
-		 * ORDER BY clause.
+		 * properly for grouping.  Otherwise, if we have window functions to
+		 * evaluate, we try to sort for the first window.  Otherwise, if
+		 * there's a sortable DISTINCT clause that's more rigorous than the
+		 * ORDER BY clause, we try to produce output that's sufficiently well
+		 * sorted for the DISTINCT.  Otherwise, if there is an ORDER BY
+		 * clause, we want to sort by the ORDER BY clause.
 		 *
 		 * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a
 		 * superset of GROUP BY, it would be tempting to request sort by ORDER
@@ -942,6 +986,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 		 */
 		if (root->group_pathkeys)
 			root->query_pathkeys = root->group_pathkeys;
+		else if (root->window_pathkeys)
+			root->query_pathkeys = root->window_pathkeys;
 		else if (list_length(root->distinct_pathkeys) >
 				 list_length(root->sort_pathkeys))
 			root->query_pathkeys = root->distinct_pathkeys;
@@ -1092,10 +1138,10 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 				 *
 				 * Below this point, any tlist eval cost for added-on nodes
 				 * should be accounted for as we create those nodes.
-				 * Presently, of the node types we can add on, only Agg and
-				 * Group project new tlists (the rest just copy their input
-				 * tuples) --- so make_agg() and make_group() are responsible
-				 * for computing the added cost.
+				 * Presently, of the node types we can add on, only Agg,
+				 * WindowAgg, and Group project new tlists (the rest just copy
+				 * their input tuples) --- so make_agg(), make_windowagg() and
+				 * make_group() are responsible for computing the added cost.
 				 */
 				cost_qual_eval(&tlist_cost, sub_tlist, root);
 				result_plan->startup_cost += tlist_cost.startup;
@@ -1225,6 +1271,142 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 												   NULL);
 			}
 		}						/* end of non-minmax-aggregate case */
+
+		/*
+		 * Since each window function could require a different sort order,
+		 * we stack up a WindowAgg node for each window, with sort steps
+		 * between them as needed.
+		 */
+		if (activeWindows)
+		{
+			List	   *window_tlist;
+			ListCell   *l;
+
+			/*
+			 * If the top-level plan node is one that cannot do expression
+			 * evaluation, we must insert a Result node to project the
+			 * desired tlist.  (In some cases this might not really be
+			 * required, but it's not worth trying to avoid it.)  Note that
+			 * on second and subsequent passes through the following loop,
+			 * the top-level node will be a WindowAgg which we know can
+			 * project; so we only need to check once.
+			 */
+			if (!is_projection_capable_plan(result_plan))
+			{
+				result_plan = (Plan *) make_result(root,
+												   NIL,
+												   NULL,
+												   result_plan);
+			}
+
+			/*
+			 * The "base" targetlist for all steps of the windowing process
+			 * is a flat tlist of all Vars and Aggs needed in the result.
+			 * (In some cases we wouldn't need to propagate all of these
+			 * all the way to the top, since they might only be needed as
+			 * inputs to WindowFuncs.  It's probably not worth trying to
+			 * optimize that though.)  As we climb up the stack, we add
+			 * outputs for the WindowFuncs computed at each level.  Also,
+			 * each input tlist has to present all the columns needed to
+			 * sort the data for the next WindowAgg step.  That's handled
+			 * internally by make_sort_from_pathkeys, but we need the
+			 * copyObject steps here to ensure that each plan node has
+			 * a separately modifiable tlist.
+			 */
+			window_tlist = flatten_tlist(tlist);
+			if (parse->hasAggs)
+				window_tlist = add_to_flat_tlist(window_tlist,
+											pull_agg_clause((Node *) tlist));
+			result_plan->targetlist = (List *) copyObject(window_tlist);
+
+			foreach(l, activeWindows)
+			{
+				WindowClause *wc = (WindowClause *) lfirst(l);
+				List	   *window_pathkeys;
+				int			partNumCols;
+				AttrNumber *partColIdx;
+				Oid		   *partOperators;
+				int			ordNumCols;
+				AttrNumber *ordColIdx;
+				Oid		   *ordOperators;
+
+				window_pathkeys = make_pathkeys_for_window(root,
+														   wc,
+														   tlist,
+														   true);
+
+				/*
+				 * This is a bit tricky: we build a sort node even if we don't
+				 * really have to sort.  Even when no explicit sort is needed,
+				 * we need to have suitable resjunk items added to the input
+				 * plan's tlist for any partitioning or ordering columns that
+				 * aren't plain Vars.  Furthermore, this way we can use
+				 * existing infrastructure to identify which input columns are
+				 * the interesting ones.
+				 */
+				if (window_pathkeys)
+				{
+					Sort	   *sort_plan;
+
+					sort_plan = make_sort_from_pathkeys(root,
+														result_plan,
+														window_pathkeys,
+														-1.0);
+					if (!pathkeys_contained_in(window_pathkeys,
+											   current_pathkeys))
+					{
+						/* we do indeed need to sort */
+						result_plan = (Plan *) sort_plan;
+						current_pathkeys = window_pathkeys;
+					}
+					/* In either case, extract the per-column information */
+					get_column_info_for_window(root, wc, tlist,
+											   sort_plan->numCols,
+											   sort_plan->sortColIdx,
+											   &partNumCols,
+											   &partColIdx,
+											   &partOperators,
+											   &ordNumCols,
+											   &ordColIdx,
+											   &ordOperators);
+				}
+				else
+				{
+					/* empty window specification, nothing to sort */
+					partNumCols = 0;
+					partColIdx = NULL;
+					partOperators = NULL;
+					ordNumCols = 0;
+					ordColIdx = NULL;
+					ordOperators = NULL;
+				}
+
+				if (lnext(l))
+				{
+					/* Add the current WindowFuncs to the running tlist */
+					window_tlist = add_to_flat_tlist(window_tlist,
+											wflists->windowFuncs[wc->winref]);
+				}
+				else
+				{
+					/* Install the original tlist in the topmost WindowAgg */
+					window_tlist = tlist;
+				}
+
+				/* ... and make the WindowAgg plan node */
+				result_plan = (Plan *)
+					make_windowagg(root,
+								   (List *) copyObject(window_tlist),
+								   list_length(wflists->windowFuncs[wc->winref]),
+								   partNumCols,
+								   partColIdx,
+								   partOperators,
+								   ordNumCols,
+								   ordColIdx,
+								   ordOperators,
+								   result_plan);
+			}
+		}
 	}							/* end of if (setOperations) */
 
 	/*
@@ -2030,7 +2212,8 @@ make_subplanTargetList(PlannerInfo *root,
 	 * If we're not grouping or aggregating, there's nothing to do here;
 	 * query_planner should receive the unmodified target list.
 	 */
-	if (!parse->hasAggs && !parse->groupClause && !root->hasHavingQual)
+	if (!parse->hasAggs && !parse->groupClause && !root->hasHavingQual &&
+		!parse->hasWindowFuncs)
 	{
 		*need_tlist_eval = true;
 		return tlist;
@@ -2039,7 +2222,9 @@ make_subplanTargetList(PlannerInfo *root,
 	/*
 	 * Otherwise, start with a "flattened" tlist (having just the vars
 	 * mentioned in the targetlist and HAVING qual --- but not upper-level
-	 * Vars; they will be replaced by Params later on).
+	 * Vars; they will be replaced by Params later on).  Note this includes
+	 * vars used in resjunk items, so we are covering the needs of ORDER BY
+	 * and window specifications.
 	 */
 	sub_tlist = flatten_tlist(tlist);
 	extravars = pull_var_clause(parse->havingQual, true);
@@ -2066,7 +2251,7 @@ make_subplanTargetList(PlannerInfo *root,
 		{
 			SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl);
 			Node	   *groupexpr = get_sortgroupclause_expr(grpcl, tlist);
-			TargetEntry *te = NULL;
+			TargetEntry *te;
 
 			/*
 			 * Find or make a matching sub_tlist entry.  If the groupexpr
@@ -2074,20 +2259,10 @@ make_subplanTargetList(PlannerInfo *root,
 			 * won't make multiple groupClause entries for the same TLE.)
 			 */
 			if (groupexpr && IsA(groupexpr, Var))
-			{
-				ListCell   *sl;
-
-				foreach(sl, sub_tlist)
-				{
-					TargetEntry *lte = (TargetEntry *) lfirst(sl);
+				te = tlist_member(groupexpr, sub_tlist);
+			else
+				te = NULL;
 
-					if (equal(groupexpr, lte->expr))
-					{
-						te = lte;
-						break;
-					}
-				}
-			}
 			if (!te)
 			{
 				te = makeTargetEntry((Expr *) groupexpr,
@@ -2112,7 +2287,7 @@ make_subplanTargetList(PlannerInfo *root,
  *
  * This is only needed if we don't use the sub_tlist chosen by
  * make_subplanTargetList.	We have to forget the column indexes found
- * by that routine and re-locate the grouping vars in the real sub_tlist.
+ * by that routine and re-locate the grouping exprs in the real sub_tlist.
  */
 static void
 locate_grouping_columns(PlannerInfo *root,
@@ -2137,18 +2312,10 @@ locate_grouping_columns(PlannerInfo *root,
 	{
 		SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl);
 		Node	   *groupexpr = get_sortgroupclause_expr(grpcl, tlist);
-		TargetEntry *te = NULL;
-		ListCell   *sl;
+		TargetEntry *te = tlist_member(groupexpr, sub_tlist);
 
-		foreach(sl, sub_tlist)
-		{
-			te = (TargetEntry *) lfirst(sl);
-			if (equal(groupexpr, te->expr))
-				break;
-		}
-		if (!sl)
+		if (!te)
 			elog(ERROR, "failed to locate grouping columns");
-
 		groupColIdx[keyno++] = te->resno;
 	}
 }
@@ -2190,3 +2357,219 @@ postprocess_setop_tlist(List *new_tlist, List *orig_tlist)
 		elog(ERROR, "resjunk output columns are not implemented");
 	return new_tlist;
 }
+
+/*
+ * select_active_windows
+ *		Create a list of the "active" window clauses (ie, those referenced
+ *		by non-deleted WindowFuncs) in the order they are to be executed.
+ */
+static List *
+select_active_windows(PlannerInfo *root, WindowFuncLists *wflists)
+{
+	List	   *result;
+	List	   *actives;
+	ListCell   *lc;
+
+	/* First, make a list of the active windows */
+	actives = NIL;
+	foreach(lc, root->parse->windowClause)
+	{
+		WindowClause *wc = (WindowClause *) lfirst(lc);
+
+		/* It's only active if wflists shows some related WindowFuncs */
+		Assert(wc->winref <= wflists->maxWinRef);
+		if (wflists->windowFuncs[wc->winref] != NIL)
+			actives = lappend(actives, wc);
+	}
+
+	/*
+	 * Now, ensure that windows with identical partitioning/ordering clauses
+	 * are adjacent in the list.  This is required by the SQL standard, which
+	 * says that only one sort is to be used for such windows, even if they
+	 * are otherwise distinct (eg, different names or framing clauses).
+	 *
+	 * There is room to be much smarter here, for example detecting whether
+	 * one window's sort keys are a prefix of another's (so that sorting
+	 * for the latter would do for the former), or putting windows first
+	 * that match a sort order available for the underlying query.  For the
+	 * moment we are content with meeting the spec.
+	 */
+	result = NIL;
+	while (actives != NIL)
+	{
+		WindowClause *wc = (WindowClause *) linitial(actives);
+		ListCell   *prev;
+		ListCell   *next;
+
+		/* Move wc from actives to result */
+		actives = list_delete_first(actives);
+		result = lappend(result, wc);
+
+		/* Now move any matching windows from actives to result */
+		prev = NULL;
+		for (lc = list_head(actives); lc; lc = next)
+		{
+			WindowClause *wc2 = (WindowClause *) lfirst(lc);
+
+			next = lnext(lc);
+			if (equal(wc->partitionClause, wc2->partitionClause) &&
+				equal(wc->orderClause, wc2->orderClause))
+			{
+				actives = list_delete_cell(actives, lc, prev);
+				result = lappend(result, wc2);
+			}
+			else
+				prev = lc;
+		}
+	}
+
+	return result;
+}
+
+/*
+ * make_pathkeys_for_window
+ *		Create a pathkeys list describing the required input ordering
+ *		for the given WindowClause.
+ *
+ * The required ordering is first the PARTITION keys, then the ORDER keys.
+ * In the future we might try to implement windowing using hashing, in which
+ * case the ordering could be relaxed, but for now we always sort.
+ */
+static List *
+make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
+						 List *tlist, bool canonicalize)
+{
+	List	   *window_pathkeys;
+	List	   *window_sortclauses;
+
+	/* Throw error if can't sort */
+	if (!grouping_is_sortable(wc->partitionClause))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("could not implement window PARTITION BY"),
+				 errdetail("Window partitioning columns must be of sortable datatypes.")));
+	if (!grouping_is_sortable(wc->orderClause))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("could not implement window ORDER BY"),
+				 errdetail("Window ordering columns must be of sortable datatypes.")));
+
+	/* Okay, make the combined pathkeys */
+	window_sortclauses = list_concat(list_copy(wc->partitionClause),
+									 list_copy(wc->orderClause));
+	window_pathkeys = make_pathkeys_for_sortclauses(root,
+													window_sortclauses,
+													tlist,
+													canonicalize);
+	list_free(window_sortclauses);
+	return window_pathkeys;
+}
+
+/*----------
+ * get_column_info_for_window
+ *		Get the partitioning/ordering column numbers and equality operators
+ *		for a WindowAgg node.
+ *
+ * This depends on the behavior of make_pathkeys_for_window()!
+ *
+ * We are given the target WindowClause and an array of the input column
+ * numbers associated with the resulting pathkeys.  In the easy case, there
+ * are the same number of pathkey columns as partitioning + ordering columns
+ * and we just have to copy some data around.  However, it's possible that
+ * some of the original partitioning + ordering columns were eliminated as
+ * redundant during the transformation to pathkeys.  (This can happen even
+ * though the parser gets rid of obvious duplicates.  A typical scenario is a
+ * window specification "PARTITION BY x ORDER BY y" coupled with a clause
+ * "WHERE x = y" that causes the two sort columns to be recognized as
+ * redundant.)  In that unusual case, we have to work a lot harder to
+ * determine which keys are significant.
+ *
+ * The method used here is a bit brute-force: add the sort columns to a list
+ * one at a time and note when the resulting pathkey list gets longer.  But
+ * it's a sufficiently uncommon case that a faster way doesn't seem worth
+ * the amount of code refactoring that'd be needed.
+ *----------
+ */
+static void
+get_column_info_for_window(PlannerInfo *root, WindowClause *wc, List *tlist,
+						   int numSortCols, AttrNumber *sortColIdx,
+						   int *partNumCols,
+						   AttrNumber **partColIdx,
+						   Oid **partOperators,
+						   int *ordNumCols,
+						   AttrNumber **ordColIdx,
+						   Oid **ordOperators)
+{
+	int			numPart = list_length(wc->partitionClause);
+	int			numOrder = list_length(wc->orderClause);
+
+	if (numSortCols == numPart + numOrder)
+	{
+		/* easy case */
+		*partNumCols = numPart;
+		*partColIdx = sortColIdx;
+		*partOperators = extract_grouping_ops(wc->partitionClause);
+		*ordNumCols = numOrder;
+		*ordColIdx = sortColIdx + numPart;
+		*ordOperators = extract_grouping_ops(wc->orderClause);
+	}
+	else
+	{
+		List	   *sortclauses;
+		List	   *pathkeys;
+		int			scidx;
+		ListCell   *lc;
+
+		/* first, allocate what's certainly enough space for the arrays */
+		*partNumCols = 0;
+		*partColIdx = (AttrNumber *) palloc(numPart * sizeof(AttrNumber));
+		*partOperators = (Oid *) palloc(numPart * sizeof(Oid));
+		*ordNumCols = 0;
+		*ordColIdx = (AttrNumber *) palloc(numOrder * sizeof(AttrNumber));
+		*ordOperators = (Oid *) palloc(numOrder * sizeof(Oid));
+		sortclauses = NIL;
+		pathkeys = NIL;
+		scidx = 0;
+		foreach(lc, wc->partitionClause)
+		{
+			SortGroupClause *sgc = (SortGroupClause *) lfirst(lc);
+			List	   *new_pathkeys;
+
+			sortclauses = lappend(sortclauses, sgc);
+			new_pathkeys = make_pathkeys_for_sortclauses(root,
+														 sortclauses,
+														 tlist,
+														 true);
+			if (list_length(new_pathkeys) > list_length(pathkeys))
+			{
+				/* this sort clause is actually significant */
+				*partColIdx[*partNumCols] = sortColIdx[scidx++];
+				*partOperators[*partNumCols] = sgc->eqop;
+				(*partNumCols)++;
+				pathkeys = new_pathkeys;
+			}
+		}
+		foreach(lc, wc->orderClause)
+		{
+			SortGroupClause *sgc = (SortGroupClause *) lfirst(lc);
+			List	   *new_pathkeys;
+
+			sortclauses = lappend(sortclauses, sgc);
+			new_pathkeys = make_pathkeys_for_sortclauses(root,
+														 sortclauses,
+														 tlist,
+														 true);
+			if (list_length(new_pathkeys) > list_length(pathkeys))
+			{
+				/* this sort clause is actually significant */
+				*ordColIdx[*ordNumCols] = sortColIdx[scidx++];
+				*ordOperators[*ordNumCols] = sgc->eqop;
+				(*ordNumCols)++;
+				pathkeys = new_pathkeys;
+			}
+		}
+		/* complain if we didn't eat exactly the right number of sort cols */
+		if (scidx != numSortCols)
+			elog(ERROR, "failed to deconstruct sort operators into partitioning/ordering operators");
+	}
+}
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index 9bec109f6f5..83447082f5b 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/setrefs.c,v 1.146 2008/10/21 20:42:53 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/setrefs.c,v 1.147 2008/12/28 18:53:57 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -415,6 +415,7 @@ set_plan_refs(PlannerGlobal *glob, Plan *plan, int rtoffset)
 			}
 			break;
 		case T_Agg:
+		case T_WindowAgg:
 		case T_Group:
 			set_upper_references(glob, plan, rtoffset);
 			break;
@@ -679,6 +680,11 @@ fix_expr_common(PlannerGlobal *glob, Node *node)
 		record_plan_function_dependency(glob,
 										((Aggref *) node)->aggfnoid);
 	}
+	else if (IsA(node, WindowFunc))
+	{
+		record_plan_function_dependency(glob,
+										((WindowFunc *) node)->winfnoid);
+	}
 	else if (IsA(node, FuncExpr))
 	{
 		record_plan_function_dependency(glob,
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index c999fb6419c..a38f8c09ae7 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.143 2008/12/08 00:16:09 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.144 2008/12/28 18:53:57 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1243,6 +1243,7 @@ simplify_EXISTS_query(Query *query)
 		query->intoClause ||
 		query->setOperations ||
 		query->hasAggs ||
+		query->hasWindowFuncs ||
 		query->havingQual ||
 		query->limitOffset ||
 		query->limitCount ||
@@ -1258,13 +1259,14 @@ simplify_EXISTS_query(Query *query)
 
 	/*
 	 * Otherwise, we can throw away the targetlist, as well as any GROUP,
-	 * DISTINCT, and ORDER BY clauses; none of those clauses will change
-	 * a nonzero-rows result to zero rows or vice versa.  (Furthermore,
+	 * WINDOW, DISTINCT, and ORDER BY clauses; none of those clauses will
+	 * change a nonzero-rows result to zero rows or vice versa.  (Furthermore,
 	 * since our parsetree representation of these clauses depends on the
 	 * targetlist, we'd better throw them away if we drop the targetlist.)
 	 */
 	query->targetList = NIL;
 	query->groupClause = NIL;
+	query->windowClause = NIL;
 	query->distinctClause = NIL;
 	query->sortClause = NIL;
 	query->hasDistinctOn = false;
@@ -1321,8 +1323,8 @@ convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect,
 	 * The rest of the sub-select must not refer to any Vars of the parent
 	 * query.  (Vars of higher levels should be okay, though.)
 	 *
-	 * Note: we need not check for Aggs separately because we know the
-	 * sub-select is as yet unoptimized; any uplevel Agg must therefore
+	 * Note: we need not check for Aggrefs separately because we know the
+	 * sub-select is as yet unoptimized; any uplevel Aggref must therefore
 	 * contain an uplevel Var reference.  This is not the case below ...
 	 */
 	if (contain_vars_of_level((Node *) subselect, 1))
@@ -1432,7 +1434,8 @@ convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect,
 	/*
 	 * And there can't be any child Vars in the stuff we intend to pull up.
 	 * (Note: we'd need to check for child Aggs too, except we know the
-	 * child has no aggs at all because of simplify_EXISTS_query's check.)
+	 * child has no aggs at all because of simplify_EXISTS_query's check.
+	 * The same goes for window functions.)
 	 */
 	if (contain_vars_of_level((Node *) leftargs, 0))
 		return NULL;
@@ -1955,6 +1958,7 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params)
 		case T_RecursiveUnion:
 		case T_Hash:
 		case T_Agg:
+		case T_WindowAgg:
 		case T_SeqScan:
 		case T_Material:
 		case T_Sort:
author	Tom Lane <tgl@sss.pgh.pa.us>	2008-12-28 18:54:01 +0000
committer	Tom Lane <tgl@sss.pgh.pa.us>	2008-12-28 18:54:01 +0000
commit	95b07bc7f5010233f52f9d11da74e2e5b653b0a7 (patch)
tree	48f5858bf4eca1bfb316ef02bb959ca85f568e0a /src/backend/optimizer/plan
parent	38e9348282e9d078487147ba8a85aebec54e3a08 (diff)
download	postgresql-95b07bc7f5010233f52f9d11da74e2e5b653b0a7.tar.gz postgresql-95b07bc7f5010233f52f9d11da74e2e5b653b0a7.zip