Add additional regression tests for select_active_windows

During the development of 728202b63, which was aimed at reducing the number of sorts required to evaluate multiple window functions with different WindowClause definitions, the code written sorted the WindowClauses in reverse tleSortGroupRef order. There appears to be no discussion in the thread which was opened to discuss the development of this patch and no comments mentioning the fact that having the WindowClauses in reverse tleSortGroupRef order makes it more likely that the final WindowClause to be evaluated will provide presorted input to the query's DISTINCT or ORDER BY clause. The reason for this is that the tleSortGroupRef indexes are assigned for the DISTINCT and ORDER BY clauses before they are for the WindowClauses PARTITION BY and ORDER BY clauses. Putting the WindowClause with the lowest tleSortGroupRef last means that it's more likely that no additional sorting is required for the query's DISTINCT or ORDER BY clause. All we're doing here is adding some tests and a comment to help ensure that remains true and that we don't accidentally forget to consider this again should we ever rewrite that code. Author: Ankit Kumar Pandey, David Rowley Discussion: https://postgr.es/m/CAApHDvq=g2=ny59f1bvwRVvupsgPHK-KjLPBsSL25fVuGZ4idQ@mail.gmail.com
author: David Rowley <drowley@postgresql.org> 2023-01-07 15:24:35 +1300
committer: David Rowley <drowley@postgresql.org> 2023-01-07 15:24:35 +1300
commit: a14a5832923e10ef14a74864c94358d5bc8720e4 (patch)
tree: 225436ed538626fa48f1ef7c432e137d864cc7b3 /src
parent: c6e1f62e2cee817cad58cccc1dd685e908678241 (diff)
download: postgresql-a14a5832923e10ef14a74864c94358d5bc8720e4.tar.gz
postgresql-a14a5832923e10ef14a74864c94358d5bc8720e4.zip
3 files changed, 156 insertions, 0 deletions
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index d6ba7589f3a..000d757bdd8 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -5672,6 +5672,14 @@ select_active_windows(PlannerInfo *root, WindowFuncLists *wflists)
  * Sort the windows by the required sorting clauses. First, compare the sort
  * clauses themselves. Second, if one window's clauses are a prefix of another
  * one's clauses, put the window with more sort clauses first.
+ *
+ * We purposefully sort by the highest tleSortGroupRef first.  Since
+ * tleSortGroupRefs are assigned for the query's DISTINCT and ORDER BY first
+ * and because here we sort the lowest tleSortGroupRefs last, if a
+ * WindowClause is sharing a tleSortGroupRef with the query's DISTINCT or
+ * ORDER BY clause, this makes it more likely that the final WindowAgg will
+ * provide presorted input for the query's DISTINCT or ORDER BY clause, thus
+ * reducing the total number of sorts required for the query.
  */
 static int
 common_prefix_cmp(const void *a, const void *b)
diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out
index 5505e9a2dac..90e89fb5b68 100644
--- a/src/test/regress/expected/window.out
+++ b/src/test/regress/expected/window.out
@@ -3885,6 +3885,104 @@ WHERE depname = 'sales';
                            Filter: ((depname)::text = 'sales'::text)
 (7 rows)
 
+-- Ensure that the evaluation order of the WindowAggs results in the WindowAgg
+-- with the same sort order that's required by the ORDER BY is evaluated last.
+EXPLAIN (COSTS OFF)
+SELECT empno,
+       enroll_date,
+       depname,
+       sum(salary) OVER (PARTITION BY depname order by empno) depsalary,
+       min(salary) OVER (PARTITION BY depname order by enroll_date) depminsalary
+FROM empsalary
+ORDER BY depname, empno;
+                     QUERY PLAN                     
+----------------------------------------------------
+ WindowAgg
+   ->  Incremental Sort
+         Sort Key: depname, empno
+         Presorted Key: depname
+         ->  WindowAgg
+               ->  Sort
+                     Sort Key: depname, enroll_date
+                     ->  Seq Scan on empsalary
+(8 rows)
+
+-- As above, but with an adjusted ORDER BY to ensure the above plan didn't
+-- perform only 2 sorts by accident.
+EXPLAIN (COSTS OFF)
+SELECT empno,
+       enroll_date,
+       depname,
+       sum(salary) OVER (PARTITION BY depname order by empno) depsalary,
+       min(salary) OVER (PARTITION BY depname order by enroll_date) depminsalary
+FROM empsalary
+ORDER BY depname, enroll_date;
+                  QUERY PLAN                   
+-----------------------------------------------
+ WindowAgg
+   ->  Incremental Sort
+         Sort Key: depname, enroll_date
+         Presorted Key: depname
+         ->  WindowAgg
+               ->  Sort
+                     Sort Key: depname, empno
+                     ->  Seq Scan on empsalary
+(8 rows)
+
+SET enable_hashagg TO off;
+-- Ensure we don't get a sort for both DISTINCT and ORDER BY.  We expect the
+-- sort for the DISTINCT to provide presorted input for the ORDER BY.
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT
+       empno,
+       enroll_date,
+       depname,
+       sum(salary) OVER (PARTITION BY depname order by empno) depsalary,
+       min(salary) OVER (PARTITION BY depname order by enroll_date) depminsalary
+FROM empsalary
+ORDER BY depname, enroll_date;
+                                          QUERY PLAN                                           
+-----------------------------------------------------------------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: depname, enroll_date, empno, (sum(salary) OVER (?)), (min(salary) OVER (?))
+         ->  WindowAgg
+               ->  Incremental Sort
+                     Sort Key: depname, enroll_date
+                     Presorted Key: depname
+                     ->  WindowAgg
+                           ->  Sort
+                                 Sort Key: depname, empno
+                                 ->  Seq Scan on empsalary
+(11 rows)
+
+-- As above but adjust the ORDER BY clause to help ensure the plan with the
+-- minimum amount of sorting wasn't a fluke.
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT
+       empno,
+       enroll_date,
+       depname,
+       sum(salary) OVER (PARTITION BY depname order by empno) depsalary,
+       min(salary) OVER (PARTITION BY depname order by enroll_date) depminsalary
+FROM empsalary
+ORDER BY depname, empno;
+                                          QUERY PLAN                                           
+-----------------------------------------------------------------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: depname, empno, enroll_date, (sum(salary) OVER (?)), (min(salary) OVER (?))
+         ->  WindowAgg
+               ->  Incremental Sort
+                     Sort Key: depname, empno
+                     Presorted Key: depname
+                     ->  WindowAgg
+                           ->  Sort
+                                 Sort Key: depname, enroll_date
+                                 ->  Seq Scan on empsalary
+(11 rows)
+
+RESET enable_hashagg;
 -- Test Sort node reordering
 EXPLAIN (COSTS OFF)
 SELECT
diff --git a/src/test/regress/sql/window.sql b/src/test/regress/sql/window.sql
index 10f450fee47..b7bd0a83da4 100644
--- a/src/test/regress/sql/window.sql
+++ b/src/test/regress/sql/window.sql
@@ -1276,6 +1276,56 @@ SELECT * FROM
    FROM empsalary) emp
 WHERE depname = 'sales';
 
+-- Ensure that the evaluation order of the WindowAggs results in the WindowAgg
+-- with the same sort order that's required by the ORDER BY is evaluated last.
+EXPLAIN (COSTS OFF)
+SELECT empno,
+       enroll_date,
+       depname,
+       sum(salary) OVER (PARTITION BY depname order by empno) depsalary,
+       min(salary) OVER (PARTITION BY depname order by enroll_date) depminsalary
+FROM empsalary
+ORDER BY depname, empno;
+
+-- As above, but with an adjusted ORDER BY to ensure the above plan didn't
+-- perform only 2 sorts by accident.
+EXPLAIN (COSTS OFF)
+SELECT empno,
+       enroll_date,
+       depname,
+       sum(salary) OVER (PARTITION BY depname order by empno) depsalary,
+       min(salary) OVER (PARTITION BY depname order by enroll_date) depminsalary
+FROM empsalary
+ORDER BY depname, enroll_date;
+
+SET enable_hashagg TO off;
+
+-- Ensure we don't get a sort for both DISTINCT and ORDER BY.  We expect the
+-- sort for the DISTINCT to provide presorted input for the ORDER BY.
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT
+       empno,
+       enroll_date,
+       depname,
+       sum(salary) OVER (PARTITION BY depname order by empno) depsalary,
+       min(salary) OVER (PARTITION BY depname order by enroll_date) depminsalary
+FROM empsalary
+ORDER BY depname, enroll_date;
+
+-- As above but adjust the ORDER BY clause to help ensure the plan with the
+-- minimum amount of sorting wasn't a fluke.
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT
+       empno,
+       enroll_date,
+       depname,
+       sum(salary) OVER (PARTITION BY depname order by empno) depsalary,
+       min(salary) OVER (PARTITION BY depname order by enroll_date) depminsalary
+FROM empsalary
+ORDER BY depname, empno;
+
+RESET enable_hashagg;
+
 -- Test Sort node reordering
 EXPLAIN (COSTS OFF)
 SELECT
author	David Rowley <drowley@postgresql.org>	2023-01-07 15:24:35 +1300
committer	David Rowley <drowley@postgresql.org>	2023-01-07 15:24:35 +1300
commit	a14a5832923e10ef14a74864c94358d5bc8720e4 (patch)
tree	225436ed538626fa48f1ef7c432e137d864cc7b3 /src
parent	c6e1f62e2cee817cad58cccc1dd685e908678241 (diff)
download	postgresql-a14a5832923e10ef14a74864c94358d5bc8720e4.tar.gz postgresql-a14a5832923e10ef14a74864c94358d5bc8720e4.zip