From 4907ba304c346051a6535e67c043779755a78e84 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Tue, 21 Jan 2025 14:43:21 -0500
Subject: Doc: simplify the tutorial's window-function examples.

For the purposes of this discussion, row_number() is just as good
as rank(), and its behavior is easier to understand and describe.
So let's switch the examples to using row_number().

Along the way to checking the results given in the tutorial,
I found it helpful to extract the empsalary table we use in the
regression tests, which is evidently the same data that was used
to make these results.  So I shoved that into advanced.source
to improve the coverage of that file a little.  (There's still
several pages of the tutorial that are not included in it,
but at least now 3.5 Window Functions is covered.)

Suggested-by: "David G. Johnston" <david.g.johnston@gmail.com>
Author: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/173737973383.1070.1832752929070067441@wrigleys.postgresql.org
---
 doc/src/sgml/advanced.sgml | 41 ++++++++++++++++++++++-------------------
 1 file changed, 22 insertions(+), 19 deletions(-)

(limited to 'doc/src')
diff --git a/doc/src/sgml/advanced.sgml b/doc/src/sgml/advanced.sgml
index 755c9f14850..e15a3323dfb 100644
--- a/doc/src/sgml/advanced.sgml
+++ b/doc/src/sgml/advanced.sgml
@@ -389,30 +389,32 @@ SELECT depname, empno, salary, avg(salary) OVER (PARTITION BY depname) FROM emps
 
 <programlisting>
 SELECT depname, empno, salary,
-       rank() OVER (PARTITION BY depname ORDER BY salary DESC)
+       row_number() OVER (PARTITION BY depname ORDER BY salary DESC)
 FROM empsalary;
 </programlisting>
 
 <screen>
-  depname  | empno | salary | rank
------------+-------+--------+------
- develop   |     8 |   6000 |    1
- develop   |    10 |   5200 |    2
- develop   |    11 |   5200 |    2
- develop   |     9 |   4500 |    4
- develop   |     7 |   4200 |    5
- personnel |     2 |   3900 |    1
- personnel |     5 |   3500 |    2
- sales     |     1 |   5000 |    1
- sales     |     4 |   4800 |    2
- sales     |     3 |   4800 |    2
+  depname  | empno | salary | row_number
+-----------+-------+--------+------------
+ develop   |     8 |   6000 |          1
+ develop   |    10 |   5200 |          2
+ develop   |    11 |   5200 |          3
+ develop   |     9 |   4500 |          4
+ develop   |     7 |   4200 |          5
+ personnel |     2 |   3900 |          1
+ personnel |     5 |   3500 |          2
+ sales     |     1 |   5000 |          1
+ sales     |     4 |   4800 |          2
+ sales     |     3 |   4800 |          3
 (10 rows)
 </screen>
 
-    As shown here, the <function>rank</function> function produces a numerical rank
-    for each distinct <literal>ORDER BY</literal> value in the current row's
-    partition, using the order defined by the <literal>ORDER BY</literal> clause.
-    <function>rank</function> needs no explicit parameter, because its behavior
+    As shown here, the <function>row_number</function> window function
+    assigns sequential numbers to the rows within each partition,
+    in the order defined by the <literal>ORDER BY</literal> clause
+    (with tied rows numbered in an unspecified order).
+    <function>row_number</function> needs no explicit parameter,
+    because its behavior
     is entirely determined by the <literal>OVER</literal> clause.
    </para>
 
@@ -527,14 +529,15 @@ SELECT salary, sum(salary) OVER (ORDER BY salary) FROM empsalary;
 SELECT depname, empno, salary, enroll_date
 FROM
   (SELECT depname, empno, salary, enroll_date,
-          rank() OVER (PARTITION BY depname ORDER BY salary DESC, empno) AS pos
+     row_number() OVER (PARTITION BY depname ORDER BY salary DESC, empno) AS pos
      FROM empsalary
   ) AS ss
 WHERE pos &lt; 3;
 </programlisting>
 
     The above query only shows the rows from the inner query having
-    <literal>rank</literal> less than 3.
+    <literal>row_number</literal> less than 3 (that is, the first
+    two rows for each department).
    </para>
 
    <para>
-- 
cgit v1.2.3