1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
|
--
-- SELECT_DISTINCT_ON
--
SELECT DISTINCT ON (string4) string4, two, ten
FROM onek
ORDER BY string4 using <, two using >, ten using <;
string4 | two | ten
---------+-----+-----
AAAAxx | 1 | 1
HHHHxx | 1 | 1
OOOOxx | 1 | 1
VVVVxx | 1 | 1
(4 rows)
-- this will fail due to conflict of ordering requirements
SELECT DISTINCT ON (string4, ten) string4, two, ten
FROM onek
ORDER BY string4 using <, two using <, ten using <;
ERROR: SELECT DISTINCT ON expressions must match initial ORDER BY expressions
LINE 1: SELECT DISTINCT ON (string4, ten) string4, two, ten
^
SELECT DISTINCT ON (string4, ten) string4, ten, two
FROM onek
ORDER BY string4 using <, ten using >, two using <;
string4 | ten | two
---------+-----+-----
AAAAxx | 9 | 1
AAAAxx | 8 | 0
AAAAxx | 7 | 1
AAAAxx | 6 | 0
AAAAxx | 5 | 1
AAAAxx | 4 | 0
AAAAxx | 3 | 1
AAAAxx | 2 | 0
AAAAxx | 1 | 1
AAAAxx | 0 | 0
HHHHxx | 9 | 1
HHHHxx | 8 | 0
HHHHxx | 7 | 1
HHHHxx | 6 | 0
HHHHxx | 5 | 1
HHHHxx | 4 | 0
HHHHxx | 3 | 1
HHHHxx | 2 | 0
HHHHxx | 1 | 1
HHHHxx | 0 | 0
OOOOxx | 9 | 1
OOOOxx | 8 | 0
OOOOxx | 7 | 1
OOOOxx | 6 | 0
OOOOxx | 5 | 1
OOOOxx | 4 | 0
OOOOxx | 3 | 1
OOOOxx | 2 | 0
OOOOxx | 1 | 1
OOOOxx | 0 | 0
VVVVxx | 9 | 1
VVVVxx | 8 | 0
VVVVxx | 7 | 1
VVVVxx | 6 | 0
VVVVxx | 5 | 1
VVVVxx | 4 | 0
VVVVxx | 3 | 1
VVVVxx | 2 | 0
VVVVxx | 1 | 1
VVVVxx | 0 | 0
(40 rows)
-- bug #5049: early 8.4.x chokes on volatile DISTINCT ON clauses
select distinct on (1) floor(random()) as r, f1 from int4_tbl order by 1,2;
r | f1
---+-------------
0 | -2147483647
(1 row)
--
-- Test the planner's ability to use a LIMIT 1 instead of a Unique node when
-- all of the distinct_pathkeys have been marked as redundant
--
-- Ensure we also get a LIMIT plan with DISTINCT ON
EXPLAIN (COSTS OFF)
SELECT DISTINCT ON (four) four,two
FROM tenk1 WHERE four = 0 ORDER BY 1;
QUERY PLAN
----------------------------
Limit
-> Seq Scan on tenk1
Filter: (four = 0)
(3 rows)
-- and check the result of the above query is correct
SELECT DISTINCT ON (four) four,two
FROM tenk1 WHERE four = 0 ORDER BY 1;
four | two
------+-----
0 | 0
(1 row)
-- Ensure a Sort -> Limit is used when the ORDER BY contains additional cols
EXPLAIN (COSTS OFF)
SELECT DISTINCT ON (four) four,two
FROM tenk1 WHERE four = 0 ORDER BY 1,2;
QUERY PLAN
----------------------------------
Limit
-> Sort
Sort Key: two
-> Seq Scan on tenk1
Filter: (four = 0)
(5 rows)
-- Same again but use a column that is indexed so that we get an index scan
-- then a limit
EXPLAIN (COSTS OFF)
SELECT DISTINCT ON (four) four,hundred
FROM tenk1 WHERE four = 0 ORDER BY 1,2;
QUERY PLAN
-----------------------------------------------
Limit
-> Index Scan using tenk1_hundred on tenk1
Filter: (four = 0)
(3 rows)
--
-- Test the planner's ability to reorder the distinctClause Pathkeys to match
-- the input path's ordering
--
CREATE TABLE distinct_on_tbl (x int, y int, z int);
INSERT INTO distinct_on_tbl SELECT i%10, i%10, i%10 FROM generate_series(1, 1000) AS i;
CREATE INDEX distinct_on_tbl_x_y_idx ON distinct_on_tbl (x, y);
ANALYZE distinct_on_tbl;
-- Produce results with sorting.
SET enable_hashagg TO OFF;
-- Ensure we avoid the need to re-sort by reordering the distinctClause
-- Pathkeys to match the ordering of the input path
EXPLAIN (COSTS OFF)
SELECT DISTINCT ON (y, x) x, y FROM distinct_on_tbl;
QUERY PLAN
------------------------------------------------------------------------
Unique
-> Index Only Scan using distinct_on_tbl_x_y_idx on distinct_on_tbl
(2 rows)
SELECT DISTINCT ON (y, x) x, y FROM distinct_on_tbl;
x | y
---+---
0 | 0
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
6 | 6
7 | 7
8 | 8
9 | 9
(10 rows)
-- Ensure we leverage incremental-sort by reordering the distinctClause
-- Pathkeys to partially match the ordering of the input path
EXPLAIN (COSTS OFF)
SELECT DISTINCT ON (y, x) x, y FROM (SELECT * FROM distinct_on_tbl ORDER BY x) s;
QUERY PLAN
------------------------------------------------------------------------------------
Unique
-> Incremental Sort
Sort Key: s.x, s.y
Presorted Key: s.x
-> Subquery Scan on s
-> Index Only Scan using distinct_on_tbl_x_y_idx on distinct_on_tbl
(6 rows)
SELECT DISTINCT ON (y, x) x, y FROM (SELECT * FROM distinct_on_tbl ORDER BY x) s;
x | y
---+---
0 | 0
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
6 | 6
7 | 7
8 | 8
9 | 9
(10 rows)
-- Ensure we reorder the distinctClause Pathkeys to match the ordering of the
-- input path even if there is ORDER BY clause
EXPLAIN (COSTS OFF)
SELECT DISTINCT ON (y, x) x, y FROM distinct_on_tbl ORDER BY y;
QUERY PLAN
------------------------------------------------------------------------------
Sort
Sort Key: y
-> Unique
-> Index Only Scan using distinct_on_tbl_x_y_idx on distinct_on_tbl
(4 rows)
SELECT DISTINCT ON (y, x) x, y FROM distinct_on_tbl ORDER BY y;
x | y
---+---
0 | 0
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
6 | 6
7 | 7
8 | 8
9 | 9
(10 rows)
-- Ensure the resulting pathkey list matches the initial distinctClause Pathkeys
EXPLAIN (COSTS OFF)
SELECT DISTINCT ON (y, x) x, y FROM (select * from distinct_on_tbl order by x, z, y) s ORDER BY y, x, z;
QUERY PLAN
---------------------------------------------------------------------------------------------
Sort
Sort Key: s.y, s.x, s.z
-> Unique
-> Incremental Sort
Sort Key: s.x, s.y, s.z
Presorted Key: s.x
-> Subquery Scan on s
-> Sort
Sort Key: distinct_on_tbl.x, distinct_on_tbl.z, distinct_on_tbl.y
-> Seq Scan on distinct_on_tbl
(10 rows)
SELECT DISTINCT ON (y, x) x, y FROM (select * from distinct_on_tbl order by x, z, y) s ORDER BY y, x, z;
x | y
---+---
0 | 0
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
6 | 6
7 | 7
8 | 8
9 | 9
(10 rows)
RESET enable_hashagg;
DROP TABLE distinct_on_tbl;
|