diff options
-rw-r--r-- | src/backend/optimizer/plan/planner.c | 77 | ||||
-rw-r--r-- | src/test/regress/expected/select_distinct.out | 54 | ||||
-rw-r--r-- | src/test/regress/expected/select_distinct_on.out | 50 | ||||
-rw-r--r-- | src/test/regress/sql/select_distinct.sql | 26 | ||||
-rw-r--r-- | src/test/regress/sql/select_distinct_on.sql | 25 |
5 files changed, 221 insertions, 11 deletions
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 78a81745348..493a3af0fa3 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -4780,11 +4780,46 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, if (pathkeys_contained_in(needed_pathkeys, path->pathkeys)) { - add_path(distinct_rel, (Path *) - create_upper_unique_path(root, distinct_rel, - path, - list_length(root->distinct_pathkeys), - numDistinctRows)); + /* + * distinct_pathkeys may have become empty if all of the + * pathkeys were determined to be redundant. If all of the + * pathkeys are redundant then each DISTINCT target must only + * allow a single value, therefore all resulting tuples must + * be identical (or at least indistinguishable by an equality + * check). We can uniquify these tuples simply by just taking + * the first tuple. All we do here is add a path to do "LIMIT + * 1" atop of 'path'. When doing a DISTINCT ON we may still + * have a non-NIL sort_pathkeys list, so we must still only do + * this with paths which are correctly sorted by + * sort_pathkeys. + */ + if (root->distinct_pathkeys == NIL) + { + Node *limitCount; + + limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid, + sizeof(int64), + Int64GetDatum(1), false, + FLOAT8PASSBYVAL); + + /* + * If the query already has a LIMIT clause, then we could + * end up with a duplicate LimitPath in the final plan. + * That does not seem worth troubling over too much. + */ + add_path(distinct_rel, (Path *) + create_limit_path(root, distinct_rel, path, NULL, + limitCount, LIMIT_OPTION_COUNT, + 0, 1)); + } + else + { + add_path(distinct_rel, (Path *) + create_upper_unique_path(root, distinct_rel, + path, + list_length(root->distinct_pathkeys), + numDistinctRows)); + } } } @@ -4805,13 +4840,33 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, path = (Path *) create_sort_path(root, distinct_rel, path, needed_pathkeys, - -1.0); + root->distinct_pathkeys == NIL ? + 1.0 : -1.0); - add_path(distinct_rel, (Path *) - create_upper_unique_path(root, distinct_rel, - path, - list_length(root->distinct_pathkeys), - numDistinctRows)); + /* + * As above, use a LimitPath instead of a UniquePath when all of the + * distinct_pathkeys are redundant and we're only going to get a + * series of tuples all with the same values anyway. + */ + if (root->distinct_pathkeys == NIL) + { + Node *limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid, + sizeof(int64), + Int64GetDatum(1), false, + FLOAT8PASSBYVAL); + + add_path(distinct_rel, (Path *) + create_limit_path(root, distinct_rel, path, NULL, + limitCount, LIMIT_OPTION_COUNT, 0, 1)); + } + else + { + add_path(distinct_rel, (Path *) + create_upper_unique_path(root, distinct_rel, + path, + list_length(root->distinct_pathkeys), + numDistinctRows)); + } } /* diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out index 748419cee05..6ce889d87c1 100644 --- a/src/test/regress/expected/select_distinct.out +++ b/src/test/regress/expected/select_distinct.out @@ -280,6 +280,60 @@ RESET min_parallel_table_scan_size; RESET parallel_setup_cost; RESET parallel_tuple_cost; -- +-- Test the planner's ability to use a LIMIT 1 instead of a Unique node when +-- all of the distinct_pathkeys have been marked as redundant +-- +-- Ensure we get a plan with a Limit 1 +EXPLAIN (COSTS OFF) +SELECT DISTINCT four FROM tenk1 WHERE four = 0; + QUERY PLAN +---------------------------- + Limit + -> Seq Scan on tenk1 + Filter: (four = 0) +(3 rows) + +-- Ensure the above gives us the correct result +SELECT DISTINCT four FROM tenk1 WHERE four = 0; + four +------ + 0 +(1 row) + +-- Ensure we get a plan with a Limit 1 +EXPLAIN (COSTS OFF) +SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0; + QUERY PLAN +--------------------------------------------- + Limit + -> Seq Scan on tenk1 + Filter: ((two <> 0) AND (four = 0)) +(3 rows) + +-- Ensure no rows are returned +SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0; + four +------ +(0 rows) + +-- Ensure we get a plan with a Limit 1 when the SELECT list contains constants +EXPLAIN (COSTS OFF) +SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0; + QUERY PLAN +---------------------------- + Limit + -> Seq Scan on tenk1 + Filter: (four = 0) +(3 rows) + +-- Ensure we only get 1 row +SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0; + four | ?column? | ?column? | ?column? +------+----------+----------+---------- + 0 | 1 | 2 | 3 +(1 row) + +-- -- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its -- very own regression file. -- diff --git a/src/test/regress/expected/select_distinct_on.out b/src/test/regress/expected/select_distinct_on.out index 3d989909912..b2978c1114a 100644 --- a/src/test/regress/expected/select_distinct_on.out +++ b/src/test/regress/expected/select_distinct_on.out @@ -73,3 +73,53 @@ select distinct on (1) floor(random()) as r, f1 from int4_tbl order by 1,2; 0 | -2147483647 (1 row) +-- +-- Test the planner's ability to use a LIMIT 1 instead of a Unique node when +-- all of the distinct_pathkeys have been marked as redundant +-- +-- Ensure we also get a LIMIT plan with DISTINCT ON +EXPLAIN (COSTS OFF) +SELECT DISTINCT ON (four) four,two + FROM tenk1 WHERE four = 0 ORDER BY 1; + QUERY PLAN +---------------------------------- + Result + -> Limit + -> Seq Scan on tenk1 + Filter: (four = 0) +(4 rows) + +-- and check the result of the above query is correct +SELECT DISTINCT ON (four) four,two + FROM tenk1 WHERE four = 0 ORDER BY 1; + four | two +------+----- + 0 | 0 +(1 row) + +-- Ensure a Sort -> Limit is used when the ORDER BY contains additional cols +EXPLAIN (COSTS OFF) +SELECT DISTINCT ON (four) four,two + FROM tenk1 WHERE four = 0 ORDER BY 1,2; + QUERY PLAN +---------------------------------- + Limit + -> Sort + Sort Key: two + -> Seq Scan on tenk1 + Filter: (four = 0) +(5 rows) + +-- Same again but use a column that is indexed so that we get an index scan +-- then a limit +EXPLAIN (COSTS OFF) +SELECT DISTINCT ON (four) four,hundred + FROM tenk1 WHERE four = 0 ORDER BY 1,2; + QUERY PLAN +----------------------------------------------------- + Result + -> Limit + -> Index Scan using tenk1_hundred on tenk1 + Filter: (four = 0) +(4 rows) + diff --git a/src/test/regress/sql/select_distinct.sql b/src/test/regress/sql/select_distinct.sql index f27ff714f8d..34020adad1d 100644 --- a/src/test/regress/sql/select_distinct.sql +++ b/src/test/regress/sql/select_distinct.sql @@ -147,6 +147,32 @@ RESET parallel_setup_cost; RESET parallel_tuple_cost; -- +-- Test the planner's ability to use a LIMIT 1 instead of a Unique node when +-- all of the distinct_pathkeys have been marked as redundant +-- + +-- Ensure we get a plan with a Limit 1 +EXPLAIN (COSTS OFF) +SELECT DISTINCT four FROM tenk1 WHERE four = 0; + +-- Ensure the above gives us the correct result +SELECT DISTINCT four FROM tenk1 WHERE four = 0; + +-- Ensure we get a plan with a Limit 1 +EXPLAIN (COSTS OFF) +SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0; + +-- Ensure no rows are returned +SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0; + +-- Ensure we get a plan with a Limit 1 when the SELECT list contains constants +EXPLAIN (COSTS OFF) +SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0; + +-- Ensure we only get 1 row +SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0; + +-- -- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its -- very own regression file. -- diff --git a/src/test/regress/sql/select_distinct_on.sql b/src/test/regress/sql/select_distinct_on.sql index 0920bd64b96..261e6140feb 100644 --- a/src/test/regress/sql/select_distinct_on.sql +++ b/src/test/regress/sql/select_distinct_on.sql @@ -17,3 +17,28 @@ SELECT DISTINCT ON (string4, ten) string4, ten, two -- bug #5049: early 8.4.x chokes on volatile DISTINCT ON clauses select distinct on (1) floor(random()) as r, f1 from int4_tbl order by 1,2; + +-- +-- Test the planner's ability to use a LIMIT 1 instead of a Unique node when +-- all of the distinct_pathkeys have been marked as redundant +-- + +-- Ensure we also get a LIMIT plan with DISTINCT ON +EXPLAIN (COSTS OFF) +SELECT DISTINCT ON (four) four,two + FROM tenk1 WHERE four = 0 ORDER BY 1; + +-- and check the result of the above query is correct +SELECT DISTINCT ON (four) four,two + FROM tenk1 WHERE four = 0 ORDER BY 1; + +-- Ensure a Sort -> Limit is used when the ORDER BY contains additional cols +EXPLAIN (COSTS OFF) +SELECT DISTINCT ON (four) four,two + FROM tenk1 WHERE four = 0 ORDER BY 1,2; + +-- Same again but use a column that is indexed so that we get an index scan +-- then a limit +EXPLAIN (COSTS OFF) +SELECT DISTINCT ON (four) four,hundred + FROM tenk1 WHERE four = 0 ORDER BY 1,2; |