5 files changed, 221 insertions, 11 deletions
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 78a81745348..493a3af0fa3 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -4780,11 +4780,46 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
 
 			if (pathkeys_contained_in(needed_pathkeys, path->pathkeys))
 			{
-				add_path(distinct_rel, (Path *)
-						 create_upper_unique_path(root, distinct_rel,
-												  path,
-												  list_length(root->distinct_pathkeys),
-												  numDistinctRows));
+				/*
+				 * distinct_pathkeys may have become empty if all of the
+				 * pathkeys were determined to be redundant.  If all of the
+				 * pathkeys are redundant then each DISTINCT target must only
+				 * allow a single value, therefore all resulting tuples must
+				 * be identical (or at least indistinguishable by an equality
+				 * check).  We can uniquify these tuples simply by just taking
+				 * the first tuple.  All we do here is add a path to do "LIMIT
+				 * 1" atop of 'path'.  When doing a DISTINCT ON we may still
+				 * have a non-NIL sort_pathkeys list, so we must still only do
+				 * this with paths which are correctly sorted by
+				 * sort_pathkeys.
+				 */
+				if (root->distinct_pathkeys == NIL)
+				{
+					Node	   *limitCount;
+
+					limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
+													sizeof(int64),
+													Int64GetDatum(1), false,
+													FLOAT8PASSBYVAL);
+
+					/*
+					 * If the query already has a LIMIT clause, then we could
+					 * end up with a duplicate LimitPath in the final plan.
+					 * That does not seem worth troubling over too much.
+					 */
+					add_path(distinct_rel, (Path *)
+							 create_limit_path(root, distinct_rel, path, NULL,
+											   limitCount, LIMIT_OPTION_COUNT,
+											   0, 1));
+				}
+				else
+				{
+					add_path(distinct_rel, (Path *)
+							 create_upper_unique_path(root, distinct_rel,
+													  path,
+													  list_length(root->distinct_pathkeys),
+													  numDistinctRows));
+				}
 			}
 		}
 
@@ -4805,13 +4840,33 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
 			path = (Path *) create_sort_path(root, distinct_rel,
 											 path,
 											 needed_pathkeys,
-											 -1.0);
+											 root->distinct_pathkeys == NIL ?
+											 1.0 : -1.0);
 
-		add_path(distinct_rel, (Path *)
-				 create_upper_unique_path(root, distinct_rel,
-										  path,
-										  list_length(root->distinct_pathkeys),
-										  numDistinctRows));
+		/*
+		 * As above, use a LimitPath instead of a UniquePath when all of the
+		 * distinct_pathkeys are redundant and we're only going to get a
+		 * series of tuples all with the same values anyway.
+		 */
+		if (root->distinct_pathkeys == NIL)
+		{
+			Node	   *limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
+														sizeof(int64),
+														Int64GetDatum(1), false,
+														FLOAT8PASSBYVAL);
+
+			add_path(distinct_rel, (Path *)
+					 create_limit_path(root, distinct_rel, path, NULL,
+									   limitCount, LIMIT_OPTION_COUNT, 0, 1));
+		}
+		else
+		{
+			add_path(distinct_rel, (Path *)
+					 create_upper_unique_path(root, distinct_rel,
+											  path,
+											  list_length(root->distinct_pathkeys),
+											  numDistinctRows));
+		}
 	}
 
 	/*
diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out
index 748419cee05..6ce889d87c1 100644
--- a/src/test/regress/expected/select_distinct.out
+++ b/src/test/regress/expected/select_distinct.out
@@ -280,6 +280,60 @@ RESET min_parallel_table_scan_size;
 RESET parallel_setup_cost;
 RESET parallel_tuple_cost;
 --
+-- Test the planner's ability to use a LIMIT 1 instead of a Unique node when
+-- all of the distinct_pathkeys have been marked as redundant
+--
+-- Ensure we get a plan with a Limit 1
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT four FROM tenk1 WHERE four = 0;
+         QUERY PLAN         
+----------------------------
+ Limit
+   ->  Seq Scan on tenk1
+         Filter: (four = 0)
+(3 rows)
+
+-- Ensure the above gives us the correct result
+SELECT DISTINCT four FROM tenk1 WHERE four = 0;
+ four 
+------
+    0
+(1 row)
+
+-- Ensure we get a plan with a Limit 1
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0;
+                 QUERY PLAN                  
+---------------------------------------------
+ Limit
+   ->  Seq Scan on tenk1
+         Filter: ((two <> 0) AND (four = 0))
+(3 rows)
+
+-- Ensure no rows are returned
+SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0;
+ four 
+------
+(0 rows)
+
+-- Ensure we get a plan with a Limit 1 when the SELECT list contains constants
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;
+         QUERY PLAN         
+----------------------------
+ Limit
+   ->  Seq Scan on tenk1
+         Filter: (four = 0)
+(3 rows)
+
+-- Ensure we only get 1 row
+SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;
+ four | ?column? | ?column? | ?column? 
+------+----------+----------+----------
+    0 |        1 |        2 |        3
+(1 row)
+
+--
 -- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its
 -- very own regression file.
 --
diff --git a/src/test/regress/expected/select_distinct_on.out b/src/test/regress/expected/select_distinct_on.out
index 3d989909912..b2978c1114a 100644
--- a/src/test/regress/expected/select_distinct_on.out
+++ b/src/test/regress/expected/select_distinct_on.out
@@ -73,3 +73,53 @@ select distinct on (1) floor(random()) as r, f1 from int4_tbl order by 1,2;
  0 | -2147483647
 (1 row)
 
+--
+-- Test the planner's ability to use a LIMIT 1 instead of a Unique node when
+-- all of the distinct_pathkeys have been marked as redundant
+--
+-- Ensure we also get a LIMIT plan with DISTINCT ON
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (four) four,two
+   FROM tenk1 WHERE four = 0 ORDER BY 1;
+            QUERY PLAN            
+----------------------------------
+ Result
+   ->  Limit
+         ->  Seq Scan on tenk1
+               Filter: (four = 0)
+(4 rows)
+
+-- and check the result of the above query is correct
+SELECT DISTINCT ON (four) four,two
+   FROM tenk1 WHERE four = 0 ORDER BY 1;
+ four | two 
+------+-----
+    0 |   0
+(1 row)
+
+-- Ensure a Sort -> Limit is used when the ORDER BY contains additional cols
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (four) four,two
+   FROM tenk1 WHERE four = 0 ORDER BY 1,2;
+            QUERY PLAN            
+----------------------------------
+ Limit
+   ->  Sort
+         Sort Key: two
+         ->  Seq Scan on tenk1
+               Filter: (four = 0)
+(5 rows)
+
+-- Same again but use a column that is indexed so that we get an index scan
+-- then a limit
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (four) four,hundred
+   FROM tenk1 WHERE four = 0 ORDER BY 1,2;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Result
+   ->  Limit
+         ->  Index Scan using tenk1_hundred on tenk1
+               Filter: (four = 0)
+(4 rows)
+
diff --git a/src/test/regress/sql/select_distinct.sql b/src/test/regress/sql/select_distinct.sql
index f27ff714f8d..34020adad1d 100644
--- a/src/test/regress/sql/select_distinct.sql
+++ b/src/test/regress/sql/select_distinct.sql
@@ -147,6 +147,32 @@ RESET parallel_setup_cost;
 RESET parallel_tuple_cost;
 
 --
+-- Test the planner's ability to use a LIMIT 1 instead of a Unique node when
+-- all of the distinct_pathkeys have been marked as redundant
+--
+
+-- Ensure we get a plan with a Limit 1
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT four FROM tenk1 WHERE four = 0;
+
+-- Ensure the above gives us the correct result
+SELECT DISTINCT four FROM tenk1 WHERE four = 0;
+
+-- Ensure we get a plan with a Limit 1
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0;
+
+-- Ensure no rows are returned
+SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0;
+
+-- Ensure we get a plan with a Limit 1 when the SELECT list contains constants
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;
+
+-- Ensure we only get 1 row
+SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;
+
+--
 -- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its
 -- very own regression file.
 --
diff --git a/src/test/regress/sql/select_distinct_on.sql b/src/test/regress/sql/select_distinct_on.sql
index 0920bd64b96..261e6140feb 100644
--- a/src/test/regress/sql/select_distinct_on.sql
+++ b/src/test/regress/sql/select_distinct_on.sql
@@ -17,3 +17,28 @@ SELECT DISTINCT ON (string4, ten) string4, ten, two
 
 -- bug #5049: early 8.4.x chokes on volatile DISTINCT ON clauses
 select distinct on (1) floor(random()) as r, f1 from int4_tbl order by 1,2;
+
+--
+-- Test the planner's ability to use a LIMIT 1 instead of a Unique node when
+-- all of the distinct_pathkeys have been marked as redundant
+--
+
+-- Ensure we also get a LIMIT plan with DISTINCT ON
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (four) four,two
+   FROM tenk1 WHERE four = 0 ORDER BY 1;
+
+-- and check the result of the above query is correct
+SELECT DISTINCT ON (four) four,two
+   FROM tenk1 WHERE four = 0 ORDER BY 1;
+
+-- Ensure a Sort -> Limit is used when the ORDER BY contains additional cols
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (four) four,two
+   FROM tenk1 WHERE four = 0 ORDER BY 1,2;
+
+-- Same again but use a column that is indexed so that we get an index scan
+-- then a limit
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (four) four,hundred
+   FROM tenk1 WHERE four = 0 ORDER BY 1,2;