aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/optimizer/plan/initsplan.c120
-rw-r--r--src/backend/optimizer/util/plancat.c1
-rw-r--r--src/include/nodes/pathnodes.h2
-rw-r--r--src/test/regress/expected/aggregates.out67
-rw-r--r--src/test/regress/sql/aggregates.sql32
5 files changed, 200 insertions, 22 deletions
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index c1c4f9f8b9d..5f3908be519 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -400,17 +400,13 @@ add_vars_to_attr_needed(PlannerInfo *root, List *vars,
*
* Since some other DBMSes do not allow references to ungrouped columns, it's
* not unusual to find all columns listed in GROUP BY even though listing the
- * primary-key columns would be sufficient. Deleting such excess columns
- * avoids redundant sorting work, so it's worth doing.
+ * primary-key columns, or columns of a unique constraint would be sufficient.
+ * Deleting such excess columns avoids redundant sorting or hashing work, so
+ * it's worth doing.
*
* Relcache invalidations will ensure that cached plans become invalidated
- * when the underlying index of the pkey constraint is dropped.
- *
- * Currently, we only make use of pkey constraints for this, however, we may
- * wish to take this further in the future and also use unique constraints
- * which have NOT NULL columns. In that case, plan invalidation will still
- * work since relations will receive a relcache invalidation when a NOT NULL
- * constraint is dropped.
+ * when the underlying supporting indexes are dropped or if a column's NOT
+ * NULL attribute is removed.
*/
void
remove_useless_groupby_columns(PlannerInfo *root)
@@ -418,6 +414,7 @@ remove_useless_groupby_columns(PlannerInfo *root)
Query *parse = root->parse;
Bitmapset **groupbyattnos;
Bitmapset **surplusvars;
+ bool tryremove = false;
ListCell *lc;
int relid;
@@ -457,11 +454,25 @@ remove_useless_groupby_columns(PlannerInfo *root)
/* OK, remember we have this Var */
relid = var->varno;
Assert(relid <= list_length(parse->rtable));
+
+ /*
+ * If this isn't the first column for this relation then we now have
+ * multiple columns. That means there might be some that can be
+ * removed.
+ */
+ tryremove |= !bms_is_empty(groupbyattnos[relid]);
groupbyattnos[relid] = bms_add_member(groupbyattnos[relid],
var->varattno - FirstLowInvalidHeapAttributeNumber);
}
/*
+ * No Vars or didn't find multiple Vars for any relation in the GROUP BY?
+ * If so, nothing can be removed, so don't waste more effort trying.
+ */
+ if (!tryremove)
+ return;
+
+ /*
* Consider each relation and see if it is possible to remove some of its
* Vars from GROUP BY. For simplicity and speed, we do the actual removal
* in a separate pass. Here, we just fill surplusvars[k] with a bitmapset
@@ -472,9 +483,10 @@ remove_useless_groupby_columns(PlannerInfo *root)
foreach(lc, parse->rtable)
{
RangeTblEntry *rte = lfirst_node(RangeTblEntry, lc);
+ RelOptInfo *rel;
Bitmapset *relattnos;
- Bitmapset *pkattnos;
- Oid constraintOid;
+ Bitmapset *best_keycolumns = NULL;
+ int32 best_nkeycolumns = PG_INT32_MAX;
relid++;
@@ -495,19 +507,83 @@ remove_useless_groupby_columns(PlannerInfo *root)
if (bms_membership(relattnos) != BMS_MULTIPLE)
continue;
- /*
- * Can't remove any columns for this rel if there is no suitable
- * (i.e., nondeferrable) primary key constraint.
- */
- pkattnos = get_primary_key_attnos(rte->relid, false, &constraintOid);
- if (pkattnos == NULL)
- continue;
+ rel = root->simple_rel_array[relid];
/*
- * If the primary key is a proper subset of relattnos then we have
- * some items in the GROUP BY that can be removed.
+ * Now check each index for this relation to see if there are any with
+ * columns which are a proper subset of the grouping columns for this
+ * relation.
*/
- if (bms_subset_compare(pkattnos, relattnos) == BMS_SUBSET1)
+ foreach_node(IndexOptInfo, index, rel->indexlist)
+ {
+ Bitmapset *ind_attnos;
+ bool nulls_check_ok;
+
+ /*
+ * Skip any non-unique and deferrable indexes. Predicate indexes
+ * have not been checked yet, so we must skip those too as the
+ * predOK check that's done later might fail.
+ */
+ if (!index->unique || !index->immediate || index->indpred != NIL)
+ continue;
+
+ /* For simplicity, we currently don't support expression indexes */
+ if (index->indexprs != NIL)
+ continue;
+
+ ind_attnos = NULL;
+ nulls_check_ok = true;
+ for (int i = 0; i < index->nkeycolumns; i++)
+ {
+ /*
+ * We must insist that the index columns are all defined NOT
+ * NULL otherwise duplicate NULLs could exist. However, we
+ * can relax this check when the index is defined with NULLS
+ * NOT DISTINCT as there can only be 1 NULL row, therefore
+ * functional dependency on the unique columns is maintained,
+ * despite the NULL.
+ */
+ if (!index->nullsnotdistinct &&
+ !bms_is_member(index->indexkeys[i],
+ rel->notnullattnums))
+ {
+ nulls_check_ok = false;
+ break;
+ }
+
+ ind_attnos =
+ bms_add_member(ind_attnos,
+ index->indexkeys[i] -
+ FirstLowInvalidHeapAttributeNumber);
+ }
+
+ if (!nulls_check_ok)
+ continue;
+
+ /*
+ * Skip any indexes where the indexed columns aren't a proper
+ * subset of the GROUP BY.
+ */
+ if (bms_subset_compare(ind_attnos, relattnos) != BMS_SUBSET1)
+ continue;
+
+ /*
+ * Record the attribute numbers from the index with the fewest
+ * columns. This allows the largest number of columns to be
+ * removed from the GROUP BY clause. In the future, we may wish
+ * to consider using the narrowest set of columns and looking at
+ * pg_statistic.stawidth as it might be better to use an index
+ * with, say two INT4s, rather than, say, one long varlena column.
+ */
+ if (index->nkeycolumns < best_nkeycolumns)
+ {
+ best_keycolumns = ind_attnos;
+ best_nkeycolumns = index->nkeycolumns;
+ }
+ }
+
+ /* Did we find a suitable index? */
+ if (!bms_is_empty(best_keycolumns))
{
/*
* To easily remember whether we've found anything to do, we don't
@@ -518,7 +594,7 @@ remove_useless_groupby_columns(PlannerInfo *root)
(list_length(parse->rtable) + 1));
/* Remember the attnos of the removable columns */
- surplusvars[relid] = bms_difference(relattnos, pkattnos);
+ surplusvars[relid] = bms_difference(relattnos, best_keycolumns);
}
}
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 37b0ca2e439..153390f2dc9 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -457,6 +457,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
info->indrestrictinfo = NIL; /* set later, in indxpath.c */
info->predOK = false; /* set later, in indxpath.c */
info->unique = index->indisunique;
+ info->nullsnotdistinct = index->indnullsnotdistinct;
info->immediate = index->indimmediate;
info->hypothetical = false;
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index add0f9e45fc..0759e00e96d 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1187,6 +1187,8 @@ struct IndexOptInfo
bool predOK;
/* true if a unique index */
bool unique;
+ /* true if the index was defined with NULLS NOT DISTINCT */
+ bool nullsnotdistinct;
/* is uniqueness enforced immediately? */
bool immediate;
/* true if index doesn't really exist */
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 1e682565d13..f2fb66388cc 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1448,6 +1448,73 @@ explain (costs off) select * from p_t1 group by a,b,c,d;
-> Seq Scan on p_t1_2
(5 rows)
+create unique index t3_c_uidx on t3(c);
+-- Ensure we don't remove any columns from the GROUP BY for a unique
+-- index on a NULLable column.
+explain (costs off) select b,c from t3 group by b,c;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: b, c
+ -> Seq Scan on t3
+(3 rows)
+
+-- Make the column NOT NULL and ensure we remove the redundant column
+alter table t3 alter column c set not null;
+explain (costs off) select b,c from t3 group by b,c;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: c
+ -> Seq Scan on t3
+(3 rows)
+
+-- When there are multiple supporting unique indexes and the GROUP BY contains
+-- columns to cover all of those, ensure we pick the index with the least
+-- number of columns so that we can remove more columns from the GROUP BY.
+explain (costs off) select a,b,c from t3 group by a,b,c;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: c
+ -> Seq Scan on t3
+(3 rows)
+
+-- As above but try ordering the columns differently to ensure we get the
+-- same result.
+explain (costs off) select a,b,c from t3 group by c,a,b;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: c
+ -> Seq Scan on t3
+(3 rows)
+
+-- Ensure we don't use a partial index as proof of functional dependency
+drop index t3_c_uidx;
+create index t3_c_uidx on t3 (c) where c > 0;
+explain (costs off) select b,c from t3 group by b,c;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: b, c
+ -> Seq Scan on t3
+(3 rows)
+
+-- A unique index defined as NULLS NOT DISTINCT does not need a supporting NOT
+-- NULL constraint on the indexed columns. Ensure the redundant columns are
+-- removed from the GROUP BY for such a table.
+drop index t3_c_uidx;
+alter table t3 alter column c drop not null;
+create unique index t3_c_uidx on t3(c) nulls not distinct;
+explain (costs off) select b,c from t3 group by b,c;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: c
+ -> Seq Scan on t3
+(3 rows)
+
drop table t1 cascade;
NOTICE: drop cascades to table t1c
drop table t2;
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 4885daffe63..77168bcc744 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -507,6 +507,38 @@ create temp table p_t1_2 partition of p_t1 for values in(2);
-- Ensure we can remove non-PK columns for partitioned tables.
explain (costs off) select * from p_t1 group by a,b,c,d;
+create unique index t3_c_uidx on t3(c);
+
+-- Ensure we don't remove any columns from the GROUP BY for a unique
+-- index on a NULLable column.
+explain (costs off) select b,c from t3 group by b,c;
+
+-- Make the column NOT NULL and ensure we remove the redundant column
+alter table t3 alter column c set not null;
+explain (costs off) select b,c from t3 group by b,c;
+
+-- When there are multiple supporting unique indexes and the GROUP BY contains
+-- columns to cover all of those, ensure we pick the index with the least
+-- number of columns so that we can remove more columns from the GROUP BY.
+explain (costs off) select a,b,c from t3 group by a,b,c;
+
+-- As above but try ordering the columns differently to ensure we get the
+-- same result.
+explain (costs off) select a,b,c from t3 group by c,a,b;
+
+-- Ensure we don't use a partial index as proof of functional dependency
+drop index t3_c_uidx;
+create index t3_c_uidx on t3 (c) where c > 0;
+explain (costs off) select b,c from t3 group by b,c;
+
+-- A unique index defined as NULLS NOT DISTINCT does not need a supporting NOT
+-- NULL constraint on the indexed columns. Ensure the redundant columns are
+-- removed from the GROUP BY for such a table.
+drop index t3_c_uidx;
+alter table t3 alter column c drop not null;
+create unique index t3_c_uidx on t3(c) nulls not distinct;
+explain (costs off) select b,c from t3 group by b,c;
+
drop table t1 cascade;
drop table t2;
drop table t3;