diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 01804b085b3..8112509e872 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -80,6 +80,18 @@ typedef struct JoinTreeItem * lateral references */ } JoinTreeItem; +/* + * Compatibility info for one GROUP BY item, precomputed for use by + * remove_useless_groupby_columns() when matching unique-index columns against + * GROUP BY items. + */ +typedef struct GroupByColInfo +{ + AttrNumber attno; /* var->varattno */ + List *eq_opfamilies; /* mergejoin opfamilies of sgc->eqop */ + Oid coll; /* var->varcollid */ +} GroupByColInfo; + static void extract_lateral_references(PlannerInfo *root, RelOptInfo *brel, Index rtindex); @@ -413,6 +425,7 @@ remove_useless_groupby_columns(PlannerInfo *root) { Query *parse = root->parse; Bitmapset **groupbyattnos; + List **groupbycols; Bitmapset **surplusvars; bool tryremove = false; ListCell *lc; @@ -429,15 +442,21 @@ remove_useless_groupby_columns(PlannerInfo *root) /* * Scan the GROUP BY clause to find GROUP BY items that are simple Vars. * Fill groupbyattnos[k] with a bitmapset of the column attnos of RTE k - * that are GROUP BY items. + * that are GROUP BY items, and groupbycols[k] with a parallel list of + * GroupByColInfo records. We need the latter so that, when checking a + * unique index against this rel's GROUP BY items, we can verify that the + * index's notion of equality agrees with at least one GROUP BY item per + * index column. */ groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) * (list_length(parse->rtable) + 1)); + groupbycols = palloc0_array(List *, list_length(parse->rtable) + 1); foreach(lc, root->processed_groupClause) { SortGroupClause *sgc = lfirst_node(SortGroupClause, lc); TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList); Var *var = (Var *) tle->expr; + GroupByColInfo *info; /* * Ignore non-Vars and Vars from other query levels. @@ -463,6 +482,12 @@ remove_useless_groupby_columns(PlannerInfo *root) tryremove |= !bms_is_empty(groupbyattnos[relid]); groupbyattnos[relid] = bms_add_member(groupbyattnos[relid], var->varattno - FirstLowInvalidHeapAttributeNumber); + + info = palloc(sizeof(GroupByColInfo)); + info->attno = var->varattno; + info->eq_opfamilies = get_mergejoin_opfamilies(sgc->eqop); + info->coll = var->varcollid; + groupbycols[relid] = lappend(groupbycols[relid], info); } /* @@ -517,7 +542,7 @@ remove_useless_groupby_columns(PlannerInfo *root) foreach_node(IndexOptInfo, index, rel->indexlist) { Bitmapset *ind_attnos; - bool nulls_check_ok; + bool index_check_ok; /* * Skip any non-unique and deferrable indexes. Predicate indexes @@ -532,9 +557,14 @@ remove_useless_groupby_columns(PlannerInfo *root) continue; ind_attnos = NULL; - nulls_check_ok = true; + index_check_ok = true; for (int i = 0; i < index->nkeycolumns; i++) { + AttrNumber indkey_attno = index->indexkeys[i]; + Oid indkey_opfamily = index->opfamily[i]; + Oid indkey_coll = index->indexcollations[i]; + ListCell *lc2; + /* * We must insist that the index columns are all defined NOT * NULL otherwise duplicate NULLs could exist. However, we @@ -544,20 +574,41 @@ remove_useless_groupby_columns(PlannerInfo *root) * despite the NULL. */ if (!index->nullsnotdistinct && - !bms_is_member(index->indexkeys[i], - rel->notnullattnums)) + !bms_is_member(indkey_attno, rel->notnullattnums)) { - nulls_check_ok = false; + index_check_ok = false; + break; + } + + /* + * The index proves uniqueness only under its own opfamily and + * collation. Require some GROUP BY item on this column to + * use a compatible eqop and collation, the same check + * relation_has_unique_index_for() applies to join clauses. + */ + foreach(lc2, groupbycols[relid]) + { + GroupByColInfo *info = (GroupByColInfo *) lfirst(lc2); + + if (info->attno != indkey_attno) + continue; + if (list_member_oid(info->eq_opfamilies, indkey_opfamily) && + collations_agree_on_equality(indkey_coll, info->coll)) + break; + } + if (lc2 == NULL) + { + index_check_ok = false; break; } ind_attnos = bms_add_member(ind_attnos, - index->indexkeys[i] - + indkey_attno - FirstLowInvalidHeapAttributeNumber); } - if (!nulls_check_ok) + if (!index_check_ok) continue; /* diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 6e0c86d79fc..cdb4ecf288f 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -1515,6 +1515,37 @@ explain (costs off) select y,z from t2 group by y,z; -> Seq Scan on t2 (3 rows) +-- A unique index proves uniqueness only under its own opfamily. When the +-- GROUP BY's eqop comes from a different opfamily with looser equality, +-- rows the index regards as distinct can collapse into one GROUP BY group, +-- so the index is not usable for removing redundant columns. +create type t_rec as (x numeric); +create temp table t_opf (a t_rec not null, b text); +create unique index on t_opf (a record_image_ops); +-- (1.0) and (1.00) are bytewise distinct but logically equal as records; +-- the index admits both, but GROUP BY a (default record_ops) would merge +-- them, so b must be retained as a grouping key. +insert into t_opf values (row(1.0)::t_rec, 'X'), (row(1.00)::t_rec, 'Y'); +explain (costs off) +select a, b from t_opf group by a, b order by b; + QUERY PLAN +------------------------------- + Group + Group Key: b, a + -> Sort + Sort Key: b, a + -> Seq Scan on t_opf +(5 rows) + +select a, b from t_opf group by a, b order by b; + a | b +--------+--- + (1.0) | X + (1.00) | Y +(2 rows) + +drop table t_opf; +drop type t_rec; drop table t1 cascade; NOTICE: drop cascades to table t1c drop table t2; diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out index 783dce6b887..66093040965 100644 --- a/src/test/regress/expected/collate.icu.utf8.out +++ b/src/test/regress/expected/collate.icu.utf8.out @@ -3245,6 +3245,38 @@ DROP TABLE pagg_tab6; RESET enable_partitionwise_aggregate; RESET max_parallel_workers_per_gather; RESET enable_incremental_sort; +-- +-- A unique index can prove functional dependency for GROUP BY column +-- removal only if its per-column collation agrees on equality with +-- the GROUP BY column's collation. An index built under a different +-- (deterministic) collation would otherwise let remove_useless_groupby_columns +-- drop other columns whose values still differ within a nondeterministic +-- group. +-- +CREATE TABLE groupby_collation_t (a text COLLATE case_insensitive NOT NULL, b text); +INSERT INTO groupby_collation_t VALUES ('foo', 'X'), ('FOO', 'Y'); +CREATE UNIQUE INDEX ON groupby_collation_t (a COLLATE "C"); +-- Column b must NOT be dropped: under case_insensitive on a, 'foo' and +-- 'FOO' would merge, but they have distinct b values. +EXPLAIN (COSTS OFF) +SELECT a, b FROM groupby_collation_t GROUP BY a, b ORDER BY a, b; + QUERY PLAN +------------------------------------------------- + Group + Group Key: a, b + -> Sort + Sort Key: a COLLATE case_insensitive, b + -> Seq Scan on groupby_collation_t +(5 rows) + +SELECT a, b FROM groupby_collation_t GROUP BY a, b ORDER BY a, b; + a | b +-----+--- + foo | X + FOO | Y +(2 rows) + +DROP TABLE groupby_collation_t; -- virtual generated columns CREATE TABLE t5 ( a int, diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index c383890bc85..b751783e833 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -539,6 +539,23 @@ alter table t2 alter column z drop not null; create unique index t2_z_uidx on t2(z) nulls not distinct; explain (costs off) select y,z from t2 group by y,z; +-- A unique index proves uniqueness only under its own opfamily. When the +-- GROUP BY's eqop comes from a different opfamily with looser equality, +-- rows the index regards as distinct can collapse into one GROUP BY group, +-- so the index is not usable for removing redundant columns. +create type t_rec as (x numeric); +create temp table t_opf (a t_rec not null, b text); +create unique index on t_opf (a record_image_ops); +-- (1.0) and (1.00) are bytewise distinct but logically equal as records; +-- the index admits both, but GROUP BY a (default record_ops) would merge +-- them, so b must be retained as a grouping key. +insert into t_opf values (row(1.0)::t_rec, 'X'), (row(1.00)::t_rec, 'Y'); +explain (costs off) +select a, b from t_opf group by a, b order by b; +select a, b from t_opf group by a, b order by b; +drop table t_opf; +drop type t_rec; + drop table t1 cascade; drop table t2; drop table t3; diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql index 724df44d9ae..90ea90d64e0 100644 --- a/src/test/regress/sql/collate.icu.utf8.sql +++ b/src/test/regress/sql/collate.icu.utf8.sql @@ -1170,6 +1170,26 @@ RESET enable_partitionwise_aggregate; RESET max_parallel_workers_per_gather; RESET enable_incremental_sort; +-- +-- A unique index can prove functional dependency for GROUP BY column +-- removal only if its per-column collation agrees on equality with +-- the GROUP BY column's collation. An index built under a different +-- (deterministic) collation would otherwise let remove_useless_groupby_columns +-- drop other columns whose values still differ within a nondeterministic +-- group. +-- +CREATE TABLE groupby_collation_t (a text COLLATE case_insensitive NOT NULL, b text); +INSERT INTO groupby_collation_t VALUES ('foo', 'X'), ('FOO', 'Y'); +CREATE UNIQUE INDEX ON groupby_collation_t (a COLLATE "C"); + +-- Column b must NOT be dropped: under case_insensitive on a, 'foo' and +-- 'FOO' would merge, but they have distinct b values. +EXPLAIN (COSTS OFF) +SELECT a, b FROM groupby_collation_t GROUP BY a, b ORDER BY a, b; +SELECT a, b FROM groupby_collation_t GROUP BY a, b ORDER BY a, b; + +DROP TABLE groupby_collation_t; + -- virtual generated columns CREATE TABLE t5 ( a int, diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index c2fef51cd7b..8cd74c4e5b6 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1103,6 +1103,7 @@ GrantRoleStmt GrantStmt GrantTargetType Group +GroupByColInfo GroupByOrdering GroupClause GroupPath