Fix var_is_nonnullable() to handle invalid NOT NULL constraints

The NOTNULL_SOURCE_SYSCACHE code path in var_is_nonnullable() used
get_attnotnull() to check pg_attribute.attnotnull, which is true for
both valid and invalid (NOT VALID) NOT NULL constraints.  An invalid
constraint does not guarantee the absence of NULLs, so this could lead
to incorrect results.  For example, query_outputs_are_not_nullable()
could wrongly conclude that a subquery's output is non-nullable,
causing NOT IN to be incorrectly converted to an anti-join.

Fix by checking the attnullability field in the relation's tuple
descriptor instead, which correctly distinguishes valid from invalid
constraints, consistent with what the NOTNULL_SOURCE_HASHTABLE code
path already does.

While at it, rename NOTNULL_SOURCE_SYSCACHE to NOTNULL_SOURCE_CATALOG
to reflect that this code path no longer uses a syscache lookup, and
remove the now-unused get_attnotnull() function.

Author: Richard Guo <guofenglinux@gmail.com>
Reviewed-by: SATYANARAYANA NARLAPURAM <satyanarlapuram@gmail.com>
Discussion: https://postgr.es/m/CAMbWs48ALW=mR0ydQ62dGS-Q+3D7WdDSh=EWDezcKp19xi=TUA@mail.gmail.com
This commit is contained in:
Richard Guo 2026-04-15 09:38:56 +09:00
parent 1f108fc02e
commit 363af93bdd
6 changed files with 61 additions and 37 deletions

View file

@ -20,6 +20,7 @@
#include "postgres.h"
#include "access/htup_details.h"
#include "access/table.h"
#include "catalog/pg_class.h"
#include "catalog/pg_inherits.h"
#include "catalog/pg_language.h"
@ -59,6 +60,7 @@
#include "utils/jsonpath.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/rel.h"
#include "utils/syscache.h"
#include "utils/typcache.h"
@ -2134,7 +2136,7 @@ query_outputs_are_not_nullable(Query *query)
* parse tree, we need to look up the not-null constraints from the
* system catalogs.
*/
if (expr_is_nonnullable(&subroot, expr, NOTNULL_SOURCE_SYSCACHE))
if (expr_is_nonnullable(&subroot, expr, NOTNULL_SOURCE_CATALOG))
continue;
if (IsA(expr, Var))
@ -4696,13 +4698,19 @@ var_is_nonnullable(PlannerInfo *root, Var *var, NotNullSource source)
return bms_is_member(var->varattno, notnullattnums);
}
case NOTNULL_SOURCE_SYSCACHE:
case NOTNULL_SOURCE_CATALOG:
{
/*
* We look up the "attnotnull" field in the attribute
* relation.
* We check the attnullability field in the tuple descriptor.
* This is necessary rather than checking the attnotnull field
* from the attribute relation, because attnotnull is also set
* for invalid (NOT VALID) NOT NULL constraints, which do not
* guarantee the absence of NULLs.
*/
RangeTblEntry *rte;
Relation rel;
CompactAttribute *attr;
bool result;
rte = planner_rt_fetch(var->varno, root);
@ -4723,7 +4731,14 @@ var_is_nonnullable(PlannerInfo *root, Var *var, NotNullSource source)
rte->relkind != RELKIND_PARTITIONED_TABLE)
return false;
return get_attnotnull(rte->relid, var->varattno);
/* We need not lock the relation since it was already locked */
rel = table_open(rte->relid, NoLock);
attr = TupleDescCompactAttr(RelationGetDescr(rel),
var->varattno - 1);
result = (attr->attnullability == ATTNULLABLE_VALID);
table_close(rel, NoLock);
return result;
}
default:
elog(ERROR, "unrecognized NotNullSource: %d",
@ -4746,9 +4761,9 @@ var_is_nonnullable(PlannerInfo *root, Var *var, NotNullSource source)
* - NOTNULL_SOURCE_HASHTABLE: Used when RelOptInfos are not yet available,
* but we have already collected relation-level not-null constraints into the
* global hash table.
* - NOTNULL_SOURCE_SYSCACHE: Used for raw parse trees where neither
* RelOptInfos nor the hash table are available. In this case, we have to
* look up the 'attnotnull' field directly in the system catalogs.
* - NOTNULL_SOURCE_CATALOG: Used for raw parse trees where neither
* RelOptInfos nor the hash table are available. In this case, we check the
* column's attnullability in the tuple descriptor.
*
* For now, we support only a limited set of expression types. Support for
* additional node types can be added in the future.

View file

@ -1114,33 +1114,6 @@ get_attoptions(Oid relid, int16 attnum)
return result;
}
/*
* get_attnotnull
*
* Given the relation id and the attribute number,
* return the "attnotnull" field from the attribute relation.
*/
bool
get_attnotnull(Oid relid, AttrNumber attnum)
{
HeapTuple tp;
bool result = false;
tp = SearchSysCache2(ATTNUM,
ObjectIdGetDatum(relid),
Int16GetDatum(attnum));
if (HeapTupleIsValid(tp))
{
Form_pg_attribute att_tup = (Form_pg_attribute) GETSTRUCT(tp);
result = att_tup->attnotnull;
ReleaseSysCache(tp);
}
return result;
}
/* ---------- PG_CAST CACHE ---------- */
/*

View file

@ -135,7 +135,7 @@ typedef enum
{
NOTNULL_SOURCE_RELOPT, /* Use RelOptInfo */
NOTNULL_SOURCE_HASHTABLE, /* Use Global Hash Table */
NOTNULL_SOURCE_SYSCACHE, /* Use System Catalog */
NOTNULL_SOURCE_CATALOG, /* Use System Catalog */
} NotNullSource;
extern bool contain_mutable_functions(Node *clause);

View file

@ -99,7 +99,6 @@ extern Oid get_atttype(Oid relid, AttrNumber attnum);
extern void get_atttypetypmodcoll(Oid relid, AttrNumber attnum,
Oid *typid, int32 *typmod, Oid *collid);
extern Datum get_attoptions(Oid relid, int16 attnum);
extern bool get_attnotnull(Oid relid, AttrNumber attnum);
extern Oid get_cast_oid(Oid sourcetypeid, Oid targettypeid, bool missing_ok);
extern char *get_collation_name(Oid colloid);
extern bool get_collation_isdeterministic(Oid colloid);

View file

@ -3761,4 +3761,27 @@ WHERE NOT id ?= ANY (SELECT id FROM not_null_tab);
-> Seq Scan on not_null_tab not_null_tab_1
(5 rows)
-- No ANTI JOIN: the inner side has an unvalidated NOT NULL constraint, so
-- the column might contain NULLs.
CREATE TEMP TABLE notnull_notvalid_tab (id int);
INSERT INTO notnull_notvalid_tab VALUES (NULL);
ALTER TABLE notnull_notvalid_tab ADD CONSTRAINT nn NOT NULL id NOT VALID;
EXPLAIN (COSTS OFF)
SELECT * FROM not_null_tab
WHERE id NOT IN (SELECT id FROM notnull_notvalid_tab);
QUERY PLAN
----------------------------------------------------------
Seq Scan on not_null_tab
Filter: (NOT (ANY (id = (hashed SubPlan any_1).col1)))
SubPlan any_1
-> Seq Scan on notnull_notvalid_tab
(4 rows)
-- NOT IN with NULL on inner side should return no rows
SELECT * FROM not_null_tab
WHERE id NOT IN (SELECT id FROM notnull_notvalid_tab);
id | val
----+-----
(0 rows)
ROLLBACK;

View file

@ -1632,4 +1632,18 @@ EXPLAIN (COSTS OFF)
SELECT * FROM not_null_tab
WHERE NOT id ?= ANY (SELECT id FROM not_null_tab);
-- No ANTI JOIN: the inner side has an unvalidated NOT NULL constraint, so
-- the column might contain NULLs.
CREATE TEMP TABLE notnull_notvalid_tab (id int);
INSERT INTO notnull_notvalid_tab VALUES (NULL);
ALTER TABLE notnull_notvalid_tab ADD CONSTRAINT nn NOT NULL id NOT VALID;
EXPLAIN (COSTS OFF)
SELECT * FROM not_null_tab
WHERE id NOT IN (SELECT id FROM notnull_notvalid_tab);
-- NOT IN with NULL on inner side should return no rows
SELECT * FROM not_null_tab
WHERE id NOT IN (SELECT id FROM notnull_notvalid_tab);
ROLLBACK;