Fix FOR PORTION OF for inheritance children

ExecForPortionOfLeftovers() assumed that any result relation with
ri_RootResultRelInfo should reinsert temporal leftovers through the
root relation.  That is correct for partitioned tables, where tuple
routing is needed, but it is wrong for plain inheritance.

When UPDATE/DELETE FOR PORTION OF is run on an inheritance parent and
a child row is split, the leftover rows must be inserted back into the
child relation.  Reinserting through the parent can lose child-only
columns and place the leftover rows in the wrong relation.

Fix this by distinguishing partitioned-table routing from plain
inheritance.  For partitioned tables, keep using the root leftover
slot and insert through the root relation.  For plain inheritance
children, use a leftover slot matching the child relation and insert
directly into the child.  Also keep translating the application-time
column attno for child relations, so multiple-inheritance cases with
different attribute numbers are handled correctly.

Added an ExecInitForPortionOf function to set up the ForPortionOfState
for each child table, which keeps most of these decisions localized
instead of spread out through ExecForPortionOfLeftovers.  Incidentally
clarified a comment about the rangetype stored in ForPortionOfState.

Add regression tests for UPDATE and DELETE FOR PORTION OF on
inheritance children, including a multiple-inheritance case where the
range column has a different attnum in the parent and child.

Author: jian he <jian.universality@gmail.com>
Co-authored-by: Chao Li <li.evan.chao@gmail.com>
Reviewed-by: Paul A. Jungwirth <pj@illuminatedcomputing.com>
Discussion: https://www.postgresql.org/message-id/flat/4245F94D-84F1-4E05-BF81-C458A6CF9901%40gmail.com
This commit is contained in:
Peter Eisentraut 2026-06-08 19:18:58 +02:00
parent 06e94eccfd
commit 7d13b03a2e
4 changed files with 353 additions and 51 deletions

View file

@ -199,6 +199,8 @@ static TupleTableSlot *ExecMergeNotMatched(ModifyTableContext *context,
static void ExecSetupTransitionCaptureState(ModifyTableState *mtstate, EState *estate);
static void fireBSTriggers(ModifyTableState *node);
static void fireASTriggers(ModifyTableState *node);
static void ExecInitForPortionOf(ModifyTableState *mtstate, EState *estate,
ResultRelInfo *resultRelInfo);
/*
@ -1410,7 +1412,6 @@ ExecForPortionOfLeftovers(ModifyTableContext *context,
ModifyTableState *mtstate = context->mtstate;
ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
ForPortionOfExpr *forPortionOf = (ForPortionOfExpr *) node->forPortionOf;
AttrNumber rangeAttno;
Datum oldRange;
TypeCacheEntry *typcache;
ForPortionOfState *fpoState;
@ -1425,37 +1426,13 @@ ExecForPortionOfLeftovers(ModifyTableContext *context,
ReturnSetInfo rsi;
bool didInit = false;
bool shouldFree = false;
ResultRelInfo *rootRelInfo = mtstate->rootResultRelInfo;
bool partitionRouting =
rootRelInfo &&
rootRelInfo->ri_RelationDesc->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
LOCAL_FCINFO(fcinfo, 2);
if (!resultRelInfo->ri_forPortionOf)
{
/*
* If we don't have a ForPortionOfState yet, we must be a partition
* child being hit for the first time. Make a copy from the root, with
* our own TupleTableSlot. We do this lazily so that we don't pay the
* price of unused partitions.
*/
ForPortionOfState *leafState = makeNode(ForPortionOfState);
if (!mtstate->rootResultRelInfo)
elog(ERROR, "no root relation but ri_forPortionOf is uninitialized");
fpoState = mtstate->rootResultRelInfo->ri_forPortionOf;
Assert(fpoState);
leafState->fp_rangeName = fpoState->fp_rangeName;
leafState->fp_rangeType = fpoState->fp_rangeType;
leafState->fp_rangeAttno = fpoState->fp_rangeAttno;
leafState->fp_targetRange = fpoState->fp_targetRange;
leafState->fp_Leftover = fpoState->fp_Leftover;
/* Each partition needs a slot matching its tuple descriptor */
leafState->fp_Existing =
table_slot_create(resultRelInfo->ri_RelationDesc,
&mtstate->ps.state->es_tupleTable);
resultRelInfo->ri_forPortionOf = leafState;
}
fpoState = resultRelInfo->ri_forPortionOf;
oldtupleSlot = fpoState->fp_Existing;
leftoverSlot = fpoState->fp_Leftover;
@ -1476,21 +1453,12 @@ ExecForPortionOfLeftovers(ModifyTableContext *context,
if (!table_tuple_fetch_row_version(resultRelInfo->ri_RelationDesc, tupleid, SnapshotAny, oldtupleSlot))
elog(ERROR, "failed to fetch tuple for FOR PORTION OF");
/*
* Get the old range of the record being updated/deleted. Must read with
* the attno of the leaf partition being updated.
*/
rangeAttno = forPortionOf->rangeVar->varattno;
if (resultRelInfo->ri_RootResultRelInfo)
map = ExecGetChildToRootMap(resultRelInfo);
if (map != NULL)
rangeAttno = map->attrMap->attnums[rangeAttno - 1];
slot_getallattrs(oldtupleSlot);
if (oldtupleSlot->tts_isnull[rangeAttno - 1])
/* Get the old range of the record being updated/deleted. */
if (oldtupleSlot->tts_isnull[fpoState->fp_rangeAttno - 1])
elog(ERROR, "found a NULL range in a temporal table");
oldRange = oldtupleSlot->tts_values[rangeAttno - 1];
oldRange = oldtupleSlot->tts_values[fpoState->fp_rangeAttno - 1];
/*
* Get the range's type cache entry. This is worth caching for the whole
@ -1528,12 +1496,19 @@ ExecForPortionOfLeftovers(ModifyTableContext *context,
fcinfo->args[1].isnull = false;
/*
* If there are partitions, we must insert into the root table, so we get
* tuple routing. We already set up leftoverSlot with the root tuple
* descriptor.
* For partitioned tables, we must read leftovers with the tuple
* descriptor of the child table, but insert into the root table to enable
* tuple routing. So leftoverSlot is configured with the root's tuple
* descriptor. But for traditional table inheritance, we don't need tuple
* routing and just insert directly into the child table to preserve
* child-specific columns. In that case, leftoverSlot uses the child's
* (resultRelInfo) tuple descriptor.
*/
if (resultRelInfo->ri_RootResultRelInfo)
if (partitionRouting)
{
map = ExecGetChildToRootMap(resultRelInfo);
resultRelInfo = resultRelInfo->ri_RootResultRelInfo;
}
/*
* Insert a leftover for each value returned by the without_portion helper
@ -1575,9 +1550,9 @@ ExecForPortionOfLeftovers(ModifyTableContext *context,
{
/*
* Make a copy of the pre-UPDATE row. Then we'll overwrite the
* range column below. Convert oldtuple to the base table's format
* if necessary. We need to insert temporal leftovers through the
* root partition so they get routed correctly.
* range column below. Only partitioned targets need conversion to
* the root table's format, because they reinsert through the root
* relation for tuple routing.
*/
if (map != NULL)
{
@ -1614,8 +1589,8 @@ ExecForPortionOfLeftovers(ModifyTableContext *context,
ExecForceStoreHeapTuple(oldtuple, leftoverSlot, false);
}
leftoverSlot->tts_values[forPortionOf->rangeVar->varattno - 1] = leftover;
leftoverSlot->tts_isnull[forPortionOf->rangeVar->varattno - 1] = false;
leftoverSlot->tts_values[resultRelInfo->ri_forPortionOf->fp_rangeAttno - 1] = leftover;
leftoverSlot->tts_isnull[resultRelInfo->ri_forPortionOf->fp_rangeAttno - 1] = false;
ExecMaterializeSlot(leftoverSlot);
/*
@ -4790,6 +4765,16 @@ ExecModifyTable(PlanState *pstate)
false, true);
}
/*
* If we don't have a ForPortionOfState yet, we must be a partition or
* inheritance child being hit for the first time. Make a copy from
* the root, with our own TupleTableSlot. We do this lazily so that we
* don't pay the price of unused partitions.
*/
if (((ModifyTable *) context.mtstate->ps.plan)->forPortionOf &&
!resultRelInfo->ri_forPortionOf)
ExecInitForPortionOf(context.mtstate, estate, resultRelInfo);
/*
* If resultRelInfo->ri_usesFdwDirectModify is true, all we need to do
* here is compute the RETURNING expressions.
@ -5873,3 +5858,76 @@ ExecReScanModifyTable(ModifyTableState *node)
*/
elog(ERROR, "ExecReScanModifyTable is not implemented");
}
/* ----------------------------------------------------------------
* ExecInitForPortionOf
*
* Initializes resultRelInfo->ri_forPortionOf for child tables.
*
* Partitions share the root leftover slot, since they must insert via
* the root relation to get tuple routing. Plain inheritance children
* must keep their own leftover slot and insert back into the child, or
* else child-only column values and physical placement would be lost.
* ----------------------------------------------------------------
*/
static void
ExecInitForPortionOf(ModifyTableState *mtstate, EState *estate,
ResultRelInfo *resultRelInfo)
{
MemoryContext oldcxt;
ForPortionOfState *leafState;
ResultRelInfo *rootRelInfo = mtstate->rootResultRelInfo;
ForPortionOfState *fpoState;
TupleConversionMap *map;
if (!rootRelInfo)
elog(ERROR, "no root relation but ri_forPortionOf is uninitialized");
fpoState = rootRelInfo->ri_forPortionOf;
Assert(fpoState);
/* Things built here have to last for the query duration. */
oldcxt = MemoryContextSwitchTo(estate->es_query_cxt);
leafState = makeNode(ForPortionOfState);
leafState->fp_rangeName = fpoState->fp_rangeName;
leafState->fp_rangeType = fpoState->fp_rangeType;
leafState->fp_targetRange = fpoState->fp_targetRange;
map = ExecGetChildToRootMap(resultRelInfo);
/*
* fp_rangeAttno must match the tuple layout used for reading the old
* range value. The query uses the target relation's attno, so translate
* it to the child attno when the child has a different column layout.
*/
if (map)
leafState->fp_rangeAttno = map->attrMap->attnums[fpoState->fp_rangeAttno - 1];
else
leafState->fp_rangeAttno = fpoState->fp_rangeAttno;
/*
* For partitioned tables we must read the leftovers using the child
* table's tuple descriptor, but then insert them into the root table
* (using its tuple descriptor) so we get tuple routing.
*
* For traditional table inheritance, we read and insert directly into
* this resultRelInfo; no tuple routing via the parent is required.
*/
if (rootRelInfo->ri_RelationDesc->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
leafState->fp_Leftover = fpoState->fp_Leftover;
else
leafState->fp_Leftover =
ExecInitExtraTupleSlot(mtstate->ps.state,
RelationGetDescr(resultRelInfo->ri_RelationDesc),
&TTSOpsVirtual);
/* Each child relation needs a slot matching its tuple descriptor. */
leafState->fp_Existing =
table_slot_create(resultRelInfo->ri_RelationDesc,
&mtstate->ps.state->es_tupleTable);
resultRelInfo->ri_forPortionOf = leafState;
MemoryContextSwitchTo(oldcxt);
}

View file

@ -477,7 +477,8 @@ typedef struct ForPortionOfState
NodeTag type;
char *fp_rangeName; /* the column named in FOR PORTION OF */
Oid fp_rangeType; /* the type of the FOR PORTION OF expression */
Oid fp_rangeType; /* the base type (not domain) of the FOR
* PORTION OF expression */
int fp_rangeAttno; /* the attno of the range column */
Datum fp_targetRange; /* the range/multirange from FOR PORTION OF */
TypeCacheEntry *fp_leftoverstypcache; /* type cache entry of the range */

View file

@ -2208,6 +2208,164 @@ SELECT * FROM fpo_rule ORDER BY f1;
(2 rows)
DROP TABLE fpo_rule;
-- UPDATE/DELETE FOR PORTION OF with table inheritance
-- Leftover rows must stay in the child table, preserving child-specific columns.
CREATE TABLE fpo_inh_parent (
id int4range,
valid_at daterange,
name text
);
CREATE TABLE fpo_inh_child (
description text
) INHERITS (fpo_inh_parent);
-- Update targets the parent; the matching row lives in the child.
INSERT INTO fpo_inh_child (id, valid_at, name, description) VALUES
('[1,2)', '[2018-01-01,2019-01-01)', 'one', 'initial');
UPDATE fpo_inh_parent FOR PORTION OF valid_at FROM '2018-04-01' TO '2018-10-01'
SET name = 'one^1';
-- All three rows should be in the child, with description preserved.
SELECT tableoid::regclass, * FROM fpo_inh_parent ORDER BY valid_at;
tableoid | id | valid_at | name
---------------+-------+-------------------------+-------
fpo_inh_child | [1,2) | [2018-01-01,2018-04-01) | one
fpo_inh_child | [1,2) | [2018-04-01,2018-10-01) | one^1
fpo_inh_child | [1,2) | [2018-10-01,2019-01-01) | one
(3 rows)
SELECT * FROM fpo_inh_child ORDER BY valid_at;
id | valid_at | name | description
-------+-------------------------+-------+-------------
[1,2) | [2018-01-01,2018-04-01) | one | initial
[1,2) | [2018-04-01,2018-10-01) | one^1 | initial
[1,2) | [2018-10-01,2019-01-01) | one | initial
(3 rows)
-- No rows should have leaked into the parent.
SELECT * FROM ONLY fpo_inh_parent ORDER BY valid_at;
id | valid_at | name
----+----------+------
(0 rows)
-- Same test for DELETE instead of UPDATE:
TRUNCATE fpo_inh_child, fpo_inh_parent;
INSERT INTO fpo_inh_child (id, valid_at, name, description) VALUES
('[1,2)', '[2018-01-01,2019-01-01)', 'one', 'initial');
DELETE FROM fpo_inh_parent FOR PORTION OF valid_at FROM '2018-04-01' TO '2018-10-01';
-- Both rows should be in the child, with description preserved.
SELECT tableoid::regclass, * FROM fpo_inh_parent ORDER BY valid_at;
tableoid | id | valid_at | name
---------------+-------+-------------------------+------
fpo_inh_child | [1,2) | [2018-01-01,2018-04-01) | one
fpo_inh_child | [1,2) | [2018-10-01,2019-01-01) | one
(2 rows)
SELECT * FROM fpo_inh_child ORDER BY valid_at;
id | valid_at | name | description
-------+-------------------------+------+-------------
[1,2) | [2018-01-01,2018-04-01) | one | initial
[1,2) | [2018-10-01,2019-01-01) | one | initial
(2 rows)
-- No rows should have leaked into the parent.
SELECT * FROM ONLY fpo_inh_parent ORDER BY valid_at;
id | valid_at | name
----+----------+------
(0 rows)
DROP TABLE fpo_inh_parent CASCADE;
NOTICE: drop cascades to table fpo_inh_child
-- UPDATE FOR PORTION OF with multiple inheritance
-- Leftover rows must stay in the child table, even if the range column's
-- attnum differs between the target parent and child.
CREATE TABLE temporal_parent (
id int,
valid_at daterange,
name text
);
CREATE TABLE other_parent (
prefix text,
note text
);
CREATE TABLE mi_child () INHERITS (other_parent, temporal_parent);
-- attnum of the range column is different in temporal_parent and mi_child
SELECT attnum, attname
FROM pg_attribute
WHERE attrelid = 'temporal_parent'::regclass
AND attnum > 0 AND NOT attisdropped
ORDER BY attnum;
attnum | attname
--------+----------
1 | id
2 | valid_at
3 | name
(3 rows)
SELECT attnum, attname
FROM pg_attribute
WHERE attrelid = 'mi_child'::regclass
AND attnum > 0 AND NOT attisdropped
ORDER BY attnum;
attnum | attname
--------+----------
1 | prefix
2 | note
3 | id
4 | valid_at
5 | name
(5 rows)
INSERT INTO mi_child (prefix, note, id, valid_at, name) VALUES
('pfx', 'memo', 1, daterange('2000-01-01', '2010-01-01'), 'old');
UPDATE temporal_parent FOR PORTION OF valid_at FROM '2001-01-01' TO '2002-01-01'
SET name = 'new'
WHERE id = 1;
SELECT tableoid::regclass, * FROM temporal_parent ORDER BY valid_at;
tableoid | id | valid_at | name
----------+----+-------------------------+------
mi_child | 1 | [2000-01-01,2001-01-01) | old
mi_child | 1 | [2001-01-01,2002-01-01) | new
mi_child | 1 | [2002-01-01,2010-01-01) | old
(3 rows)
SELECT * FROM mi_child ORDER BY valid_at;
prefix | note | id | valid_at | name
--------+------+----+-------------------------+------
pfx | memo | 1 | [2000-01-01,2001-01-01) | old
pfx | memo | 1 | [2001-01-01,2002-01-01) | new
pfx | memo | 1 | [2002-01-01,2010-01-01) | old
(3 rows)
SELECT * FROM ONLY temporal_parent ORDER BY valid_at;
id | valid_at | name
----+----------+------
(0 rows)
TRUNCATE mi_child, other_parent, temporal_parent;
INSERT INTO mi_child (prefix, note, id, valid_at, name) VALUES
('pfx', 'memo', 1, daterange('2000-01-01', '2010-01-01'), 'old');
DELETE FROM temporal_parent FOR PORTION OF valid_at FROM '2001-01-01' TO '2002-01-01'
WHERE id = 1;
SELECT tableoid::regclass, * FROM temporal_parent ORDER BY valid_at;
tableoid | id | valid_at | name
----------+----+-------------------------+------
mi_child | 1 | [2000-01-01,2001-01-01) | old
mi_child | 1 | [2002-01-01,2010-01-01) | old
(2 rows)
SELECT * FROM mi_child ORDER BY valid_at;
prefix | note | id | valid_at | name
--------+------+----+-------------------------+------
pfx | memo | 1 | [2000-01-01,2001-01-01) | old
pfx | memo | 1 | [2002-01-01,2010-01-01) | old
(2 rows)
SELECT * FROM ONLY temporal_parent ORDER BY valid_at;
id | valid_at | name
----+----------+------
(0 rows)
DROP TABLE temporal_parent CASCADE;
NOTICE: drop cascades to table mi_child
-- UPDATE FOR PORTION OF with generated columns
-- The generated column depends on the range column, so it must be
-- recomputed when FOR PORTION OF narrows the range.

View file

@ -1448,6 +1448,91 @@ SELECT * FROM fpo_rule ORDER BY f1;
DROP TABLE fpo_rule;
-- UPDATE/DELETE FOR PORTION OF with table inheritance
-- Leftover rows must stay in the child table, preserving child-specific columns.
CREATE TABLE fpo_inh_parent (
id int4range,
valid_at daterange,
name text
);
CREATE TABLE fpo_inh_child (
description text
) INHERITS (fpo_inh_parent);
-- Update targets the parent; the matching row lives in the child.
INSERT INTO fpo_inh_child (id, valid_at, name, description) VALUES
('[1,2)', '[2018-01-01,2019-01-01)', 'one', 'initial');
UPDATE fpo_inh_parent FOR PORTION OF valid_at FROM '2018-04-01' TO '2018-10-01'
SET name = 'one^1';
-- All three rows should be in the child, with description preserved.
SELECT tableoid::regclass, * FROM fpo_inh_parent ORDER BY valid_at;
SELECT * FROM fpo_inh_child ORDER BY valid_at;
-- No rows should have leaked into the parent.
SELECT * FROM ONLY fpo_inh_parent ORDER BY valid_at;
-- Same test for DELETE instead of UPDATE:
TRUNCATE fpo_inh_child, fpo_inh_parent;
INSERT INTO fpo_inh_child (id, valid_at, name, description) VALUES
('[1,2)', '[2018-01-01,2019-01-01)', 'one', 'initial');
DELETE FROM fpo_inh_parent FOR PORTION OF valid_at FROM '2018-04-01' TO '2018-10-01';
-- Both rows should be in the child, with description preserved.
SELECT tableoid::regclass, * FROM fpo_inh_parent ORDER BY valid_at;
SELECT * FROM fpo_inh_child ORDER BY valid_at;
-- No rows should have leaked into the parent.
SELECT * FROM ONLY fpo_inh_parent ORDER BY valid_at;
DROP TABLE fpo_inh_parent CASCADE;
-- UPDATE FOR PORTION OF with multiple inheritance
-- Leftover rows must stay in the child table, even if the range column's
-- attnum differs between the target parent and child.
CREATE TABLE temporal_parent (
id int,
valid_at daterange,
name text
);
CREATE TABLE other_parent (
prefix text,
note text
);
CREATE TABLE mi_child () INHERITS (other_parent, temporal_parent);
-- attnum of the range column is different in temporal_parent and mi_child
SELECT attnum, attname
FROM pg_attribute
WHERE attrelid = 'temporal_parent'::regclass
AND attnum > 0 AND NOT attisdropped
ORDER BY attnum;
SELECT attnum, attname
FROM pg_attribute
WHERE attrelid = 'mi_child'::regclass
AND attnum > 0 AND NOT attisdropped
ORDER BY attnum;
INSERT INTO mi_child (prefix, note, id, valid_at, name) VALUES
('pfx', 'memo', 1, daterange('2000-01-01', '2010-01-01'), 'old');
UPDATE temporal_parent FOR PORTION OF valid_at FROM '2001-01-01' TO '2002-01-01'
SET name = 'new'
WHERE id = 1;
SELECT tableoid::regclass, * FROM temporal_parent ORDER BY valid_at;
SELECT * FROM mi_child ORDER BY valid_at;
SELECT * FROM ONLY temporal_parent ORDER BY valid_at;
TRUNCATE mi_child, other_parent, temporal_parent;
INSERT INTO mi_child (prefix, note, id, valid_at, name) VALUES
('pfx', 'memo', 1, daterange('2000-01-01', '2010-01-01'), 'old');
DELETE FROM temporal_parent FOR PORTION OF valid_at FROM '2001-01-01' TO '2002-01-01'
WHERE id = 1;
SELECT tableoid::regclass, * FROM temporal_parent ORDER BY valid_at;
SELECT * FROM mi_child ORDER BY valid_at;
SELECT * FROM ONLY temporal_parent ORDER BY valid_at;
DROP TABLE temporal_parent CASCADE;
-- UPDATE FOR PORTION OF with generated columns
-- The generated column depends on the range column, so it must be
-- recomputed when FOR PORTION OF narrows the range.