diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 6596843a8d8..90e94fb8a5a 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -3893,6 +3893,8 @@ typedef struct AfterTriggersData /* per-subtransaction-level data: */ AfterTriggersTransData *trans_stack; /* array of structs shown below */ int maxtransdepth; /* allocated len of above array */ + + List *batch_callbacks; /* List of AfterTriggerCallbackItem */ } AfterTriggersData; struct AfterTriggersQueryData @@ -3929,6 +3931,13 @@ struct AfterTriggersTableData TupleTableSlot *storeslot; /* for converting to tuplestore's format */ }; +/* Entry in afterTriggers.batch_callbacks */ +typedef struct AfterTriggerCallbackItem +{ + AfterTriggerBatchCallback callback; + void *arg; +} AfterTriggerCallbackItem; + static AfterTriggersData afterTriggers; static void AfterTriggerExecute(EState *estate, @@ -3964,6 +3973,7 @@ static SetConstraintState SetConstraintStateAddItem(SetConstraintState state, Oid tgoid, bool tgisdeferred); static void cancel_prior_stmt_triggers(Oid relid, CmdType cmdType, int tgevent); +static void FireAfterTriggerBatchCallbacks(void); /* * Get the FDW tuplestore for the current trigger query level, creating it @@ -5089,6 +5099,7 @@ AfterTriggerBeginXact(void) */ afterTriggers.firing_counter = (CommandId) 1; /* mustn't be 0 */ afterTriggers.query_depth = -1; + afterTriggers.batch_callbacks = NIL; /* * Verify that there is no leftover state remaining. If these assertions @@ -5210,6 +5221,15 @@ AfterTriggerEndQuery(EState *estate) break; } + /* + * Fire batch callbacks before releasing query-level storage and before + * decrementing query_depth. Callbacks may do real work (index probes, + * error reporting) and rely on query_depth still reflecting the current + * batch level so that nested calls from SPI inside AFTER triggers are + * correctly suppressed by FireAfterTriggerBatchCallbacks's depth guard. + */ + FireAfterTriggerBatchCallbacks(); + /* Release query-level-local storage, including tuplestores if any */ AfterTriggerFreeQuery(&afterTriggers.query_stack[afterTriggers.query_depth]); @@ -5317,6 +5337,9 @@ AfterTriggerFireDeferred(void) break; /* all fired */ } + /* Flush any fast-path batches accumulated by the triggers just fired. */ + FireAfterTriggerBatchCallbacks(); + /* * We don't bother freeing the event list, since it will go away anyway * (and more efficiently than via pfree) in AfterTriggerEndXact. @@ -6059,6 +6082,11 @@ AfterTriggerSetState(ConstraintsSetStmt *stmt) break; /* all fired */ } + /* + * Flush any fast-path batches accumulated by the triggers just fired. + */ + FireAfterTriggerBatchCallbacks(); + if (snapshot_set) PopActiveSnapshot(); } @@ -6755,3 +6783,80 @@ check_modified_virtual_generated(TupleDesc tupdesc, HeapTuple tuple) return tuple; } + +/* + * RegisterAfterTriggerBatchCallback + * Register a function to be called when the current trigger-firing + * batch completes. + * + * Must be called from within a trigger function's execution context + * (i.e., while afterTriggers state is active). + * + * The callback list is cleared after invocation, so the caller must + * re-register for each new batch if needed. + */ +void +RegisterAfterTriggerBatchCallback(AfterTriggerBatchCallback callback, + void *arg) +{ + AfterTriggerCallbackItem *item; + MemoryContext oldcxt; + + /* + * Allocate in TopTransactionContext so the item survives for the duration + * of the batch, which may span multiple trigger invocations. + * + * Must be called while afterTriggers is active (query_depth >= 0); + * callbacks registered outside a trigger-firing context would never fire. + */ + Assert(afterTriggers.query_depth >= 0); + oldcxt = MemoryContextSwitchTo(TopTransactionContext); + item = palloc(sizeof(AfterTriggerCallbackItem)); + item->callback = callback; + item->arg = arg; + afterTriggers.batch_callbacks = + lappend(afterTriggers.batch_callbacks, item); + MemoryContextSwitchTo(oldcxt); +} + +/* + * FireAfterTriggerBatchCallbacks + * Invoke and clear all registered batch callbacks. + * + * Only fires at the outermost query level (query_depth == 0) or from + * top-level operations (query_depth == -1, e.g. AfterTriggerFireDeferred + * at COMMIT). Nested queries from SPI inside AFTER triggers run at + * depth > 0 and must not tear down resources the outer batch still needs. + */ +static void +FireAfterTriggerBatchCallbacks(void) +{ + ListCell *lc; + + if (afterTriggers.query_depth > 0) + return; + + foreach(lc, afterTriggers.batch_callbacks) + { + AfterTriggerCallbackItem *item = lfirst(lc); + + item->callback(item->arg); + } + + list_free_deep(afterTriggers.batch_callbacks); + afterTriggers.batch_callbacks = NIL; +} + +/* + * AfterTriggerIsActive + * Returns true if we're inside the after-trigger framework where + * registered batch callbacks will actually be invoked. + * + * This is false during validateForeignKeyConstraint(), which calls + * RI trigger functions directly outside the after-trigger framework. + */ +bool +AfterTriggerIsActive(void) +{ + return afterTriggers.query_depth >= 0; +} diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c index 2de08da6539..84f9fecdb4c 100644 --- a/src/backend/utils/adt/ri_triggers.c +++ b/src/backend/utils/adt/ri_triggers.c @@ -23,6 +23,7 @@ #include "postgres.h" +#include "access/amapi.h" #include "access/genam.h" #include "access/htup_details.h" #include "access/skey.h" @@ -199,6 +200,55 @@ typedef struct RI_CompareHashEntry FmgrInfo cast_func_finfo; /* in case we must coerce input */ } RI_CompareHashEntry; +/* + * Maximum number of FK rows buffered before flushing. + * + * Larger batches amortize per-flush overhead and let the SK_SEARCHARRAY + * path walk more leaf pages in a single sorted traversal. But each + * buffered row is a materialized HeapTuple in flush_cxt, and the matched[] + * scan in ri_FastPathFlushArray() is O(batch_size) per index match. + * Benchmarking showed little difference between 16 and 64, with 256 + * consistently slower. 64 is a reasonable default. + */ +#define RI_FASTPATH_BATCH_SIZE 64 + +/* + * RI_FastPathEntry + * Per-constraint cache of resources needed by ri_FastPathBatchFlush(). + * + * One entry per constraint, keyed by pg_constraint OID. Created lazily + * by ri_FastPathGetEntry() on first use within a trigger-firing batch + * and torn down by ri_FastPathTeardown() at batch end. + * + * FK tuples are buffered in batch[] across trigger invocations and + * flushed when the buffer fills or the batch ends. + * + * RI_FastPathEntry is not subject to cache invalidation. The cached + * relations are held open with locks for the transaction duration, preventing + * relcache invalidation. The entry itself is torn down at batch end by + * ri_FastPathEndBatch(); on abort, ResourceOwner releases the cached + * relations and the XactCallback/SubXactCallback NULL the static cache pointer + * to prevent any subsequent access. + */ +typedef struct RI_FastPathEntry +{ + Oid conoid; /* hash key: pg_constraint OID */ + Oid fk_relid; /* for ri_FastPathEndBatch() */ + Relation pk_rel; + Relation idx_rel; + TupleTableSlot *pk_slot; + TupleTableSlot *fk_slot; + MemoryContext flush_cxt; /* short-lived context for per-flush work */ + + /* + * TODO: batch[] is HeapTuple[] because the AFTER trigger machinery + * currently passes tuples as HeapTuples. Once trigger infrastructure is + * slotified, this should use a slot array or whatever batched tuple + * storage abstraction exists at that point to be TAM-agnostic. + */ + HeapTuple batch[RI_FASTPATH_BATCH_SIZE]; + int batch_count; +} RI_FastPathEntry; /* * Local data @@ -208,6 +258,8 @@ static HTAB *ri_query_cache = NULL; static HTAB *ri_compare_cache = NULL; static dclist_head ri_constraint_cache_valid_list; +static HTAB *ri_fastpath_cache = NULL; +static bool ri_fastpath_callback_registered = false; /* * Local function prototypes @@ -258,6 +310,16 @@ static bool ri_PerformCheck(const RI_ConstraintInfo *riinfo, bool detectNewRows, int expect_OK); static void ri_FastPathCheck(const RI_ConstraintInfo *riinfo, Relation fk_rel, TupleTableSlot *newslot); +static void ri_FastPathBatchAdd(const RI_ConstraintInfo *riinfo, + Relation fk_rel, TupleTableSlot *newslot); +static void ri_FastPathBatchFlush(RI_FastPathEntry *fpentry, Relation fk_rel, + const RI_ConstraintInfo *riinfo); +static int ri_FastPathFlushArray(RI_FastPathEntry *fpentry, TupleTableSlot *fk_slot, + const RI_ConstraintInfo *riinfo, Relation fk_rel, + Snapshot snapshot, IndexScanDesc scandesc); +static int ri_FastPathFlushLoop(RI_FastPathEntry *fpentry, TupleTableSlot *fk_slot, + const RI_ConstraintInfo *riinfo, Relation fk_rel, + Snapshot snapshot, IndexScanDesc scandesc); static bool ri_FastPathProbeOne(Relation pk_rel, Relation idx_rel, IndexScanDesc scandesc, TupleTableSlot *slot, Snapshot snapshot, const RI_ConstraintInfo *riinfo, @@ -280,6 +342,10 @@ pg_noreturn static void ri_ReportViolation(const RI_ConstraintInfo *riinfo, Relation pk_rel, Relation fk_rel, TupleTableSlot *violatorslot, TupleDesc tupdesc, int queryno, bool is_restrict, bool partgone); +static RI_FastPathEntry *ri_FastPathGetEntry(const RI_ConstraintInfo *riinfo, + Relation fk_rel); +static void ri_FastPathEndBatch(void *arg); +static void ri_FastPathTeardown(void); /* @@ -390,12 +456,22 @@ RI_FKey_check(TriggerData *trigdata) * lock. This is semantically equivalent to the SPI path below but avoids * the per-row executor overhead. * - * ri_FastPathCheck() reports the violation itself (via ereport) if no - * matching PK row is found, so it only returns on success. + * ri_FastPathBatchAdd() and ri_FastPathCheck() report the violation + * themselves if no matching PK row is found, so they only return on + * success. */ if (ri_fastpath_is_applicable(riinfo)) { - ri_FastPathCheck(riinfo, fk_rel, newslot); + if (AfterTriggerIsActive()) + { + /* Batched path: buffer and probe in groups */ + ri_FastPathBatchAdd(riinfo, fk_rel, newslot); + } + else + { + /* ALTER TABLE validation: per-row, no cache */ + ri_FastPathCheck(riinfo, fk_rel, newslot); + } return PointerGetDatum(NULL); } @@ -2690,10 +2766,14 @@ ri_PerformCheck(const RI_ConstraintInfo *riinfo, /* * ri_FastPathCheck - * Perform FK existence check via direct index probe, bypassing SPI. + * Perform per row FK existence check via direct index probe, + * bypassing SPI. * * If no matching PK row exists, report the violation via ri_ReportViolation(), * otherwise, the function returns normally. + * + * Note: This is only used by the ALTER TABLE validation path. Other paths use + * ri_FastPathBatchAdd(). */ static void ri_FastPathCheck(const RI_ConstraintInfo *riinfo, @@ -2761,6 +2841,332 @@ ri_FastPathCheck(const RI_ConstraintInfo *riinfo, table_close(pk_rel, NoLock); } +/* + * ri_FastPathBatchAdd + * Buffer a FK row for batched probing. + * + * Adds the row to the batch buffer. When the buffer is full, flushes all + * buffered rows by probing the PK index. Any violation is reported + * immediately during the flush via ri_ReportViolation (which does not return). + * + * Uses the per-batch cache (RI_FastPathEntry) to avoid per-row relation + * open/close, slot creation, etc. + * + * The batch is also flushed at end of trigger-firing cycle via + * ri_FastPathEndBatch(). + */ +static void +ri_FastPathBatchAdd(const RI_ConstraintInfo *riinfo, + Relation fk_rel, TupleTableSlot *newslot) +{ + RI_FastPathEntry *fpentry = ri_FastPathGetEntry(riinfo, fk_rel); + MemoryContext oldcxt; + + oldcxt = MemoryContextSwitchTo(fpentry->flush_cxt); + fpentry->batch[fpentry->batch_count] = + ExecCopySlotHeapTuple(newslot); + fpentry->batch_count++; + MemoryContextSwitchTo(oldcxt); + + if (fpentry->batch_count >= RI_FASTPATH_BATCH_SIZE) + ri_FastPathBatchFlush(fpentry, fk_rel, riinfo); +} + +/* + * ri_FastPathBatchFlush + * Flush all buffered FK rows by probing the PK index. + * + * Dispatches to ri_FastPathFlushArray() for single-column FKs + * (using SK_SEARCHARRAY) or ri_FastPathFlushLoop() for multi-column + * FKs (per-row probing). Violations are reported immediately via + * ri_ReportViolation(), which does not return. + */ +static void +ri_FastPathBatchFlush(RI_FastPathEntry *fpentry, Relation fk_rel, + const RI_ConstraintInfo *riinfo) +{ + Relation pk_rel = fpentry->pk_rel; + Relation idx_rel = fpentry->idx_rel; + TupleTableSlot *fk_slot = fpentry->fk_slot; + Snapshot snapshot; + IndexScanDesc scandesc; + Oid saved_userid; + int saved_sec_context; + MemoryContext oldcxt; + int violation_index; + + if (fpentry->batch_count == 0) + return; + + /* + * CCI and security context switch are done once for the entire batch. + * Per-row CCI is unnecessary because by the time a flush runs, all AFTER + * triggers for the buffered rows have already fired (trigger invocations + * strictly alternate per row), so a single CCI advances past all their + * effects. Per-row security context switch is unnecessary because each + * row's probe runs entirely as the PK table owner, same as the SPI path + * -- the only difference is that the SPI path sets and restores the + * context per row whereas we do it once around the whole batch. + */ + CommandCounterIncrement(); + snapshot = RegisterSnapshot(GetTransactionSnapshot()); + + /* + * build_index_scankeys() may palloc cast results for cross-type FKs. Use + * the entry's short-lived flush context so these don't accumulate across + * batches. + */ + oldcxt = MemoryContextSwitchTo(fpentry->flush_cxt); + + scandesc = index_beginscan(pk_rel, idx_rel, snapshot, NULL, + riinfo->nkeys, 0, SO_NONE); + + GetUserIdAndSecContext(&saved_userid, &saved_sec_context); + SetUserIdAndSecContext(RelationGetForm(pk_rel)->relowner, + saved_sec_context | + SECURITY_LOCAL_USERID_CHANGE | + SECURITY_NOFORCE_RLS); + + /* + * Check that the current user has permission to access pk_rel. Done here + * rather than at entry creation so that permission changes between + * flushes are respected, matching the per-row behavior of the SPI path, + * albeit checked once per flush rather than once per row, like in + * ri_FastPathCheck(). + */ + ri_CheckPermissions(pk_rel); + + if (riinfo->fpmeta == NULL) + { + /* Reload to ensure it's valid. */ + riinfo = ri_LoadConstraintInfo(riinfo->constraint_id); + ri_populate_fastpath_metadata((RI_ConstraintInfo *) riinfo, + fk_rel, idx_rel); + } + Assert(riinfo->fpmeta); + + /* Skip array overhead for single-row batches. */ + if (riinfo->nkeys == 1 && fpentry->batch_count > 1) + violation_index = ri_FastPathFlushArray(fpentry, fk_slot, riinfo, + fk_rel, snapshot, scandesc); + else + violation_index = ri_FastPathFlushLoop(fpentry, fk_slot, riinfo, + fk_rel, snapshot, scandesc); + + SetUserIdAndSecContext(saved_userid, saved_sec_context); + UnregisterSnapshot(snapshot); + index_endscan(scandesc); + + if (violation_index >= 0) + { + ExecStoreHeapTuple(fpentry->batch[violation_index], fk_slot, false); + ri_ReportViolation(riinfo, pk_rel, fk_rel, + fk_slot, NULL, + RI_PLAN_CHECK_LOOKUPPK, false, false); + } + + MemoryContextReset(fpentry->flush_cxt); + MemoryContextSwitchTo(oldcxt); + + /* Reset. */ + fpentry->batch_count = 0; +} + +/* + * ri_FastPathFlushLoop + * Multi-column fallback: probe the index once per buffered row. + * + * Used for composite foreign keys where SK_SEARCHARRAY does not + * apply, and also for single-row batches of single-column FKs where + * the array overhead is not worth it. + * + * Returns the index of the first violating row in the batch array, or -1 if + * all rows are valid. + */ +static int +ri_FastPathFlushLoop(RI_FastPathEntry *fpentry, TupleTableSlot *fk_slot, + const RI_ConstraintInfo *riinfo, Relation fk_rel, + Snapshot snapshot, IndexScanDesc scandesc) +{ + Relation pk_rel = fpentry->pk_rel; + Relation idx_rel = fpentry->idx_rel; + TupleTableSlot *pk_slot = fpentry->pk_slot; + Datum pk_vals[INDEX_MAX_KEYS]; + char pk_nulls[INDEX_MAX_KEYS]; + ScanKeyData skey[INDEX_MAX_KEYS]; + bool found = true; + + for (int i = 0; i < fpentry->batch_count; i++) + { + ExecStoreHeapTuple(fpentry->batch[i], fk_slot, false); + ri_ExtractValues(fk_rel, fk_slot, riinfo, false, pk_vals, pk_nulls); + build_index_scankeys(riinfo, idx_rel, pk_vals, pk_nulls, skey); + + found = ri_FastPathProbeOne(pk_rel, idx_rel, scandesc, pk_slot, + snapshot, riinfo, skey, riinfo->nkeys); + + /* Report first unmatched row */ + if (!found) + return i; + } + + /* All pass. */ + return -1; +} + +/* + * ri_FastPathFlushArray + * Single-column fast path using SK_SEARCHARRAY. + * + * Builds an array of FK values and does one index scan with + * SK_SEARCHARRAY. The index AM sorts and deduplicates the array + * internally, then walks matching leaf pages in order. Each + * matched PK tuple is locked and rechecked as before; a matched[] + * bitmap tracks which batch items were satisfied. + * + * Returns the index of the first violating row in the batch array, or -1 if + * all rows are valid. + */ +static int +ri_FastPathFlushArray(RI_FastPathEntry *fpentry, TupleTableSlot *fk_slot, + const RI_ConstraintInfo *riinfo, Relation fk_rel, + Snapshot snapshot, IndexScanDesc scandesc) +{ + FastPathMeta *fpmeta = riinfo->fpmeta; + Relation pk_rel = fpentry->pk_rel; + Relation idx_rel = fpentry->idx_rel; + TupleTableSlot *pk_slot = fpentry->pk_slot; + Datum search_vals[RI_FASTPATH_BATCH_SIZE]; + bool matched[RI_FASTPATH_BATCH_SIZE]; + int nvals = fpentry->batch_count; + Datum pk_vals[INDEX_MAX_KEYS]; + char pk_nulls[INDEX_MAX_KEYS]; + ScanKeyData skey[1]; + FmgrInfo *cast_func_finfo; + FmgrInfo *eq_opr_finfo; + Oid elem_type; + int16 elem_len; + bool elem_byval; + char elem_align; + ArrayType *arr; + + Assert(fpmeta); + + memset(matched, 0, nvals * sizeof(bool)); + + /* + * Extract FK values, casting to the operator's expected input type if + * needed (e.g. int8 FK -> int4 for int48eq). + */ + cast_func_finfo = &fpmeta->cast_func_finfo[0]; + eq_opr_finfo = &fpmeta->eq_opr_finfo[0]; + for (int i = 0; i < nvals; i++) + { + ExecStoreHeapTuple(fpentry->batch[i], fk_slot, false); + ri_ExtractValues(fk_rel, fk_slot, riinfo, false, pk_vals, pk_nulls); + + /* Cast if needed (e.g. int8 FK -> numeric PK) */ + if (OidIsValid(cast_func_finfo->fn_oid)) + search_vals[i] = FunctionCall3(cast_func_finfo, + pk_vals[0], + Int32GetDatum(-1), + BoolGetDatum(false)); + else + search_vals[i] = pk_vals[0]; + } + + /* + * Array element type must match the operator's right-hand input type, + * which is what the index comparison expects on the search side. + * ri_populate_fastpath_metadata() stores exactly this via + * get_op_opfamily_properties(), which returns the operator's right-hand + * type as the subtype for cross-type operators (e.g. int8 for int48eq) + * and the common type for same-type operators. + */ + elem_type = fpmeta->subtypes[0]; + Assert(OidIsValid(elem_type)); + get_typlenbyvalalign(elem_type, &elem_len, &elem_byval, &elem_align); + + arr = construct_array(search_vals, nvals, + elem_type, elem_len, elem_byval, elem_align); + + /* + * Build scan key with SK_SEARCHARRAY. The index AM code will internally + * sort and deduplicate, then walk leaf pages in order. + * + * PK indexes are always btree, which supports SK_SEARCHARRAY. + */ + Assert(idx_rel->rd_indam->amsearcharray); + ScanKeyEntryInitialize(&skey[0], + SK_SEARCHARRAY, + 1, /* attno */ + fpmeta->strats[0], + fpmeta->subtypes[0], + idx_rel->rd_indcollation[0], + fpmeta->regops[0], + PointerGetDatum(arr)); + + index_rescan(scandesc, skey, 1, NULL, 0); + + /* + * Walk all matches. The index AM returns them in index order. For each + * match, find which batch item(s) it satisfies. + */ + while (index_getnext_slot(scandesc, ForwardScanDirection, pk_slot)) + { + Datum found_val; + bool found_null; + bool concurrently_updated; + ScanKeyData recheck_skey[1]; + + if (!ri_LockPKTuple(pk_rel, pk_slot, snapshot, &concurrently_updated)) + continue; + + /* Extract the PK value from the matched and locked tuple */ + found_val = slot_getattr(pk_slot, riinfo->pk_attnums[0], &found_null); + Assert(!found_null); + + if (concurrently_updated) + { + /* + * Build a single-key scankey for recheck. We need the actual PK + * value that was found, not the FK search value. + */ + ScanKeyEntryInitialize(&recheck_skey[0], 0, 1, + fpmeta->strats[0], + fpmeta->subtypes[0], + idx_rel->rd_indcollation[0], + fpmeta->regops[0], + found_val); + if (!recheck_matched_pk_tuple(idx_rel, recheck_skey, pk_slot)) + continue; + } + + /* + * Linear scan to mark all batch items matching this PK value. + * O(batch_size) per match, O(batch_size^2) worst case -- fine for the + * current batch size of 64. + */ + for (int i = 0; i < nvals; i++) + { + if (!matched[i] && + DatumGetBool(FunctionCall2Coll(eq_opr_finfo, + idx_rel->rd_indcollation[0], + found_val, + search_vals[i]))) + matched[i] = true; + } + } + + /* Report first unmatched row */ + for (int i = 0; i < nvals; i++) + if (!matched[i]) + return i; + + /* All pass. */ + return -1; +} + /* * ri_FastPathProbeOne * Probe the PK index for one set of scan keys, lock the matching @@ -3687,3 +4093,196 @@ RI_FKey_trigger_type(Oid tgfoid) return RI_TRIGGER_NONE; } + +/* + * ri_FastPathEndBatch + * Flush remaining rows and tear down cached state. + * + * Registered as an AfterTriggerBatchCallback. Note: the flush can + * do real work (CCI, security context switch, index probes) and can + * throw ERROR on a constraint violation. If that happens, + * ri_FastPathTeardown never runs; ResourceOwner + XactCallback + * handle resource cleanup on the abort path. + */ +static void +ri_FastPathEndBatch(void *arg) +{ + HASH_SEQ_STATUS status; + RI_FastPathEntry *entry; + + if (ri_fastpath_cache == NULL) + return; + + /* Flush any partial batches -- can throw ERROR */ + hash_seq_init(&status, ri_fastpath_cache); + while ((entry = hash_seq_search(&status)) != NULL) + { + if (entry->batch_count > 0) + { + Relation fk_rel = table_open(entry->fk_relid, AccessShareLock); + const RI_ConstraintInfo *riinfo = ri_LoadConstraintInfo(entry->conoid); + + ri_FastPathBatchFlush(entry, fk_rel, riinfo); + table_close(fk_rel, NoLock); + } + } + + ri_FastPathTeardown(); +} + +/* + * ri_FastPathTeardown + * Tear down all cached fast-path state. + * + * Called from ri_FastPathEndBatch() after flushing any remaining rows. + */ +static void +ri_FastPathTeardown(void) +{ + HASH_SEQ_STATUS status; + RI_FastPathEntry *entry; + + if (ri_fastpath_cache == NULL) + return; + + hash_seq_init(&status, ri_fastpath_cache); + while ((entry = hash_seq_search(&status)) != NULL) + { + if (entry->idx_rel) + index_close(entry->idx_rel, NoLock); + if (entry->pk_rel) + table_close(entry->pk_rel, NoLock); + if (entry->pk_slot) + ExecDropSingleTupleTableSlot(entry->pk_slot); + if (entry->fk_slot) + ExecDropSingleTupleTableSlot(entry->fk_slot); + if (entry->flush_cxt) + MemoryContextDelete(entry->flush_cxt); + } + + hash_destroy(ri_fastpath_cache); + ri_fastpath_cache = NULL; + ri_fastpath_callback_registered = false; +} + +static bool ri_fastpath_xact_callback_registered = false; + +static void +ri_FastPathXactCallback(XactEvent event, void *arg) +{ + /* + * On abort, ResourceOwner already released relations; on commit, + * ri_FastPathTeardown already ran. Either way, just NULL the static + * pointers so they don't dangle into the next transaction. + */ + ri_fastpath_cache = NULL; + ri_fastpath_callback_registered = false; +} + +static void +ri_FastPathSubXactCallback(SubXactEvent event, SubTransactionId mySubid, + SubTransactionId parentSubid, void *arg) +{ + if (event == SUBXACT_EVENT_ABORT_SUB) + { + /* + * ResourceOwner already released relations. NULL the static pointers + * so the still-registered batch callback becomes a no-op for the rest + * of this transaction. + */ + ri_fastpath_cache = NULL; + ri_fastpath_callback_registered = false; + } +} + +/* + * ri_FastPathGetEntry + * Look up or create a per-batch cache entry for the given constraint. + * + * On first call for a constraint within a batch: opens pk_rel and the index, + * allocates slots for both FK row and the looked up PK row, and registers the + * cleanup callback. + * + * On subsequent calls: returns the existing entry. + */ +static RI_FastPathEntry * +ri_FastPathGetEntry(const RI_ConstraintInfo *riinfo, Relation fk_rel) +{ + RI_FastPathEntry *entry; + bool found; + + /* Create hash table on first use in this batch */ + if (ri_fastpath_cache == NULL) + { + HASHCTL ctl; + + if (!ri_fastpath_xact_callback_registered) + { + RegisterXactCallback(ri_FastPathXactCallback, NULL); + RegisterSubXactCallback(ri_FastPathSubXactCallback, NULL); + ri_fastpath_xact_callback_registered = true; + } + + ctl.keysize = sizeof(Oid); + ctl.entrysize = sizeof(RI_FastPathEntry); + ctl.hcxt = TopTransactionContext; + ri_fastpath_cache = hash_create("RI fast-path cache", + 16, + &ctl, + HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); + } + + entry = hash_search(ri_fastpath_cache, &riinfo->constraint_id, + HASH_ENTER, &found); + + if (!found) + { + MemoryContext oldcxt; + + /* + * Zero out non-key fields so ri_FastPathTeardown is safe if we error + * out during partial initialization below. + */ + memset(((char *) entry) + offsetof(RI_FastPathEntry, pk_rel), 0, + sizeof(RI_FastPathEntry) - offsetof(RI_FastPathEntry, pk_rel)); + + oldcxt = MemoryContextSwitchTo(TopTransactionContext); + + entry->fk_relid = RelationGetRelid(fk_rel); + + /* + * Open PK table and its unique index. + * + * RowShareLock on pk_rel matches what the SPI path's SELECT ... FOR + * KEY SHARE would acquire as a relation-level lock. AccessShareLock + * on the index is standard for index scans. + * + * We don't release these locks until end of transaction, matching SPI + * behavior. + */ + entry->pk_rel = table_open(riinfo->pk_relid, RowShareLock); + entry->idx_rel = index_open(riinfo->conindid, AccessShareLock); + entry->pk_slot = table_slot_create(entry->pk_rel, NULL); + + /* + * Must be TTSOpsHeapTuple because ExecStoreHeapTuple() is used to + * load entries from batch[] into this slot for value extraction. + */ + entry->fk_slot = MakeSingleTupleTableSlot(RelationGetDescr(fk_rel), + &TTSOpsHeapTuple); + + entry->flush_cxt = AllocSetContextCreate(TopTransactionContext, + "RI fast path flush temporary context", + ALLOCSET_SMALL_SIZES); + MemoryContextSwitchTo(oldcxt); + + /* Ensure cleanup at end of this trigger-firing batch */ + if (!ri_fastpath_callback_registered) + { + RegisterAfterTriggerBatchCallback(ri_FastPathEndBatch, NULL); + ri_fastpath_callback_registered = true; + } + } + + return entry; +} diff --git a/src/include/commands/trigger.h b/src/include/commands/trigger.h index 27af5284406..1d9869973c0 100644 --- a/src/include/commands/trigger.h +++ b/src/include/commands/trigger.h @@ -289,4 +289,25 @@ extern void RI_PartitionRemove_Check(Trigger *trigger, Relation fk_rel, extern int RI_FKey_trigger_type(Oid tgfoid); +/* + * Callback type for end-of-trigger-batch callbacks. + * + * Currently used by ri_triggers.c to flush fast-path FK batches and + * clean up associated resources. + * + * Registered via RegisterAfterTriggerBatchCallback(). Invoked when + * the current trigger-firing batch completes: + * - AfterTriggerEndQuery() (immediate constraints) + * - AfterTriggerFireDeferred() (deferred constraints at COMMIT) + * - AfterTriggerSetState() (SET CONSTRAINTS IMMEDIATE) + * + * The callback list is cleared after each batch. Callers must + * re-register if they need to be called again in a subsequent batch. + */ +typedef void (*AfterTriggerBatchCallback) (void *arg); + +extern void RegisterAfterTriggerBatchCallback(AfterTriggerBatchCallback callback, + void *arg); +extern bool AfterTriggerIsActive(void); + #endif /* TRIGGER_H */ diff --git a/src/test/regress/expected/foreign_key.out b/src/test/regress/expected/foreign_key.out index 6c607d36222..91295754bab 100644 --- a/src/test/regress/expected/foreign_key.out +++ b/src/test/regress/expected/foreign_key.out @@ -3557,3 +3557,129 @@ DETAIL: drop cascades to table fkpart13_t1 drop cascades to table fkpart13_t2 drop cascades to table fkpart13_t3 RESET search_path; +-- Tests foreign key check fast-path no-cache path. +CREATE TABLE fp_pk_alter (a int PRIMARY KEY); +INSERT INTO fp_pk_alter SELECT generate_series(1, 100); +CREATE TABLE fp_fk_alter (a int); +INSERT INTO fp_fk_alter SELECT generate_series(1, 100); +-- Validation path: should succeed +ALTER TABLE fp_fk_alter ADD FOREIGN KEY (a) REFERENCES fp_pk_alter; +INSERT INTO fp_fk_alter VALUES (101); -- should fail (constraint active) +ERROR: insert or update on table "fp_fk_alter" violates foreign key constraint "fp_fk_alter_a_fkey" +DETAIL: Key (a)=(101) is not present in table "fp_pk_alter". +DROP TABLE fp_fk_alter, fp_pk_alter; +-- Separate test: validation catches existing violation +CREATE TABLE fp_pk_alter2 (a int PRIMARY KEY); +INSERT INTO fp_pk_alter2 VALUES (1); +CREATE TABLE fp_fk_alter2 (a int); +INSERT INTO fp_fk_alter2 VALUES (1), (200); -- 200 has no PK match +ALTER TABLE fp_fk_alter2 ADD FOREIGN KEY (a) REFERENCES fp_pk_alter2; -- should fail +ERROR: insert or update on table "fp_fk_alter2" violates foreign key constraint "fp_fk_alter2_a_fkey" +DETAIL: Key (a)=(200) is not present in table "fp_pk_alter2". +DROP TABLE fp_fk_alter2, fp_pk_alter2; +-- Tests that the fast-path handles caching for multiple constraints +CREATE TABLE fp_pk1 (a int PRIMARY KEY); +CREATE TABLE fp_pk2 (b int PRIMARY KEY); +INSERT INTO fp_pk1 VALUES (1); +INSERT INTO fp_pk2 VALUES (1); +CREATE TABLE fp_multi_fk ( + a int REFERENCES fp_pk1, + b int REFERENCES fp_pk2 +); +INSERT INTO fp_multi_fk VALUES (1, 1); -- two constraints, one batch +INSERT INTO fp_multi_fk VALUES (1, 2); -- second constraint fails +ERROR: insert or update on table "fp_multi_fk" violates foreign key constraint "fp_multi_fk_b_fkey" +DETAIL: Key (b)=(2) is not present in table "fp_pk2". +DROP TABLE fp_multi_fk, fp_pk1, fp_pk2; +-- Test that fast-path cache handles deferred constraints and SET CONSTRAINTS IMMEDIATE +CREATE TABLE fp_pk_defer (a int PRIMARY KEY); +CREATE TABLE fp_fk_defer (a int REFERENCES fp_pk_defer DEFERRABLE INITIALLY DEFERRED); +INSERT INTO fp_pk_defer VALUES (1), (2); +BEGIN; +INSERT INTO fp_fk_defer VALUES (1); +INSERT INTO fp_fk_defer VALUES (2); +SET CONSTRAINTS ALL IMMEDIATE; -- fires batch callback here +INSERT INTO fp_fk_defer VALUES (3); -- should fail, also tests that cache was cleaned up +ERROR: insert or update on table "fp_fk_defer" violates foreign key constraint "fp_fk_defer_a_fkey" +DETAIL: Key (a)=(3) is not present in table "fp_pk_defer". +COMMIT; +DROP TABLE fp_pk_defer, fp_fk_defer; +-- Subtransaction abort: cached state must be invalidated on ROLLBACK TO +CREATE TABLE fp_pk_subxact (a int PRIMARY KEY); +CREATE TABLE fp_fk_subxact (a int REFERENCES fp_pk_subxact); +INSERT INTO fp_pk_subxact VALUES (1), (2); +BEGIN; +INSERT INTO fp_fk_subxact VALUES (1); +SAVEPOINT sp1; +INSERT INTO fp_fk_subxact VALUES (2); +ROLLBACK TO sp1; +INSERT INTO fp_fk_subxact VALUES (1); +COMMIT; +SELECT * FROM fp_fk_subxact; + a +--- + 1 + 1 +(2 rows) + +DROP TABLE fp_fk_subxact, fp_pk_subxact; +-- FK check must see PK rows inserted by earlier AFTER triggers +-- firing on the same statement +CREATE TABLE fp_pk_cci (a int PRIMARY KEY); +CREATE TABLE fp_fk_cci (a int REFERENCES fp_pk_cci); +CREATE FUNCTION fp_auto_pk() RETURNS trigger AS $$ +BEGIN + RAISE NOTICE 'fp_auto_pk called'; + INSERT INTO fp_pk_cci VALUES (NEW.a); + RETURN NEW; +END $$ LANGUAGE plpgsql; +-- Name sorts before the RI trigger, so fires first per row +CREATE TRIGGER "AAA_auto" AFTER INSERT ON fp_fk_cci + FOR EACH ROW EXECUTE FUNCTION fp_auto_pk(); +-- Should succeed: AAA_auto provisions the PK row before RI check +INSERT INTO fp_fk_cci VALUES (1), (2), (3); +NOTICE: fp_auto_pk called +NOTICE: fp_auto_pk called +NOTICE: fp_auto_pk called +DROP TABLE fp_fk_cci, fp_pk_cci; +DROP FUNCTION fp_auto_pk; +-- Multi-column FK: exercises batched per-row probing with composite keys +CREATE TABLE fp_pk_multi (a int, b int, PRIMARY KEY (a, b)); +INSERT INTO fp_pk_multi SELECT i, i FROM generate_series(1, 100) i; +CREATE TABLE fp_fk_multi (x int, a int, b int, + FOREIGN KEY (a, b) REFERENCES fp_pk_multi); +INSERT INTO fp_fk_multi SELECT i, i, i FROM generate_series(1, 100) i; +INSERT INTO fp_fk_multi VALUES (1, 999, 999); +ERROR: insert or update on table "fp_fk_multi" violates foreign key constraint "fp_fk_multi_a_b_fkey" +DETAIL: Key (a, b)=(999, 999) is not present in table "fp_pk_multi". +DROP TABLE fp_fk_multi, fp_pk_multi; +-- Deferred constraint: batch flushed at COMMIT, not at statement end +CREATE TABLE fp_pk_commit (a int PRIMARY KEY); +CREATE TABLE fp_fk_commit (a int REFERENCES fp_pk_commit + DEFERRABLE INITIALLY DEFERRED); +INSERT INTO fp_pk_commit VALUES (1); +BEGIN; +INSERT INTO fp_fk_commit VALUES (1); +INSERT INTO fp_fk_commit VALUES (1); +INSERT INTO fp_fk_commit VALUES (999); +COMMIT; +ERROR: insert or update on table "fp_fk_commit" violates foreign key constraint "fp_fk_commit_a_fkey" +DETAIL: Key (a)=(999) is not present in table "fp_pk_commit". +DROP TABLE fp_fk_commit, fp_pk_commit; +-- Cross-type FK with bulk insert: int8 FK referencing int4 PK, +-- values cast during array construction +CREATE TABLE fp_pk_cross (a int4 PRIMARY KEY); +INSERT INTO fp_pk_cross SELECT generate_series(1, 200); +CREATE TABLE fp_fk_cross (a int8 REFERENCES fp_pk_cross); +INSERT INTO fp_fk_cross SELECT generate_series(1, 200); +INSERT INTO fp_fk_cross VALUES (999); +ERROR: insert or update on table "fp_fk_cross" violates foreign key constraint "fp_fk_cross_a_fkey" +DETAIL: Key (a)=(999) is not present in table "fp_pk_cross". +DROP TABLE fp_fk_cross, fp_pk_cross; +-- Duplicate FK values: when using the batched SAOP path, every +-- row must be recognized as satisfied, not just the first match +CREATE TABLE fp_pk_dup (a int PRIMARY KEY); +INSERT INTO fp_pk_dup VALUES (1); +CREATE TABLE fp_fk_dup (a int REFERENCES fp_pk_dup); +INSERT INTO fp_fk_dup SELECT 1 FROM generate_series(1, 100); +DROP TABLE fp_fk_dup, fp_pk_dup; diff --git a/src/test/regress/sql/foreign_key.sql b/src/test/regress/sql/foreign_key.sql index fcdd006c971..f646dd10401 100644 --- a/src/test/regress/sql/foreign_key.sql +++ b/src/test/regress/sql/foreign_key.sql @@ -2535,3 +2535,121 @@ WITH cte AS ( DROP SCHEMA fkpart13 CASCADE; RESET search_path; + +-- Tests foreign key check fast-path no-cache path. +CREATE TABLE fp_pk_alter (a int PRIMARY KEY); +INSERT INTO fp_pk_alter SELECT generate_series(1, 100); +CREATE TABLE fp_fk_alter (a int); +INSERT INTO fp_fk_alter SELECT generate_series(1, 100); +-- Validation path: should succeed +ALTER TABLE fp_fk_alter ADD FOREIGN KEY (a) REFERENCES fp_pk_alter; +INSERT INTO fp_fk_alter VALUES (101); -- should fail (constraint active) +DROP TABLE fp_fk_alter, fp_pk_alter; + +-- Separate test: validation catches existing violation +CREATE TABLE fp_pk_alter2 (a int PRIMARY KEY); +INSERT INTO fp_pk_alter2 VALUES (1); +CREATE TABLE fp_fk_alter2 (a int); +INSERT INTO fp_fk_alter2 VALUES (1), (200); -- 200 has no PK match +ALTER TABLE fp_fk_alter2 ADD FOREIGN KEY (a) REFERENCES fp_pk_alter2; -- should fail +DROP TABLE fp_fk_alter2, fp_pk_alter2; + +-- Tests that the fast-path handles caching for multiple constraints +CREATE TABLE fp_pk1 (a int PRIMARY KEY); +CREATE TABLE fp_pk2 (b int PRIMARY KEY); +INSERT INTO fp_pk1 VALUES (1); +INSERT INTO fp_pk2 VALUES (1); +CREATE TABLE fp_multi_fk ( + a int REFERENCES fp_pk1, + b int REFERENCES fp_pk2 +); +INSERT INTO fp_multi_fk VALUES (1, 1); -- two constraints, one batch +INSERT INTO fp_multi_fk VALUES (1, 2); -- second constraint fails +DROP TABLE fp_multi_fk, fp_pk1, fp_pk2; + +-- Test that fast-path cache handles deferred constraints and SET CONSTRAINTS IMMEDIATE +CREATE TABLE fp_pk_defer (a int PRIMARY KEY); +CREATE TABLE fp_fk_defer (a int REFERENCES fp_pk_defer DEFERRABLE INITIALLY DEFERRED); +INSERT INTO fp_pk_defer VALUES (1), (2); + +BEGIN; +INSERT INTO fp_fk_defer VALUES (1); +INSERT INTO fp_fk_defer VALUES (2); +SET CONSTRAINTS ALL IMMEDIATE; -- fires batch callback here +INSERT INTO fp_fk_defer VALUES (3); -- should fail, also tests that cache was cleaned up +COMMIT; +DROP TABLE fp_pk_defer, fp_fk_defer; + +-- Subtransaction abort: cached state must be invalidated on ROLLBACK TO +CREATE TABLE fp_pk_subxact (a int PRIMARY KEY); +CREATE TABLE fp_fk_subxact (a int REFERENCES fp_pk_subxact); +INSERT INTO fp_pk_subxact VALUES (1), (2); +BEGIN; +INSERT INTO fp_fk_subxact VALUES (1); +SAVEPOINT sp1; +INSERT INTO fp_fk_subxact VALUES (2); +ROLLBACK TO sp1; +INSERT INTO fp_fk_subxact VALUES (1); +COMMIT; +SELECT * FROM fp_fk_subxact; +DROP TABLE fp_fk_subxact, fp_pk_subxact; + +-- FK check must see PK rows inserted by earlier AFTER triggers +-- firing on the same statement +CREATE TABLE fp_pk_cci (a int PRIMARY KEY); +CREATE TABLE fp_fk_cci (a int REFERENCES fp_pk_cci); + +CREATE FUNCTION fp_auto_pk() RETURNS trigger AS $$ +BEGIN + RAISE NOTICE 'fp_auto_pk called'; + INSERT INTO fp_pk_cci VALUES (NEW.a); + RETURN NEW; +END $$ LANGUAGE plpgsql; + +-- Name sorts before the RI trigger, so fires first per row +CREATE TRIGGER "AAA_auto" AFTER INSERT ON fp_fk_cci + FOR EACH ROW EXECUTE FUNCTION fp_auto_pk(); + +-- Should succeed: AAA_auto provisions the PK row before RI check +INSERT INTO fp_fk_cci VALUES (1), (2), (3); + +DROP TABLE fp_fk_cci, fp_pk_cci; +DROP FUNCTION fp_auto_pk; + +-- Multi-column FK: exercises batched per-row probing with composite keys +CREATE TABLE fp_pk_multi (a int, b int, PRIMARY KEY (a, b)); +INSERT INTO fp_pk_multi SELECT i, i FROM generate_series(1, 100) i; +CREATE TABLE fp_fk_multi (x int, a int, b int, + FOREIGN KEY (a, b) REFERENCES fp_pk_multi); +INSERT INTO fp_fk_multi SELECT i, i, i FROM generate_series(1, 100) i; +INSERT INTO fp_fk_multi VALUES (1, 999, 999); +DROP TABLE fp_fk_multi, fp_pk_multi; + +-- Deferred constraint: batch flushed at COMMIT, not at statement end +CREATE TABLE fp_pk_commit (a int PRIMARY KEY); +CREATE TABLE fp_fk_commit (a int REFERENCES fp_pk_commit + DEFERRABLE INITIALLY DEFERRED); +INSERT INTO fp_pk_commit VALUES (1); +BEGIN; +INSERT INTO fp_fk_commit VALUES (1); +INSERT INTO fp_fk_commit VALUES (1); +INSERT INTO fp_fk_commit VALUES (999); +COMMIT; +DROP TABLE fp_fk_commit, fp_pk_commit; + +-- Cross-type FK with bulk insert: int8 FK referencing int4 PK, +-- values cast during array construction +CREATE TABLE fp_pk_cross (a int4 PRIMARY KEY); +INSERT INTO fp_pk_cross SELECT generate_series(1, 200); +CREATE TABLE fp_fk_cross (a int8 REFERENCES fp_pk_cross); +INSERT INTO fp_fk_cross SELECT generate_series(1, 200); +INSERT INTO fp_fk_cross VALUES (999); +DROP TABLE fp_fk_cross, fp_pk_cross; + +-- Duplicate FK values: when using the batched SAOP path, every +-- row must be recognized as satisfied, not just the first match +CREATE TABLE fp_pk_dup (a int PRIMARY KEY); +INSERT INTO fp_pk_dup VALUES (1); +CREATE TABLE fp_fk_dup (a int REFERENCES fp_pk_dup); +INSERT INTO fp_fk_dup SELECT 1 FROM generate_series(1, 100); +DROP TABLE fp_fk_dup, fp_pk_dup; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 5bc517602b1..91b1225da82 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -30,6 +30,8 @@ AddForeignUpdateTargets_function AddrInfo AffixNode AffixNodeData +AfterTriggerBatchCallback +AfterTriggerCallbackItem AfterTriggerEvent AfterTriggerEventChunk AfterTriggerEventData @@ -2488,6 +2490,7 @@ RIX RI_CompareHashEntry RI_CompareKey RI_ConstraintInfo +RI_FastPathEntry RI_QueryHashEntry RI_QueryKey RTEKind