From 2c16deee2f7d52d6567dcbad046f74a8e880ee52 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Wed, 8 Apr 2026 00:02:26 -0400 Subject: [PATCH] instrumentation: Allocate query level instrumentation in ExecutorStart Until now extensions that wanted to measure overall query execution could create QueryDesc->totaltime, which the core executor would then start and stop. That's a bit odd and composes badly, e.g. extensions always had to use INSTRUMENT_ALL, because otherwise another extension might not get what they need. Instead this introduces a new field, QueryDesc->query_instr_options, that extensions can use to indicate whether they need query level instrumentation populated, and with which instrumentation options. Extensions should take care to only add options they need, instead of replacing the options of others. The prior name of the field, totaltime, sounded like it would only measure time, but these days the instrumentation infrastructure can track more resources. The secondary benefit is that this will make it obvious to extensions that they may not create the Instrumentation struct themselves anymore (often extensions build only against a postgres build without assertions). Adjust pg_stat_statements and auto_explain to match, and lower the requested instrumentation level for auto_explain to INSTRUMENT_TIMER, since the summary instrumentation it needs is only runtime. The reason to push this now, rather in the PG 20 cycle, is that 5a79e78501f already required extensions using query level instrumentations to adjust their code, and it seemed undesirable to require them to do so again for 20. Author: Lukas Fittl Reviewed-by: Andres Freund Discussion: https://postgr.es/m/CAP53Pkyqsht+exJQYRsjhSWYKu+vFGHhPub7m6PmFD6Or0=p1g@mail.gmail.com --- contrib/auto_explain/auto_explain.c | 24 +++----------- .../pg_stat_statements/pg_stat_statements.c | 33 +++++++------------ src/backend/executor/execMain.c | 27 ++++++++++----- src/backend/tcop/pquery.c | 3 +- src/include/executor/execdesc.h | 6 ++-- 5 files changed, 40 insertions(+), 53 deletions(-) diff --git a/contrib/auto_explain/auto_explain.c b/contrib/auto_explain/auto_explain.c index 9114ced45a7..97ce498d0c1 100644 --- a/contrib/auto_explain/auto_explain.c +++ b/contrib/auto_explain/auto_explain.c @@ -346,6 +346,9 @@ explain_ExecutorStart(QueryDesc *queryDesc, int eflags) if (auto_explain_enabled()) { + /* We're always interested in runtime */ + queryDesc->query_instr_options |= INSTRUMENT_TIMER; + /* Enable per-node instrumentation iff log_analyze is required. */ if (auto_explain_log_analyze && (eflags & EXEC_FLAG_EXPLAIN_ONLY) == 0) { @@ -366,23 +369,6 @@ explain_ExecutorStart(QueryDesc *queryDesc, int eflags) prev_ExecutorStart(queryDesc, eflags); else standard_ExecutorStart(queryDesc, eflags); - - if (auto_explain_enabled()) - { - /* - * Set up to track total elapsed time in ExecutorRun. Make sure the - * space is allocated in the per-query context so it will go away at - * ExecutorEnd. - */ - if (queryDesc->totaltime == NULL) - { - MemoryContext oldcxt; - - oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt); - queryDesc->totaltime = InstrAlloc(INSTRUMENT_ALL); - MemoryContextSwitchTo(oldcxt); - } - } } /* @@ -434,7 +420,7 @@ explain_ExecutorFinish(QueryDesc *queryDesc) static void explain_ExecutorEnd(QueryDesc *queryDesc) { - if (queryDesc->totaltime && auto_explain_enabled()) + if (queryDesc->query_instr && auto_explain_enabled()) { MemoryContext oldcxt; double msec; @@ -446,7 +432,7 @@ explain_ExecutorEnd(QueryDesc *queryDesc) oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt); /* Log plan if duration is exceeded. */ - msec = INSTR_TIME_GET_MILLISEC(queryDesc->totaltime->total); + msec = INSTR_TIME_GET_MILLISEC(queryDesc->query_instr->total); if (msec >= auto_explain_log_min_duration) { ExplainState *es = NewExplainState(); diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index b5000bc14b7..95a5411a39d 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -993,11 +993,6 @@ pgss_planner(Query *parse, static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags) { - if (prev_ExecutorStart) - prev_ExecutorStart(queryDesc, eflags); - else - standard_ExecutorStart(queryDesc, eflags); - /* * If query has queryId zero, don't track it. This prevents double * counting of optimizable statements that are directly contained in @@ -1005,20 +1000,14 @@ pgss_ExecutorStart(QueryDesc *queryDesc, int eflags) */ if (pgss_enabled(nesting_level) && queryDesc->plannedstmt->queryId != INT64CONST(0)) { - /* - * Set up to track total elapsed time in ExecutorRun. Make sure the - * space is allocated in the per-query context so it will go away at - * ExecutorEnd. - */ - if (queryDesc->totaltime == NULL) - { - MemoryContext oldcxt; - - oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt); - queryDesc->totaltime = InstrAlloc(INSTRUMENT_ALL); - MemoryContextSwitchTo(oldcxt); - } + /* Request all summary instrumentation, i.e. timing, buffers and WAL */ + queryDesc->query_instr_options |= INSTRUMENT_ALL; } + + if (prev_ExecutorStart) + prev_ExecutorStart(queryDesc, eflags); + else + standard_ExecutorStart(queryDesc, eflags); } /* @@ -1071,7 +1060,7 @@ pgss_ExecutorEnd(QueryDesc *queryDesc) { int64 queryId = queryDesc->plannedstmt->queryId; - if (queryId != INT64CONST(0) && queryDesc->totaltime && + if (queryId != INT64CONST(0) && queryDesc->query_instr && pgss_enabled(nesting_level)) { pgss_store(queryDesc->sourceText, @@ -1079,10 +1068,10 @@ pgss_ExecutorEnd(QueryDesc *queryDesc) queryDesc->plannedstmt->stmt_location, queryDesc->plannedstmt->stmt_len, PGSS_EXEC, - INSTR_TIME_GET_MILLISEC(queryDesc->totaltime->total), + INSTR_TIME_GET_MILLISEC(queryDesc->query_instr->total), queryDesc->estate->es_total_processed, - &queryDesc->totaltime->bufusage, - &queryDesc->totaltime->walusage, + &queryDesc->query_instr->bufusage, + &queryDesc->query_instr->walusage, queryDesc->estate->es_jit ? &queryDesc->estate->es_jit->instr : NULL, NULL, queryDesc->estate->es_parallel_workers_to_launch, diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index b0f636bf8b6..4b30f768680 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -250,6 +250,15 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags) estate->es_instrument = queryDesc->instrument_options; estate->es_jit_flags = queryDesc->plannedstmt->jitFlags; + /* + * Set up query-level instrumentation if extensions have requested it via + * query_instr_options. Ensure an extension has not allocated query_instr + * itself. + */ + Assert(queryDesc->query_instr == NULL); + if (queryDesc->query_instr_options) + queryDesc->query_instr = InstrAlloc(queryDesc->query_instr_options); + /* * Set up an AFTER-trigger statement context, unless told not to, or * unless it's EXPLAIN-only mode (when ExecutorFinish won't be called). @@ -332,8 +341,8 @@ standard_ExecutorRun(QueryDesc *queryDesc, oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); /* Allow instrumentation of Executor overall runtime */ - if (queryDesc->totaltime) - InstrStart(queryDesc->totaltime); + if (queryDesc->query_instr) + InstrStart(queryDesc->query_instr); /* * extract information from the query descriptor and the query feature. @@ -384,8 +393,8 @@ standard_ExecutorRun(QueryDesc *queryDesc, if (sendTuples) dest->rShutdown(dest); - if (queryDesc->totaltime) - InstrStop(queryDesc->totaltime); + if (queryDesc->query_instr) + InstrStop(queryDesc->query_instr); MemoryContextSwitchTo(oldcontext); } @@ -434,8 +443,8 @@ standard_ExecutorFinish(QueryDesc *queryDesc) oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); /* Allow instrumentation of Executor overall runtime */ - if (queryDesc->totaltime) - InstrStart(queryDesc->totaltime); + if (queryDesc->query_instr) + InstrStart(queryDesc->query_instr); /* Run ModifyTable nodes to completion */ ExecPostprocessPlan(estate); @@ -444,8 +453,8 @@ standard_ExecutorFinish(QueryDesc *queryDesc) if (!(estate->es_top_eflags & EXEC_FLAG_SKIP_TRIGGERS)) AfterTriggerEndQuery(estate); - if (queryDesc->totaltime) - InstrStop(queryDesc->totaltime); + if (queryDesc->query_instr) + InstrStop(queryDesc->query_instr); MemoryContextSwitchTo(oldcontext); @@ -524,7 +533,7 @@ standard_ExecutorEnd(QueryDesc *queryDesc) queryDesc->tupDesc = NULL; queryDesc->estate = NULL; queryDesc->planstate = NULL; - queryDesc->totaltime = NULL; + queryDesc->query_instr = NULL; } /* ---------------------------------------------------------------- diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c index d8fc75d0bb9..ee731000820 100644 --- a/src/backend/tcop/pquery.c +++ b/src/backend/tcop/pquery.c @@ -86,12 +86,13 @@ CreateQueryDesc(PlannedStmt *plannedstmt, qd->params = params; /* parameter values passed into query */ qd->queryEnv = queryEnv; qd->instrument_options = instrument_options; /* instrumentation wanted? */ + qd->query_instr_options = 0; /* null these fields until set by ExecutorStart */ qd->tupDesc = NULL; qd->estate = NULL; qd->planstate = NULL; - qd->totaltime = NULL; + qd->query_instr = NULL; /* not yet executed */ qd->already_executed = false; diff --git a/src/include/executor/execdesc.h b/src/include/executor/execdesc.h index d3a57242844..37c2576e4bc 100644 --- a/src/include/executor/execdesc.h +++ b/src/include/executor/execdesc.h @@ -42,6 +42,8 @@ typedef struct QueryDesc ParamListInfo params; /* param values being passed in */ QueryEnvironment *queryEnv; /* query environment passed in */ int instrument_options; /* OR of InstrumentOption flags */ + int query_instr_options; /* OR of InstrumentOption flags for + * query_instr */ /* These fields are set by ExecutorStart */ TupleDesc tupDesc; /* descriptor for result tuples */ @@ -51,8 +53,8 @@ typedef struct QueryDesc /* This field is set by ExecutePlan */ bool already_executed; /* true if previously executed */ - /* This is always set NULL by the core system, but plugins can change it */ - struct Instrumentation *totaltime; /* total time spent in ExecutorRun */ + /* This field is allocated by ExecutorStart if needed */ + struct Instrumentation *query_instr; /* query level instrumentation */ } QueryDesc; /* in pquery.c */