instrumentation: Allocate query level instrumentation in ExecutorStart

Until now extensions that wanted to measure overall query execution could
create QueryDesc->totaltime, which the core executor would then start and
stop.  That's a bit odd and composes badly, e.g. extensions always had to use
INSTRUMENT_ALL, because otherwise another extension might not get what they
need.

Instead this introduces a new field, QueryDesc->query_instr_options, that
extensions can use to indicate whether they need query level instrumentation
populated, and with which instrumentation options. Extensions should take care
to only add options they need, instead of replacing the options of others.

The prior name of the field, totaltime, sounded like it would only measure
time, but these days the instrumentation infrastructure can track more
resources.  The secondary benefit is that this will make it obvious to
extensions that they may not create the Instrumentation struct themselves
anymore (often extensions build only against a postgres build without
assertions).

Adjust pg_stat_statements and auto_explain to match, and lower the
requested instrumentation level for auto_explain to INSTRUMENT_TIMER,
since the summary instrumentation it needs is only runtime.

The reason to push this now, rather in the PG 20 cycle, is that 5a79e78501
already required extensions using query level instrumentations to adjust their
code, and it seemed undesirable to require them to do so again for 20.

Author: Lukas Fittl <lukas@fittl.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/CAP53Pkyqsht+exJQYRsjhSWYKu+vFGHhPub7m6PmFD6Or0=p1g@mail.gmail.com
This commit is contained in:
Andres Freund 2026-04-08 00:02:26 -04:00
parent db93032a7c
commit 2c16deee2f
5 changed files with 40 additions and 53 deletions

View file

@ -346,6 +346,9 @@ explain_ExecutorStart(QueryDesc *queryDesc, int eflags)
if (auto_explain_enabled())
{
/* We're always interested in runtime */
queryDesc->query_instr_options |= INSTRUMENT_TIMER;
/* Enable per-node instrumentation iff log_analyze is required. */
if (auto_explain_log_analyze && (eflags & EXEC_FLAG_EXPLAIN_ONLY) == 0)
{
@ -366,23 +369,6 @@ explain_ExecutorStart(QueryDesc *queryDesc, int eflags)
prev_ExecutorStart(queryDesc, eflags);
else
standard_ExecutorStart(queryDesc, eflags);
if (auto_explain_enabled())
{
/*
* Set up to track total elapsed time in ExecutorRun. Make sure the
* space is allocated in the per-query context so it will go away at
* ExecutorEnd.
*/
if (queryDesc->totaltime == NULL)
{
MemoryContext oldcxt;
oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
queryDesc->totaltime = InstrAlloc(INSTRUMENT_ALL);
MemoryContextSwitchTo(oldcxt);
}
}
}
/*
@ -434,7 +420,7 @@ explain_ExecutorFinish(QueryDesc *queryDesc)
static void
explain_ExecutorEnd(QueryDesc *queryDesc)
{
if (queryDesc->totaltime && auto_explain_enabled())
if (queryDesc->query_instr && auto_explain_enabled())
{
MemoryContext oldcxt;
double msec;
@ -446,7 +432,7 @@ explain_ExecutorEnd(QueryDesc *queryDesc)
oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
/* Log plan if duration is exceeded. */
msec = INSTR_TIME_GET_MILLISEC(queryDesc->totaltime->total);
msec = INSTR_TIME_GET_MILLISEC(queryDesc->query_instr->total);
if (msec >= auto_explain_log_min_duration)
{
ExplainState *es = NewExplainState();

View file

@ -993,11 +993,6 @@ pgss_planner(Query *parse,
static void
pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
{
if (prev_ExecutorStart)
prev_ExecutorStart(queryDesc, eflags);
else
standard_ExecutorStart(queryDesc, eflags);
/*
* If query has queryId zero, don't track it. This prevents double
* counting of optimizable statements that are directly contained in
@ -1005,20 +1000,14 @@ pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
*/
if (pgss_enabled(nesting_level) && queryDesc->plannedstmt->queryId != INT64CONST(0))
{
/*
* Set up to track total elapsed time in ExecutorRun. Make sure the
* space is allocated in the per-query context so it will go away at
* ExecutorEnd.
*/
if (queryDesc->totaltime == NULL)
{
MemoryContext oldcxt;
oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
queryDesc->totaltime = InstrAlloc(INSTRUMENT_ALL);
MemoryContextSwitchTo(oldcxt);
}
/* Request all summary instrumentation, i.e. timing, buffers and WAL */
queryDesc->query_instr_options |= INSTRUMENT_ALL;
}
if (prev_ExecutorStart)
prev_ExecutorStart(queryDesc, eflags);
else
standard_ExecutorStart(queryDesc, eflags);
}
/*
@ -1071,7 +1060,7 @@ pgss_ExecutorEnd(QueryDesc *queryDesc)
{
int64 queryId = queryDesc->plannedstmt->queryId;
if (queryId != INT64CONST(0) && queryDesc->totaltime &&
if (queryId != INT64CONST(0) && queryDesc->query_instr &&
pgss_enabled(nesting_level))
{
pgss_store(queryDesc->sourceText,
@ -1079,10 +1068,10 @@ pgss_ExecutorEnd(QueryDesc *queryDesc)
queryDesc->plannedstmt->stmt_location,
queryDesc->plannedstmt->stmt_len,
PGSS_EXEC,
INSTR_TIME_GET_MILLISEC(queryDesc->totaltime->total),
INSTR_TIME_GET_MILLISEC(queryDesc->query_instr->total),
queryDesc->estate->es_total_processed,
&queryDesc->totaltime->bufusage,
&queryDesc->totaltime->walusage,
&queryDesc->query_instr->bufusage,
&queryDesc->query_instr->walusage,
queryDesc->estate->es_jit ? &queryDesc->estate->es_jit->instr : NULL,
NULL,
queryDesc->estate->es_parallel_workers_to_launch,

View file

@ -250,6 +250,15 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
estate->es_instrument = queryDesc->instrument_options;
estate->es_jit_flags = queryDesc->plannedstmt->jitFlags;
/*
* Set up query-level instrumentation if extensions have requested it via
* query_instr_options. Ensure an extension has not allocated query_instr
* itself.
*/
Assert(queryDesc->query_instr == NULL);
if (queryDesc->query_instr_options)
queryDesc->query_instr = InstrAlloc(queryDesc->query_instr_options);
/*
* Set up an AFTER-trigger statement context, unless told not to, or
* unless it's EXPLAIN-only mode (when ExecutorFinish won't be called).
@ -332,8 +341,8 @@ standard_ExecutorRun(QueryDesc *queryDesc,
oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
/* Allow instrumentation of Executor overall runtime */
if (queryDesc->totaltime)
InstrStart(queryDesc->totaltime);
if (queryDesc->query_instr)
InstrStart(queryDesc->query_instr);
/*
* extract information from the query descriptor and the query feature.
@ -384,8 +393,8 @@ standard_ExecutorRun(QueryDesc *queryDesc,
if (sendTuples)
dest->rShutdown(dest);
if (queryDesc->totaltime)
InstrStop(queryDesc->totaltime);
if (queryDesc->query_instr)
InstrStop(queryDesc->query_instr);
MemoryContextSwitchTo(oldcontext);
}
@ -434,8 +443,8 @@ standard_ExecutorFinish(QueryDesc *queryDesc)
oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
/* Allow instrumentation of Executor overall runtime */
if (queryDesc->totaltime)
InstrStart(queryDesc->totaltime);
if (queryDesc->query_instr)
InstrStart(queryDesc->query_instr);
/* Run ModifyTable nodes to completion */
ExecPostprocessPlan(estate);
@ -444,8 +453,8 @@ standard_ExecutorFinish(QueryDesc *queryDesc)
if (!(estate->es_top_eflags & EXEC_FLAG_SKIP_TRIGGERS))
AfterTriggerEndQuery(estate);
if (queryDesc->totaltime)
InstrStop(queryDesc->totaltime);
if (queryDesc->query_instr)
InstrStop(queryDesc->query_instr);
MemoryContextSwitchTo(oldcontext);
@ -524,7 +533,7 @@ standard_ExecutorEnd(QueryDesc *queryDesc)
queryDesc->tupDesc = NULL;
queryDesc->estate = NULL;
queryDesc->planstate = NULL;
queryDesc->totaltime = NULL;
queryDesc->query_instr = NULL;
}
/* ----------------------------------------------------------------

View file

@ -86,12 +86,13 @@ CreateQueryDesc(PlannedStmt *plannedstmt,
qd->params = params; /* parameter values passed into query */
qd->queryEnv = queryEnv;
qd->instrument_options = instrument_options; /* instrumentation wanted? */
qd->query_instr_options = 0;
/* null these fields until set by ExecutorStart */
qd->tupDesc = NULL;
qd->estate = NULL;
qd->planstate = NULL;
qd->totaltime = NULL;
qd->query_instr = NULL;
/* not yet executed */
qd->already_executed = false;

View file

@ -42,6 +42,8 @@ typedef struct QueryDesc
ParamListInfo params; /* param values being passed in */
QueryEnvironment *queryEnv; /* query environment passed in */
int instrument_options; /* OR of InstrumentOption flags */
int query_instr_options; /* OR of InstrumentOption flags for
* query_instr */
/* These fields are set by ExecutorStart */
TupleDesc tupDesc; /* descriptor for result tuples */
@ -51,8 +53,8 @@ typedef struct QueryDesc
/* This field is set by ExecutePlan */
bool already_executed; /* true if previously executed */
/* This is always set NULL by the core system, but plugins can change it */
struct Instrumentation *totaltime; /* total time spent in ExecutorRun */
/* This field is allocated by ExecutorStart if needed */
struct Instrumentation *query_instr; /* query level instrumentation */
} QueryDesc;
/* in pquery.c */