mirror of
https://github.com/postgres/postgres.git
synced 2026-04-15 05:56:48 -04:00
Previously, parallel index and index-only scans packed the parallel scan descriptor and shared instrumentation (for EXPLAIN ANALYZE) into a single DSM allocation. Since scans may be instrumented without being parallel-aware, and vice versa, using separate DSM chunks -- each with its own TOC key -- is cleaner. A future commit will extend this pattern to other scan node types. Author: Melanie Plageman <melanieplageman@gmail.com> Reviewed-by: Tomas Vondra <tomas@vondra.me> Discussion: https://postgr.es/m/flat/a177a6dd-240b-455a-8f25-aca0b1c08c6e%40vondra.me
448 lines
13 KiB
C
448 lines
13 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* nodeBitmapIndexscan.c
|
|
* Routines to support bitmapped index scans of relations
|
|
*
|
|
* Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/executor/nodeBitmapIndexscan.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
/*
|
|
* INTERFACE ROUTINES
|
|
* MultiExecBitmapIndexScan scans a relation using index.
|
|
* ExecInitBitmapIndexScan creates and initializes state info.
|
|
* ExecReScanBitmapIndexScan prepares to rescan the plan.
|
|
* ExecEndBitmapIndexScan releases all storage.
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include "access/genam.h"
|
|
#include "executor/executor.h"
|
|
#include "executor/instrument.h"
|
|
#include "executor/nodeBitmapIndexscan.h"
|
|
#include "executor/nodeIndexscan.h"
|
|
#include "miscadmin.h"
|
|
#include "nodes/tidbitmap.h"
|
|
|
|
|
|
/* ----------------------------------------------------------------
|
|
* ExecBitmapIndexScan
|
|
*
|
|
* stub for pro forma compliance
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
static TupleTableSlot *
|
|
ExecBitmapIndexScan(PlanState *pstate)
|
|
{
|
|
elog(ERROR, "BitmapIndexScan node does not support ExecProcNode call convention");
|
|
return NULL;
|
|
}
|
|
|
|
/* ----------------------------------------------------------------
|
|
* MultiExecBitmapIndexScan(node)
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
Node *
|
|
MultiExecBitmapIndexScan(BitmapIndexScanState *node)
|
|
{
|
|
TIDBitmap *tbm;
|
|
IndexScanDesc scandesc;
|
|
double nTuples = 0;
|
|
bool doscan;
|
|
|
|
/* must provide our own instrumentation support */
|
|
if (node->ss.ps.instrument)
|
|
InstrStartNode(node->ss.ps.instrument);
|
|
|
|
/*
|
|
* extract necessary information from index scan node
|
|
*/
|
|
scandesc = node->biss_ScanDesc;
|
|
|
|
/*
|
|
* If we have runtime keys and they've not already been set up, do it now.
|
|
* Array keys are also treated as runtime keys; note that if ExecReScan
|
|
* returns with biss_RuntimeKeysReady still false, then there is an empty
|
|
* array key so we should do nothing.
|
|
*/
|
|
if (!node->biss_RuntimeKeysReady &&
|
|
(node->biss_NumRuntimeKeys != 0 || node->biss_NumArrayKeys != 0))
|
|
{
|
|
ExecReScan((PlanState *) node);
|
|
doscan = node->biss_RuntimeKeysReady;
|
|
}
|
|
else
|
|
doscan = true;
|
|
|
|
/*
|
|
* Prepare the result bitmap. Normally we just create a new one to pass
|
|
* back; however, our parent node is allowed to store a pre-made one into
|
|
* node->biss_result, in which case we just OR our tuple IDs into the
|
|
* existing bitmap. (This saves needing explicit UNION steps.)
|
|
*/
|
|
if (node->biss_result)
|
|
{
|
|
tbm = node->biss_result;
|
|
node->biss_result = NULL; /* reset for next time */
|
|
}
|
|
else
|
|
{
|
|
/* XXX should we use less than work_mem for this? */
|
|
tbm = tbm_create(work_mem * (Size) 1024,
|
|
((BitmapIndexScan *) node->ss.ps.plan)->isshared ?
|
|
node->ss.ps.state->es_query_dsa : NULL);
|
|
}
|
|
|
|
/*
|
|
* Get TIDs from index and insert into bitmap
|
|
*/
|
|
while (doscan)
|
|
{
|
|
nTuples += (double) index_getbitmap(scandesc, tbm);
|
|
|
|
CHECK_FOR_INTERRUPTS();
|
|
|
|
doscan = ExecIndexAdvanceArrayKeys(node->biss_ArrayKeys,
|
|
node->biss_NumArrayKeys);
|
|
if (doscan) /* reset index scan */
|
|
index_rescan(node->biss_ScanDesc,
|
|
node->biss_ScanKeys, node->biss_NumScanKeys,
|
|
NULL, 0);
|
|
}
|
|
|
|
/* must provide our own instrumentation support */
|
|
if (node->ss.ps.instrument)
|
|
InstrStopNode(node->ss.ps.instrument, nTuples);
|
|
|
|
return (Node *) tbm;
|
|
}
|
|
|
|
/* ----------------------------------------------------------------
|
|
* ExecReScanBitmapIndexScan(node)
|
|
*
|
|
* Recalculates the values of any scan keys whose value depends on
|
|
* information known at runtime, then rescans the indexed relation.
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
void
|
|
ExecReScanBitmapIndexScan(BitmapIndexScanState *node)
|
|
{
|
|
ExprContext *econtext = node->biss_RuntimeContext;
|
|
|
|
/*
|
|
* Reset the runtime-key context so we don't leak memory as each outer
|
|
* tuple is scanned. Note this assumes that we will recalculate *all*
|
|
* runtime keys on each call.
|
|
*/
|
|
if (econtext)
|
|
ResetExprContext(econtext);
|
|
|
|
/*
|
|
* If we are doing runtime key calculations (ie, any of the index key
|
|
* values weren't simple Consts), compute the new key values.
|
|
*
|
|
* Array keys are also treated as runtime keys; note that if we return
|
|
* with biss_RuntimeKeysReady still false, then there is an empty array
|
|
* key so no index scan is needed.
|
|
*/
|
|
if (node->biss_NumRuntimeKeys != 0)
|
|
ExecIndexEvalRuntimeKeys(econtext,
|
|
node->biss_RuntimeKeys,
|
|
node->biss_NumRuntimeKeys);
|
|
if (node->biss_NumArrayKeys != 0)
|
|
node->biss_RuntimeKeysReady =
|
|
ExecIndexEvalArrayKeys(econtext,
|
|
node->biss_ArrayKeys,
|
|
node->biss_NumArrayKeys);
|
|
else
|
|
node->biss_RuntimeKeysReady = true;
|
|
|
|
/* reset index scan */
|
|
if (node->biss_RuntimeKeysReady)
|
|
index_rescan(node->biss_ScanDesc,
|
|
node->biss_ScanKeys, node->biss_NumScanKeys,
|
|
NULL, 0);
|
|
}
|
|
|
|
/* ----------------------------------------------------------------
|
|
* ExecEndBitmapIndexScan
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
void
|
|
ExecEndBitmapIndexScan(BitmapIndexScanState *node)
|
|
{
|
|
Relation indexRelationDesc;
|
|
IndexScanDesc indexScanDesc;
|
|
|
|
/*
|
|
* extract information from the node
|
|
*/
|
|
indexRelationDesc = node->biss_RelationDesc;
|
|
indexScanDesc = node->biss_ScanDesc;
|
|
|
|
/*
|
|
* When ending a parallel worker, copy the statistics gathered by the
|
|
* worker back into shared memory so that it can be picked up by the main
|
|
* process to report in EXPLAIN ANALYZE
|
|
*/
|
|
if (node->biss_SharedInfo != NULL && IsParallelWorker())
|
|
{
|
|
IndexScanInstrumentation *winstrument;
|
|
|
|
Assert(ParallelWorkerNumber < node->biss_SharedInfo->num_workers);
|
|
winstrument = &node->biss_SharedInfo->winstrument[ParallelWorkerNumber];
|
|
|
|
/*
|
|
* We have to accumulate the stats rather than performing a memcpy.
|
|
* When a Gather/GatherMerge node finishes it will perform planner
|
|
* shutdown on the workers. On rescan it will spin up new workers
|
|
* which will have a new BitmapIndexScanState and zeroed stats.
|
|
*/
|
|
winstrument->nsearches += node->biss_Instrument->nsearches;
|
|
}
|
|
|
|
/*
|
|
* close the index relation (no-op if we didn't open it)
|
|
*/
|
|
if (indexScanDesc)
|
|
index_endscan(indexScanDesc);
|
|
if (indexRelationDesc)
|
|
index_close(indexRelationDesc, NoLock);
|
|
}
|
|
|
|
/* ----------------------------------------------------------------
|
|
* ExecInitBitmapIndexScan
|
|
*
|
|
* Initializes the index scan's state information.
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
BitmapIndexScanState *
|
|
ExecInitBitmapIndexScan(BitmapIndexScan *node, EState *estate, int eflags)
|
|
{
|
|
BitmapIndexScanState *indexstate;
|
|
LOCKMODE lockmode;
|
|
|
|
/* check for unsupported flags */
|
|
Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
|
|
|
|
/*
|
|
* create state structure
|
|
*/
|
|
indexstate = makeNode(BitmapIndexScanState);
|
|
indexstate->ss.ps.plan = (Plan *) node;
|
|
indexstate->ss.ps.state = estate;
|
|
indexstate->ss.ps.ExecProcNode = ExecBitmapIndexScan;
|
|
|
|
/* normally we don't make the result bitmap till runtime */
|
|
indexstate->biss_result = NULL;
|
|
|
|
/*
|
|
* We do not open or lock the base relation here. We assume that an
|
|
* ancestor BitmapHeapScan node is holding AccessShareLock (or better) on
|
|
* the heap relation throughout the execution of the plan tree.
|
|
*/
|
|
|
|
indexstate->ss.ss_currentRelation = NULL;
|
|
indexstate->ss.ss_currentScanDesc = NULL;
|
|
|
|
/*
|
|
* Miscellaneous initialization
|
|
*
|
|
* We do not need a standard exprcontext for this node, though we may
|
|
* decide below to create a runtime-key exprcontext
|
|
*/
|
|
|
|
/*
|
|
* initialize child expressions
|
|
*
|
|
* We don't need to initialize targetlist or qual since neither are used.
|
|
*
|
|
* Note: we don't initialize all of the indexqual expression, only the
|
|
* sub-parts corresponding to runtime keys (see below).
|
|
*/
|
|
|
|
/*
|
|
* If we are just doing EXPLAIN (ie, aren't going to run the plan), stop
|
|
* here. This allows an index-advisor plugin to EXPLAIN a plan containing
|
|
* references to nonexistent indexes.
|
|
*/
|
|
if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
|
|
return indexstate;
|
|
|
|
/* Set up instrumentation of bitmap index scans if requested */
|
|
if (estate->es_instrument)
|
|
indexstate->biss_Instrument = palloc0_object(IndexScanInstrumentation);
|
|
|
|
/* Open the index relation. */
|
|
lockmode = exec_rt_fetch(node->scan.scanrelid, estate)->rellockmode;
|
|
indexstate->biss_RelationDesc = index_open(node->indexid, lockmode);
|
|
|
|
/*
|
|
* Initialize index-specific scan state
|
|
*/
|
|
indexstate->biss_RuntimeKeysReady = false;
|
|
indexstate->biss_RuntimeKeys = NULL;
|
|
indexstate->biss_NumRuntimeKeys = 0;
|
|
|
|
/*
|
|
* build the index scan keys from the index qualification
|
|
*/
|
|
ExecIndexBuildScanKeys((PlanState *) indexstate,
|
|
indexstate->biss_RelationDesc,
|
|
node->indexqual,
|
|
false,
|
|
&indexstate->biss_ScanKeys,
|
|
&indexstate->biss_NumScanKeys,
|
|
&indexstate->biss_RuntimeKeys,
|
|
&indexstate->biss_NumRuntimeKeys,
|
|
&indexstate->biss_ArrayKeys,
|
|
&indexstate->biss_NumArrayKeys);
|
|
|
|
/*
|
|
* If we have runtime keys or array keys, we need an ExprContext to
|
|
* evaluate them. We could just create a "standard" plan node exprcontext,
|
|
* but to keep the code looking similar to nodeIndexscan.c, it seems
|
|
* better to stick with the approach of using a separate ExprContext.
|
|
*/
|
|
if (indexstate->biss_NumRuntimeKeys != 0 ||
|
|
indexstate->biss_NumArrayKeys != 0)
|
|
{
|
|
ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext;
|
|
|
|
ExecAssignExprContext(estate, &indexstate->ss.ps);
|
|
indexstate->biss_RuntimeContext = indexstate->ss.ps.ps_ExprContext;
|
|
indexstate->ss.ps.ps_ExprContext = stdecontext;
|
|
}
|
|
else
|
|
{
|
|
indexstate->biss_RuntimeContext = NULL;
|
|
}
|
|
|
|
/*
|
|
* Initialize scan descriptor.
|
|
*/
|
|
indexstate->biss_ScanDesc =
|
|
index_beginscan_bitmap(indexstate->biss_RelationDesc,
|
|
estate->es_snapshot,
|
|
indexstate->biss_Instrument,
|
|
indexstate->biss_NumScanKeys);
|
|
|
|
/*
|
|
* If no run-time keys to calculate, go ahead and pass the scankeys to the
|
|
* index AM.
|
|
*/
|
|
if (indexstate->biss_NumRuntimeKeys == 0 &&
|
|
indexstate->biss_NumArrayKeys == 0)
|
|
index_rescan(indexstate->biss_ScanDesc,
|
|
indexstate->biss_ScanKeys, indexstate->biss_NumScanKeys,
|
|
NULL, 0);
|
|
|
|
/*
|
|
* all done.
|
|
*/
|
|
return indexstate;
|
|
}
|
|
|
|
/* ----------------------------------------------------------------
|
|
* ExecBitmapIndexScanEstimate
|
|
*
|
|
* Compute the amount of space we'll need in the parallel
|
|
* query DSM, and inform pcxt->estimator about our needs.
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
void
|
|
ExecBitmapIndexScanEstimate(BitmapIndexScanState *node, ParallelContext *pcxt)
|
|
{
|
|
Size size;
|
|
|
|
/*
|
|
* Parallel bitmap index scans are not supported, but we still need to
|
|
* store the scan's instrumentation in DSM during parallel query
|
|
*/
|
|
if (!node->ss.ps.instrument || pcxt->nworkers == 0)
|
|
return;
|
|
|
|
size = offsetof(SharedIndexScanInstrumentation, winstrument) +
|
|
pcxt->nworkers * sizeof(IndexScanInstrumentation);
|
|
shm_toc_estimate_chunk(&pcxt->estimator, size);
|
|
shm_toc_estimate_keys(&pcxt->estimator, 1);
|
|
}
|
|
|
|
/* ----------------------------------------------------------------
|
|
* ExecBitmapIndexScanInitializeDSM
|
|
*
|
|
* Set up bitmap index scan shared instrumentation.
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
void
|
|
ExecBitmapIndexScanInitializeDSM(BitmapIndexScanState *node,
|
|
ParallelContext *pcxt)
|
|
{
|
|
Size size;
|
|
|
|
/* don't need this if not instrumenting or no workers */
|
|
if (!node->ss.ps.instrument || pcxt->nworkers == 0)
|
|
return;
|
|
|
|
size = offsetof(SharedIndexScanInstrumentation, winstrument) +
|
|
pcxt->nworkers * sizeof(IndexScanInstrumentation);
|
|
node->biss_SharedInfo =
|
|
(SharedIndexScanInstrumentation *) shm_toc_allocate(pcxt->toc,
|
|
size);
|
|
shm_toc_insert(pcxt->toc,
|
|
node->ss.ps.plan->plan_node_id +
|
|
PARALLEL_KEY_SCAN_INSTRUMENT_OFFSET,
|
|
node->biss_SharedInfo);
|
|
|
|
/* Each per-worker area must start out as zeroes */
|
|
memset(node->biss_SharedInfo, 0, size);
|
|
node->biss_SharedInfo->num_workers = pcxt->nworkers;
|
|
}
|
|
|
|
/* ----------------------------------------------------------------
|
|
* ExecBitmapIndexScanInitializeWorker
|
|
*
|
|
* Copy relevant information from TOC into planstate.
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
void
|
|
ExecBitmapIndexScanInitializeWorker(BitmapIndexScanState *node,
|
|
ParallelWorkerContext *pwcxt)
|
|
{
|
|
/* don't need this if not instrumenting */
|
|
if (!node->ss.ps.instrument)
|
|
return;
|
|
|
|
node->biss_SharedInfo = (SharedIndexScanInstrumentation *)
|
|
shm_toc_lookup(pwcxt->toc,
|
|
node->ss.ps.plan->plan_node_id +
|
|
PARALLEL_KEY_SCAN_INSTRUMENT_OFFSET,
|
|
false);
|
|
}
|
|
|
|
/* ----------------------------------------------------------------
|
|
* ExecBitmapIndexScanRetrieveInstrumentation
|
|
*
|
|
* Transfer bitmap index scan statistics from DSM to private memory.
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
void
|
|
ExecBitmapIndexScanRetrieveInstrumentation(BitmapIndexScanState *node)
|
|
{
|
|
SharedIndexScanInstrumentation *SharedInfo = node->biss_SharedInfo;
|
|
size_t size;
|
|
|
|
if (SharedInfo == NULL)
|
|
return;
|
|
|
|
/* Create a copy of SharedInfo in backend-local memory */
|
|
size = offsetof(SharedIndexScanInstrumentation, winstrument) +
|
|
SharedInfo->num_workers * sizeof(IndexScanInstrumentation);
|
|
node->biss_SharedInfo = palloc(size);
|
|
memcpy(node->biss_SharedInfo, SharedInfo, size);
|
|
}
|