pgstattuple: Optimize btree and hash index functions with streaming read

This commit replaces the synchronous ReadBufferExtended() loops with the
streaming read routines, affecting pgstatindex() (for btree) and
pgstathashindex() (for hash indexes).

Under test conditions similar to 6c228755ad (some dm_delay and
debug_io_direct=data), this can result in nice runtime and IO gains.

Author: Xuneng Zhou <xunengzhou@gmail.com>
Reviewed-by: Michael Paquier <michael@paquier.xyz>
Reviewed-by: Nazir Bilal Yavuz <byavuz81@gmail.com>
Discussion: https://postgr.es/m/CABPTF7VrqfbcDXqGrdLQ2xaQ=K0RzExNuw6U_GGqzSJu32wfdQ@mail.gmail.com
This commit is contained in:
Michael Paquier 2026-03-13 10:48:45 +09:00
parent a0b6ef29a5
commit 213f0079b3

View file

@ -37,6 +37,7 @@
#include "funcapi.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
#include "storage/read_stream.h"
#include "utils/rel.h"
#include "utils/varlena.h"
@ -217,6 +218,9 @@ pgstatindex_impl(Relation rel, FunctionCallInfo fcinfo)
BlockNumber blkno;
BTIndexStat indexStat;
BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD);
BlockRangeReadStreamPrivate p;
ReadStream *stream;
BlockNumber startblk;
if (!IS_INDEX(rel) || !IS_BTREE(rel))
ereport(ERROR,
@ -273,11 +277,28 @@ pgstatindex_impl(Relation rel, FunctionCallInfo fcinfo)
indexStat.fragments = 0;
/*
* Scan all blocks except the metapage
* Scan all blocks except the metapage (0th page) using streaming reads
*/
nblocks = RelationGetNumberOfBlocks(rel);
startblk = BTREE_METAPAGE + 1;
for (blkno = 1; blkno < nblocks; blkno++)
p.current_blocknum = startblk;
p.last_exclusive = nblocks;
/*
* It is safe to use batchmode as block_range_read_stream_cb takes no
* locks.
*/
stream = read_stream_begin_relation(READ_STREAM_FULL |
READ_STREAM_USE_BATCHING,
bstrategy,
rel,
MAIN_FORKNUM,
block_range_read_stream_cb,
&p,
0);
for (blkno = startblk; blkno < nblocks; blkno++)
{
Buffer buffer;
Page page;
@ -285,8 +306,7 @@ pgstatindex_impl(Relation rel, FunctionCallInfo fcinfo)
CHECK_FOR_INTERRUPTS();
/* Read and lock buffer */
buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy);
buffer = read_stream_next_buffer(stream, NULL);
LockBuffer(buffer, BUFFER_LOCK_SHARE);
page = BufferGetPage(buffer);
@ -322,11 +342,12 @@ pgstatindex_impl(Relation rel, FunctionCallInfo fcinfo)
else
indexStat.internal_pages++;
/* Unlock and release buffer */
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
UnlockReleaseBuffer(buffer);
}
Assert(read_stream_next_buffer(stream, NULL) == InvalidBuffer);
read_stream_end(stream);
relation_close(rel, AccessShareLock);
/*----------------------------
@ -600,6 +621,9 @@ pgstathashindex(PG_FUNCTION_ARGS)
HashMetaPage metap;
float8 free_percent;
uint64 total_space;
BlockRangeReadStreamPrivate p;
ReadStream *stream;
BlockNumber startblk;
/*
* This uses relation_open() and not index_open(). The latter allows
@ -644,16 +668,33 @@ pgstathashindex(PG_FUNCTION_ARGS)
/* prepare access strategy for this index */
bstrategy = GetAccessStrategy(BAS_BULKREAD);
/* Start from blkno 1 as 0th block is metapage */
for (blkno = 1; blkno < nblocks; blkno++)
/* Scan all blocks except the metapage (0th page) using streaming reads */
startblk = HASH_METAPAGE + 1;
p.current_blocknum = startblk;
p.last_exclusive = nblocks;
/*
* It is safe to use batchmode as block_range_read_stream_cb takes no
* locks.
*/
stream = read_stream_begin_relation(READ_STREAM_FULL |
READ_STREAM_USE_BATCHING,
bstrategy,
rel,
MAIN_FORKNUM,
block_range_read_stream_cb,
&p,
0);
for (blkno = startblk; blkno < nblocks; blkno++)
{
Buffer buf;
Page page;
CHECK_FOR_INTERRUPTS();
buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
bstrategy);
buf = read_stream_next_buffer(stream, NULL);
LockBuffer(buf, BUFFER_LOCK_SHARE);
page = BufferGetPage(buf);
@ -698,6 +739,9 @@ pgstathashindex(PG_FUNCTION_ARGS)
UnlockReleaseBuffer(buf);
}
Assert(read_stream_next_buffer(stream, NULL) == InvalidBuffer);
read_stream_end(stream);
/* Done accessing the index */
relation_close(rel, AccessShareLock);