mirror of
https://github.com/postgres/postgres.git
synced 2026-02-20 00:10:16 -05:00
PGSS CPU time tracking.
This commit is contained in:
parent
aa082bed0b
commit
13f897fbc8
16 changed files with 891 additions and 226 deletions
|
|
@ -19,26 +19,9 @@ PGFILEDESC = "pg_stat_statements - execution statistics of SQL statements"
|
|||
LDFLAGS_SL += $(filter -lm, $(LIBS))
|
||||
|
||||
REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/pg_stat_statements/pg_stat_statements.conf
|
||||
|
||||
# Note: Test "cleanup" is kept second to last, removing the extension.
|
||||
REGRESS = \
|
||||
select \
|
||||
dml \
|
||||
cursors \
|
||||
utility \
|
||||
level_tracking \
|
||||
planning \
|
||||
user_activity \
|
||||
wal \
|
||||
entry_timestamp \
|
||||
privileges \
|
||||
extended \
|
||||
parallel \
|
||||
plancache \
|
||||
squashing \
|
||||
cleanup \
|
||||
oldextversions
|
||||
|
||||
REGRESS = select dml cursors utility level_tracking planning \
|
||||
user_activity wal entry_timestamp privileges extended \
|
||||
parallel cpu_time cleanup oldextversions
|
||||
# Disabled because these tests require "shared_preload_libraries=pg_stat_statements",
|
||||
# which typical installcheck users do not have (e.g. buildfarm clients).
|
||||
NO_INSTALLCHECK = 1
|
||||
|
|
|
|||
163
contrib/pg_stat_statements/bench_cpu_time.c
Normal file
163
contrib/pg_stat_statements/bench_cpu_time.c
Normal file
|
|
@ -0,0 +1,163 @@
|
|||
/*
|
||||
* bench_cpu_time.c
|
||||
*
|
||||
* Benchmark to demonstrate the difference between wall clock time
|
||||
* and CPU time (via getrusage) under varying system load.
|
||||
*
|
||||
* Under no load: wall ~= cpu
|
||||
* Under CPU saturation (e.g. stress-ng): wall >> cpu (scheduling delay)
|
||||
* Under I/O saturation: wall >> cpu (I/O wait)
|
||||
*
|
||||
* Usage:
|
||||
* cc -O2 -o bench_cpu_time bench_cpu_time.c -lm
|
||||
* ./bench_cpu_time [iterations]
|
||||
*
|
||||
* Then compare with:
|
||||
* stress-ng --cpu $(nproc) --timeout 30s &
|
||||
* ./bench_cpu_time
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/time.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#define DEFAULT_ITERATIONS 10
|
||||
#define WORK_SIZE 5000000 /* number of math ops per iteration */
|
||||
|
||||
static double
|
||||
wall_time_ms(struct timespec *start, struct timespec *end)
|
||||
{
|
||||
return (end->tv_sec - start->tv_sec) * 1000.0
|
||||
+ (end->tv_nsec - start->tv_nsec) / 1000000.0;
|
||||
}
|
||||
|
||||
static double
|
||||
rusage_cpu_ms(struct rusage *start, struct rusage *end)
|
||||
{
|
||||
double user_ms = (end->ru_utime.tv_sec - start->ru_utime.tv_sec) * 1000.0
|
||||
+ (end->ru_utime.tv_usec - start->ru_utime.tv_usec) / 1000.0;
|
||||
double sys_ms = (end->ru_stime.tv_sec - start->ru_stime.tv_sec) * 1000.0
|
||||
+ (end->ru_stime.tv_usec - start->ru_stime.tv_usec) / 1000.0;
|
||||
return user_ms + sys_ms;
|
||||
}
|
||||
|
||||
/*
|
||||
* Pure CPU-bound work: compute a bunch of sqrt/sin to keep the CPU busy.
|
||||
* volatile to prevent the compiler from optimizing it away.
|
||||
*/
|
||||
static volatile double sink;
|
||||
|
||||
static void
|
||||
cpu_bound_work(void)
|
||||
{
|
||||
double acc = 0.0;
|
||||
|
||||
for (int i = 0; i < WORK_SIZE; i++)
|
||||
acc += sqrt((double)i) * sin((double)i);
|
||||
|
||||
sink = acc;
|
||||
}
|
||||
|
||||
/*
|
||||
* Mixed I/O + CPU work: write to a temp file between CPU bursts.
|
||||
*/
|
||||
static void
|
||||
io_bound_work(void)
|
||||
{
|
||||
char buf[4096];
|
||||
char path[] = "/tmp/bench_cpu_time_XXXXXX";
|
||||
int fd;
|
||||
double acc = 0.0;
|
||||
|
||||
memset(buf, 'x', sizeof(buf));
|
||||
fd = mkstemp(path);
|
||||
if (fd < 0)
|
||||
{
|
||||
perror("mkstemp");
|
||||
return;
|
||||
}
|
||||
unlink(path);
|
||||
|
||||
for (int i = 0; i < WORK_SIZE / 10; i++)
|
||||
{
|
||||
acc += sqrt((double)i);
|
||||
if (i % 1000 == 0)
|
||||
{
|
||||
write(fd, buf, sizeof(buf));
|
||||
fsync(fd);
|
||||
}
|
||||
}
|
||||
|
||||
close(fd);
|
||||
sink = acc;
|
||||
}
|
||||
|
||||
static void
|
||||
run_benchmark(const char *label, void (*workfn)(void), int iterations)
|
||||
{
|
||||
double total_wall = 0.0;
|
||||
double total_cpu = 0.0;
|
||||
|
||||
printf("\n=== %s (%d iterations) ===\n", label, iterations);
|
||||
printf("%4s %10s %10s %10s %7s\n",
|
||||
"#", "wall(ms)", "cpu(ms)", "off-cpu(ms)", "cpu%");
|
||||
|
||||
for (int i = 0; i < iterations; i++)
|
||||
{
|
||||
struct timespec ts_start, ts_end;
|
||||
struct rusage ru_start, ru_end;
|
||||
|
||||
getrusage(RUSAGE_SELF, &ru_start);
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts_start);
|
||||
|
||||
workfn();
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts_end);
|
||||
getrusage(RUSAGE_SELF, &ru_end);
|
||||
|
||||
double wall = wall_time_ms(&ts_start, &ts_end);
|
||||
double cpu = rusage_cpu_ms(&ru_start, &ru_end);
|
||||
double off = wall - cpu;
|
||||
double pct = (wall > 0) ? (cpu / wall) * 100.0 : 0.0;
|
||||
|
||||
printf("%4d %10.2f %10.2f %10.2f %6.1f%%\n",
|
||||
i + 1, wall, cpu, off, pct);
|
||||
|
||||
total_wall += wall;
|
||||
total_cpu += cpu;
|
||||
}
|
||||
|
||||
double avg_wall = total_wall / iterations;
|
||||
double avg_cpu = total_cpu / iterations;
|
||||
double avg_off = avg_wall - avg_cpu;
|
||||
double avg_pct = (avg_wall > 0) ? (avg_cpu / avg_wall) * 100.0 : 0.0;
|
||||
|
||||
printf("---- ---------- ---------- ---------- -------\n");
|
||||
printf(" avg %10.2f %10.2f %10.2f %6.1f%%\n",
|
||||
avg_wall, avg_cpu, avg_off, avg_pct);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
int iterations = DEFAULT_ITERATIONS;
|
||||
|
||||
if (argc > 1)
|
||||
iterations = atoi(argv[1]);
|
||||
if (iterations < 1)
|
||||
iterations = 1;
|
||||
|
||||
printf("PID: %d\n", getpid());
|
||||
printf("Work size: %d ops per iteration\n", WORK_SIZE);
|
||||
|
||||
run_benchmark("CPU-bound work", cpu_bound_work, iterations);
|
||||
run_benchmark("I/O-bound work", io_bound_work, iterations);
|
||||
|
||||
return 0;
|
||||
}
|
||||
142
contrib/pg_stat_statements/benchmark/run_benchmark.sh
Executable file
142
contrib/pg_stat_statements/benchmark/run_benchmark.sh
Executable file
|
|
@ -0,0 +1,142 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# run_benchmark.sh - Benchmark pg_stat_statements CPU time tracking
|
||||
#
|
||||
# Runs pgbench at various concurrency levels, with and without CPU saturation,
|
||||
# then queries pg_stat_statements to compare wall time vs CPU time.
|
||||
#
|
||||
set -euo pipefail
|
||||
|
||||
export PATH="$HOME/pginstall/bin:$PATH"
|
||||
DB="bench"
|
||||
NCPU=$(nproc)
|
||||
DURATION=30 # seconds per pgbench run
|
||||
PGBENCH_SCALE=10 # matches -s 10 from setup
|
||||
RESULTS_FILE="$HOME/benchmark_results.txt"
|
||||
|
||||
# Concurrency levels to test
|
||||
CLIENTS=(1 4 8 $((NCPU * 2)) $((NCPU * 4)))
|
||||
|
||||
# Stress-ng intensity: 0 = no load, N = N CPU-burning workers
|
||||
STRESS_LEVELS=(0 $NCPU $((NCPU * 2)))
|
||||
|
||||
header() {
|
||||
echo ""
|
||||
echo "================================================================"
|
||||
echo " $1"
|
||||
echo "================================================================"
|
||||
}
|
||||
|
||||
run_one() {
|
||||
local clients=$1
|
||||
local stress=$2
|
||||
local label="clients=$clients, stress=$stress"
|
||||
local stress_pid=""
|
||||
|
||||
echo ""
|
||||
echo "--- $label ---"
|
||||
|
||||
# Reset stats
|
||||
psql "$DB" -qc "SELECT pg_stat_statements_reset();" > /dev/null
|
||||
|
||||
# Start CPU stress if requested
|
||||
if [ "$stress" -gt 0 ]; then
|
||||
stress-ng --cpu "$stress" --timeout $((DURATION + 10))s &>/dev/null &
|
||||
stress_pid=$!
|
||||
sleep 1
|
||||
fi
|
||||
|
||||
# Run pgbench
|
||||
pgbench -c "$clients" -j "$clients" -T "$DURATION" "$DB" 2>&1 | grep -E "^(tps|latency|number)"
|
||||
|
||||
# Stop stress
|
||||
if [ -n "$stress_pid" ]; then
|
||||
kill "$stress_pid" 2>/dev/null
|
||||
wait "$stress_pid" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Query pg_stat_statements for the pgbench queries
|
||||
psql "$DB" -P pager=off -c "
|
||||
SELECT
|
||||
left(query, 60) AS query,
|
||||
calls,
|
||||
round(total_exec_time::numeric, 2) AS wall_ms,
|
||||
round(total_exec_cpu_time::numeric, 2) AS cpu_ms,
|
||||
round((total_exec_time - total_exec_cpu_time)::numeric, 2) AS off_cpu_ms,
|
||||
CASE WHEN total_exec_time > 0
|
||||
THEN round((total_exec_cpu_time / total_exec_time * 100)::numeric, 1)
|
||||
ELSE 0
|
||||
END AS cpu_pct
|
||||
FROM pg_stat_statements
|
||||
WHERE query NOT LIKE '%pg_stat_statements%'
|
||||
AND query NOT LIKE '%BEGIN%'
|
||||
AND query NOT LIKE '%END%'
|
||||
AND calls > 10
|
||||
ORDER BY total_exec_time DESC
|
||||
LIMIT 8;
|
||||
"
|
||||
|
||||
# Summary line for the aggregate
|
||||
psql "$DB" -t -A -c "
|
||||
SELECT
|
||||
'$clients' AS clients,
|
||||
'$stress' AS stress_workers,
|
||||
round(sum(total_exec_time)::numeric, 2) AS total_wall_ms,
|
||||
round(sum(total_exec_cpu_time)::numeric, 2) AS total_cpu_ms,
|
||||
round(sum(total_exec_time - total_exec_cpu_time)::numeric, 2) AS total_off_cpu_ms,
|
||||
CASE WHEN sum(total_exec_time) > 0
|
||||
THEN round((sum(total_exec_cpu_time) / sum(total_exec_time) * 100)::numeric, 1)
|
||||
ELSE 0
|
||||
END AS cpu_pct
|
||||
FROM pg_stat_statements
|
||||
WHERE query NOT LIKE '%pg_stat_statements%'
|
||||
AND calls > 0;
|
||||
" >> "$RESULTS_FILE"
|
||||
}
|
||||
|
||||
# ============================================================
|
||||
# Main
|
||||
# ============================================================
|
||||
|
||||
echo "Benchmark: pg_stat_statements CPU time tracking"
|
||||
echo "Instance: $(uname -n) ($(nproc) vCPUs)"
|
||||
echo "Duration per run: ${DURATION}s"
|
||||
echo ""
|
||||
|
||||
# Initialize results file with CSV header
|
||||
echo "clients,stress_workers,total_wall_ms,total_cpu_ms,total_off_cpu_ms,cpu_pct" > "$RESULTS_FILE"
|
||||
|
||||
header "Verifying CPU time columns exist"
|
||||
psql "$DB" -c "
|
||||
SELECT total_plan_cpu_time, total_exec_cpu_time
|
||||
FROM pg_stat_statements
|
||||
LIMIT 1;
|
||||
"
|
||||
|
||||
for stress in "${STRESS_LEVELS[@]}"; do
|
||||
if [ "$stress" -eq 0 ]; then
|
||||
header "NO CPU STRESS"
|
||||
else
|
||||
header "CPU STRESS: $stress workers on $NCPU vCPUs"
|
||||
fi
|
||||
|
||||
for clients in "${CLIENTS[@]}"; do
|
||||
run_one "$clients" "$stress"
|
||||
done
|
||||
done
|
||||
|
||||
header "SUMMARY"
|
||||
echo ""
|
||||
echo "Instance: $(uname -n) ($(nproc) vCPUs)"
|
||||
echo ""
|
||||
echo "clients | stress | wall_ms | cpu_ms | off_cpu_ms | cpu%"
|
||||
echo "--------|--------|---------------|---------------|---------------|------"
|
||||
while IFS='|' read -r c s w cpu off pct; do
|
||||
printf "%-8s| %-7s| %13s | %13s | %13s | %s%%\n" "$c" "$s" "$w" "$cpu" "$off" "$pct"
|
||||
done < "$RESULTS_FILE"
|
||||
|
||||
echo ""
|
||||
echo "Full results saved to: $RESULTS_FILE"
|
||||
echo ""
|
||||
echo "Key insight: as stress workers increase, cpu% should decrease"
|
||||
echo "(more off-CPU time = scheduling delay from CPU contention)"
|
||||
70
contrib/pg_stat_statements/benchmark/setup.sh
Executable file
70
contrib/pg_stat_statements/benchmark/setup.sh
Executable file
|
|
@ -0,0 +1,70 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# setup.sh - Build PostgreSQL from source and initialize a pgbench database
|
||||
# with pg_stat_statements enabled.
|
||||
#
|
||||
# Run from the postgres source root: bash contrib/pg_stat_statements/benchmark/setup.sh
|
||||
#
|
||||
set -euo pipefail
|
||||
|
||||
PGDATA="$HOME/pgdata"
|
||||
PGINSTALL="$HOME/pginstall"
|
||||
NCPU=$(nproc)
|
||||
|
||||
echo "=== Building PostgreSQL (using $NCPU cores) ==="
|
||||
|
||||
# Wait for user_data to finish installing deps
|
||||
while [ ! -f /tmp/setup_done ]; do
|
||||
echo "Waiting for build dependencies to be installed..."
|
||||
sleep 5
|
||||
done
|
||||
|
||||
./configure \
|
||||
--prefix="$PGINSTALL" \
|
||||
--without-icu \
|
||||
--with-openssl \
|
||||
2>&1 | tail -3
|
||||
|
||||
make -j"$NCPU" 2>&1 | tail -5
|
||||
make install 2>&1 | tail -3
|
||||
|
||||
echo "=== Building pg_stat_statements ==="
|
||||
make -C contrib/pg_stat_statements install 2>&1 | tail -3
|
||||
|
||||
# Also build pgbench
|
||||
make -C contrib/pgbench install 2>&1 | tail -3 || \
|
||||
make -C src/bin/pgbench install 2>&1 | tail -3
|
||||
|
||||
export PATH="$PGINSTALL/bin:$PATH"
|
||||
|
||||
echo "=== Initializing database ==="
|
||||
# Stop any existing server before wiping data
|
||||
pg_ctl -D "$PGDATA" stop 2>/dev/null || true
|
||||
rm -rf "$PGDATA"
|
||||
initdb -D "$PGDATA" 2>&1 | tail -3
|
||||
|
||||
# Configure pg_stat_statements
|
||||
cat >> "$PGDATA/postgresql.conf" <<EOF
|
||||
|
||||
# pg_stat_statements config
|
||||
shared_preload_libraries = 'pg_stat_statements'
|
||||
pg_stat_statements.max = 10000
|
||||
pg_stat_statements.track = top
|
||||
pg_stat_statements.track_utility = on
|
||||
pg_stat_statements.track_planning = on
|
||||
compute_query_id = on
|
||||
EOF
|
||||
|
||||
echo "=== Starting PostgreSQL ==="
|
||||
pg_ctl -D "$PGDATA" -l "$PGDATA/logfile" start
|
||||
sleep 2
|
||||
|
||||
# Create the extension and initialize pgbench
|
||||
createdb bench
|
||||
psql bench -c "CREATE EXTENSION pg_stat_statements;"
|
||||
pgbench -i -s 10 bench 2>&1 | tail -3
|
||||
|
||||
echo ""
|
||||
echo "=== Setup complete ==="
|
||||
echo "PostgreSQL is running with pg_stat_statements enabled."
|
||||
echo "Run: bash contrib/pg_stat_statements/benchmark/run_benchmark.sh"
|
||||
5
contrib/pg_stat_statements/benchmark/terraform/.gitignore
vendored
Normal file
5
contrib/pg_stat_statements/benchmark/terraform/.gitignore
vendored
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
*.pem
|
||||
*.tfstate
|
||||
*.tfstate.backup
|
||||
.terraform/
|
||||
.terraform.lock.hcl
|
||||
128
contrib/pg_stat_statements/benchmark/terraform/main.tf
Normal file
128
contrib/pg_stat_statements/benchmark/terraform/main.tf
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
terraform {
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = "~> 5.0"
|
||||
}
|
||||
http = {
|
||||
source = "hashicorp/http"
|
||||
version = "~> 3.0"
|
||||
}
|
||||
tls = {
|
||||
source = "hashicorp/tls"
|
||||
version = "~> 4.0"
|
||||
}
|
||||
local = {
|
||||
source = "hashicorp/local"
|
||||
version = "~> 2.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
provider "aws" {
|
||||
region = var.region
|
||||
}
|
||||
|
||||
# Auto-detect the caller's public IP if allowed_ssh_cidr is "auto"
|
||||
data "http" "my_ip" {
|
||||
count = var.allowed_ssh_cidr == "auto" ? 1 : 0
|
||||
url = "https://checkip.amazonaws.com"
|
||||
}
|
||||
|
||||
locals {
|
||||
ssh_cidr = var.allowed_ssh_cidr == "auto" ? "${trimspace(data.http.my_ip[0].response_body)}/32" : var.allowed_ssh_cidr
|
||||
}
|
||||
|
||||
# Generate an SSH key pair
|
||||
resource "tls_private_key" "bench" {
|
||||
algorithm = "ED25519"
|
||||
}
|
||||
|
||||
resource "aws_key_pair" "bench" {
|
||||
key_name_prefix = "pgss-bench-"
|
||||
public_key = tls_private_key.bench.public_key_openssh
|
||||
}
|
||||
|
||||
# Write the private key to a local file
|
||||
resource "local_file" "ssh_key" {
|
||||
content = tls_private_key.bench.private_key_openssh
|
||||
filename = "${path.module}/pgss-bench-key.pem"
|
||||
file_permission = "0600"
|
||||
}
|
||||
|
||||
# Use the default VPC
|
||||
data "aws_vpc" "default" {
|
||||
default = true
|
||||
}
|
||||
|
||||
# Find the latest Amazon Linux 2023 AMI
|
||||
data "aws_ami" "al2023" {
|
||||
most_recent = true
|
||||
owners = ["amazon"]
|
||||
|
||||
filter {
|
||||
name = "name"
|
||||
values = ["al2023-ami-*-x86_64"]
|
||||
}
|
||||
|
||||
filter {
|
||||
name = "virtualization-type"
|
||||
values = ["hvm"]
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_security_group" "pgss_bench" {
|
||||
name_prefix = "pgss-bench-"
|
||||
description = "SSH access for pg_stat_statements benchmark"
|
||||
vpc_id = data.aws_vpc.default.id
|
||||
|
||||
ingress {
|
||||
from_port = 22
|
||||
to_port = 22
|
||||
protocol = "tcp"
|
||||
cidr_blocks = [local.ssh_cidr]
|
||||
description = "SSH"
|
||||
}
|
||||
|
||||
egress {
|
||||
from_port = 0
|
||||
to_port = 0
|
||||
protocol = "-1"
|
||||
cidr_blocks = ["0.0.0.0/0"]
|
||||
description = "All outbound"
|
||||
}
|
||||
|
||||
tags = {
|
||||
Name = "pgss-cpu-time-benchmark"
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_instance" "bench" {
|
||||
ami = data.aws_ami.al2023.id
|
||||
instance_type = var.instance_type
|
||||
key_name = aws_key_pair.bench.key_name
|
||||
vpc_security_group_ids = [aws_security_group.pgss_bench.id]
|
||||
|
||||
root_block_device {
|
||||
volume_size = 30
|
||||
volume_type = "gp3"
|
||||
}
|
||||
|
||||
user_data = <<-USERDATA
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
# Install build dependencies for PostgreSQL
|
||||
dnf groupinstall -y "Development Tools"
|
||||
dnf install -y \
|
||||
readline-devel zlib-devel libxml2-devel openssl-devel \
|
||||
flex bison perl git rsync \
|
||||
stress-ng
|
||||
|
||||
echo "=== Build dependencies installed ===" > /tmp/setup_done
|
||||
USERDATA
|
||||
|
||||
tags = {
|
||||
Name = "pgss-cpu-time-benchmark"
|
||||
}
|
||||
}
|
||||
26
contrib/pg_stat_statements/benchmark/terraform/outputs.tf
Normal file
26
contrib/pg_stat_statements/benchmark/terraform/outputs.tf
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
output "instance_id" {
|
||||
value = aws_instance.bench.id
|
||||
}
|
||||
|
||||
output "public_ip" {
|
||||
value = aws_instance.bench.public_ip
|
||||
}
|
||||
|
||||
output "ssh_key_file" {
|
||||
description = "Path to the generated private key"
|
||||
value = local_file.ssh_key.filename
|
||||
}
|
||||
|
||||
output "ssh_command" {
|
||||
value = "ssh -i ${abspath(local_file.ssh_key.filename)} ec2-user@${aws_instance.bench.public_ip}"
|
||||
}
|
||||
|
||||
output "rsync_command" {
|
||||
description = "Run this from the postgres repo root to push the source tree"
|
||||
value = "rsync -avz -e 'ssh -i ${abspath(local_file.ssh_key.filename)}' --exclude='.git' --exclude='*.o' --exclude='*.dylib' . ec2-user@${aws_instance.bench.public_ip}:~/postgres/"
|
||||
}
|
||||
|
||||
output "benchmark_command" {
|
||||
description = "Run this after SSH'ing in to start the benchmark"
|
||||
value = "cd ~/postgres && bash contrib/pg_stat_statements/benchmark/setup.sh && bash contrib/pg_stat_statements/benchmark/run_benchmark.sh"
|
||||
}
|
||||
17
contrib/pg_stat_statements/benchmark/terraform/variables.tf
Normal file
17
contrib/pg_stat_statements/benchmark/terraform/variables.tf
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
variable "instance_type" {
|
||||
description = "EC2 instance type"
|
||||
type = string
|
||||
default = "c5.2xlarge"
|
||||
}
|
||||
|
||||
variable "region" {
|
||||
description = "AWS region"
|
||||
type = string
|
||||
default = "us-east-1"
|
||||
}
|
||||
|
||||
variable "allowed_ssh_cidr" {
|
||||
description = "CIDR block allowed to SSH (e.g. your IP). Use 'auto' to detect."
|
||||
type = string
|
||||
default = "auto"
|
||||
}
|
||||
62
contrib/pg_stat_statements/expected/cpu_time.out
Normal file
62
contrib/pg_stat_statements/expected/cpu_time.out
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
--
|
||||
-- Tests for CPU time tracking
|
||||
--
|
||||
SET pg_stat_statements.track_utility = FALSE;
|
||||
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
|
||||
t
|
||||
---
|
||||
t
|
||||
(1 row)
|
||||
|
||||
-- Run a CPU-intensive query
|
||||
SELECT count(*) FROM generate_series(1, 100000);
|
||||
count
|
||||
--------
|
||||
100000
|
||||
(1 row)
|
||||
|
||||
-- Verify CPU time columns are populated and sensible
|
||||
SELECT query,
|
||||
total_exec_cpu_time > 0 AS has_cpu_time,
|
||||
total_exec_cpu_time <= total_exec_time AS cpu_le_wall
|
||||
FROM pg_stat_statements
|
||||
WHERE query LIKE '%generate_series%'
|
||||
ORDER BY query COLLATE "C";
|
||||
query | has_cpu_time | cpu_le_wall
|
||||
--------------------------------------------------+--------------+-------------
|
||||
SELECT count(*) FROM generate_series($1, $2) | t | t
|
||||
(1 row)
|
||||
|
||||
-- Test planning CPU time
|
||||
SET pg_stat_statements.track_planning = TRUE;
|
||||
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
|
||||
t
|
||||
---
|
||||
t
|
||||
(1 row)
|
||||
|
||||
SELECT count(*) FROM generate_series(1, 100000);
|
||||
count
|
||||
--------
|
||||
100000
|
||||
(1 row)
|
||||
|
||||
SELECT query,
|
||||
total_exec_cpu_time >= 0 AS has_exec_cpu,
|
||||
total_plan_cpu_time >= 0 AS has_plan_cpu
|
||||
FROM pg_stat_statements
|
||||
WHERE query LIKE '%generate_series%'
|
||||
ORDER BY query COLLATE "C";
|
||||
query | has_exec_cpu | has_plan_cpu
|
||||
--------------------------------------------------+--------------+--------------
|
||||
SELECT count(*) FROM generate_series($1, $2) | t | t
|
||||
(1 row)
|
||||
|
||||
-- Cleanup
|
||||
SET pg_stat_statements.track_planning = FALSE;
|
||||
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
|
||||
t
|
||||
---
|
||||
t
|
||||
(1 row)
|
||||
|
||||
|
|
@ -385,7 +385,6 @@ AlTER EXTENSION pg_stat_statements UPDATE TO '1.12';
|
|||
wal_records | bigint | | |
|
||||
wal_fpi | bigint | | |
|
||||
wal_bytes | numeric | | |
|
||||
wal_buffers_full | bigint | | |
|
||||
jit_functions | bigint | | |
|
||||
jit_generation_time | double precision | | |
|
||||
jit_inlining_count | bigint | | |
|
||||
|
|
@ -402,7 +401,7 @@ AlTER EXTENSION pg_stat_statements UPDATE TO '1.12';
|
|||
minmax_stats_since | timestamp with time zone | | |
|
||||
|
||||
SELECT count(*) > 0 AS has_data FROM pg_stat_statements;
|
||||
has_data
|
||||
has_data
|
||||
----------
|
||||
t
|
||||
(1 row)
|
||||
|
|
@ -411,65 +410,64 @@ SELECT count(*) > 0 AS has_data FROM pg_stat_statements;
|
|||
AlTER EXTENSION pg_stat_statements UPDATE TO '1.13';
|
||||
\d pg_stat_statements
|
||||
View "public.pg_stat_statements"
|
||||
Column | Type | Collation | Nullable | Default
|
||||
Column | Type | Collation | Nullable | Default
|
||||
----------------------------+--------------------------+-----------+----------+---------
|
||||
userid | oid | | |
|
||||
dbid | oid | | |
|
||||
toplevel | boolean | | |
|
||||
queryid | bigint | | |
|
||||
query | text | | |
|
||||
plans | bigint | | |
|
||||
total_plan_time | double precision | | |
|
||||
min_plan_time | double precision | | |
|
||||
max_plan_time | double precision | | |
|
||||
mean_plan_time | double precision | | |
|
||||
stddev_plan_time | double precision | | |
|
||||
calls | bigint | | |
|
||||
total_exec_time | double precision | | |
|
||||
min_exec_time | double precision | | |
|
||||
max_exec_time | double precision | | |
|
||||
mean_exec_time | double precision | | |
|
||||
stddev_exec_time | double precision | | |
|
||||
rows | bigint | | |
|
||||
shared_blks_hit | bigint | | |
|
||||
shared_blks_read | bigint | | |
|
||||
shared_blks_dirtied | bigint | | |
|
||||
shared_blks_written | bigint | | |
|
||||
local_blks_hit | bigint | | |
|
||||
local_blks_read | bigint | | |
|
||||
local_blks_dirtied | bigint | | |
|
||||
local_blks_written | bigint | | |
|
||||
temp_blks_read | bigint | | |
|
||||
temp_blks_written | bigint | | |
|
||||
shared_blk_read_time | double precision | | |
|
||||
shared_blk_write_time | double precision | | |
|
||||
local_blk_read_time | double precision | | |
|
||||
local_blk_write_time | double precision | | |
|
||||
temp_blk_read_time | double precision | | |
|
||||
temp_blk_write_time | double precision | | |
|
||||
wal_records | bigint | | |
|
||||
wal_fpi | bigint | | |
|
||||
wal_bytes | numeric | | |
|
||||
wal_buffers_full | bigint | | |
|
||||
jit_functions | bigint | | |
|
||||
jit_generation_time | double precision | | |
|
||||
jit_inlining_count | bigint | | |
|
||||
jit_inlining_time | double precision | | |
|
||||
jit_optimization_count | bigint | | |
|
||||
jit_optimization_time | double precision | | |
|
||||
jit_emission_count | bigint | | |
|
||||
jit_emission_time | double precision | | |
|
||||
jit_deform_count | bigint | | |
|
||||
jit_deform_time | double precision | | |
|
||||
parallel_workers_to_launch | bigint | | |
|
||||
parallel_workers_launched | bigint | | |
|
||||
generic_plan_calls | bigint | | |
|
||||
custom_plan_calls | bigint | | |
|
||||
stats_since | timestamp with time zone | | |
|
||||
minmax_stats_since | timestamp with time zone | | |
|
||||
userid | oid | | |
|
||||
dbid | oid | | |
|
||||
toplevel | boolean | | |
|
||||
queryid | bigint | | |
|
||||
query | text | | |
|
||||
plans | bigint | | |
|
||||
total_plan_time | double precision | | |
|
||||
min_plan_time | double precision | | |
|
||||
max_plan_time | double precision | | |
|
||||
mean_plan_time | double precision | | |
|
||||
stddev_plan_time | double precision | | |
|
||||
calls | bigint | | |
|
||||
total_exec_time | double precision | | |
|
||||
min_exec_time | double precision | | |
|
||||
max_exec_time | double precision | | |
|
||||
mean_exec_time | double precision | | |
|
||||
stddev_exec_time | double precision | | |
|
||||
rows | bigint | | |
|
||||
shared_blks_hit | bigint | | |
|
||||
shared_blks_read | bigint | | |
|
||||
shared_blks_dirtied | bigint | | |
|
||||
shared_blks_written | bigint | | |
|
||||
local_blks_hit | bigint | | |
|
||||
local_blks_read | bigint | | |
|
||||
local_blks_dirtied | bigint | | |
|
||||
local_blks_written | bigint | | |
|
||||
temp_blks_read | bigint | | |
|
||||
temp_blks_written | bigint | | |
|
||||
shared_blk_read_time | double precision | | |
|
||||
shared_blk_write_time | double precision | | |
|
||||
local_blk_read_time | double precision | | |
|
||||
local_blk_write_time | double precision | | |
|
||||
temp_blk_read_time | double precision | | |
|
||||
temp_blk_write_time | double precision | | |
|
||||
wal_records | bigint | | |
|
||||
wal_fpi | bigint | | |
|
||||
wal_bytes | numeric | | |
|
||||
jit_functions | bigint | | |
|
||||
jit_generation_time | double precision | | |
|
||||
jit_inlining_count | bigint | | |
|
||||
jit_inlining_time | double precision | | |
|
||||
jit_optimization_count | bigint | | |
|
||||
jit_optimization_time | double precision | | |
|
||||
jit_emission_count | bigint | | |
|
||||
jit_emission_time | double precision | | |
|
||||
jit_deform_count | bigint | | |
|
||||
jit_deform_time | double precision | | |
|
||||
parallel_workers_to_launch | bigint | | |
|
||||
parallel_workers_launched | bigint | | |
|
||||
total_plan_cpu_time | double precision | | |
|
||||
total_exec_cpu_time | double precision | | |
|
||||
stats_since | timestamp with time zone | | |
|
||||
minmax_stats_since | timestamp with time zone | | |
|
||||
|
||||
SELECT count(*) > 0 AS has_data FROM pg_stat_statements;
|
||||
has_data
|
||||
has_data
|
||||
----------
|
||||
t
|
||||
(1 row)
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (c) 2022-2026, PostgreSQL Global Development Group
|
||||
# Copyright (c) 2022-2024, PostgreSQL Global Development Group
|
||||
|
||||
pg_stat_statements_sources = files(
|
||||
'pg_stat_statements.c',
|
||||
|
|
@ -37,7 +37,6 @@ install_data(
|
|||
kwargs: contrib_data_args,
|
||||
)
|
||||
|
||||
# Note: Test "cleanup" is kept second to last, removing the extension.
|
||||
tests += {
|
||||
'name': 'pg_stat_statements',
|
||||
'sd': meson.current_source_dir(),
|
||||
|
|
@ -56,8 +55,7 @@ tests += {
|
|||
'privileges',
|
||||
'extended',
|
||||
'parallel',
|
||||
'plancache',
|
||||
'squashing',
|
||||
'cpu_time',
|
||||
'cleanup',
|
||||
'oldextversions',
|
||||
],
|
||||
|
|
|
|||
|
|
@ -50,7 +50,6 @@ CREATE FUNCTION pg_stat_statements(IN showtext boolean,
|
|||
OUT wal_records int8,
|
||||
OUT wal_fpi int8,
|
||||
OUT wal_bytes numeric,
|
||||
OUT wal_buffers_full int8,
|
||||
OUT jit_functions int8,
|
||||
OUT jit_generation_time float8,
|
||||
OUT jit_inlining_count int8,
|
||||
|
|
@ -63,8 +62,8 @@ CREATE FUNCTION pg_stat_statements(IN showtext boolean,
|
|||
OUT jit_deform_time float8,
|
||||
OUT parallel_workers_to_launch int8,
|
||||
OUT parallel_workers_launched int8,
|
||||
OUT generic_plan_calls int8,
|
||||
OUT custom_plan_calls int8,
|
||||
OUT total_plan_cpu_time float8,
|
||||
OUT total_exec_cpu_time float8,
|
||||
OUT stats_since timestamp with time zone,
|
||||
OUT minmax_stats_since timestamp with time zone
|
||||
)
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@
|
|||
* in the file to be read or written while holding only shared lock.
|
||||
*
|
||||
*
|
||||
* Copyright (c) 2008-2026, PostgreSQL Global Development Group
|
||||
* Copyright (c) 2008-2024, PostgreSQL Global Development Group
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* contrib/pg_stat_statements/pg_stat_statements.c
|
||||
|
|
@ -47,7 +47,6 @@
|
|||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "access/htup_details.h"
|
||||
#include "access/parallel.h"
|
||||
#include "catalog/pg_authid.h"
|
||||
#include "common/int.h"
|
||||
|
|
@ -72,10 +71,7 @@
|
|||
#include "utils/memutils.h"
|
||||
#include "utils/timestamp.h"
|
||||
|
||||
PG_MODULE_MAGIC_EXT(
|
||||
.name = "pg_stat_statements",
|
||||
.version = PG_VERSION
|
||||
);
|
||||
PG_MODULE_MAGIC;
|
||||
|
||||
/* Location of permanent stats file (valid when database is shut down) */
|
||||
#define PGSS_DUMP_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pg_stat_statements.stat"
|
||||
|
|
@ -86,7 +82,7 @@ PG_MODULE_MAGIC_EXT(
|
|||
#define PGSS_TEXT_FILE PG_STAT_TMP_DIR "/pgss_query_texts.stat"
|
||||
|
||||
/* Magic number identifying the stats file format */
|
||||
static const uint32 PGSS_FILE_HEADER = 0x20250731;
|
||||
static const uint32 PGSS_FILE_HEADER = 0x20220408;
|
||||
|
||||
/* PostgreSQL major version number, changes in which invalidate all entries */
|
||||
static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100;
|
||||
|
|
@ -140,12 +136,13 @@ typedef enum pgssStoreKind
|
|||
* If you add a new key to this struct, make sure to teach pgss_store() to
|
||||
* zero the padding bytes. Otherwise, things will break, because pgss_hash is
|
||||
* created using HASH_BLOBS, and thus tag_hash is used to hash this.
|
||||
|
||||
*/
|
||||
typedef struct pgssHashKey
|
||||
{
|
||||
Oid userid; /* user OID */
|
||||
Oid dbid; /* database OID */
|
||||
int64 queryid; /* query identifier */
|
||||
uint64 queryid; /* query identifier */
|
||||
bool toplevel; /* query executed at top level */
|
||||
} pgssHashKey;
|
||||
|
||||
|
|
@ -191,7 +188,6 @@ typedef struct Counters
|
|||
int64 wal_records; /* # of WAL records generated */
|
||||
int64 wal_fpi; /* # of WAL full page images generated */
|
||||
uint64 wal_bytes; /* total amount of WAL generated in bytes */
|
||||
int64 wal_buffers_full; /* # of times the WAL buffers became full */
|
||||
int64 jit_functions; /* total number of JIT functions emitted */
|
||||
double jit_generation_time; /* total time to generate jit code */
|
||||
int64 jit_inlining_count; /* number of times inlining time has been
|
||||
|
|
@ -211,8 +207,7 @@ typedef struct Counters
|
|||
* to be launched */
|
||||
int64 parallel_workers_launched; /* # of parallel workers actually
|
||||
* launched */
|
||||
int64 generic_plan_calls; /* number of calls using a generic plan */
|
||||
int64 custom_plan_calls; /* number of calls using a custom plan */
|
||||
double cpu_time[PGSS_NUMKIND]; /* total CPU (user+sys) time, in msec */
|
||||
} Counters;
|
||||
|
||||
/*
|
||||
|
|
@ -263,7 +258,7 @@ typedef struct pgssSharedState
|
|||
/* Current nesting depth of planner/ExecutorRun/ProcessUtility calls */
|
||||
static int nesting_level = 0;
|
||||
|
||||
/* Saved hook values */
|
||||
/* Saved hook values in case of unload */
|
||||
static shmem_request_hook_type prev_shmem_request_hook = NULL;
|
||||
static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
|
||||
static post_parse_analyze_hook_type prev_post_parse_analyze_hook = NULL;
|
||||
|
|
@ -302,6 +297,7 @@ static bool pgss_track_planning = false; /* whether to track planning
|
|||
* duration */
|
||||
static bool pgss_save = true; /* whether to save stats across shutdown */
|
||||
|
||||
|
||||
#define pgss_enabled(level) \
|
||||
(!IsParallelWorker() && \
|
||||
(pgss_track == PGSS_TRACK_ALL || \
|
||||
|
|
@ -338,12 +334,11 @@ static void pgss_post_parse_analyze(ParseState *pstate, Query *query,
|
|||
static PlannedStmt *pgss_planner(Query *parse,
|
||||
const char *query_string,
|
||||
int cursorOptions,
|
||||
ParamListInfo boundParams,
|
||||
ExplainState *es);
|
||||
ParamListInfo boundParams);
|
||||
static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
|
||||
static void pgss_ExecutorRun(QueryDesc *queryDesc,
|
||||
ScanDirection direction,
|
||||
uint64 count);
|
||||
uint64 count, bool execute_once);
|
||||
static void pgss_ExecutorFinish(QueryDesc *queryDesc);
|
||||
static void pgss_ExecutorEnd(QueryDesc *queryDesc);
|
||||
static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
|
||||
|
|
@ -351,17 +346,17 @@ static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
|
|||
ProcessUtilityContext context, ParamListInfo params,
|
||||
QueryEnvironment *queryEnv,
|
||||
DestReceiver *dest, QueryCompletion *qc);
|
||||
static void pgss_store(const char *query, int64 queryId,
|
||||
static void pgss_store(const char *query, uint64 queryId,
|
||||
int query_location, int query_len,
|
||||
pgssStoreKind kind,
|
||||
double total_time, uint64 rows,
|
||||
double total_time, double cpu_time,
|
||||
uint64 rows,
|
||||
const BufferUsage *bufusage,
|
||||
const WalUsage *walusage,
|
||||
const struct JitInstrumentation *jitusage,
|
||||
JumbleState *jstate,
|
||||
int parallel_workers_to_launch,
|
||||
int parallel_workers_launched,
|
||||
PlannedStmtOrigin planOrigin);
|
||||
int parallel_workers_launched);
|
||||
static void pg_stat_statements_internal(FunctionCallInfo fcinfo,
|
||||
pgssVersion api_version,
|
||||
bool showtext);
|
||||
|
|
@ -376,7 +371,25 @@ static char *qtext_fetch(Size query_offset, int query_len,
|
|||
char *buffer, Size buffer_size);
|
||||
static bool need_gc_qtexts(void);
|
||||
static void gc_qtexts(void);
|
||||
static TimestampTz entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only);
|
||||
|
||||
/*
|
||||
* Compute total CPU time (user + system) in milliseconds from two
|
||||
* getrusage snapshots. Returns the difference ru1 - ru0.
|
||||
*/
|
||||
static double
|
||||
pgss_cpu_time_diff_ms(const struct rusage *ru1, const struct rusage *ru0)
|
||||
{
|
||||
double user_ms,
|
||||
sys_ms;
|
||||
|
||||
user_ms = (double) (ru1->ru_utime.tv_sec - ru0->ru_utime.tv_sec) * 1000.0
|
||||
+ (double) (ru1->ru_utime.tv_usec - ru0->ru_utime.tv_usec) / 1000.0;
|
||||
sys_ms = (double) (ru1->ru_stime.tv_sec - ru0->ru_stime.tv_sec) * 1000.0
|
||||
+ (double) (ru1->ru_stime.tv_usec - ru0->ru_stime.tv_usec) / 1000.0;
|
||||
|
||||
return user_ms + sys_ms;
|
||||
}
|
||||
static TimestampTz entry_reset(Oid userid, Oid dbid, uint64 queryid, bool minmax_only);
|
||||
static char *generate_normalized_query(JumbleState *jstate, const char *query,
|
||||
int query_loc, int *query_len_p);
|
||||
static void fill_in_constant_lengths(JumbleState *jstate, const char *query,
|
||||
|
|
@ -858,7 +871,7 @@ pgss_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate)
|
|||
{
|
||||
if (pgss_track_utility && IsA(query->utilityStmt, ExecuteStmt))
|
||||
{
|
||||
query->queryId = INT64CONST(0);
|
||||
query->queryId = UINT64CONST(0);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
@ -876,15 +889,14 @@ pgss_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate)
|
|||
query->stmt_location,
|
||||
query->stmt_len,
|
||||
PGSS_INVALID,
|
||||
0,
|
||||
0,
|
||||
0, 0, /* total_time, cpu_time */
|
||||
0, /* rows */
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
jstate,
|
||||
0,
|
||||
0,
|
||||
PLAN_STMT_UNKNOWN);
|
||||
0);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -895,8 +907,7 @@ static PlannedStmt *
|
|||
pgss_planner(Query *parse,
|
||||
const char *query_string,
|
||||
int cursorOptions,
|
||||
ParamListInfo boundParams,
|
||||
ExplainState *es)
|
||||
ParamListInfo boundParams)
|
||||
{
|
||||
PlannedStmt *result;
|
||||
|
||||
|
|
@ -907,7 +918,7 @@ pgss_planner(Query *parse,
|
|||
*/
|
||||
if (pgss_enabled(nesting_level)
|
||||
&& pgss_track_planning && query_string
|
||||
&& parse->queryId != INT64CONST(0))
|
||||
&& parse->queryId != UINT64CONST(0))
|
||||
{
|
||||
instr_time start;
|
||||
instr_time duration;
|
||||
|
|
@ -915,6 +926,8 @@ pgss_planner(Query *parse,
|
|||
bufusage;
|
||||
WalUsage walusage_start,
|
||||
walusage;
|
||||
struct rusage rusage_start,
|
||||
rusage_end;
|
||||
|
||||
/* We need to track buffer usage as the planner can access them. */
|
||||
bufusage_start = pgBufferUsage;
|
||||
|
|
@ -924,6 +937,7 @@ pgss_planner(Query *parse,
|
|||
* (e.g. setting a hint bit with those being WAL-logged)
|
||||
*/
|
||||
walusage_start = pgWalUsage;
|
||||
getrusage(RUSAGE_SELF, &rusage_start);
|
||||
INSTR_TIME_SET_CURRENT(start);
|
||||
|
||||
nesting_level++;
|
||||
|
|
@ -931,10 +945,10 @@ pgss_planner(Query *parse,
|
|||
{
|
||||
if (prev_planner_hook)
|
||||
result = prev_planner_hook(parse, query_string, cursorOptions,
|
||||
boundParams, es);
|
||||
boundParams);
|
||||
else
|
||||
result = standard_planner(parse, query_string, cursorOptions,
|
||||
boundParams, es);
|
||||
boundParams);
|
||||
}
|
||||
PG_FINALLY();
|
||||
{
|
||||
|
|
@ -944,6 +958,7 @@ pgss_planner(Query *parse,
|
|||
|
||||
INSTR_TIME_SET_CURRENT(duration);
|
||||
INSTR_TIME_SUBTRACT(duration, start);
|
||||
getrusage(RUSAGE_SELF, &rusage_end);
|
||||
|
||||
/* calc differences of buffer counters. */
|
||||
memset(&bufusage, 0, sizeof(BufferUsage));
|
||||
|
|
@ -959,14 +974,14 @@ pgss_planner(Query *parse,
|
|||
parse->stmt_len,
|
||||
PGSS_PLAN,
|
||||
INSTR_TIME_GET_MILLISEC(duration),
|
||||
pgss_cpu_time_diff_ms(&rusage_end, &rusage_start),
|
||||
0,
|
||||
&bufusage,
|
||||
&walusage,
|
||||
NULL,
|
||||
NULL,
|
||||
0,
|
||||
0,
|
||||
result->planOrigin);
|
||||
0);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -980,10 +995,10 @@ pgss_planner(Query *parse,
|
|||
{
|
||||
if (prev_planner_hook)
|
||||
result = prev_planner_hook(parse, query_string, cursorOptions,
|
||||
boundParams, es);
|
||||
boundParams);
|
||||
else
|
||||
result = standard_planner(parse, query_string, cursorOptions,
|
||||
boundParams, es);
|
||||
boundParams);
|
||||
}
|
||||
PG_FINALLY();
|
||||
{
|
||||
|
|
@ -1011,7 +1026,7 @@ pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
|
|||
* counting of optimizable statements that are directly contained in
|
||||
* utility statements.
|
||||
*/
|
||||
if (pgss_enabled(nesting_level) && queryDesc->plannedstmt->queryId != INT64CONST(0))
|
||||
if (pgss_enabled(nesting_level) && queryDesc->plannedstmt->queryId != UINT64CONST(0))
|
||||
{
|
||||
/*
|
||||
* Set up to track total elapsed time in ExecutorRun. Make sure the
|
||||
|
|
@ -1033,15 +1048,16 @@ pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
|
|||
* ExecutorRun hook: all we need do is track nesting depth
|
||||
*/
|
||||
static void
|
||||
pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
|
||||
pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count,
|
||||
bool execute_once)
|
||||
{
|
||||
nesting_level++;
|
||||
PG_TRY();
|
||||
{
|
||||
if (prev_ExecutorRun)
|
||||
prev_ExecutorRun(queryDesc, direction, count);
|
||||
prev_ExecutorRun(queryDesc, direction, count, execute_once);
|
||||
else
|
||||
standard_ExecutorRun(queryDesc, direction, count);
|
||||
standard_ExecutorRun(queryDesc, direction, count, execute_once);
|
||||
}
|
||||
PG_FINALLY();
|
||||
{
|
||||
|
|
@ -1077,9 +1093,9 @@ pgss_ExecutorFinish(QueryDesc *queryDesc)
|
|||
static void
|
||||
pgss_ExecutorEnd(QueryDesc *queryDesc)
|
||||
{
|
||||
int64 queryId = queryDesc->plannedstmt->queryId;
|
||||
uint64 queryId = queryDesc->plannedstmt->queryId;
|
||||
|
||||
if (queryId != INT64CONST(0) && queryDesc->totaltime &&
|
||||
if (queryId != UINT64CONST(0) && queryDesc->totaltime &&
|
||||
pgss_enabled(nesting_level))
|
||||
{
|
||||
/*
|
||||
|
|
@ -1093,15 +1109,15 @@ pgss_ExecutorEnd(QueryDesc *queryDesc)
|
|||
queryDesc->plannedstmt->stmt_location,
|
||||
queryDesc->plannedstmt->stmt_len,
|
||||
PGSS_EXEC,
|
||||
INSTR_TIME_GET_MILLISEC(queryDesc->totaltime->total),
|
||||
queryDesc->totaltime->total * 1000.0, /* convert to msec */
|
||||
queryDesc->totaltime->cpu_time * 1000.0, /* convert to msec */
|
||||
queryDesc->estate->es_total_processed,
|
||||
&queryDesc->totaltime->bufusage,
|
||||
&queryDesc->totaltime->walusage,
|
||||
queryDesc->estate->es_jit ? &queryDesc->estate->es_jit->instr : NULL,
|
||||
NULL,
|
||||
queryDesc->estate->es_parallel_workers_to_launch,
|
||||
queryDesc->estate->es_parallel_workers_launched,
|
||||
queryDesc->plannedstmt->planOrigin);
|
||||
queryDesc->estate->es_parallel_workers_launched);
|
||||
}
|
||||
|
||||
if (prev_ExecutorEnd)
|
||||
|
|
@ -1121,7 +1137,7 @@ pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
|
|||
DestReceiver *dest, QueryCompletion *qc)
|
||||
{
|
||||
Node *parsetree = pstmt->utilityStmt;
|
||||
int64 saved_queryId = pstmt->queryId;
|
||||
uint64 saved_queryId = pstmt->queryId;
|
||||
int saved_stmt_location = pstmt->stmt_location;
|
||||
int saved_stmt_len = pstmt->stmt_len;
|
||||
bool enabled = pgss_track_utility && pgss_enabled(nesting_level);
|
||||
|
|
@ -1141,7 +1157,7 @@ pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
|
|||
* only.
|
||||
*/
|
||||
if (enabled)
|
||||
pstmt->queryId = INT64CONST(0);
|
||||
pstmt->queryId = UINT64CONST(0);
|
||||
|
||||
/*
|
||||
* If it's an EXECUTE statement, we don't track it and don't increment the
|
||||
|
|
@ -1167,9 +1183,12 @@ pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
|
|||
bufusage;
|
||||
WalUsage walusage_start,
|
||||
walusage;
|
||||
struct rusage rusage_start,
|
||||
rusage_end;
|
||||
|
||||
bufusage_start = pgBufferUsage;
|
||||
walusage_start = pgWalUsage;
|
||||
getrusage(RUSAGE_SELF, &rusage_start);
|
||||
INSTR_TIME_SET_CURRENT(start);
|
||||
|
||||
nesting_level++;
|
||||
|
|
@ -1202,6 +1221,7 @@ pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
|
|||
|
||||
INSTR_TIME_SET_CURRENT(duration);
|
||||
INSTR_TIME_SUBTRACT(duration, start);
|
||||
getrusage(RUSAGE_SELF, &rusage_end);
|
||||
|
||||
/*
|
||||
* Track the total number of rows retrieved or affected by the utility
|
||||
|
|
@ -1228,14 +1248,14 @@ pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
|
|||
saved_stmt_len,
|
||||
PGSS_EXEC,
|
||||
INSTR_TIME_GET_MILLISEC(duration),
|
||||
pgss_cpu_time_diff_ms(&rusage_end, &rusage_start),
|
||||
rows,
|
||||
&bufusage,
|
||||
&walusage,
|
||||
NULL,
|
||||
NULL,
|
||||
0,
|
||||
0,
|
||||
pstmt->planOrigin);
|
||||
0);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -1289,17 +1309,17 @@ pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
|
|||
* for the arrays in the Counters field.
|
||||
*/
|
||||
static void
|
||||
pgss_store(const char *query, int64 queryId,
|
||||
pgss_store(const char *query, uint64 queryId,
|
||||
int query_location, int query_len,
|
||||
pgssStoreKind kind,
|
||||
double total_time, uint64 rows,
|
||||
double total_time, double cpu_time,
|
||||
uint64 rows,
|
||||
const BufferUsage *bufusage,
|
||||
const WalUsage *walusage,
|
||||
const struct JitInstrumentation *jitusage,
|
||||
JumbleState *jstate,
|
||||
int parallel_workers_to_launch,
|
||||
int parallel_workers_launched,
|
||||
PlannedStmtOrigin planOrigin)
|
||||
int parallel_workers_launched)
|
||||
{
|
||||
pgssHashKey key;
|
||||
pgssEntry *entry;
|
||||
|
|
@ -1316,7 +1336,7 @@ pgss_store(const char *query, int64 queryId,
|
|||
* Nothing to do if compute_query_id isn't enabled and no other module
|
||||
* computed a query identifier.
|
||||
*/
|
||||
if (queryId == INT64CONST(0))
|
||||
if (queryId == UINT64CONST(0))
|
||||
return;
|
||||
|
||||
/*
|
||||
|
|
@ -1421,6 +1441,7 @@ pgss_store(const char *query, int64 queryId,
|
|||
|
||||
entry->counters.calls[kind] += 1;
|
||||
entry->counters.total_time[kind] += total_time;
|
||||
entry->counters.cpu_time[kind] += cpu_time;
|
||||
|
||||
if (entry->counters.calls[kind] == 1)
|
||||
{
|
||||
|
|
@ -1480,7 +1501,6 @@ pgss_store(const char *query, int64 queryId,
|
|||
entry->counters.wal_records += walusage->wal_records;
|
||||
entry->counters.wal_fpi += walusage->wal_fpi;
|
||||
entry->counters.wal_bytes += walusage->wal_bytes;
|
||||
entry->counters.wal_buffers_full += walusage->wal_buffers_full;
|
||||
if (jitusage)
|
||||
{
|
||||
entry->counters.jit_functions += jitusage->created_functions;
|
||||
|
|
@ -1507,12 +1527,6 @@ pgss_store(const char *query, int64 queryId,
|
|||
entry->counters.parallel_workers_to_launch += parallel_workers_to_launch;
|
||||
entry->counters.parallel_workers_launched += parallel_workers_launched;
|
||||
|
||||
/* plan cache counters */
|
||||
if (planOrigin == PLAN_STMT_CACHE_GENERIC)
|
||||
entry->counters.generic_plan_calls++;
|
||||
else if (planOrigin == PLAN_STMT_CACHE_CUSTOM)
|
||||
entry->counters.custom_plan_calls++;
|
||||
|
||||
SpinLockRelease(&entry->mutex);
|
||||
}
|
||||
|
||||
|
|
@ -1532,11 +1546,11 @@ pg_stat_statements_reset_1_7(PG_FUNCTION_ARGS)
|
|||
{
|
||||
Oid userid;
|
||||
Oid dbid;
|
||||
int64 queryid;
|
||||
uint64 queryid;
|
||||
|
||||
userid = PG_GETARG_OID(0);
|
||||
dbid = PG_GETARG_OID(1);
|
||||
queryid = PG_GETARG_INT64(2);
|
||||
queryid = (uint64) PG_GETARG_INT64(2);
|
||||
|
||||
entry_reset(userid, dbid, queryid, false);
|
||||
|
||||
|
|
@ -1548,12 +1562,12 @@ pg_stat_statements_reset_1_11(PG_FUNCTION_ARGS)
|
|||
{
|
||||
Oid userid;
|
||||
Oid dbid;
|
||||
int64 queryid;
|
||||
uint64 queryid;
|
||||
bool minmax_only;
|
||||
|
||||
userid = PG_GETARG_OID(0);
|
||||
dbid = PG_GETARG_OID(1);
|
||||
queryid = PG_GETARG_INT64(2);
|
||||
queryid = (uint64) PG_GETARG_INT64(2);
|
||||
minmax_only = PG_GETARG_BOOL(3);
|
||||
|
||||
PG_RETURN_TIMESTAMPTZ(entry_reset(userid, dbid, queryid, minmax_only));
|
||||
|
|
@ -1579,9 +1593,9 @@ pg_stat_statements_reset(PG_FUNCTION_ARGS)
|
|||
#define PG_STAT_STATEMENTS_COLS_V1_9 33
|
||||
#define PG_STAT_STATEMENTS_COLS_V1_10 43
|
||||
#define PG_STAT_STATEMENTS_COLS_V1_11 49
|
||||
#define PG_STAT_STATEMENTS_COLS_V1_12 52
|
||||
#define PG_STAT_STATEMENTS_COLS_V1_13 54
|
||||
#define PG_STAT_STATEMENTS_COLS 54 /* maximum of above */
|
||||
#define PG_STAT_STATEMENTS_COLS_V1_12 51
|
||||
#define PG_STAT_STATEMENTS_COLS_V1_13 53
|
||||
#define PG_STAT_STATEMENTS_COLS 53 /* maximum of above */
|
||||
|
||||
/*
|
||||
* Retrieve statement statistics.
|
||||
|
|
@ -1992,10 +2006,6 @@ pg_stat_statements_internal(FunctionCallInfo fcinfo,
|
|||
Int32GetDatum(-1));
|
||||
values[i++] = wal_bytes;
|
||||
}
|
||||
if (api_version >= PGSS_V1_12)
|
||||
{
|
||||
values[i++] = Int64GetDatumFast(tmp.wal_buffers_full);
|
||||
}
|
||||
if (api_version >= PGSS_V1_10)
|
||||
{
|
||||
values[i++] = Int64GetDatumFast(tmp.jit_functions);
|
||||
|
|
@ -2019,8 +2029,8 @@ pg_stat_statements_internal(FunctionCallInfo fcinfo,
|
|||
}
|
||||
if (api_version >= PGSS_V1_13)
|
||||
{
|
||||
values[i++] = Int64GetDatumFast(tmp.generic_plan_calls);
|
||||
values[i++] = Int64GetDatumFast(tmp.custom_plan_calls);
|
||||
values[i++] = Float8GetDatumFast(tmp.cpu_time[PGSS_PLAN]);
|
||||
values[i++] = Float8GetDatumFast(tmp.cpu_time[PGSS_EXEC]);
|
||||
}
|
||||
if (api_version >= PGSS_V1_11)
|
||||
{
|
||||
|
|
@ -2710,13 +2720,13 @@ if (e) { \
|
|||
* Reset entries corresponding to parameters passed.
|
||||
*/
|
||||
static TimestampTz
|
||||
entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only)
|
||||
entry_reset(Oid userid, Oid dbid, uint64 queryid, bool minmax_only)
|
||||
{
|
||||
HASH_SEQ_STATUS hash_seq;
|
||||
pgssEntry *entry;
|
||||
FILE *qfile;
|
||||
int64 num_entries;
|
||||
int64 num_remove = 0;
|
||||
long num_entries;
|
||||
long num_remove = 0;
|
||||
pgssHashKey key;
|
||||
TimestampTz stats_reset;
|
||||
|
||||
|
|
@ -2730,7 +2740,7 @@ entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only)
|
|||
|
||||
stats_reset = GetCurrentTimestamp();
|
||||
|
||||
if (userid != 0 && dbid != 0 && queryid != INT64CONST(0))
|
||||
if (userid != 0 && dbid != 0 && queryid != UINT64CONST(0))
|
||||
{
|
||||
/* If all the parameters are available, use the fast path. */
|
||||
memset(&key, 0, sizeof(pgssHashKey));
|
||||
|
|
@ -2753,7 +2763,7 @@ entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only)
|
|||
|
||||
SINGLE_ENTRY_RESET(entry);
|
||||
}
|
||||
else if (userid != 0 || dbid != 0 || queryid != INT64CONST(0))
|
||||
else if (userid != 0 || dbid != 0 || queryid != UINT64CONST(0))
|
||||
{
|
||||
/* Reset entries corresponding to valid parameters. */
|
||||
hash_seq_init(&hash_seq, pgss_hash);
|
||||
|
|
@ -2849,13 +2859,13 @@ generate_normalized_query(JumbleState *jstate, const char *query,
|
|||
{
|
||||
char *norm_query;
|
||||
int query_len = *query_len_p;
|
||||
int norm_query_buflen, /* Space allowed for norm_query */
|
||||
int i,
|
||||
norm_query_buflen, /* Space allowed for norm_query */
|
||||
len_to_wrt, /* Length (in bytes) to write */
|
||||
quer_loc = 0, /* Source query byte location */
|
||||
n_quer_loc = 0, /* Normalized query byte location */
|
||||
last_off = 0, /* Offset from start for previous tok */
|
||||
last_tok_len = 0; /* Length (in bytes) of that tok */
|
||||
int num_constants_replaced = 0;
|
||||
|
||||
/*
|
||||
* Get constants' lengths (core system only gives us locations). Note
|
||||
|
|
@ -2875,23 +2885,12 @@ generate_normalized_query(JumbleState *jstate, const char *query,
|
|||
/* Allocate result buffer */
|
||||
norm_query = palloc(norm_query_buflen + 1);
|
||||
|
||||
for (int i = 0; i < jstate->clocations_count; i++)
|
||||
for (i = 0; i < jstate->clocations_count; i++)
|
||||
{
|
||||
int off, /* Offset from start for cur tok */
|
||||
tok_len; /* Length (in bytes) of that tok */
|
||||
|
||||
/*
|
||||
* If we have an external param at this location, but no lists are
|
||||
* being squashed across the query, then we skip here; this will make
|
||||
* us print the characters found in the original query that represent
|
||||
* the parameter in the next iteration (or after the loop is done),
|
||||
* which is a bit odd but seems to work okay in most cases.
|
||||
*/
|
||||
if (jstate->clocations[i].extern_param && !jstate->has_squashed_lists)
|
||||
continue;
|
||||
|
||||
off = jstate->clocations[i].location;
|
||||
|
||||
/* Adjust recorded location if we're dealing with partial string */
|
||||
off -= query_loc;
|
||||
|
||||
|
|
@ -2903,21 +2902,15 @@ generate_normalized_query(JumbleState *jstate, const char *query,
|
|||
/* Copy next chunk (what precedes the next constant) */
|
||||
len_to_wrt = off - last_off;
|
||||
len_to_wrt -= last_tok_len;
|
||||
|
||||
Assert(len_to_wrt >= 0);
|
||||
memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
|
||||
n_quer_loc += len_to_wrt;
|
||||
|
||||
/*
|
||||
* And insert a param symbol in place of the constant token; and, if
|
||||
* we have a squashable list, insert a placeholder comment starting
|
||||
* from the list's second value.
|
||||
*/
|
||||
n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d%s",
|
||||
num_constants_replaced + 1 + jstate->highest_extern_param_id,
|
||||
jstate->clocations[i].squashed ? " /*, ... */" : "");
|
||||
num_constants_replaced++;
|
||||
/* And insert a param symbol in place of the constant token */
|
||||
n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
|
||||
i + 1 + jstate->highest_extern_param_id);
|
||||
|
||||
/* move forward */
|
||||
quer_loc = off + tok_len;
|
||||
last_off = off;
|
||||
last_tok_len = tok_len;
|
||||
|
|
@ -2954,8 +2947,9 @@ generate_normalized_query(JumbleState *jstate, const char *query,
|
|||
* have originated from within the authoritative parser, this should not be
|
||||
* a problem.
|
||||
*
|
||||
* Multiple constants can have the same location. We reset lengths of those
|
||||
* past the first to -1 so that they can later be ignored.
|
||||
* Duplicate constant pointers are possible, and will have their lengths
|
||||
* marked as '-1', so that they are later ignored. (Actually, we assume the
|
||||
* lengths were initialized as -1 to start with, and don't change them here.)
|
||||
*
|
||||
* If query_loc > 0, then "query" has been advanced by that much compared to
|
||||
* the original string start, so we need to translate the provided locations
|
||||
|
|
@ -2975,6 +2969,8 @@ fill_in_constant_lengths(JumbleState *jstate, const char *query,
|
|||
core_yy_extra_type yyextra;
|
||||
core_YYSTYPE yylval;
|
||||
YYLTYPE yylloc;
|
||||
int last_loc = -1;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Sort the records by location so that we can process them in order while
|
||||
|
|
@ -2991,30 +2987,24 @@ fill_in_constant_lengths(JumbleState *jstate, const char *query,
|
|||
&ScanKeywords,
|
||||
ScanKeywordTokens);
|
||||
|
||||
/* we don't want to re-emit any escape string warnings */
|
||||
yyextra.escape_string_warning = false;
|
||||
|
||||
/* Search for each constant, in sequence */
|
||||
for (int i = 0; i < jstate->clocations_count; i++)
|
||||
for (i = 0; i < jstate->clocations_count; i++)
|
||||
{
|
||||
int loc;
|
||||
int loc = locs[i].location;
|
||||
int tok;
|
||||
|
||||
/* Ignore constants after the first one in the same location */
|
||||
if (i > 0 && locs[i].location == locs[i - 1].location)
|
||||
{
|
||||
locs[i].length = -1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (locs[i].squashed)
|
||||
continue; /* squashable list, ignore */
|
||||
|
||||
/* Adjust recorded location if we're dealing with partial string */
|
||||
loc = locs[i].location - query_loc;
|
||||
loc -= query_loc;
|
||||
|
||||
Assert(loc >= 0);
|
||||
|
||||
/*
|
||||
* We have a valid location for a constant that's not a dupe. Lex
|
||||
* tokens until we find the desired constant.
|
||||
*/
|
||||
if (loc <= last_loc)
|
||||
continue; /* Duplicate constant, ignore */
|
||||
|
||||
/* Lex tokens until we find the desired constant */
|
||||
for (;;)
|
||||
{
|
||||
tok = core_yylex(&yylval, &yylloc, yyscanner);
|
||||
|
|
@ -3060,6 +3050,8 @@ fill_in_constant_lengths(JumbleState *jstate, const char *query,
|
|||
/* If we hit end-of-string, give up, leaving remaining lengths -1 */
|
||||
if (tok == 0)
|
||||
break;
|
||||
|
||||
last_loc = loc;
|
||||
}
|
||||
|
||||
scanner_finish(yyscanner);
|
||||
|
|
|
|||
33
contrib/pg_stat_statements/sql/cpu_time.sql
Normal file
33
contrib/pg_stat_statements/sql/cpu_time.sql
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
--
|
||||
-- Tests for CPU time tracking
|
||||
--
|
||||
SET pg_stat_statements.track_utility = FALSE;
|
||||
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
|
||||
|
||||
-- Run a CPU-intensive query
|
||||
SELECT count(*) FROM generate_series(1, 100000);
|
||||
|
||||
-- Verify CPU time columns are populated and sensible
|
||||
SELECT query,
|
||||
total_exec_cpu_time > 0 AS has_cpu_time,
|
||||
total_exec_cpu_time <= total_exec_time AS cpu_le_wall
|
||||
FROM pg_stat_statements
|
||||
WHERE query LIKE '%generate_series%'
|
||||
ORDER BY query COLLATE "C";
|
||||
|
||||
-- Test planning CPU time
|
||||
SET pg_stat_statements.track_planning = TRUE;
|
||||
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
|
||||
|
||||
SELECT count(*) FROM generate_series(1, 100000);
|
||||
|
||||
SELECT query,
|
||||
total_exec_cpu_time >= 0 AS has_exec_cpu,
|
||||
total_plan_cpu_time >= 0 AS has_plan_cpu
|
||||
FROM pg_stat_statements
|
||||
WHERE query LIKE '%generate_series%'
|
||||
ORDER BY query COLLATE "C";
|
||||
|
||||
-- Cleanup
|
||||
SET pg_stat_statements.track_planning = FALSE;
|
||||
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
|
||||
|
|
@ -4,7 +4,7 @@
|
|||
* functions for instrumentation of plan execution
|
||||
*
|
||||
*
|
||||
* Copyright (c) 2001-2026, PostgreSQL Global Development Group
|
||||
* Copyright (c) 2001-2024, PostgreSQL Global Development Group
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* src/backend/executor/instrument.c
|
||||
|
|
@ -13,6 +13,7 @@
|
|||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include <sys/resource.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "executor/instrument.h"
|
||||
|
|
@ -25,6 +26,23 @@ static WalUsage save_pgWalUsage;
|
|||
static void BufferUsageAdd(BufferUsage *dst, const BufferUsage *add);
|
||||
static void WalUsageAdd(WalUsage *dst, WalUsage *add);
|
||||
|
||||
/*
|
||||
* Compute the CPU time delta (user + system) between two rusage snapshots,
|
||||
* in seconds.
|
||||
*/
|
||||
static inline double
|
||||
rusage_cpu_time_diff(const struct rusage *ru_end, const struct rusage *ru_start)
|
||||
{
|
||||
double user_sec,
|
||||
sys_sec;
|
||||
|
||||
user_sec = (double) (ru_end->ru_utime.tv_sec - ru_start->ru_utime.tv_sec)
|
||||
+ (double) (ru_end->ru_utime.tv_usec - ru_start->ru_utime.tv_usec) / 1000000.0;
|
||||
sys_sec = (double) (ru_end->ru_stime.tv_sec - ru_start->ru_stime.tv_sec)
|
||||
+ (double) (ru_end->ru_stime.tv_usec - ru_start->ru_stime.tv_usec) / 1000000.0;
|
||||
return user_sec + sys_sec;
|
||||
}
|
||||
|
||||
|
||||
/* Allocate new instrumentation structure(s) */
|
||||
Instrumentation *
|
||||
|
|
@ -34,11 +52,12 @@ InstrAlloc(int n, int instrument_options, bool async_mode)
|
|||
|
||||
/* initialize all fields to zeroes, then modify as needed */
|
||||
instr = palloc0(n * sizeof(Instrumentation));
|
||||
if (instrument_options & (INSTRUMENT_BUFFERS | INSTRUMENT_TIMER | INSTRUMENT_WAL))
|
||||
if (instrument_options & (INSTRUMENT_BUFFERS | INSTRUMENT_TIMER | INSTRUMENT_WAL | INSTRUMENT_CPUUSAGE))
|
||||
{
|
||||
bool need_buffers = (instrument_options & INSTRUMENT_BUFFERS) != 0;
|
||||
bool need_wal = (instrument_options & INSTRUMENT_WAL) != 0;
|
||||
bool need_timer = (instrument_options & INSTRUMENT_TIMER) != 0;
|
||||
bool need_cpuusage = (instrument_options & INSTRUMENT_CPUUSAGE) != 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
|
|
@ -46,6 +65,7 @@ InstrAlloc(int n, int instrument_options, bool async_mode)
|
|||
instr[i].need_bufusage = need_buffers;
|
||||
instr[i].need_walusage = need_wal;
|
||||
instr[i].need_timer = need_timer;
|
||||
instr[i].need_cpuusage = need_cpuusage;
|
||||
instr[i].async_mode = async_mode;
|
||||
}
|
||||
}
|
||||
|
|
@ -61,6 +81,7 @@ InstrInit(Instrumentation *instr, int instrument_options)
|
|||
instr->need_bufusage = (instrument_options & INSTRUMENT_BUFFERS) != 0;
|
||||
instr->need_walusage = (instrument_options & INSTRUMENT_WAL) != 0;
|
||||
instr->need_timer = (instrument_options & INSTRUMENT_TIMER) != 0;
|
||||
instr->need_cpuusage = (instrument_options & INSTRUMENT_CPUUSAGE) != 0;
|
||||
}
|
||||
|
||||
/* Entry to a plan node */
|
||||
|
|
@ -77,6 +98,10 @@ InstrStartNode(Instrumentation *instr)
|
|||
|
||||
if (instr->need_walusage)
|
||||
instr->walusage_start = pgWalUsage;
|
||||
|
||||
/* save CPU usage at node entry, if needed */
|
||||
if (instr->need_cpuusage)
|
||||
getrusage(RUSAGE_SELF, &instr->rusage_start);
|
||||
}
|
||||
|
||||
/* Exit from a plan node */
|
||||
|
|
@ -89,6 +114,22 @@ InstrStopNode(Instrumentation *instr, double nTuples)
|
|||
/* count the returned tuples */
|
||||
instr->tuplecount += nTuples;
|
||||
|
||||
/*
|
||||
* Capture CPU usage before wall clock so the CPU measurement window is
|
||||
* strictly contained within the wall clock window. InstrStartNode
|
||||
* captures rusage after the wall clock start, so:
|
||||
* [wall_start ... cpu_start ... cpu_end ... wall_end]
|
||||
* This guarantees cpu_time <= total (wall) time.
|
||||
*/
|
||||
if (instr->need_cpuusage)
|
||||
{
|
||||
struct rusage rusage_end;
|
||||
|
||||
getrusage(RUSAGE_SELF, &rusage_end);
|
||||
instr->counter_cpu += rusage_cpu_time_diff(&rusage_end,
|
||||
&instr->rusage_start);
|
||||
}
|
||||
|
||||
/* let's update the time only if the timer was requested */
|
||||
if (instr->need_timer)
|
||||
{
|
||||
|
|
@ -114,7 +155,7 @@ InstrStopNode(Instrumentation *instr, double nTuples)
|
|||
if (!instr->running)
|
||||
{
|
||||
instr->running = true;
|
||||
instr->firsttuple = instr->counter;
|
||||
instr->firsttuple = INSTR_TIME_GET_DOUBLE(instr->counter);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -123,7 +164,7 @@ InstrStopNode(Instrumentation *instr, double nTuples)
|
|||
* this might be the first tuple
|
||||
*/
|
||||
if (instr->async_mode && save_tuplecount < 1.0)
|
||||
instr->firsttuple = instr->counter;
|
||||
instr->firsttuple = INSTR_TIME_GET_DOUBLE(instr->counter);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -139,6 +180,8 @@ InstrUpdateTupleCount(Instrumentation *instr, double nTuples)
|
|||
void
|
||||
InstrEndLoop(Instrumentation *instr)
|
||||
{
|
||||
double totaltime;
|
||||
|
||||
/* Skip if nothing has happened, or already shut down */
|
||||
if (!instr->running)
|
||||
return;
|
||||
|
|
@ -147,8 +190,11 @@ InstrEndLoop(Instrumentation *instr)
|
|||
elog(ERROR, "InstrEndLoop called on running node");
|
||||
|
||||
/* Accumulate per-cycle statistics into totals */
|
||||
INSTR_TIME_ADD(instr->startup, instr->firsttuple);
|
||||
INSTR_TIME_ADD(instr->total, instr->counter);
|
||||
totaltime = INSTR_TIME_GET_DOUBLE(instr->counter);
|
||||
|
||||
instr->startup += instr->firsttuple;
|
||||
instr->total += totaltime;
|
||||
instr->cpu_time += instr->counter_cpu;
|
||||
instr->ntuples += instr->tuplecount;
|
||||
instr->nloops += 1;
|
||||
|
||||
|
|
@ -156,7 +202,8 @@ InstrEndLoop(Instrumentation *instr)
|
|||
instr->running = false;
|
||||
INSTR_TIME_SET_ZERO(instr->starttime);
|
||||
INSTR_TIME_SET_ZERO(instr->counter);
|
||||
INSTR_TIME_SET_ZERO(instr->firsttuple);
|
||||
instr->counter_cpu = 0;
|
||||
instr->firsttuple = 0;
|
||||
instr->tuplecount = 0;
|
||||
}
|
||||
|
||||
|
|
@ -169,15 +216,16 @@ InstrAggNode(Instrumentation *dst, Instrumentation *add)
|
|||
dst->running = true;
|
||||
dst->firsttuple = add->firsttuple;
|
||||
}
|
||||
else if (dst->running && add->running &&
|
||||
INSTR_TIME_LT(dst->firsttuple, add->firsttuple))
|
||||
else if (dst->running && add->running && dst->firsttuple > add->firsttuple)
|
||||
dst->firsttuple = add->firsttuple;
|
||||
|
||||
INSTR_TIME_ADD(dst->counter, add->counter);
|
||||
|
||||
dst->tuplecount += add->tuplecount;
|
||||
INSTR_TIME_ADD(dst->startup, add->startup);
|
||||
INSTR_TIME_ADD(dst->total, add->total);
|
||||
dst->counter_cpu += add->counter_cpu;
|
||||
dst->startup += add->startup;
|
||||
dst->total += add->total;
|
||||
dst->cpu_time += add->cpu_time;
|
||||
dst->ntuples += add->ntuples;
|
||||
dst->ntuples2 += add->ntuples2;
|
||||
dst->nloops += add->nloops;
|
||||
|
|
@ -277,8 +325,6 @@ WalUsageAdd(WalUsage *dst, WalUsage *add)
|
|||
dst->wal_bytes += add->wal_bytes;
|
||||
dst->wal_records += add->wal_records;
|
||||
dst->wal_fpi += add->wal_fpi;
|
||||
dst->wal_fpi_bytes += add->wal_fpi_bytes;
|
||||
dst->wal_buffers_full += add->wal_buffers_full;
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -287,6 +333,4 @@ WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
|
|||
dst->wal_bytes += add->wal_bytes - sub->wal_bytes;
|
||||
dst->wal_records += add->wal_records - sub->wal_records;
|
||||
dst->wal_fpi += add->wal_fpi - sub->wal_fpi;
|
||||
dst->wal_fpi_bytes += add->wal_fpi_bytes - sub->wal_fpi_bytes;
|
||||
dst->wal_buffers_full += add->wal_buffers_full - sub->wal_buffers_full;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
* definitions for run-time statistics collection
|
||||
*
|
||||
*
|
||||
* Copyright (c) 2001-2026, PostgreSQL Global Development Group
|
||||
* Copyright (c) 2001-2024, PostgreSQL Global Development Group
|
||||
*
|
||||
* src/include/executor/instrument.h
|
||||
*
|
||||
|
|
@ -13,6 +13,8 @@
|
|||
#ifndef INSTRUMENT_H
|
||||
#define INSTRUMENT_H
|
||||
|
||||
#include <sys/resource.h>
|
||||
|
||||
#include "portability/instr_time.h"
|
||||
|
||||
|
||||
|
|
@ -53,8 +55,6 @@ typedef struct WalUsage
|
|||
int64 wal_records; /* # of WAL records produced */
|
||||
int64 wal_fpi; /* # of WAL full page images produced */
|
||||
uint64 wal_bytes; /* size of WAL records produced */
|
||||
uint64 wal_fpi_bytes; /* size of WAL full page images produced */
|
||||
int64 wal_buffers_full; /* # of times the WAL buffers became full */
|
||||
} WalUsage;
|
||||
|
||||
/* Flag bits included in InstrAlloc's instrument_options bitmask */
|
||||
|
|
@ -64,6 +64,7 @@ typedef enum InstrumentOption
|
|||
INSTRUMENT_BUFFERS = 1 << 1, /* needs buffer usage */
|
||||
INSTRUMENT_ROWS = 1 << 2, /* needs row count */
|
||||
INSTRUMENT_WAL = 1 << 3, /* needs WAL usage */
|
||||
INSTRUMENT_CPUUSAGE = 1 << 4, /* needs CPU usage via getrusage() */
|
||||
INSTRUMENT_ALL = PG_INT32_MAX
|
||||
} InstrumentOption;
|
||||
|
||||
|
|
@ -73,18 +74,22 @@ typedef struct Instrumentation
|
|||
bool need_timer; /* true if we need timer data */
|
||||
bool need_bufusage; /* true if we need buffer usage data */
|
||||
bool need_walusage; /* true if we need WAL usage data */
|
||||
bool need_cpuusage; /* true if we need CPU usage data */
|
||||
bool async_mode; /* true if node is in async mode */
|
||||
/* Info about current plan cycle: */
|
||||
bool running; /* true if we've completed first tuple */
|
||||
instr_time starttime; /* start time of current iteration of node */
|
||||
instr_time counter; /* accumulated runtime for this node */
|
||||
instr_time firsttuple; /* time for first tuple of this cycle */
|
||||
double firsttuple; /* time for first tuple of this cycle */
|
||||
double tuplecount; /* # of tuples emitted so far this cycle */
|
||||
BufferUsage bufusage_start; /* buffer usage at start */
|
||||
WalUsage walusage_start; /* WAL usage at start */
|
||||
struct rusage rusage_start; /* CPU usage at start */
|
||||
double counter_cpu; /* per-cycle CPU time accumulator (seconds) */
|
||||
/* Accumulated statistics across all completed cycles: */
|
||||
instr_time startup; /* total startup time */
|
||||
instr_time total; /* total time */
|
||||
double startup; /* total startup time (in seconds) */
|
||||
double total; /* total time (in seconds) */
|
||||
double cpu_time; /* total CPU time: user + system (seconds) */
|
||||
double ntuples; /* total tuples produced */
|
||||
double ntuples2; /* secondary node-specific tuple counter */
|
||||
double nloops; /* # of run cycles for this node */
|
||||
|
|
|
|||
Loading…
Reference in a new issue