Merge pull request #9354 from mr-raj12/benchmark-crud-json-lines

benchmark crud: add --json-lines output option, fixes #9165
This commit is contained in:
TW 2026-02-17 10:13:43 +01:00 committed by GitHub
commit 786a27519f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 70 additions and 9 deletions

View file

@ -1,6 +1,7 @@
import argparse
from contextlib import contextmanager
import functools
import json
import os
import tempfile
import time
@ -116,14 +117,38 @@ class BenchmarkMixIn:
for msg, count, size, random in tests:
with test_files(args.path, count, size, random) as path:
dt_create, dt_update, dt_extract, dt_delete = measurement_run(args.location.canonical_path(), path)
total_size_MB = count * size / 1e06
file_size_formatted = format_file_size(size)
content = "random" if random else "all-zero"
fmt = "%s-%-10s %9.2f MB/s (%d * %s %s files: %.2fs)"
print(fmt % ("C", msg, total_size_MB / dt_create, count, file_size_formatted, content, dt_create))
print(fmt % ("R", msg, total_size_MB / dt_extract, count, file_size_formatted, content, dt_extract))
print(fmt % ("U", msg, total_size_MB / dt_update, count, file_size_formatted, content, dt_update))
print(fmt % ("D", msg, total_size_MB / dt_delete, count, file_size_formatted, content, dt_delete))
total_size = count * size
if args.json_lines:
for cmd_letter, cmd_name, dt in [
("C", "create1", dt_create),
("R", "extract", dt_extract),
("U", "create2", dt_update),
("D", "delete", dt_delete),
]:
print(
json.dumps(
{
"id": f"{cmd_letter}-{msg}",
"command": cmd_name,
"sample": msg,
"sample_count": count,
"sample_size": size,
"sample_random": random,
"time": dt,
"io": int(total_size / dt),
},
sort_keys=True,
)
)
else:
total_size_MB = total_size / 1e06
file_size_formatted = format_file_size(size)
content = "random" if random else "all-zero"
fmt = "%s-%-10s %9.2f MB/s (%d * %s %s files: %.2fs)"
print(fmt % ("C", msg, total_size_MB / dt_create, count, file_size_formatted, content, dt_create))
print(fmt % ("R", msg, total_size_MB / dt_extract, count, file_size_formatted, content, dt_extract))
print(fmt % ("U", msg, total_size_MB / dt_update, count, file_size_formatted, content, dt_update))
print(fmt % ("D", msg, total_size_MB / dt_delete, count, file_size_formatted, content, dt_delete))
def do_benchmark_cpu(self, args):
"""Benchmark CPU-bound operations."""
@ -378,6 +403,7 @@ class BenchmarkMixIn:
subparser.set_defaults(func=self.do_benchmark_crud)
subparser.add_argument("path", metavar="PATH", help="path where to create benchmark input data")
subparser.add_argument("--json-lines", action="store_true", help="Format output as JSON Lines.")
bench_cpu_epilog = process_epilog(
"""

View file

@ -7,7 +7,42 @@ from . import cmd, RK_ENCRYPTION
def test_benchmark_crud(archiver, monkeypatch):
cmd(archiver, "repo-create", RK_ENCRYPTION)
monkeypatch.setenv("_BORG_BENCHMARK_CRUD_TEST", "YES")
cmd(archiver, "benchmark", "crud", archiver.input_path)
output = cmd(archiver, "benchmark", "crud", archiver.input_path)
# Verify human-readable output contains expected C/R/U/D lines with MB/s
for prefix in ("C-Z-TEST", "R-Z-TEST", "U-Z-TEST", "D-Z-TEST", "C-R-TEST", "R-R-TEST", "U-R-TEST", "D-R-TEST"):
assert prefix in output
assert "MB/s" in output
def test_benchmark_crud_json_lines(archiver, monkeypatch):
cmd(archiver, "repo-create", RK_ENCRYPTION)
monkeypatch.setenv("_BORG_BENCHMARK_CRUD_TEST", "YES")
output = cmd(archiver, "benchmark", "crud", "--json-lines", archiver.input_path)
# Filter for JSON lines only; the test harness merges stdout and stderr,
# so non-JSON messages (e.g. "Done. Run borg compact...") from inner
# commands may appear in the captured output.
lines = [line for line in output.splitlines() if line.strip().startswith("{")]
# 2 test samples (Z-TEST, R-TEST) x 4 operations (C, R, U, D) = 8 lines
assert len(lines) == 8
entries = [json.loads(line) for line in lines]
# Verify all expected id values are present
expected_ids = {"C-Z-TEST", "R-Z-TEST", "U-Z-TEST", "D-Z-TEST", "C-R-TEST", "R-R-TEST", "U-R-TEST", "D-R-TEST"}
actual_ids = {e["id"] for e in entries}
assert actual_ids == expected_ids
for entry in entries:
assert isinstance(entry["id"], str)
assert entry["command"] in ("create1", "extract", "create2", "delete")
assert isinstance(entry["sample"], str)
assert entry["sample"] in ("Z-TEST", "R-TEST")
assert isinstance(entry["sample_count"], int)
assert entry["sample_count"] == 1
assert isinstance(entry["sample_size"], int)
assert entry["sample_size"] == 1
assert isinstance(entry["sample_random"], bool)
assert isinstance(entry["time"], float)
assert entry["time"] > 0
assert isinstance(entry["io"], int)
assert entry["io"] > 0
def test_benchmark_cpu(archiver):