From 4fcde8ad0f13be289390ba895b2f18067c8a3086 Mon Sep 17 00:00:00 2001
From: Mrityunjay Raj <mr.raj.earth@gmail.com>
Date: Mon, 16 Feb 2026 22:51:32 +0530
Subject: [PATCH] benchmark cpu: add --json output option, fixes #9166

Add --json flag to 'borg benchmark cpu' that outputs all benchmark
results as a single JSON object for easy machine parsing. Size values
use integers (bytes) in JSON and format_file_size() for human-readable
text output. Also add tests for both plain-text and JSON output formats.
---
 src/borg/archiver/benchmark_cmd.py            | 96 +++++++++++++++----
 .../testsuite/archiver/benchmark_cmd_test.py  | 43 +++++++++
 2 files changed, 120 insertions(+), 19 deletions(-)

diff --git a/src/borg/archiver/benchmark_cmd.py b/src/borg/archiver/benchmark_cmd.py
index b1b241c4a..062e7597e 100644
--- a/src/borg/archiver/benchmark_cmd.py
+++ b/src/borg/archiver/benchmark_cmd.py
@@ -8,6 +8,7 @@ import time
 from ..constants import *  # NOQA
 from ..crypto.key import FlexiKey
 from ..helpers import format_file_size
+from ..helpers import json_print
 from ..helpers import msgpack
 from ..helpers import get_reset_ec
 from ..item import Item
@@ -128,6 +129,8 @@ class BenchmarkMixIn:
         """Benchmark CPU-bound operations."""
         from timeit import timeit
 
+        result = {} if args.json else None
+
         random_10M = os.urandom(10 * 1000 * 1000)
         key_256 = os.urandom(32)
         key_128 = os.urandom(16)
@@ -136,8 +139,11 @@ class BenchmarkMixIn:
         import io
         from ..chunkers import get_chunker  # noqa
 
-        print("Chunkers =======================================================")
-        size = "1GB"
+        if not args.json:
+            print("Chunkers =======================================================")
+        else:
+            result["chunkers"] = []
+        size = 1000000000
 
         def chunkit(ch):
             with io.BytesIO(random_10M) as data_file:
@@ -160,31 +166,53 @@ class BenchmarkMixIn:
             ),
             ("fixed,1048576", "ch = get_chunker('fixed', 1048576, sparse=False)", "chunkit(ch)", locals()),
         ]:
-            print(f"{spec:<24} {size:<10} {timeit(func, setup, number=100, globals=vars):.3f}s")
+            dt = timeit(func, setup, number=100, globals=vars)
+            if args.json:
+                algo, _, algo_params = spec.partition(",")
+                result["chunkers"].append({"algo": algo, "algo_params": algo_params, "size": size, "time": dt})
+            else:
+                print(f"{spec:<24} {format_file_size(size):<10} {dt:.3f}s")
 
         from ..checksums import crc32, xxh64
 
-        print("Non-cryptographic checksums / hashes ===========================")
-        size = "1GB"
+        if not args.json:
+            print("Non-cryptographic checksums / hashes ===========================")
+        else:
+            result["checksums"] = []
+        size = 1000000000
         tests = [("xxh64", lambda: xxh64(random_10M)), ("crc32 (zlib)", lambda: crc32(random_10M))]
         for spec, func in tests:
-            print(f"{spec:<24} {size:<10} {timeit(func, number=100):.3f}s")
+            dt = timeit(func, number=100)
+            if args.json:
+                result["checksums"].append({"algo": spec, "size": size, "time": dt})
+            else:
+                print(f"{spec:<24} {format_file_size(size):<10} {dt:.3f}s")
 
         from ..crypto.low_level import hmac_sha256, blake2b_256
 
-        print("Cryptographic hashes / MACs ====================================")
-        size = "1GB"
+        if not args.json:
+            print("Cryptographic hashes / MACs ====================================")
+        else:
+            result["hashes"] = []
+        size = 1000000000
         for spec, func in [
             ("hmac-sha256", lambda: hmac_sha256(key_256, random_10M)),
             ("blake2b-256", lambda: blake2b_256(key_256, random_10M)),
         ]:
-            print(f"{spec:<24} {size:<10} {timeit(func, number=100):.3f}s")
+            dt = timeit(func, number=100)
+            if args.json:
+                result["hashes"].append({"algo": spec, "size": size, "time": dt})
+            else:
+                print(f"{spec:<24} {format_file_size(size):<10} {dt:.3f}s")
 
         from ..crypto.low_level import AES256_CTR_BLAKE2b, AES256_CTR_HMAC_SHA256
         from ..crypto.low_level import AES256_OCB, CHACHA20_POLY1305
 
-        print("Encryption =====================================================")
-        size = "1GB"
+        if not args.json:
+            print("Encryption =====================================================")
+        else:
+            result["encryption"] = []
+        size = 1000000000
 
         tests = [
             (
@@ -211,19 +239,33 @@ class BenchmarkMixIn:
             ),
         ]
         for spec, func in tests:
-            print(f"{spec:<24} {size:<10} {timeit(func, number=100):.3f}s")
+            dt = timeit(func, number=100)
+            if args.json:
+                result["encryption"].append({"algo": spec, "size": size, "time": dt})
+            else:
+                print(f"{spec:<24} {format_file_size(size):<10} {dt:.3f}s")
 
-        print("KDFs (slow is GOOD, use argon2!) ===============================")
+        if not args.json:
+            print("KDFs (slow is GOOD, use argon2!) ===============================")
+        else:
+            result["kdf"] = []
         count = 5
         for spec, func in [
             ("pbkdf2", lambda: FlexiKey.pbkdf2("mypassphrase", b"salt" * 8, PBKDF2_ITERATIONS, 32)),
             ("argon2", lambda: FlexiKey.argon2("mypassphrase", 64, b"S" * ARGON2_SALT_BYTES, **ARGON2_ARGS)),
         ]:
-            print(f"{spec:<24} {count:<10} {timeit(func, number=count):.3f}s")
+            dt = timeit(func, number=count)
+            if args.json:
+                result["kdf"].append({"algo": spec, "count": count, "time": dt})
+            else:
+                print(f"{spec:<24} {count:<10} {dt:.3f}s")
 
         from ..compress import CompressionSpec
 
-        print("Compression ====================================================")
+        if not args.json:
+            print("Compression ====================================================")
+        else:
+            result["compression"] = []
         for spec in [
             "lz4",
             "zstd,1",
@@ -240,15 +282,30 @@ class BenchmarkMixIn:
             "lzma,9",
         ]:
             compressor = CompressionSpec(spec).compressor
-            size = "0.1GB"
-            print(f"{spec:<12} {size:<10} {timeit(lambda: compressor.compress({}, random_10M), number=10):.3f}s")
+            size = 100000000
+            dt = timeit(lambda: compressor.compress({}, random_10M), number=10)
+            if args.json:
+                algo, _, algo_params = spec.partition(",")
+                result["compression"].append({"algo": algo, "algo_params": algo_params, "size": size, "time": dt})
+            else:
+                print(f"{spec:<12} {format_file_size(size):<10} {dt:.3f}s")
 
-        print("msgpack ========================================================")
+        if not args.json:
+            print("msgpack ========================================================")
+        else:
+            result["msgpack"] = []
         item = Item(path="foo/bar/baz", mode=660, mtime=1234567)
         items = [item.as_dict()] * 1000
         size = "100k Items"
         spec = "msgpack"
-        print(f"{spec:<12} {size:<10} {timeit(lambda: msgpack.packb(items), number=100):.3f}s")
+        dt = timeit(lambda: msgpack.packb(items), number=100)
+        if args.json:
+            result["msgpack"].append({"algo": spec, "count": 100000, "time": dt})
+        else:
+            print(f"{spec:<12} {size:<10} {dt:.3f}s")
+
+        if args.json:
+            json_print(result)
 
     def build_parser_benchmarks(self, subparsers, common_parser, mid_common_parser):
         from ._common import process_epilog
@@ -343,3 +400,4 @@ class BenchmarkMixIn:
             help="benchmarks Borg CPU-bound operations.",
         )
         subparser.set_defaults(func=self.do_benchmark_cpu)
+        subparser.add_argument("--json", action="store_true", help="format output as JSON")
diff --git a/src/borg/testsuite/archiver/benchmark_cmd_test.py b/src/borg/testsuite/archiver/benchmark_cmd_test.py
index 6ad84e170..abc17a656 100644
--- a/src/borg/testsuite/archiver/benchmark_cmd_test.py
+++ b/src/borg/testsuite/archiver/benchmark_cmd_test.py
@@ -1,3 +1,5 @@
+import json
+
 from ...constants import *  # NOQA
 from . import cmd, RK_ENCRYPTION
 
@@ -6,3 +8,44 @@ def test_benchmark_crud(archiver, monkeypatch):
     cmd(archiver, "repo-create", RK_ENCRYPTION)
     monkeypatch.setenv("_BORG_BENCHMARK_CRUD_TEST", "YES")
     cmd(archiver, "benchmark", "crud", archiver.input_path)
+
+
+def test_benchmark_cpu(archiver):
+    output = cmd(archiver, "benchmark", "cpu")
+    # verify all section headers appear in the plain-text output
+    assert "Chunkers" in output
+    assert "Non-cryptographic checksums / hashes" in output
+    assert "Cryptographic hashes / MACs" in output
+    assert "Encryption" in output
+    assert "KDFs" in output
+    assert "Compression" in output
+    assert "msgpack" in output
+
+
+def test_benchmark_cpu_json(archiver):
+    output = cmd(archiver, "benchmark", "cpu", "--json")
+    result = json.loads(output)
+    assert isinstance(result, dict)
+    # categories with "size" field (bytes)
+    for category in ["chunkers", "checksums", "hashes", "encryption"]:
+        assert isinstance(result[category], list)
+        assert len(result[category]) > 0
+        for entry in result[category]:
+            assert isinstance(entry["algo"], str)
+            assert isinstance(entry["size"], int)
+            assert isinstance(entry["time"], float)
+    # chunkers and compression also have algo_params
+    for category in ["chunkers", "compression"]:
+        for entry in result[category]:
+            assert "algo_params" in entry
+    # categories with "count" field
+    for category in ["kdf", "msgpack"]:
+        assert isinstance(result[category], list)
+        assert len(result[category]) > 0
+        for entry in result[category]:
+            assert isinstance(entry["algo"], str)
+            assert isinstance(entry["count"], int)
+            assert isinstance(entry["time"], float)
+    # compression has size field too
+    for entry in result["compression"]:
+        assert isinstance(entry["size"], int)