From 1150b8d8f7320a1bd07d81e693a8df14b2cbba1e Mon Sep 17 00:00:00 2001
From: Mrityunjay Raj <mr.raj.earth@gmail.com>
Date: Mon, 16 Feb 2026 22:47:40 +0530
Subject: [PATCH] benchmark crud: add --json-lines output option, fixes #9165

Add --json-lines flag to 'borg benchmark crud' that outputs
each measurement as a JSON object (one per line) for easy
machine parsing. Also improve test coverage to validate both
human-readable and JSON-lines output formats.
---
 src/borg/archiver/benchmark_cmd.py            | 42 +++++++++++++++----
 .../testsuite/archiver/benchmark_cmd_test.py  | 37 +++++++++++++++-
 2 files changed, 70 insertions(+), 9 deletions(-)

diff --git a/src/borg/archiver/benchmark_cmd.py b/src/borg/archiver/benchmark_cmd.py
index 062e7597e..028ef11f6 100644
--- a/src/borg/archiver/benchmark_cmd.py
+++ b/src/borg/archiver/benchmark_cmd.py
@@ -1,6 +1,7 @@
 import argparse
 from contextlib import contextmanager
 import functools
+import json
 import os
 import tempfile
 import time
@@ -116,14 +117,38 @@ class BenchmarkMixIn:
         for msg, count, size, random in tests:
             with test_files(args.path, count, size, random) as path:
                 dt_create, dt_update, dt_extract, dt_delete = measurement_run(args.location.canonical_path(), path)
-            total_size_MB = count * size / 1e06
-            file_size_formatted = format_file_size(size)
-            content = "random" if random else "all-zero"
-            fmt = "%s-%-10s %9.2f MB/s (%d * %s %s files: %.2fs)"
-            print(fmt % ("C", msg, total_size_MB / dt_create, count, file_size_formatted, content, dt_create))
-            print(fmt % ("R", msg, total_size_MB / dt_extract, count, file_size_formatted, content, dt_extract))
-            print(fmt % ("U", msg, total_size_MB / dt_update, count, file_size_formatted, content, dt_update))
-            print(fmt % ("D", msg, total_size_MB / dt_delete, count, file_size_formatted, content, dt_delete))
+            total_size = count * size
+            if args.json_lines:
+                for cmd_letter, cmd_name, dt in [
+                    ("C", "create1", dt_create),
+                    ("R", "extract", dt_extract),
+                    ("U", "create2", dt_update),
+                    ("D", "delete", dt_delete),
+                ]:
+                    print(
+                        json.dumps(
+                            {
+                                "id": f"{cmd_letter}-{msg}",
+                                "command": cmd_name,
+                                "sample": msg,
+                                "sample_count": count,
+                                "sample_size": size,
+                                "sample_random": random,
+                                "time": dt,
+                                "io": int(total_size / dt),
+                            },
+                            sort_keys=True,
+                        )
+                    )
+            else:
+                total_size_MB = total_size / 1e06
+                file_size_formatted = format_file_size(size)
+                content = "random" if random else "all-zero"
+                fmt = "%s-%-10s %9.2f MB/s (%d * %s %s files: %.2fs)"
+                print(fmt % ("C", msg, total_size_MB / dt_create, count, file_size_formatted, content, dt_create))
+                print(fmt % ("R", msg, total_size_MB / dt_extract, count, file_size_formatted, content, dt_extract))
+                print(fmt % ("U", msg, total_size_MB / dt_update, count, file_size_formatted, content, dt_update))
+                print(fmt % ("D", msg, total_size_MB / dt_delete, count, file_size_formatted, content, dt_delete))
 
     def do_benchmark_cpu(self, args):
         """Benchmark CPU-bound operations."""
@@ -378,6 +403,7 @@ class BenchmarkMixIn:
         subparser.set_defaults(func=self.do_benchmark_crud)
 
         subparser.add_argument("path", metavar="PATH", help="path where to create benchmark input data")
+        subparser.add_argument("--json-lines", action="store_true", help="Format output as JSON Lines.")
 
         bench_cpu_epilog = process_epilog(
             """
diff --git a/src/borg/testsuite/archiver/benchmark_cmd_test.py b/src/borg/testsuite/archiver/benchmark_cmd_test.py
index abc17a656..a30757c54 100644
--- a/src/borg/testsuite/archiver/benchmark_cmd_test.py
+++ b/src/borg/testsuite/archiver/benchmark_cmd_test.py
@@ -7,7 +7,42 @@ from . import cmd, RK_ENCRYPTION
 def test_benchmark_crud(archiver, monkeypatch):
     cmd(archiver, "repo-create", RK_ENCRYPTION)
     monkeypatch.setenv("_BORG_BENCHMARK_CRUD_TEST", "YES")
-    cmd(archiver, "benchmark", "crud", archiver.input_path)
+    output = cmd(archiver, "benchmark", "crud", archiver.input_path)
+    # Verify human-readable output contains expected C/R/U/D lines with MB/s
+    for prefix in ("C-Z-TEST", "R-Z-TEST", "U-Z-TEST", "D-Z-TEST", "C-R-TEST", "R-R-TEST", "U-R-TEST", "D-R-TEST"):
+        assert prefix in output
+    assert "MB/s" in output
+
+
+def test_benchmark_crud_json_lines(archiver, monkeypatch):
+    cmd(archiver, "repo-create", RK_ENCRYPTION)
+    monkeypatch.setenv("_BORG_BENCHMARK_CRUD_TEST", "YES")
+    output = cmd(archiver, "benchmark", "crud", "--json-lines", archiver.input_path)
+    # Filter for JSON lines only; the test harness merges stdout and stderr,
+    # so non-JSON messages (e.g. "Done. Run borg compact...") from inner
+    # commands may appear in the captured output.
+    lines = [line for line in output.splitlines() if line.strip().startswith("{")]
+    # 2 test samples (Z-TEST, R-TEST) x 4 operations (C, R, U, D) = 8 lines
+    assert len(lines) == 8
+    entries = [json.loads(line) for line in lines]
+    # Verify all expected id values are present
+    expected_ids = {"C-Z-TEST", "R-Z-TEST", "U-Z-TEST", "D-Z-TEST", "C-R-TEST", "R-R-TEST", "U-R-TEST", "D-R-TEST"}
+    actual_ids = {e["id"] for e in entries}
+    assert actual_ids == expected_ids
+    for entry in entries:
+        assert isinstance(entry["id"], str)
+        assert entry["command"] in ("create1", "extract", "create2", "delete")
+        assert isinstance(entry["sample"], str)
+        assert entry["sample"] in ("Z-TEST", "R-TEST")
+        assert isinstance(entry["sample_count"], int)
+        assert entry["sample_count"] == 1
+        assert isinstance(entry["sample_size"], int)
+        assert entry["sample_size"] == 1
+        assert isinstance(entry["sample_random"], bool)
+        assert isinstance(entry["time"], float)
+        assert entry["time"] > 0
+        assert isinstance(entry["io"], int)
+        assert entry["io"] > 0
 
 
 def test_benchmark_cpu(archiver):