diff --git a/docs/faq.rst b/docs/faq.rst index aee29f723..dc4aface8 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -464,8 +464,6 @@ are calculated *before* compression. New compression settings will only be applied to new chunks, not existing chunks. So it's safe to change them. -Use ``borg repo-compress`` to efficiently recompress a complete repository. - Why is backing up an unmodified FAT filesystem slow on Linux? ------------------------------------------------------------- diff --git a/docs/usage.rst b/docs/usage.rst index ce9a82ea4..6921d0bc7 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -40,7 +40,6 @@ Usage usage/repo-space usage/repo-list usage/repo-info - usage/repo-compress usage/repo-delete usage/serve usage/version diff --git a/docs/usage/repo-compress.rst b/docs/usage/repo-compress.rst deleted file mode 100644 index 813db9419..000000000 --- a/docs/usage/repo-compress.rst +++ /dev/null @@ -1,12 +0,0 @@ -.. include:: repo-compress.rst.inc - -Examples -~~~~~~~~ - -:: - - # Recompress repository contents - $ borg repo-compress --progress --compression=zstd,3 - - # Recompress and obfuscate repository contents - $ borg repo-compress --progress --compression=obfuscate,1,zstd,3 diff --git a/docs/usage/repo-compress.rst.inc b/docs/usage/repo-compress.rst.inc deleted file mode 100644 index ca12152d3..000000000 --- a/docs/usage/repo-compress.rst.inc +++ /dev/null @@ -1,69 +0,0 @@ -.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit! - -.. _borg_repo-compress: - -borg repo-compress ------------------- -.. code-block:: none - - borg [common options] repo-compress [options] - -.. only:: html - - .. class:: borg-options-table - - +-------------------------------------------------------+---------------------------------------------------+--------------------------------------------------------------------------------------------------+ - | **options** | - +-------------------------------------------------------+---------------------------------------------------+--------------------------------------------------------------------------------------------------+ - | | ``-C COMPRESSION``, ``--compression COMPRESSION`` | select compression algorithm, see the output of the "borg help compression" command for details. | - +-------------------------------------------------------+---------------------------------------------------+--------------------------------------------------------------------------------------------------+ - | | ``-s``, ``--stats`` | print statistics | - +-------------------------------------------------------+---------------------------------------------------+--------------------------------------------------------------------------------------------------+ - | .. class:: borg-common-opt-ref | - | | - | :ref:`common_options` | - +-------------------------------------------------------+---------------------------------------------------+--------------------------------------------------------------------------------------------------+ - - .. raw:: html - - - -.. only:: latex - - - - options - -C COMPRESSION, --compression COMPRESSION select compression algorithm, see the output of the "borg help compression" command for details. - -s, --stats print statistics - - - :ref:`common_options` - | - -Description -~~~~~~~~~~~ - -Repository (re-)compression (and/or re-obfuscation). - -Reads all chunks in the repository and recompresses them if they are not already -using the compression type/level and obfuscation level given via ``--compression``. - -If the outcome of the chunk processing indicates a change in compression -type/level or obfuscation level, the processed chunk is written to the repository. -Please note that the outcome might not always be the desired compression -type/level - if no compression gives a shorter output, that might be chosen. - -Please note that this command can not work in low (or zero) free disk space -conditions. - -If the ``borg repo-compress`` process receives a SIGINT signal (Ctrl-C), the repo -will be committed and compacted and borg will terminate cleanly afterwards. - -Both ``--progress`` and ``--stats`` are recommended when ``borg repo-compress`` -is used interactively. - -You do **not** need to run ``borg compact`` after ``borg repo-compress``. \ No newline at end of file diff --git a/scripts/shell_completions/fish/borg.fish b/scripts/shell_completions/fish/borg.fish index 159c93adb..423ac6e2d 100644 --- a/scripts/shell_completions/fish/borg.fish +++ b/scripts/shell_completions/fish/borg.fish @@ -20,7 +20,6 @@ complete -c borg -f -n __fish_is_first_token -a 'compact' -d 'Free repository sp complete -c borg -f -n __fish_is_first_token -a 'info' -d 'Show archive details' complete -c borg -f -n __fish_is_first_token -a 'mount' -d 'Mount archive or a repository' complete -c borg -f -n __fish_is_first_token -a 'umount' -d 'Unmount the mounted archive' -complete -c borg -f -n __fish_is_first_token -a 'repo-compress' -d 'Repository (re-)compression' complete -c borg -f -n __fish_is_first_token -a 'repo-create' -d 'Create a new, empty repository' complete -c borg -f -n __fish_is_first_token -a 'repo-delete' -d 'Delete a repository' complete -c borg -f -n __fish_is_first_token -a 'repo-info' -d 'Show repository information' @@ -106,12 +105,6 @@ complete -c borg -f -l 'newest' -d 'Consider archives within complete -c borg -f -l 'older' -d 'Consider archives older than TIMESPAN' -n "__fish_seen_subcommand_from analyze" complete -c borg -f -l 'newer' -d 'Consider archives newer than TIMESPAN' -n "__fish_seen_subcommand_from analyze" -# borg repo-compress options -# Define compression methods once at the top -set -l compression_methods "none auto lz4 zstd,1 zstd,2 zstd,3 zstd,4 zstd,5 zstd,6 zstd,7 zstd,8 zstd,9 zstd,10 zstd,11 zstd,12 zstd,13 zstd,14 zstd,15 zstd,16 zstd,17 zstd,18 zstd,19 zstd,20 zstd,21 zstd,22 zlib,1 zlib,2 zlib,3 zlib,4 zlib,5 zlib,6 zlib,7 zlib,8 zlib,9 lzma,0 lzma,1 lzma,2 lzma,3 lzma,4 lzma,5 lzma,6 lzma,7 lzma,8 lzma,9" -complete -c borg -f -s C -l 'compression' -d 'Select compression ALGORITHM,LEVEL [lz4]' -a "$compression_methods" -n "__fish_seen_subcommand_from repo-compress" -complete -c borg -f -s s -l 'stats' -d 'Print statistics' -n "__fish_seen_subcommand_from repo-compress" - # borg create options complete -c borg -f -s n -l 'dry-run' -d 'Do not create a backup archive' -n "__fish_seen_subcommand_from create" complete -c borg -f -s s -l 'stats' -d 'Print verbose statistics' -n "__fish_seen_subcommand_from create" diff --git a/src/borg/archiver/__init__.py b/src/borg/archiver/__init__.py index dadb4c7d8..a2d409cc2 100644 --- a/src/borg/archiver/__init__.py +++ b/src/borg/archiver/__init__.py @@ -81,7 +81,6 @@ from .list_cmd import ListMixIn from .lock_cmds import LocksMixIn from .mount_cmds import MountMixIn from .prune_cmd import PruneMixIn -from .repo_compress_cmd import RepoCompressMixIn from .recreate_cmd import RecreateMixIn from .rename_cmd import RenameMixIn from .repo_create_cmd import RepoCreateMixIn @@ -117,7 +116,6 @@ class Archiver( PruneMixIn, RecreateMixIn, RenameMixIn, - RepoCompressMixIn, RepoCreateMixIn, RepoDeleteMixIn, RepoInfoMixIn, @@ -292,7 +290,6 @@ class Archiver( self.build_parser_locks(subparsers, common_parser, mid_common_parser) self.build_parser_mount_umount(subparsers, common_parser, mid_common_parser) self.build_parser_prune(subparsers, common_parser, mid_common_parser) - self.build_parser_repo_compress(subparsers, common_parser, mid_common_parser) self.build_parser_repo_create(subparsers, common_parser, mid_common_parser) self.build_parser_repo_delete(subparsers, common_parser, mid_common_parser) self.build_parser_repo_info(subparsers, common_parser, mid_common_parser) diff --git a/src/borg/archiver/repo_compress_cmd.py b/src/borg/archiver/repo_compress_cmd.py deleted file mode 100644 index 8fb26e2f7..000000000 --- a/src/borg/archiver/repo_compress_cmd.py +++ /dev/null @@ -1,199 +0,0 @@ -from collections import defaultdict - -from ._common import with_repository, Highlander -from ..constants import * # NOQA -from ..compress import ObfuscateSize, Auto, COMPRESSOR_TABLE -from ..hashindex import ChunkIndex -from ..helpers import sig_int, ProgressIndicatorPercent, Error, CompressionSpec -from ..helpers.argparsing import ArgumentParser -from ..repository import Repository -from ..remote import RemoteRepository -from ..manifest import Manifest - -from ..logger import create_logger - -logger = create_logger() - - -def find_chunks(repository, repo_objs, cache, stats, ctype, clevel, olevel): - """Find and flag chunks that need processing (usually: recompression).""" - compr_keys = stats["compr_keys"] = set() - compr_wanted = ctype, clevel, olevel - recompress_count = 0 - for id, cie in cache.chunks.iteritems(): - chunk_no_data = repository.get(id, read_data=False) - meta = repo_objs.parse_meta(id, chunk_no_data, ro_type=ROBJ_DONTCARE) - compr_found = meta["ctype"], meta["clevel"], meta.get("olevel", -1) - if compr_found != compr_wanted: - flags_compress = cie.flags | ChunkIndex.F_COMPRESS - cache.chunks[id] = cie._replace(flags=flags_compress) - recompress_count += 1 - compr_keys.add(compr_found) - stats[compr_found] += 1 - stats["checked_count"] += 1 - return recompress_count - - -def process_chunks(repository, repo_objs, stats, recompress_ids, olevel): - """Process some chunks (usually: recompress).""" - compr_keys = stats["compr_keys"] - if compr_keys == 0: # work around defaultdict(int) - compr_keys = stats["compr_keys"] = set() - for id, chunk in zip(recompress_ids, repository.get_many(recompress_ids, read_data=True)): - old_size = len(chunk) - stats["old_size"] += old_size - meta, data = repo_objs.parse(id, chunk, ro_type=ROBJ_DONTCARE) - ro_type = meta.pop("type", None) - compr_old = meta["ctype"], meta["clevel"], meta.get("olevel", -1) - if olevel == -1: - # if the chunk was obfuscated, but should not be in future, remove related metadata - meta.pop("olevel", None) - meta.pop("psize", None) - chunk = repo_objs.format(id, meta, data, ro_type=ro_type) - compr_done = meta["ctype"], meta["clevel"], meta.get("olevel", -1) - if compr_done != compr_old: - # we actually changed something - repository.put(id, chunk, wait=False) - repository.async_response(wait=False) - stats["new_size"] += len(chunk) - compr_keys.add(compr_done) - stats[compr_done] += 1 - stats["recompressed_count"] += 1 - else: - # It might be that the old chunk used compression none or lz4 (for whatever reason, - # including the old compressor being a DecidingCompressor) AND we used a - # DecidingCompressor now, which did NOT compress like we wanted, but decided - # to use the same compression (and obfuscation) we already had. - # In this case, we just keep the old chunk and do not rewrite it - - # This is important to avoid rewriting such chunks **again and again**. - stats["new_size"] += old_size - compr_keys.add(compr_old) - stats[compr_old] += 1 - stats["kept_count"] += 1 - - -def format_compression_spec(ctype, clevel, olevel): - obfuscation = "" if olevel == -1 else f"obfuscate,{olevel}," - for cname, cls in COMPRESSOR_TABLE.items(): - if cls.ID == ctype: - cname = f"{cname}" - break - else: - cname = f"{ctype}" - clevel = f",{clevel}" if clevel != 255 else "" - return obfuscation + cname + clevel - - -class RepoCompressMixIn: - @with_repository(cache=True, manifest=True, compatibility=(Manifest.Operation.CHECK,)) - def do_repo_compress(self, args, repository, manifest, cache): - """Repository (re-)compression.""" - - def get_csettings(c): - if isinstance(c, Auto): - return get_csettings(c.compressor) - if isinstance(c, ObfuscateSize): - ctype, clevel, _ = get_csettings(c.compressor) - olevel = c.level - return ctype, clevel, olevel - ctype, clevel, olevel = c.ID, c.level, -1 - return ctype, clevel, olevel - - if not isinstance(repository, (Repository, RemoteRepository)): - raise Error("repo-compress not supported for legacy repositories.") - - repo_objs = manifest.repo_objs - ctype, clevel, olevel = get_csettings(repo_objs.compressor) # desired compression set by --compression - - stats_find = defaultdict(int) - stats_process = defaultdict(int) - recompress_candidate_count = find_chunks(repository, repo_objs, cache, stats_find, ctype, clevel, olevel) - - pi = ProgressIndicatorPercent( - total=recompress_candidate_count, - msg="Recompressing %3.1f%%", - step=0.1, - msgid="repo_compress.process_chunks", - ) - for id, cie in cache.chunks.iteritems(): - if sig_int and sig_int.action_done(): - break - if cie.flags & ChunkIndex.F_COMPRESS: - process_chunks(repository, repo_objs, stats_process, [id], olevel) - pi.show() - pi.finish() - if sig_int: - # Ctrl-C / SIGINT: do not commit - raise Error("Got Ctrl-C / SIGINT.") - else: - while repository.async_response(wait=True) is not None: - pass - if args.stats: - print() - print("Recompression stats:") - print(f"Size: previously {stats_process['old_size']} -> now {stats_process['new_size']} bytes.") - print( - f"Change: " - f"{stats_process['new_size'] - stats_process['old_size']} bytes == " - f"{100.0 * stats_process['new_size'] / stats_process['old_size']:3.2f}%" - ) - print("Found chunks stats (before processing):") - for ck in stats_find["compr_keys"]: - pretty_ck = format_compression_spec(*ck) - print(f"{pretty_ck}: {stats_find[ck]}") - print(f"Total: {stats_find['checked_count']}") - - print(f"Candidates for recompression: {recompress_candidate_count}") - - print("Processed chunks stats (after processing):") - for ck in stats_process["compr_keys"]: - pretty_ck = format_compression_spec(*ck) - print(f"{pretty_ck}: {stats_process[ck]}") - print(f"Recompressed and rewritten: {stats_process['recompressed_count']}") - print(f"Kept as is: {stats_process['kept_count']}") - print(f"Total: {stats_process['recompressed_count'] + stats_process['kept_count']}") - - def build_parser_repo_compress(self, subparsers, common_parser, mid_common_parser): - from ._common import process_epilog - - repo_compress_epilog = process_epilog( - """ - Repository (re-)compression (and/or re-obfuscation). - - Reads all chunks in the repository and recompresses them if they are not already - using the compression type/level and obfuscation level given via ``--compression``. - - If the outcome of the chunk processing indicates a change in compression - type/level or obfuscation level, the processed chunk is written to the repository. - Please note that the outcome might not always be the desired compression - type/level - if no compression gives a shorter output, that might be chosen. - - Please note that this command can not work in low (or zero) free disk space - conditions. - - If the ``borg repo-compress`` process receives a SIGINT signal (Ctrl-C), the repo - will be committed and compacted and borg will terminate cleanly afterwards. - - Both ``--progress`` and ``--stats`` are recommended when ``borg repo-compress`` - is used interactively. - - You do **not** need to run ``borg compact`` after ``borg repo-compress``. - """ - ) - subparser = ArgumentParser( - parents=[common_parser], description=self.do_repo_compress.__doc__, epilog=repo_compress_epilog - ) - subparsers.add_subcommand("repo-compress", subparser, help=self.do_repo_compress.__doc__) - - subparser.add_argument( - "-C", - "--compression", - metavar="COMPRESSION", - dest="compression", - type=CompressionSpec, - default=CompressionSpec("lz4"), - action=Highlander, - help="select compression algorithm, see the output of the " '"borg help compression" command for details.', - ) - - subparser.add_argument("-s", "--stats", dest="stats", action="store_true", help="print statistics") diff --git a/src/borg/testsuite/archiver/repo_compress_cmd_test.py b/src/borg/testsuite/archiver/repo_compress_cmd_test.py deleted file mode 100644 index 67a34ef55..000000000 --- a/src/borg/testsuite/archiver/repo_compress_cmd_test.py +++ /dev/null @@ -1,84 +0,0 @@ -import os - -from ...constants import * # NOQA -from ...repository import Repository, repo_lister -from ...manifest import Manifest -from ...compress import ZSTD, ZLIB, LZ4, CNONE -from ...helpers import bin_to_hex - -from . import create_regular_file, cmd, RK_ENCRYPTION - - -def test_repo_compress(archiver): - def check_compression(ctype, clevel, olevel): - """Check that all chunks in the repo are compressed/obfuscated as expected.""" - repository = Repository(archiver.repository_path, exclusive=True) - with repository: - manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK) - for id, _ in repo_lister(repository, limit=LIST_SCAN_LIMIT): - chunk = repository.get(id, read_data=True) - meta, data = manifest.repo_objs.parse( - id, chunk, ro_type=ROBJ_DONTCARE - ) # will also decompress according to metadata - m_olevel = meta.get("olevel", -1) - m_psize = meta.get("psize", -1) - print(bin_to_hex(id), meta["ctype"], meta["clevel"], meta["csize"], meta["size"], m_olevel, m_psize) - # this is not as easy as one thinks due to the DecidingCompressor choosing the smallest of - # (desired compressed, lz4 compressed, not compressed). - assert meta["ctype"] in (ctype, LZ4.ID, CNONE.ID) - assert meta["clevel"] in (clevel, 255) # LZ4 and CNONE have level 255 - if olevel != -1: # we expect obfuscation - assert "psize" in meta - assert m_olevel == olevel - else: - assert "psize" not in meta - assert "olevel" not in meta - - create_regular_file(archiver.input_path, "file1", size=1024 * 10) - create_regular_file(archiver.input_path, "file2", contents=os.urandom(1024 * 10)) - cmd(archiver, "repo-create", RK_ENCRYPTION) - - cname, ctype, clevel, olevel = ZLIB.name, ZLIB.ID, 3, -1 - cmd(archiver, "create", "test", "input", "-C", f"{cname},{clevel}") - check_compression(ctype, clevel, olevel) - - cname, ctype, clevel, olevel = ZSTD.name, ZSTD.ID, 1, -1 # change compressor (and level) - cmd(archiver, "repo-compress", "-C", f"{cname},{clevel}") - check_compression(ctype, clevel, olevel) - - cname, ctype, clevel, olevel = ZSTD.name, ZSTD.ID, 3, -1 # only change level - cmd(archiver, "repo-compress", "-C", f"{cname},{clevel}") - check_compression(ctype, clevel, olevel) - - cname, ctype, clevel, olevel = ZSTD.name, ZSTD.ID, 3, 110 # only change to obfuscated - cmd(archiver, "repo-compress", "-C", f"obfuscate,{olevel},{cname},{clevel}") - check_compression(ctype, clevel, olevel) - - cname, ctype, clevel, olevel = ZSTD.name, ZSTD.ID, 3, 112 # only change obfuscation level - cmd(archiver, "repo-compress", "-C", f"obfuscate,{olevel},{cname},{clevel}") - check_compression(ctype, clevel, olevel) - - cname, ctype, clevel, olevel = ZSTD.name, ZSTD.ID, 3, -1 # change to not obfuscated - cmd(archiver, "repo-compress", "-C", f"{cname},{clevel}") - check_compression(ctype, clevel, olevel) - - cname, ctype, clevel, olevel = ZLIB.name, ZLIB.ID, 1, -1 - cmd(archiver, "repo-compress", "-C", f"auto,{cname},{clevel}") - check_compression(ctype, clevel, olevel) - - cname, ctype, clevel, olevel = ZLIB.name, ZLIB.ID, 2, 111 - cmd(archiver, "repo-compress", "-C", f"obfuscate,{olevel},auto,{cname},{clevel}") - check_compression(ctype, clevel, olevel) - - -def test_repo_compress_stats(archiver): - create_regular_file(archiver.input_path, "file1", size=1024 * 10) - create_regular_file(archiver.input_path, "file2", contents=os.urandom(1024 * 10)) - cmd(archiver, "repo-create", RK_ENCRYPTION) - - cname, clevel = ZLIB.name, 3 - cmd(archiver, "create", "test", "input", "-C", f"{cname},{clevel}") - - cname, clevel = ZSTD.name, 1 # change compressor (and level) - output = cmd(archiver, "repo-compress", "-C", f"{cname},{clevel}", "--stats") - assert "Recompression stats:" in output diff --git a/src/borg/testsuite/archiver/restricted_permissions_test.py b/src/borg/testsuite/archiver/restricted_permissions_test.py index 1ce8c3c87..1b98f8c0d 100644 --- a/src/borg/testsuite/archiver/restricted_permissions_test.py +++ b/src/borg/testsuite/archiver/restricted_permissions_test.py @@ -86,11 +86,6 @@ def test_repository_permissions_no_delete(archivers, request, monkeypatch): with pytest.raises(PermissionDenied): cmd(archiver, "check", "--repair") - # Try to repo-compress (and change compression from lz4 to zstd), which should fail. - # It fails because it needs to overwrite existing chunks, which is also disallowed by no-delete. - with pytest.raises(PermissionDenied): - cmd(archiver, "repo-compress", "-C", "zstd") - def test_repository_permissions_read_only(archivers, request, monkeypatch): """Test repository with 'read-only' permissions setting."""