mirror of
https://github.com/borgbackup/borg.git
synced 2026-06-11 01:41:57 -04:00
compact: also clean up files cache, fixes #8852
This commit is contained in:
parent
708c93a4b0
commit
318c94d750
3 changed files with 127 additions and 8 deletions
|
|
@ -1,9 +1,12 @@
|
|||
import argparse
|
||||
import os
|
||||
from typing import Tuple, Set
|
||||
|
||||
from ._common import with_repository
|
||||
from ..archive import Archive
|
||||
from ..cache import write_chunkindex_to_repo_cache, build_chunkindex_from_repo
|
||||
from ..cache import files_cache_name, discover_files_cache_names
|
||||
from ..helpers import get_cache_dir
|
||||
from ..constants import * # NOQA
|
||||
from ..hashindex import ChunkIndex, ChunkIndexEntry
|
||||
from ..helpers import set_ec, EXIT_ERROR, format_file_size, bin_to_hex
|
||||
|
|
@ -43,6 +46,7 @@ class ArchiveGarbageCollector:
|
|||
(self.missing_chunks, self.total_files, self.total_size, self.archives_count) = self.analyze_archives()
|
||||
self.report_and_delete()
|
||||
self.save_chunk_index()
|
||||
self.cleanup_files_cache()
|
||||
logger.info("Finished compaction / garbage collection...")
|
||||
|
||||
def get_repository_chunks(self) -> ChunkIndex:
|
||||
|
|
@ -71,6 +75,43 @@ class ArchiveGarbageCollector:
|
|||
)
|
||||
self.chunks = None # nothing there (cleared!)
|
||||
|
||||
def cleanup_files_cache(self):
|
||||
"""
|
||||
Clean up files cache files for archive series names that no longer exist in the repository.
|
||||
|
||||
Note: this only works perfectly if the files cache filename suffixes are automatically generated
|
||||
and the user does not manually control them via more than one BORG_FILES_CACHE_SUFFIX env var value.
|
||||
"""
|
||||
logger.info("Cleaning up files cache...")
|
||||
|
||||
cache_dir = os.path.join(get_cache_dir(), self.repository.id_str)
|
||||
if not os.path.exists(cache_dir):
|
||||
logger.debug("Cache directory does not exist, skipping files cache cleanup")
|
||||
return
|
||||
|
||||
# Get all existing archive series names
|
||||
existing_series = set(self.manifest.archives.names())
|
||||
logger.debug(f"Found {len(existing_series)} existing archive series.")
|
||||
|
||||
# Get the set of all existing files cache file names.
|
||||
try:
|
||||
files_cache_names = set(discover_files_cache_names(cache_dir))
|
||||
logger.debug(f"Found {len(files_cache_names)} files cache files.")
|
||||
except (FileNotFoundError, PermissionError) as e:
|
||||
logger.warning(f"Could not access cache directory: {e}")
|
||||
return
|
||||
|
||||
used_files_cache_names = set(files_cache_name(series_name) for series_name in existing_series)
|
||||
unused_files_cache_names = files_cache_names - used_files_cache_names
|
||||
|
||||
for cache_filename in unused_files_cache_names:
|
||||
cache_path = os.path.join(cache_dir, cache_filename)
|
||||
try:
|
||||
os.unlink(cache_path)
|
||||
except (FileNotFoundError, PermissionError) as e:
|
||||
logger.warning(f"Could not access cache file: {e}")
|
||||
logger.info(f"Removed {len(unused_files_cache_names)} unused files cache files.")
|
||||
|
||||
def analyze_archives(self) -> Tuple[Set, int, int, int]:
|
||||
"""Iterate over all items in all archives, create the dicts id -> size of all used chunks."""
|
||||
|
||||
|
|
|
|||
|
|
@ -37,6 +37,35 @@ from .platform import SaveFile
|
|||
from .remote import RemoteRepository
|
||||
from .repository import LIST_SCAN_LIMIT, Repository, StoreObjectNotFound, repo_lister
|
||||
|
||||
|
||||
def files_cache_name(archive_name, files_cache_name="files"):
|
||||
"""
|
||||
Return the name of the files cache file for the given archive name.
|
||||
|
||||
:param archive_name: name of the archive (ideally a series name)
|
||||
:param files_cache_name: base name of the files cache file
|
||||
:return: name of the files cache file
|
||||
"""
|
||||
suffix = os.environ.get("BORG_FILES_CACHE_SUFFIX", "")
|
||||
# when using archive series, we automatically make up a separate cache file per series.
|
||||
# when not, the user may manually do that by using the env var.
|
||||
if not suffix:
|
||||
# avoid issues with too complex or long archive_name by hashing it:
|
||||
suffix = bin_to_hex(xxh64(archive_name.encode()))
|
||||
return files_cache_name + "." + suffix
|
||||
|
||||
|
||||
def discover_files_cache_names(path, files_cache_name="files"):
|
||||
"""
|
||||
Return a list of all files cache file names in the given directory.
|
||||
|
||||
:param path: path to the directory to search in
|
||||
:param files_cache_name: base name of the files cache files
|
||||
:return: list of files cache file names
|
||||
"""
|
||||
return [fn for fn in os.listdir(path) if fn.startswith(files_cache_name + ".")]
|
||||
|
||||
|
||||
# chunks is a list of ChunkListEntry
|
||||
FileCacheEntry = namedtuple("FileCacheEntry", "age inode size ctime mtime chunks")
|
||||
|
||||
|
|
@ -495,16 +524,10 @@ class FilesCacheMixin:
|
|||
return files
|
||||
|
||||
def files_cache_name(self):
|
||||
suffix = os.environ.get("BORG_FILES_CACHE_SUFFIX", "")
|
||||
# when using archive series, we automatically make up a separate cache file per series.
|
||||
# when not, the user may manually do that by using the env var.
|
||||
if not suffix:
|
||||
# avoid issues with too complex or long archive_name by hashing it:
|
||||
suffix = bin_to_hex(xxh64(self.archive_name.encode()))
|
||||
return self.FILES_CACHE_NAME + "." + suffix
|
||||
return files_cache_name(self.archive_name, self.FILES_CACHE_NAME)
|
||||
|
||||
def discover_files_cache_names(self, path):
|
||||
return [fn for fn in os.listdir(path) if fn.startswith(self.FILES_CACHE_NAME + ".")]
|
||||
return discover_files_cache_names(path, self.FILES_CACHE_NAME)
|
||||
|
||||
def _read_files_cache(self):
|
||||
"""read files cache from cache directory"""
|
||||
|
|
|
|||
|
|
@ -1,6 +1,9 @@
|
|||
import os
|
||||
import pytest
|
||||
|
||||
from ...constants import * # NOQA
|
||||
from ...helpers import get_cache_dir
|
||||
from ...cache import files_cache_name, discover_files_cache_names
|
||||
from . import cmd, create_src_archive, generate_archiver_tests, RK_ENCRYPTION
|
||||
|
||||
pytest_generate_tests = lambda metafunc: generate_archiver_tests(metafunc, kinds="local,remote,binary") # NOQA
|
||||
|
|
@ -79,3 +82,55 @@ def test_compact_index_corruption(archivers, request):
|
|||
|
||||
output = cmd(archiver, "compact", "-v", "--stats", exit_code=0)
|
||||
assert "missing objects" not in output
|
||||
|
||||
|
||||
def test_compact_files_cache_cleanup(archivers, request):
|
||||
"""Test that files cache files for deleted archives are removed during compact."""
|
||||
archiver = request.getfixturevalue(archivers)
|
||||
|
||||
# Create repository and archives
|
||||
cmd(archiver, "repo-create", RK_ENCRYPTION)
|
||||
create_src_archive(archiver, "archive1")
|
||||
create_src_archive(archiver, "archive2")
|
||||
create_src_archive(archiver, "archive3")
|
||||
|
||||
# Get repository ID
|
||||
output = cmd(archiver, "repo-info")
|
||||
for line in output.splitlines():
|
||||
if "Repository ID:" in line:
|
||||
repo_id = line.split(":", 1)[1].strip()
|
||||
break
|
||||
else:
|
||||
pytest.fail("Could not find repository ID in info output")
|
||||
|
||||
# Check cache directory for files cache files
|
||||
cache_dir = os.path.join(get_cache_dir(), repo_id)
|
||||
if not os.path.exists(cache_dir):
|
||||
pytest.skip("Cache directory does not exist, skipping test")
|
||||
|
||||
# Get initial files cache files
|
||||
try:
|
||||
initial_cache_files = set(discover_files_cache_names(cache_dir))
|
||||
except (FileNotFoundError, PermissionError):
|
||||
pytest.skip("Could not access cache directory, skipping test")
|
||||
|
||||
# Get expected cache files for remaining archives
|
||||
expected_cache_files = {files_cache_name(name) for name in ["archive1", "archive2", "archive3"]}
|
||||
assert expected_cache_files == initial_cache_files, "Unexpected cache files found"
|
||||
|
||||
# Delete one archive
|
||||
cmd(archiver, "delete", "-a", "archive2")
|
||||
|
||||
# Run compact
|
||||
output = cmd(archiver, "compact", "-v")
|
||||
assert "Cleaning up files cache" in output
|
||||
|
||||
# Check that files cache for deleted archive is removed
|
||||
try:
|
||||
remaining_cache_files = set(discover_files_cache_names(cache_dir))
|
||||
except (FileNotFoundError, PermissionError):
|
||||
pytest.fail("Could not access cache directory after compact")
|
||||
|
||||
# Get expected cache files for remaining archives
|
||||
expected_cache_files = {files_cache_name(name) for name in ["archive1", "archive3"]}
|
||||
assert expected_cache_files == remaining_cache_files, "Unexpected cache files found"
|
||||
|
|
|
|||
Loading…
Reference in a new issue