From d2536de4eea6d5ed3e02dc899084dc8969c00208 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 14 Jun 2020 21:43:28 +0200 Subject: [PATCH] fix hardlinked CACHEDIR.TAG processing, fixes #4911 --- src/borg/archive.py | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index ae0ca55a0..38649814e 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -2032,24 +2032,33 @@ class ArchiveRecreater: matcher = self.matcher tag_files = [] tagged_dirs = [] - # build hardlink masters, but only for paths ending in CACHE_TAG_NAME, so we can read hard-linked TAGs + + # to support reading hard-linked CACHEDIR.TAGs (aka CACHE_TAG_NAME), similar to hardlink_masters: cachedir_masters = {} + if self.exclude_caches: + # sadly, due to how CACHEDIR.TAG works (filename AND file [header] contents) and + # how borg deals with hardlinks (slave hardlinks referring back to master hardlinks), + # we need to pass over the archive collecting hardlink master paths. + # as seen in issue #4911, the master paths can have an arbitrary filenames, + # not just CACHEDIR.TAG. + for item in archive.iter_items(filter=lambda item: os.path.basename(item.path) == CACHE_TAG_NAME): + if stat.S_ISREG(item.mode) and 'chunks' not in item and 'source' in item: + # this is a hardlink slave, referring back to its hardlink master (via item.source) + cachedir_masters[item.source] = None # we know the key (path), but not the value (item) yet + for item in archive.iter_items( - filter=lambda item: item.path.endswith(CACHE_TAG_NAME) or matcher.match(item.path)): - if item.path.endswith(CACHE_TAG_NAME): + filter=lambda item: os.path.basename(item.path) == CACHE_TAG_NAME or matcher.match(item.path)): + if self.exclude_caches and item.path in cachedir_masters: cachedir_masters[item.path] = item dir, tag_file = os.path.split(item.path) if tag_file in self.exclude_if_present: exclude(dir, item) - if stat.S_ISREG(item.mode): - if self.exclude_caches and tag_file == CACHE_TAG_NAME: - if 'chunks' in item: - file = open_item(archive, item) - else: - file = open_item(archive, cachedir_masters[item.source]) - if file.read(len(CACHE_TAG_CONTENTS)).startswith(CACHE_TAG_CONTENTS): - exclude(dir, item) + elif self.exclude_caches and tag_file == CACHE_TAG_NAME and stat.S_ISREG(item.mode): + content_item = item if 'chunks' in item else cachedir_masters[item.source] + file = open_item(archive, content_item) + if file.read(len(CACHE_TAG_CONTENTS)) == CACHE_TAG_CONTENTS: + exclude(dir, item) matcher.add(tag_files, IECommand.Include) matcher.add(tagged_dirs, IECommand.ExcludeNoRecurse)