diff --git a/src/borg/archive.py b/src/borg/archive.py index b17685429..c27faf67c 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -161,11 +161,11 @@ class DownloadPipeline: for _, data in self.fetch_many(ids): unpacker.feed(data) items = [Item(internal_dict=item) for item in unpacker] - if filter: - items = [item for item in items if filter(item)] for item in items: if 'chunks' in item: item.chunks = [ChunkListEntry(*e) for e in item.chunks] + if filter: + items = [item for item in items if filter(item)] if preload: for item in items: if 'chunks' in item: @@ -422,7 +422,7 @@ Number of files: {0.stats.nfiles}'''.format( return stats def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sparse=False, - hardlink_masters=None, original_path=None, pi=None): + hardlink_masters=None, stripped_components=0, original_path=None, pi=None): """ Extract archive item. @@ -432,9 +432,11 @@ Number of files: {0.stats.nfiles}'''.format( :param stdout: write extracted data to stdout :param sparse: write sparse files (chunk-granularity, independent of the original being sparse) :param hardlink_masters: maps paths to (chunks, link_target) for extracting subtrees with hardlinks correctly + :param stripped_components: stripped leading path components to correct hard link extraction :param original_path: 'path' key as stored in archive :param pi: ProgressIndicatorPercent (or similar) for file extraction progress (in bytes) """ + hardlink_masters = hardlink_masters or {} has_damaged_chunks = 'chunks_healthy' in item if dry_run or stdout: if 'chunks' in item: @@ -473,11 +475,11 @@ Number of files: {0.stats.nfiles}'''.format( os.makedirs(os.path.dirname(path)) # Hard link? if 'source' in item: - source = os.path.join(dest, item.source) + source = os.path.join(dest, *item.source.split(os.sep)[stripped_components:]) with backup_io(): if os.path.exists(path): os.unlink(path) - if not hardlink_masters: + if item.source not in hardlink_masters: os.link(source, path) return item.chunks, link_target = hardlink_masters[item.source] diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 619dbd7e9..f5ebd7308 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -417,15 +417,17 @@ class Archiver: self.print_file_status(status, path) @staticmethod - def build_filter(matcher, is_hardlink_master, strip_components=0): + def build_filter(matcher, peek_and_store_hardlink_masters, strip_components): if strip_components: def item_filter(item): - return (is_hardlink_master(item) or - matcher.match(item.path) and os.sep.join(item.path.split(os.sep)[strip_components:])) + matched = matcher.match(item.path) and os.sep.join(item.path.split(os.sep)[strip_components:]) + peek_and_store_hardlink_masters(item, matched) + return matched else: def item_filter(item): - return (is_hardlink_master(item) or - matcher.match(item.path)) + matched = matcher.match(item.path) + peek_and_store_hardlink_masters(item, matched) + return matched return item_filter @with_repository() @@ -450,25 +452,22 @@ class Archiver: partial_extract = not matcher.empty() or strip_components hardlink_masters = {} if partial_extract else None - def item_is_hardlink_master(item): - return (partial_extract and stat.S_ISREG(item.mode) and - item.get('hardlink_master', True) and 'source' not in item) + def peek_and_store_hardlink_masters(item, matched): + if (partial_extract and not matched and stat.S_ISREG(item.mode) and + item.get('hardlink_master', True) and 'source' not in item): + hardlink_masters[item.get('path')] = (item.get('chunks'), None) - filter = self.build_filter(matcher, item_is_hardlink_master, strip_components) + filter = self.build_filter(matcher, peek_and_store_hardlink_masters, strip_components) if progress: progress_logger = logging.getLogger(ProgressIndicatorPercent.LOGGER) progress_logger.info('Calculating size') - extracted_size = sum(item.file_size() for item in archive.iter_items(filter)) + extracted_size = sum(item.file_size(hardlink_masters) for item in archive.iter_items(filter)) pi = ProgressIndicatorPercent(total=extracted_size, msg='Extracting files %5.1f%%', step=0.1) else: pi = None for item in archive.iter_items(filter, preload=True): orig_path = item.path - if item_is_hardlink_master(item): - hardlink_masters[orig_path] = (item.get('chunks'), None) - if not matcher.match(item.path): - continue if strip_components: item.path = os.sep.join(orig_path.split(os.sep)[strip_components:]) if not args.dry_run: @@ -489,7 +488,7 @@ class Archiver: archive.extract_item(item, restore_attrs=False) else: archive.extract_item(item, stdout=stdout, sparse=sparse, hardlink_masters=hardlink_masters, - original_path=orig_path, pi=pi) + stripped_components=strip_components, original_path=orig_path, pi=pi) except BackupOSError as e: self.print_warning('%s: %s', remove_surrogates(orig_path), e) diff --git a/src/borg/item.py b/src/borg/item.py index 0bc336239..052478705 100644 --- a/src/borg/item.py +++ b/src/borg/item.py @@ -157,10 +157,13 @@ class Item(PropDict): part = PropDict._make_property('part', int) - def file_size(self): - if 'chunks' not in self: + def file_size(self, hardlink_masters=None): + hardlink_masters = hardlink_masters or {} + chunks, _ = hardlink_masters.get(self.get('source'), (None, None)) + chunks = self.get('chunks', chunks) + if chunks is None: return 0 - return sum(chunk.size for chunk in self.chunks) + return sum(chunk.size for chunk in chunks) class EncryptedKey(PropDict): diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index 9d68e3eea..fd7eb5fc8 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -2204,25 +2204,25 @@ def test_compare_chunk_contents(): class TestBuildFilter: @staticmethod - def item_is_hardlink_master(item): - return False + def peek_and_store_hardlink_masters(item, matched): + pass def test_basic(self): matcher = PatternMatcher() matcher.add([parse_pattern('included')], True) - filter = Archiver.build_filter(matcher, self.item_is_hardlink_master) + filter = Archiver.build_filter(matcher, self.peek_and_store_hardlink_masters, 0) assert filter(Item(path='included')) assert filter(Item(path='included/file')) assert not filter(Item(path='something else')) def test_empty(self): matcher = PatternMatcher(fallback=True) - filter = Archiver.build_filter(matcher, self.item_is_hardlink_master) + filter = Archiver.build_filter(matcher, self.peek_and_store_hardlink_masters, 0) assert filter(Item(path='anything')) def test_strip_components(self): matcher = PatternMatcher(fallback=True) - filter = Archiver.build_filter(matcher, self.item_is_hardlink_master, strip_components=1) + filter = Archiver.build_filter(matcher, self.peek_and_store_hardlink_masters, strip_components=1) assert not filter(Item(path='shallow')) assert not filter(Item(path='shallow/')) # can this even happen? paths are normalized... assert filter(Item(path='deep enough/file'))