From 7211bb2211606e97eb57751a4f61a968862de0e8 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 29 Oct 2017 10:53:12 +0100 Subject: [PATCH 1/3] get rid of chunks_healthy when rechunking, fixes #3218 --- src/borg/archive.py | 9 +++++++-- src/borg/archiver.py | 13 ++++++++++++- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index a8fbba916..26940739d 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -967,13 +967,14 @@ class ChunksProcessor: def __init__(self, *, key, cache, add_item, write_checkpoint, - checkpoint_interval): + checkpoint_interval, rechunkify): self.key = key self.cache = cache self.add_item = add_item self.write_checkpoint = write_checkpoint self.checkpoint_interval = checkpoint_interval self.last_checkpoint = time.monotonic() + self.rechunkify = rechunkify def write_part_file(self, item, from_chunk, number): item = Item(internal_dict=item.as_dict()) @@ -998,6 +999,10 @@ class ChunksProcessor: return chunk_entry item.chunks = [] + # if we rechunkify, we'll get a fundamentally different chunks list, thus we need + # to get rid of .chunks_healthy, as it might not correspond to .chunks any more. + if self.rechunkify and 'chunks_healthy' in item: + del item.chunks_healthy from_chunk = 0 part_number = 1 for data in chunk_iter: @@ -1891,7 +1896,7 @@ class ArchiveRecreater: target.process_file_chunks = ChunksProcessor( cache=self.cache, key=self.key, add_item=target.add_item, write_checkpoint=target.write_checkpoint, - checkpoint_interval=self.checkpoint_interval).process_file_chunks + checkpoint_interval=self.checkpoint_interval, rechunkify=target.recreate_rechunkify).process_file_chunks target.chunker = Chunker(self.key.chunk_seed, *target.chunker_params) return target diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 553d6be0b..58dd817d7 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -519,7 +519,7 @@ class Archiver: nobsdflags=args.nobsdflags, numeric_owner=args.numeric_owner) cp = ChunksProcessor(cache=cache, key=key, add_item=archive.add_item, write_checkpoint=archive.write_checkpoint, - checkpoint_interval=args.checkpoint_interval) + checkpoint_interval=args.checkpoint_interval, rechunkify=False) fso = FilesystemObjectProcessors(metadata_collector=metadata_collector, cache=cache, key=key, process_file_chunks=cp.process_file_chunks, add_item=archive.add_item, chunker_params=args.chunker_params) @@ -3371,6 +3371,17 @@ class Archiver: deduplicated size of the archives using the previous chunker params. When recompressing expect approx. (throughput / checkpoint-interval) in space usage, assuming all chunks are recompressed. + + If you recently ran borg check --repair and it had to fix lost chunks with all-zero + replacement chunks, please first run another backup for the same data and re-run + borg check --repair afterwards to heal any archives that had lost chunks which are + still generated from the input data. + + Important: running borg recreate to re-chunk will remove the chunks_healthy + metadata of all items with replacement chunks, so healing will not be possible + any more after re-chunking (it is also unlikely it would ever work: due to the + change of chunking parameters, the missing chunk likely will never be seen again + even if you still have the data that produced it). """) subparser = subparsers.add_parser('recreate', parents=[common_parser], add_help=False, description=self.do_recreate.__doc__, From 90186ad12b7f67ebca56250cafbe7d8b3f6ac628 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 29 Oct 2017 11:25:11 +0100 Subject: [PATCH 2/3] get rid of already existing invalid chunks_healthy metadata, see #3218 --- src/borg/archive.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index 26940739d..490157808 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -1507,7 +1507,12 @@ class ArchiveChecker: has_chunks_healthy = 'chunks_healthy' in item chunks_current = item.chunks chunks_healthy = item.chunks_healthy if has_chunks_healthy else chunks_current - assert len(chunks_current) == len(chunks_healthy) + if has_chunks_healthy and len(chunks_current) != len(chunks_healthy): + # should never happen, but there was issue #3218. + logger.warning('{}: Invalid chunks_healthy metadata removed!'.format(item.path)) + del item.chunks_healthy + has_chunks_healthy = False + chunks_healthy = chunks_current for chunk_current, chunk_healthy in zip(chunks_current, chunks_healthy): chunk_id, size, csize = chunk_healthy if chunk_id not in self.chunks: From 7aafcc517a30627d5b76180cc0cc604c7b04cb34 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 29 Oct 2017 14:49:42 +0100 Subject: [PATCH 3/3] recreate: move chunks_healthy when excluding hardlink master, fixes #3228 --- src/borg/archive.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index 490157808..65e89d61b 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -1768,15 +1768,17 @@ class ArchiveRecreater: if not matcher.match(item.path): self.print_file_status('x', item.path) if item_is_hardlink_master(item): - hardlink_masters[item.path] = (item.get('chunks'), None) + hardlink_masters[item.path] = (item.get('chunks'), item.get('chunks_healthy'), None) continue if target_is_subset and hardlinkable(item.mode) and item.get('source') in hardlink_masters: # master of this hard link is outside the target subset - chunks, new_source = hardlink_masters[item.source] + chunks, chunks_healthy, new_source = hardlink_masters[item.source] if new_source is None: # First item to use this master, move the chunks item.chunks = chunks - hardlink_masters[item.source] = (None, item.path) + if chunks_healthy is not None: + item.chunks_healthy = chunks_healthy + hardlink_masters[item.source] = (None, None, item.path) del item.source else: # Master was already moved, only update this item's source