From e5b880f222cadc09d29a7150c93e364f82c59917 Mon Sep 17 00:00:00 2001 From: Manuel Riel Date: Thu, 31 Jan 2019 12:58:33 +0800 Subject: [PATCH 1/3] FAQ regarding change of compression settings. Fixes #4222 --- docs/faq.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/faq.rst b/docs/faq.rst index 55b6fd402..9638dc3fb 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -289,6 +289,15 @@ the :ref:`borg_recreate` command to rewrite all archives with a different ``--exclude`` pattern. See the examples in the :ref:`borg_recreate` manpage for more information. +Can I safely change the compression level or algorithm? +-------------------------------------------------------- + +The compression level and algorithm don't affect deduplication. Chunk ID hashes +are calculated *before* compression. New compression settings +will only be applied to new chunks, not existing chunks. So it's safe +to change them. + + Security ######## From 2378ef48909d4e2dff5993e7265da824c15b8bee Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 1 Feb 2019 23:30:45 +0100 Subject: [PATCH 2/3] add archive name to check --repair output, fixes #3447 so it does not look like duplicated and also informs the user about affected archives. --- src/borg/archive.py | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index ae611179d..51e4e902b 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -1391,7 +1391,7 @@ class ArchiveChecker: if self.repair: self.repository.put(id_, cdata) - def verify_file_chunks(item): + def verify_file_chunks(archive_name, item): """Verifies that all file chunks are present. Missing file chunks will be replaced with new chunks of the same length containing all zeros. @@ -1412,7 +1412,7 @@ class ArchiveChecker: chunks_healthy = item.chunks_healthy if has_chunks_healthy else chunks_current if has_chunks_healthy and len(chunks_current) != len(chunks_healthy): # should never happen, but there was issue #3218. - logger.warning('{}: Invalid chunks_healthy metadata removed!'.format(item.path)) + logger.warning('{}: {}: Invalid chunks_healthy metadata removed!'.format(archive_name, item.path)) del item.chunks_healthy has_chunks_healthy = False chunks_healthy = chunks_current @@ -1421,20 +1421,23 @@ class ArchiveChecker: if chunk_id not in self.chunks: # a chunk of the healthy list is missing if chunk_current == chunk_healthy: - logger.error('{}: New missing file chunk detected (Byte {}-{}). ' - 'Replacing with all-zero chunk.'.format(item.path, offset, offset + size)) + logger.error('{}: {}: New missing file chunk detected (Byte {}-{}). ' + 'Replacing with all-zero chunk.'.format( + archive_name, item.path, offset, offset + size)) self.error_found = chunks_replaced = True chunk_id, size, csize, cdata = replacement_chunk(size) add_reference(chunk_id, size, csize, cdata) else: - logger.info('{}: Previously missing file chunk is still missing (Byte {}-{}). It has a ' - 'all-zero replacement chunk already.'.format(item.path, offset, offset + size)) + logger.info('{}: {}: Previously missing file chunk is still missing (Byte {}-{}). It has a ' + 'all-zero replacement chunk already.'.format( + archive_name, item.path, offset, offset + size)) chunk_id, size, csize = chunk_current if chunk_id in self.chunks: add_reference(chunk_id, size, csize) else: - logger.warning('{}: Missing all-zero replacement chunk detected (Byte {}-{}). ' - 'Generating new replacement chunk.'.format(item.path, offset, offset + size)) + logger.warning('{}: {}: Missing all-zero replacement chunk detected (Byte {}-{}). ' + 'Generating new replacement chunk.'.format( + archive_name, item.path, offset, offset + size)) self.error_found = chunks_replaced = True chunk_id, size, csize, cdata = replacement_chunk(size) add_reference(chunk_id, size, csize, cdata) @@ -1443,8 +1446,8 @@ class ArchiveChecker: # normal case, all fine. add_reference(chunk_id, size, csize) else: - logger.info('{}: Healed previously missing file chunk! ' - '(Byte {}-{}).'.format(item.path, offset, offset + size)) + logger.info('{}: {}: Healed previously missing file chunk! ' + '(Byte {}-{}).'.format(archive_name, item.path, offset, offset + size)) add_reference(chunk_id, size, csize) mark_as_possibly_superseded(chunk_current[0]) # maybe orphaned the all-zero replacement chunk chunk_list.append([chunk_id, size, csize]) # list-typed element as chunks_healthy is list-of-lists @@ -1453,7 +1456,7 @@ class ArchiveChecker: # if this is first repair, remember the correct chunk IDs, so we can maybe heal the file later item.chunks_healthy = item.chunks if has_chunks_healthy and chunk_list == chunks_healthy: - logger.info('{}: Completely healed previously damaged file!'.format(item.path)) + logger.info('{}: {}: Completely healed previously damaged file!'.format(archive_name, item.path)) del item.chunks_healthy item.chunks = chunk_list if 'size' in item: @@ -1461,8 +1464,8 @@ class ArchiveChecker: item_chunks_size = item.get_size(compressed=False, from_chunks=True) if item_size != item_chunks_size: # just warn, but keep the inconsistency, so that borg extract can warn about it. - logger.warning('{}: size inconsistency detected: size {}, chunks size {}'.format( - item.path, item_size, item_chunks_size)) + logger.warning('{}: {}: size inconsistency detected: size {}, chunks size {}'.format( + archive_name, item.path, item_size, item_chunks_size)) def robust_iterator(archive): """Iterates through all archive items @@ -1573,7 +1576,7 @@ class ArchiveChecker: items_buffer.write_chunk = add_callback for item in robust_iterator(archive): if 'chunks' in item: - verify_file_chunks(item) + verify_file_chunks(info.name, item) items_buffer.add(item) items_buffer.flush(flush=True) for previous_item_id in archive.items: From 429645d1741e01261da8f6e6d510a589ad6cb5c9 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 4 Feb 2019 17:12:11 +0100 Subject: [PATCH 3/3] borg init --make-parent-dirs parent1/parent2/repo_dir, fixes #4235 --- src/borg/archiver.py | 8 ++++++-- src/borg/remote.py | 20 ++++++++++++++------ src/borg/repository.py | 15 +++++++++++++-- src/borg/testsuite/archiver.py | 15 +++++++++++++++ 4 files changed, 48 insertions(+), 10 deletions(-) diff --git a/src/borg/archiver.py b/src/borg/archiver.py index ccd10c609..4e350ed7c 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -125,15 +125,17 @@ def with_repository(fake=False, invert_fake=False, create=False, lock=True, location = args.location # note: 'location' must be always present in args append_only = getattr(args, 'append_only', False) storage_quota = getattr(args, 'storage_quota', None) + make_parent_dirs = getattr(args, 'make_parent_dirs', False) if argument(args, fake) ^ invert_fake: return method(self, args, repository=None, **kwargs) elif location.proto == 'ssh': repository = RemoteRepository(location, create=create, exclusive=argument(args, exclusive), - lock_wait=self.lock_wait, lock=lock, append_only=append_only, args=args) + lock_wait=self.lock_wait, lock=lock, append_only=append_only, + make_parent_dirs=make_parent_dirs, args=args) else: repository = Repository(location.path, create=create, exclusive=argument(args, exclusive), lock_wait=self.lock_wait, lock=lock, append_only=append_only, - storage_quota=storage_quota) + storage_quota=storage_quota, make_parent_dirs=make_parent_dirs) with repository: if manifest or cache: kwargs['manifest'], kwargs['key'] = Manifest.load(repository, compatibility) @@ -2957,6 +2959,8 @@ class Archiver: subparser.add_argument('--storage-quota', metavar='QUOTA', dest='storage_quota', default=None, type=parse_storage_quota, help='Set storage quota of the new repository (e.g. 5G, 1.5T). Default: no quota.') + subparser.add_argument('--make-parent-dirs', dest='make_parent_dirs', action='store_true', + help='create the parent directories of the repository directory, if they are missing.') check_epilog = process_epilog(""" The check command verifies the consistency of a repository and the corresponding archives. diff --git a/src/borg/remote.py b/src/borg/remote.py index 738e3f506..4a3bba7a1 100644 --- a/src/borg/remote.py +++ b/src/borg/remote.py @@ -341,7 +341,8 @@ class RepositoryServer: # pragma: no cover path = path[3:] return os.path.realpath(path) - def open(self, path, create=False, lock_wait=None, lock=True, exclusive=None, append_only=False): + def open(self, path, create=False, lock_wait=None, lock=True, exclusive=None, append_only=False, + make_parent_dirs=False): logging.debug('Resolving repository path %r', path) path = self._resolve_path(path) logging.debug('Resolved repository path to %r', path) @@ -370,7 +371,8 @@ class RepositoryServer: # pragma: no cover self.repository = Repository(path, create, lock_wait=lock_wait, lock=lock, append_only=append_only, storage_quota=self.storage_quota, - exclusive=exclusive) + exclusive=exclusive, + make_parent_dirs=make_parent_dirs) self.repository.__enter__() # clean exit handled by serve() method return self.repository.id @@ -529,7 +531,8 @@ class RemoteRepository: # If compatibility with 1.0.x is not longer needed, replace all checks of this with True and simplify the code dictFormat = False # outside of __init__ for testing of legacy free protocol - def __init__(self, location, create=False, exclusive=False, lock_wait=None, lock=True, append_only=False, args=None): + def __init__(self, location, create=False, exclusive=False, lock_wait=None, lock=True, append_only=False, + make_parent_dirs=False, args=None): self.location = self._location = location self.preload_ids = [] self.msgid = 0 @@ -582,7 +585,8 @@ class RemoteRepository: def do_open(): self.id = self.open(path=self.location.path, create=create, lock_wait=lock_wait, - lock=lock, exclusive=exclusive, append_only=append_only) + lock=lock, exclusive=exclusive, append_only=append_only, + make_parent_dirs=make_parent_dirs) if self.dictFormat: do_open() @@ -745,6 +749,8 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+. raise PathNotAllowed('(unknown)') else: raise PathNotAllowed(args[0].decode()) + elif error == 'ParentPathDoesNotExist': + raise Repository.ParentPathDoesNotExist(args[0].decode()) elif error == 'ObjectNotFound': if old_server: raise Repository.ObjectNotFound('(not available)', self.location.orig) @@ -890,8 +896,10 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+. self.ignore_responses |= set(waiting_for) # we lose order here @api(since=parse_version('1.0.0'), - append_only={'since': parse_version('1.0.7'), 'previously': False}) - def open(self, path, create=False, lock_wait=None, lock=True, exclusive=False, append_only=False): + append_only={'since': parse_version('1.0.7'), 'previously': False}, + make_parent_dirs={'since': parse_version('1.1.9'), 'previously': False}) + def open(self, path, create=False, lock_wait=None, lock=True, exclusive=False, append_only=False, + make_parent_dirs=False): """actual remoting is done via self.call in the @api decorator""" @api(since=parse_version('1.0.0')) diff --git a/src/borg/repository.py b/src/borg/repository.py index c391d8ef5..afa6a3f91 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -119,6 +119,9 @@ class Repository: class PathAlreadyExists(Error): """There is already something at {}.""" + class ParentPathDoesNotExist(Error): + """The parent path of the repo directory [{}] does not exist.""" + class InvalidRepository(Error): """{} is not a valid repository. Check repo config.""" @@ -146,7 +149,8 @@ class Repository: """The storage quota ({}) has been exceeded ({}). Try deleting some archives.""" def __init__(self, path, create=False, exclusive=False, lock_wait=None, lock=True, - append_only=False, storage_quota=None, check_segment_magic=True): + append_only=False, storage_quota=None, check_segment_magic=True, + make_parent_dirs=False): self.path = os.path.abspath(path) self._location = Location('file://%s' % self.path) self.io = None # type: LoggedIO @@ -167,6 +171,7 @@ class Repository: self.storage_quota_use = 0 self.transaction_doomed = None self.check_segment_magic = check_segment_magic + self.make_parent_dirs = make_parent_dirs def __del__(self): if self.lock: @@ -249,8 +254,14 @@ class Repository: """Create a new empty repository at `path` """ self.check_can_create_repository(path) + if self.make_parent_dirs: + parent_path = os.path.join(path, os.pardir) + os.makedirs(parent_path, exist_ok=True) if not os.path.exists(path): - os.mkdir(path) + try: + os.mkdir(path) + except FileNotFoundError as err: + raise self.ParentPathDoesNotExist(path) from err with open(os.path.join(path, 'README'), 'w') as fd: fd.write(REPOSITORY_README) os.mkdir(os.path.join(path, 'data')) diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index f3993f042..85bab0f1c 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -438,6 +438,17 @@ class ArchiverTestCase(ArchiverTestCaseBase): # the interesting parts of info_output2 and info_output should be same self.assert_equal(filter(info_output), filter(info_output2)) + def test_init_parent_dirs(self): + parent_path = os.path.join(self.tmpdir, 'parent1', 'parent2') + repository_path = os.path.join(parent_path, 'repository') + repository_location = self.prefix + repository_path + with pytest.raises(Repository.ParentPathDoesNotExist): + # normal borg init does NOT create missing parent dirs + self.cmd('init', '--encryption=none', repository_location) + # but if told so, it does: + self.cmd('init', '--encryption=none', '--make-parent-dirs', repository_location) + assert os.path.exists(parent_path) + def test_unix_socket(self): self.cmd('init', '--encryption=repokey', self.repository_location) try: @@ -2904,6 +2915,10 @@ class ArchiverTestCaseBinary(ArchiverTestCase): EXE = 'borg.exe' FORK_DEFAULT = True + @unittest.skip('does not raise Exception, but sets rc==2') + def test_init_parent_dirs(self): + pass + @unittest.skip('patches objects') def test_init_interrupt(self): pass