diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 1ff8da076..82ad00e13 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -1763,7 +1763,8 @@ class Archiver: log_json=args.log_json, iec=args.iec, file_status_printer=self.print_file_status) - tar = tarfile.open(fileobj=tarstream, mode='r|') + tar = tarfile.open(fileobj=tarstream, mode='r|', + ignore_zeros=args.ignore_zeros) while True: tarinfo = tar.next() @@ -4932,6 +4933,10 @@ class Archiver: import-tar reads POSIX.1-1988 (ustar), POSIX.1-2001 (pax), GNU tar, UNIX V7 tar and SunOS tar with extended attributes. + + To import multiple tarballs into a single archive, they can be simply + concatenated (e.g. using "cat") into a single file, and imported with an + ``--ignore-zeros`` option to skip through the stop markers between them. """) subparser = subparsers.add_parser('import-tar', parents=[common_parser], add_help=False, description=self.do_import_tar.__doc__, @@ -4951,6 +4956,9 @@ class Archiver: help='only display items with the given status characters') subparser.add_argument('--json', action='store_true', help='output stats as JSON (implies --stats)') + subparser.add_argument('--ignore-zeros', dest='ignore_zeros', + action='store_true', default=False, + help='ignore zero-filled blocks in the input tarball') archive_group = subparser.add_argument_group('Archive options') archive_group.add_argument('--comment', dest='comment', metavar='COMMENT', default='', diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index 118e4b3a8..7eb5669b6 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -322,8 +322,12 @@ class ArchiverTestCaseBase(BaseTestCase): contents = b'X' * size fd.write(contents) - def create_test_files(self): + def create_test_files(self, create_hardlinks=True): """Create a minimal test case including all supported file types + + Args: + create_hardlinks: whether to create a sample hardlink. When set to + False, the hardlink file will not be created at all. """ # File self.create_regular_file('file1', size=1024 * 80) @@ -333,7 +337,7 @@ class ArchiverTestCaseBase(BaseTestCase): # File mode os.chmod('input/file1', 0o4755) # Hard link - if are_hardlinks_supported(): + if create_hardlinks and are_hardlinks_supported(): os.link(os.path.join(self.input_path, 'file1'), os.path.join(self.input_path, 'hardlink')) # Symlink @@ -3588,6 +3592,61 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02 self.cmd('extract', self.repository_location + '::dst') self.assert_dirs_equal('input', 'output/input', ignore_ns=True, ignore_xattrs=True) + @requires_gnutar + def test_import_concatenated_tar_with_ignore_zeros(self): + # file1 has a hardlink reference to it, but we put it in a separate + # tarball, breaking the link during import-tar. It could be any other + # file though, so we won't take chances and just avoid hardlinks. + self.create_test_files(create_hardlinks=False) + os.unlink('input/flagfile') + + with changedir('input'): + subprocess.check_call(['tar', 'cf', 'file1.tar', 'file1']) + subprocess.check_call(['tar', 'cf', 'the_rest.tar', '--exclude', 'file1*', '.']) + with open('concatenated.tar', 'wb') as concatenated: + with open('file1.tar', 'rb') as file1: + concatenated.write(file1.read()) + # Clean up for assert_dirs_equal. + os.unlink('file1.tar') + + with open('the_rest.tar', 'rb') as the_rest: + concatenated.write(the_rest.read()) + # Clean up for assert_dirs_equal. + os.unlink('the_rest.tar') + + self.cmd('init', '--encryption=none', self.repository_location) + self.cmd('import-tar', '--ignore-zeros', self.repository_location + '::dst', 'input/concatenated.tar') + os.unlink('input/concatenated.tar') + + with changedir(self.output_path): + self.cmd('extract', self.repository_location + '::dst') + self.assert_dirs_equal('input', 'output', ignore_ns=True, ignore_xattrs=True) + + @requires_gnutar + def test_import_concatenated_tar_without_ignore_zeros(self): + self.create_test_files() + os.unlink('input/flagfile') + + with changedir('input'): + subprocess.check_call(['tar', 'cf', 'file1.tar', 'file1']) + subprocess.check_call(['tar', 'cf', 'the_rest.tar', '--exclude', 'file1*', '.']) + with open('concatenated.tar', 'wb') as concatenated: + with open('file1.tar', 'rb') as file1: + concatenated.write(file1.read()) + + with open('the_rest.tar', 'rb') as the_rest: + concatenated.write(the_rest.read()) + + self.cmd('init', '--encryption=none', self.repository_location) + self.cmd('import-tar', self.repository_location + '::dst', 'input/concatenated.tar') + + with changedir(self.output_path): + self.cmd('extract', self.repository_location + '::dst') + + # Negative test -- assert that only file1 has been extracted, and the_rest has been ignored + # due to zero-filled block marker. + self.assert_equal(os.listdir('output'), ['file1']) + def test_detect_attic_repo(self): path = make_attic_repo(self.repository_path) cmds = [