diff --git a/src/borg/archiver/tar_cmds.py b/src/borg/archiver/tar_cmds.py index 8c46645a5..2bbe81854 100644 --- a/src/borg/archiver/tar_cmds.py +++ b/src/borg/archiver/tar_cmds.py @@ -289,7 +289,7 @@ class TarMixIn: file_status_printer=self.print_file_status, ) - tar = tarfile.open(fileobj=tarstream, mode="r|") + tar = tarfile.open(fileobj=tarstream, mode="r|", ignore_zeros=args.ignore_zeros) while True: tarinfo = tar.next() @@ -445,6 +445,9 @@ class TarMixIn: - UNIX V7 tar - SunOS tar with extended attributes + To import multiple tarballs into a single archive, they can be simply + concatenated (e.g. using "cat") into a single file, and imported with an + ``--ignore-zeros`` option to skip through the stop markers between them. """ ) subparser = subparsers.add_parser( @@ -487,6 +490,12 @@ class TarMixIn: help="only display items with the given status characters", ) subparser.add_argument("--json", action="store_true", help="output stats as JSON (implies --stats)") + subparser.add_argument( + "--ignore-zeros", + dest="ignore_zeros", + action="store_true", + help="ignore zero-filled blocks in the input tarball", + ) archive_group = subparser.add_argument_group("Archive options") archive_group.add_argument( diff --git a/src/borg/testsuite/archiver/tar_cmds.py b/src/borg/testsuite/archiver/tar_cmds.py index b1228f5bd..d9be9ce46 100644 --- a/src/borg/testsuite/archiver/tar_cmds.py +++ b/src/borg/testsuite/archiver/tar_cmds.py @@ -143,6 +143,57 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd(f"--repo={self.repository_location}", "extract", "dst") self.assert_dirs_equal("input", "output/input", ignore_ns=True, ignore_xattrs=True) + @requires_gnutar + def test_import_concatenated_tar_with_ignore_zeros(self): + self.create_test_files(create_hardlinks=False) # hardlinks become separate files + os.unlink("input/flagfile") + with changedir("input"): + subprocess.check_call(["tar", "cf", "file1.tar", "file1"]) + subprocess.check_call(["tar", "cf", "the_rest.tar", "--exclude", "file1*", "."]) + with open("concatenated.tar", "wb") as concatenated: + with open("file1.tar", "rb") as file1: + concatenated.write(file1.read()) + # Clean up for assert_dirs_equal. + os.unlink("file1.tar") + + with open("the_rest.tar", "rb") as the_rest: + concatenated.write(the_rest.read()) + # Clean up for assert_dirs_equal. + os.unlink("the_rest.tar") + + self.cmd(f"--repo={self.repository_location}", "rcreate", "--encryption=none") + self.cmd(f"--repo={self.repository_location}", "import-tar", "--ignore-zeros", "dst", "input/concatenated.tar") + # Clean up for assert_dirs_equal. + os.unlink("input/concatenated.tar") + + with changedir(self.output_path): + self.cmd(f"--repo={self.repository_location}", "extract", "dst") + self.assert_dirs_equal("input", "output", ignore_ns=True, ignore_xattrs=True) + + @requires_gnutar + def test_import_concatenated_tar_without_ignore_zeros(self): + self.create_test_files(create_hardlinks=False) # hardlinks become separate files + os.unlink("input/flagfile") + with changedir("input"): + subprocess.check_call(["tar", "cf", "file1.tar", "file1"]) + subprocess.check_call(["tar", "cf", "the_rest.tar", "--exclude", "file1*", "."]) + with open("concatenated.tar", "wb") as concatenated: + with open("file1.tar", "rb") as file1: + concatenated.write(file1.read()) + with open("the_rest.tar", "rb") as the_rest: + concatenated.write(the_rest.read()) + os.unlink("the_rest.tar") + + self.cmd(f"--repo={self.repository_location}", "rcreate", "--encryption=none") + self.cmd(f"--repo={self.repository_location}", "import-tar", "dst", "input/concatenated.tar") + + with changedir(self.output_path): + self.cmd(f"--repo={self.repository_location}", "extract", "dst") + + # Negative test -- assert that only file1 has been extracted, and the_rest has been ignored + # due to zero-filled block marker. + self.assert_equal(os.listdir("output"), ["file1"]) + def test_roundtrip_pax_borg(self): self.create_test_files() self.cmd(f"--repo={self.repository_location}", "rcreate", "--encryption=none")