From 3c941ae6041fda9700e0de0a30cb9a4a003e83d0 Mon Sep 17 00:00:00 2001 From: Artem Sheremet Date: Tue, 21 Mar 2023 17:28:16 +0100 Subject: [PATCH 1/4] Add --ignore-zeros flag to import-tar Fixes #7432. --- src/borg/archiver/tar_cmds.py | 8 +++++++- src/borg/testsuite/archiver/tar_cmds.py | 27 +++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/borg/archiver/tar_cmds.py b/src/borg/archiver/tar_cmds.py index 8c46645a5..e91d04c12 100644 --- a/src/borg/archiver/tar_cmds.py +++ b/src/borg/archiver/tar_cmds.py @@ -289,7 +289,7 @@ class TarMixIn: file_status_printer=self.print_file_status, ) - tar = tarfile.open(fileobj=tarstream, mode="r|") + tar = tarfile.open(fileobj=tarstream, mode="r|", ignore_zeros=args.ignore_zeros) while True: tarinfo = tar.next() @@ -487,6 +487,12 @@ class TarMixIn: help="only display items with the given status characters", ) subparser.add_argument("--json", action="store_true", help="output stats as JSON (implies --stats)") + subparser.add_argument( + "--ignore-zeros", + dest="ignore_zeros", + action="store_true", + help="ignore zero-filled blocks in the input tarball", + ) archive_group = subparser.add_argument_group("Archive options") archive_group.add_argument( diff --git a/src/borg/testsuite/archiver/tar_cmds.py b/src/borg/testsuite/archiver/tar_cmds.py index b1228f5bd..e2990bee4 100644 --- a/src/borg/testsuite/archiver/tar_cmds.py +++ b/src/borg/testsuite/archiver/tar_cmds.py @@ -143,6 +143,33 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd(f"--repo={self.repository_location}", "extract", "dst") self.assert_dirs_equal("input", "output/input", ignore_ns=True, ignore_xattrs=True) + @requires_gnutar + def test_import_tar_with_ignore_zeros(self): + self.create_test_files(create_hardlinks=False) # hardlinks become separate files + os.unlink("input/flagfile") + with changedir("input"): + subprocess.check_call(["tar", "cf", "file1.tar", "file1"]) + subprocess.check_call(["tar", "cf", "the_rest.tar", "--exclude", "file1*", "."]) + with open("concatenated.tar", "wb") as concatenated: + with open("file1.tar", "rb") as file1: + concatenated.write(file1.read()) + # Clean up for assert_dirs_equal. + os.unlink("file1.tar") + + with open("the_rest.tar", "rb") as the_rest: + concatenated.write(the_rest.read()) + # Clean up for assert_dirs_equal. + os.unlink("the_rest.tar") + + self.cmd(f"--repo={self.repository_location}", "rcreate", "--encryption=none") + self.cmd(f"--repo={self.repository_location}", "import-tar", "--ignore-zeros", "dst", "input/concatenated.tar") + # Clean up for assert_dirs_equal. + os.unlink("input/concatenated.tar") + + with changedir(self.output_path): + self.cmd(f"--repo={self.repository_location}", "extract", "dst") + self.assert_dirs_equal("input", "output", ignore_ns=True, ignore_xattrs=True) + def test_roundtrip_pax_borg(self): self.create_test_files() self.cmd(f"--repo={self.repository_location}", "rcreate", "--encryption=none") From 8071ef62800806abc073a5000980b4fab38d1a86 Mon Sep 17 00:00:00 2001 From: Artem Sheremet Date: Tue, 21 Mar 2023 23:08:40 +0100 Subject: [PATCH 2/4] Add a negative test for import-tar --ignore-zeros --- src/borg/testsuite/archiver/tar_cmds.py | 32 ++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/borg/testsuite/archiver/tar_cmds.py b/src/borg/testsuite/archiver/tar_cmds.py index e2990bee4..9b91dae2d 100644 --- a/src/borg/testsuite/archiver/tar_cmds.py +++ b/src/borg/testsuite/archiver/tar_cmds.py @@ -144,7 +144,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.assert_dirs_equal("input", "output/input", ignore_ns=True, ignore_xattrs=True) @requires_gnutar - def test_import_tar_with_ignore_zeros(self): + def test_import_concatenated_tar_with_ignore_zeros(self): self.create_test_files(create_hardlinks=False) # hardlinks become separate files os.unlink("input/flagfile") with changedir("input"): @@ -170,6 +170,36 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd(f"--repo={self.repository_location}", "extract", "dst") self.assert_dirs_equal("input", "output", ignore_ns=True, ignore_xattrs=True) + @requires_gnutar + def test_import_concatenated_tar_without_ignore_zeros(self): + self.create_test_files(create_hardlinks=False) # hardlinks become separate files + os.unlink("input/flagfile") + with changedir("input"): + subprocess.check_call(["tar", "cf", "file1.tar", "file1"]) + subprocess.check_call(["tar", "cf", "the_rest.tar", "--exclude", "file1*", "."]) + with open("concatenated.tar", "wb") as concatenated: + with open("file1.tar", "rb") as file1: + concatenated.write(file1.read()) + # Clean up for assert_dirs_equal. + os.unlink("file1.tar") + + with open("the_rest.tar", "rb") as the_rest: + concatenated.write(the_rest.read()) + # Clean up for assert_dirs_equal. + os.unlink("the_rest.tar") + + self.cmd(f"--repo={self.repository_location}", "rcreate", "--encryption=none") + self.cmd(f"--repo={self.repository_location}", "import-tar", "dst", "input/concatenated.tar") + # Clean up for assert_dirs_equal. + os.unlink("input/concatenated.tar") + + with changedir(self.output_path): + self.cmd(f"--repo={self.repository_location}", "extract", "dst") + + # Negative test -- assert that only file1 has been extracted, and the_rest has been ignored + # due to zero-filled block marker. + self.assert_equal(os.listdir("output"), ["file1"]) + def test_roundtrip_pax_borg(self): self.create_test_files() self.cmd(f"--repo={self.repository_location}", "rcreate", "--encryption=none") From c999d5ddedc8392850e335976e5339b19c6ee92b Mon Sep 17 00:00:00 2001 From: Artem Sheremet Date: Wed, 22 Mar 2023 11:13:19 +0100 Subject: [PATCH 3/4] Explain multiple tarball import with ignore-zeros --- src/borg/archiver/tar_cmds.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/borg/archiver/tar_cmds.py b/src/borg/archiver/tar_cmds.py index e91d04c12..2bbe81854 100644 --- a/src/borg/archiver/tar_cmds.py +++ b/src/borg/archiver/tar_cmds.py @@ -445,6 +445,9 @@ class TarMixIn: - UNIX V7 tar - SunOS tar with extended attributes + To import multiple tarballs into a single archive, they can be simply + concatenated (e.g. using "cat") into a single file, and imported with an + ``--ignore-zeros`` option to skip through the stop markers between them. """ ) subparser = subparsers.add_parser( From 335efbf187ca7ae38ad40a5c0803cfce8812becd Mon Sep 17 00:00:00 2001 From: Artem Sheremet Date: Wed, 22 Mar 2023 11:19:44 +0100 Subject: [PATCH 4/4] Simplify import-tar ignore-zeros negative test Some of those cleanups are unnecessary since we don't use assert_dirs_equal. --- src/borg/testsuite/archiver/tar_cmds.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/borg/testsuite/archiver/tar_cmds.py b/src/borg/testsuite/archiver/tar_cmds.py index 9b91dae2d..d9be9ce46 100644 --- a/src/borg/testsuite/archiver/tar_cmds.py +++ b/src/borg/testsuite/archiver/tar_cmds.py @@ -180,18 +180,12 @@ class ArchiverTestCase(ArchiverTestCaseBase): with open("concatenated.tar", "wb") as concatenated: with open("file1.tar", "rb") as file1: concatenated.write(file1.read()) - # Clean up for assert_dirs_equal. - os.unlink("file1.tar") - with open("the_rest.tar", "rb") as the_rest: concatenated.write(the_rest.read()) - # Clean up for assert_dirs_equal. os.unlink("the_rest.tar") self.cmd(f"--repo={self.repository_location}", "rcreate", "--encryption=none") self.cmd(f"--repo={self.repository_location}", "import-tar", "dst", "input/concatenated.tar") - # Clean up for assert_dirs_equal. - os.unlink("input/concatenated.tar") with changedir(self.output_path): self.cmd(f"--repo={self.repository_location}", "extract", "dst")