diff --git a/src/borg/archive.py b/src/borg/archive.py index 833616661..d6fa3b105 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -1445,9 +1445,20 @@ class TarfileObjectProcessors: @contextmanager def create_helper(self, tarinfo, status=None, type=None): + def s_to_ns(s): + return safe_ns(int(float(s) * 1e9)) + item = Item(path=make_path_safe(tarinfo.name), mode=tarinfo.mode | type, uid=tarinfo.uid, gid=tarinfo.gid, user=tarinfo.uname or None, group=tarinfo.gname or None, - mtime=safe_ns(int(tarinfo.mtime * 1000**3))) + mtime=s_to_ns(tarinfo.mtime)) + if tarinfo.pax_headers: + ph = tarinfo.pax_headers + # note: for mtime this is a bit redundant as it is already done by tarfile module, + # but we just do it in our way to be consistent for sure. + for name in 'atime', 'ctime', 'mtime': + if name in ph: + ns = s_to_ns(ph[name]) + setattr(item, name, ns) yield item, status # if we get here, "with"-block worked ok without error/exception, the item was processed ok... self.add_item(item, stats=self.stats) diff --git a/src/borg/archiver.py b/src/borg/archiver.py index c5341da5e..7eb25bac3 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -1137,7 +1137,8 @@ class Archiver: # The | (pipe) symbol instructs tarfile to use a streaming mode of operation # where it never seeks on the passed fileobj. - tar = tarfile.open(fileobj=tarstream, mode='w|', format=tarfile.GNU_FORMAT) + tar_format = dict(GNU=tarfile.GNU_FORMAT, PAX=tarfile.PAX_FORMAT)[args.tar_format] + tar = tarfile.open(fileobj=tarstream, mode='w|', format=tar_format) if progress: pi = ProgressIndicatorPercent(msg='%5.1f%% Processing: %s', step=0.1, msgid='extract') @@ -1168,13 +1169,6 @@ class Archiver: the file contents, if any, and is None otherwise. When *tarinfo* is None, the *item* cannot be represented as a TarInfo object and should be skipped. """ - - # If we would use the PAX (POSIX) format (which we currently don't), - # we can support most things that aren't possible with classic tar - # formats, including GNU tar, such as: - # atime, ctime, possibly Linux capabilities (security.* xattrs) - # and various additions supported by GNU tar in POSIX mode. - stream = None tarinfo = tarfile.TarInfo() tarinfo.name = item.path @@ -1236,6 +1230,24 @@ class Archiver: return None, stream return tarinfo, stream + def item_to_paxheaders(item): + """ + Transform (parts of) a Borg *item* into a pax_headers dict. + """ + # When using the PAX (POSIX) format, we can support some things that aren't possible + # with classic tar formats, including GNU tar, such as: + # - atime, ctime (DONE) + # - possibly Linux capabilities, security.* xattrs (TODO) + # - various additions supported by GNU tar in POSIX mode (TODO) + ph = {} + # note: for mtime this is a bit redundant as it is already done by tarfile module, + # but we just do it in our way to be consistent for sure. + for name in 'atime', 'ctime', 'mtime': + if hasattr(item, name): + ns = getattr(item, name) + ph[name] = str(ns / 1e9) + return ph + for item in archive.iter_items(filter, partial_extract=partial_extract, preload=True, hardlink_masters=hardlink_masters): orig_path = item.path @@ -1243,6 +1255,8 @@ class Archiver: item.path = os.sep.join(orig_path.split(os.sep)[strip_components:]) tarinfo, stream = item_to_tarinfo(item, orig_path) if tarinfo: + if args.tar_format == 'PAX': + tarinfo.pax_headers = item_to_paxheaders(item) if output_list: logging.getLogger('borg.output.list').info(remove_surrogates(orig_path)) tar.addfile(tarinfo, stream) @@ -4043,7 +4057,10 @@ class Archiver: read the uncompressed tar stream from stdin and write a compressed/filtered tar stream to stdout. - The generated tarball uses the GNU tar format. + Depending on the ```-tar-format``option, the generated tarball uses this format: + + - PAX: POSIX.1-2001 (pax) format + - GNU: GNU tar format export-tar is a lossy conversion: BSD flags, ACLs, extended attributes (xattrs), atime and ctime are not exported. @@ -4071,6 +4088,9 @@ class Archiver: help='filter program to pipe data through') subparser.add_argument('--list', dest='output_list', action='store_true', help='output verbose list of items (files, dirs, ...)') + subparser.add_argument('--tar-format', metavar='FMT', dest='tar_format', default='GNU', + choices=('PAX', 'GNU'), + help='select tar format: PAX or GNU') subparser.add_argument('location', metavar='ARCHIVE', type=location_validator(archive=True), help='archive to export') diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index 64d294b03..48637b1f7 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -3409,7 +3409,7 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02 os.unlink('input/flagfile') self.cmd('init', '--encryption=repokey', self.repository_location) self.cmd('create', self.repository_location + '::test', 'input') - self.cmd('export-tar', self.repository_location + '::test', 'simple.tar', '--progress') + self.cmd('export-tar', self.repository_location + '::test', 'simple.tar', '--progress', '--tar-format=GNU') with changedir('output'): # This probably assumes GNU tar. Note -p switch to extract permissions regardless of umask. subprocess.check_call(['tar', 'xpf', '../simple.tar', '--warning=no-timestamp']) @@ -3424,7 +3424,8 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02 os.unlink('input/flagfile') self.cmd('init', '--encryption=repokey', self.repository_location) self.cmd('create', self.repository_location + '::test', 'input') - list = self.cmd('export-tar', self.repository_location + '::test', 'simple.tar.gz', '--list') + list = self.cmd('export-tar', self.repository_location + '::test', 'simple.tar.gz', + '--list', '--tar-format=GNU') assert 'input/file1\n' in list assert 'input/dir2\n' in list with changedir('output'): @@ -3439,7 +3440,8 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02 os.unlink('input/flagfile') self.cmd('init', '--encryption=repokey', self.repository_location) self.cmd('create', self.repository_location + '::test', 'input') - list = self.cmd('export-tar', self.repository_location + '::test', 'simple.tar', '--strip-components=1', '--list') + list = self.cmd('export-tar', self.repository_location + '::test', 'simple.tar', + '--strip-components=1', '--list', '--tar-format=GNU') # --list's path are those before processing with --strip-components assert 'input/file1\n' in list assert 'input/dir2\n' in list @@ -3451,7 +3453,8 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02 @requires_gnutar def test_export_tar_strip_components_links(self): self._extract_hardlinks_setup() - self.cmd('export-tar', self.repository_location + '::test', 'output.tar', '--strip-components=2') + self.cmd('export-tar', self.repository_location + '::test', 'output.tar', + '--strip-components=2', '--tar-format=GNU') with changedir('output'): subprocess.check_call(['tar', 'xpf', '../output.tar', '--warning=no-timestamp']) assert os.stat('hardlink').st_nlink == 2 @@ -3463,7 +3466,7 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02 @requires_gnutar def test_extract_hardlinks_tar(self): self._extract_hardlinks_setup() - self.cmd('export-tar', self.repository_location + '::test', 'output.tar', 'input/dir1') + self.cmd('export-tar', self.repository_location + '::test', 'output.tar', 'input/dir1', '--tar-format=GNU') with changedir('output'): subprocess.check_call(['tar', 'xpf', '../output.tar', '--warning=no-timestamp']) assert os.stat('input/dir1/hardlink').st_nlink == 2 @@ -3471,26 +3474,26 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02 assert os.stat('input/dir1/aaaa').st_nlink == 2 assert os.stat('input/dir1/source2').st_nlink == 2 - def test_import_tar(self): + def test_import_tar(self, tar_format='PAX'): self.create_test_files() os.unlink('input/flagfile') self.cmd('init', '--encryption=none', self.repository_location) self.cmd('create', self.repository_location + '::src', 'input') - self.cmd('export-tar', self.repository_location + '::src', 'simple.tar') + self.cmd('export-tar', self.repository_location + '::src', 'simple.tar', f'--tar-format={tar_format}') self.cmd('import-tar', self.repository_location + '::dst', 'simple.tar') with changedir(self.output_path): self.cmd('extract', self.repository_location + '::dst') self.assert_dirs_equal('input', 'output/input', ignore_ns=True, ignore_xattrs=True) @requires_gzip - def test_import_tar_gz(self): + def test_import_tar_gz(self, tar_format='GNU'): if not shutil.which('gzip'): pytest.skip('gzip is not installed') self.create_test_files() os.unlink('input/flagfile') self.cmd('init', '--encryption=none', self.repository_location) self.cmd('create', self.repository_location + '::src', 'input') - self.cmd('export-tar', self.repository_location + '::src', 'simple.tgz') + self.cmd('export-tar', self.repository_location + '::src', 'simple.tgz', f'--tar-format={tar_format}') self.cmd('import-tar', self.repository_location + '::dst', 'simple.tgz') with changedir(self.output_path): self.cmd('extract', self.repository_location + '::dst')