From 4151db270cd9413f3fac153e173ca13ad77a539a Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Thu, 17 Mar 2016 17:32:23 +0100 Subject: [PATCH] Redo borg list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - This is compatible except for {formatkeys}, which has been replaced by "borg list --help" - --list-format is deprecated, use --format instead (using deprecated arguments will print a warning and an exit code of 1) - borg list now supports the usual [PATH [PATHS…]] syntax and excludes - Additional keys: csize, num_chunks, unique_chunks, NUL - Supports guaranteed_available hashlib hashes (to avoid varying functionality depending on environment) (also, the other hashes are really obscure, like MD-4) --- borg/archiver.py | 121 ++++++++++------------------ borg/helpers.py | 158 ++++++++++++++++++++++++++++++++++++- borg/testsuite/archiver.py | 40 +++++++++- borg/testsuite/helpers.py | 10 ++- 4 files changed, 244 insertions(+), 85 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 1919a1c16..75084453d 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -16,12 +16,12 @@ import textwrap import traceback from . import __version__ -from .helpers import Error, location_validator, archivename_validator, format_line, format_time, format_file_size, \ - parse_pattern, PathPrefixPattern, to_localtime, timestamp, safe_timestamp, \ +from .helpers import Error, location_validator, archivename_validator, format_time, format_file_size, \ + parse_pattern, PathPrefixPattern, to_localtime, timestamp, \ get_cache_dir, prune_within, prune_split, \ Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ - dir_is_tagged, bigint_to_int, ChunkerParams, CompressionSpec, is_slow_msgpack, yes, sysinfo, \ - EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR, log_multi, PatternMatcher + dir_is_tagged, ChunkerParams, CompressionSpec, is_slow_msgpack, yes, sysinfo, \ + EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR, log_multi, PatternMatcher, ItemFormatter from .logger import create_logger, setup_logging logger = create_logger() from .compress import Compressor, COMPR_BUFFER @@ -585,79 +585,29 @@ class Archiver: repository = self.open_repository(args) manifest, key = Manifest.load(repository) if args.location.archive: - archive = Archive(repository, key, manifest, args.location.archive) - """use_user_format flag is used to speed up default listing. - When user issues format options, listing is a bit slower, but more keys are available and - precalculated. - """ - use_user_format = args.listformat is not None - if use_user_format: - list_format = args.listformat - elif args.short: - list_format = "{path}{LF}" - else: - list_format = "{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{LF}" + matcher, _ = self.build_matcher(args.excludes, args.paths) - for item in archive.iter_items(): - mode = stat.filemode(item[b'mode']) - type = mode[0] - size = 0 - if type == '-': - try: - size = sum(size for _, size, _ in item[b'chunks']) - except KeyError: - pass + with Cache(repository, key, manifest, lock_wait=self.lock_wait) as cache: + archive = Archive(repository, key, manifest, args.location.archive, cache=cache) - mtime = safe_timestamp(item[b'mtime']) - if use_user_format: - atime = safe_timestamp(item.get(b'atime') or item[b'mtime']) - ctime = safe_timestamp(item.get(b'ctime') or item[b'mtime']) - - if b'source' in item: - source = item[b'source'] - if type == 'l': - extra = ' -> %s' % item[b'source'] - else: - mode = 'h' + mode[1:] - extra = ' link to %s' % item[b'source'] + if args.format: + format = args.format + elif args.short: + format = "{path}{NL}" else: - extra = '' - source = '' - - item_data = { - 'mode': mode, - 'user': item[b'user'] or item[b'uid'], - 'group': item[b'group'] or item[b'gid'], - 'size': size, - 'isomtime': format_time(mtime), - 'path': remove_surrogates(item[b'path']), - 'extra': extra, - 'LF': '\n', - } - if use_user_format: - item_data_advanced = { - 'bmode': item[b'mode'], - 'type': type, - 'source': source, - 'linktarget': source, - 'uid': item[b'uid'], - 'gid': item[b'gid'], - 'mtime': mtime, - 'isoctime': format_time(ctime), - 'ctime': ctime, - 'isoatime': format_time(atime), - 'atime': atime, - 'archivename': archive.name, - 'SPACE': ' ', - 'TAB': '\t', - 'CR': '\r', - 'NEWLINE': os.linesep, - } - item_data.update(item_data_advanced) - item_data['formatkeys'] = list(item_data.keys()) - - print(format_line(list_format, item_data), end='') + format = "{mode} {user:6} {group:6} {size:8} {isomtime} {path}{extra}{NL}" + formatter = ItemFormatter(archive, format) + if not hasattr(sys.stdout, 'buffer'): + # This is a shim for supporting unit tests replacing sys.stdout with e.g. StringIO, + # which doesn't have an underlying buffer (= lower file object). + def write(bytestring): + sys.stdout.write(bytestring.decode('utf-8', errors='replace')) + else: + write = sys.stdout.buffer.write + for item in archive.iter_items(lambda item: matcher.match(item[b'path'])): + write(formatter.format_item(item).encode('utf-8', errors='surrogateescape')) + repository.close() else: for archive_info in manifest.list_archive_infos(sort_by='ts'): if args.prefix and not archive_info.name.startswith(args.prefix): @@ -944,12 +894,13 @@ class Archiver: def preprocess_args(self, args): deprecations = [ # ('--old', '--new', 'Warning: "--old" has been deprecated. Use "--new" instead.'), + ('--list-format', '--format', 'Warning: "--list-format" has been deprecated. Use "--format" instead.'), ] for i, arg in enumerate(args[:]): for old_name, new_name, warning in deprecations: if arg.startswith(old_name): args[i] = arg.replace(old_name, new_name) - print(warning) + self.print_warning(warning) return args def build_parser(self, args=None, prog=None): @@ -1322,7 +1273,12 @@ class Archiver: list_epilog = textwrap.dedent(""" This command lists the contents of a repository or an archive. - """) + + See the "borg help patterns" command for more help on exclude patterns. + + The following keys are available for --format: + + """) + ItemFormatter.keys_help() subparser = subparsers.add_parser('list', parents=[common_parser], description=self.do_list.__doc__, epilog=list_epilog, @@ -1332,15 +1288,22 @@ class Archiver: subparser.add_argument('--short', dest='short', action='store_true', default=False, help='only print file/directory names, nothing else') - subparser.add_argument('--list-format', dest='listformat', type=str, - help="""specify format for archive file listing - (default: "{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NEWLINE}") - Special "{formatkeys}" exists to list available keys""") + subparser.add_argument('--format', '--list-format', dest='format', type=str, + help="""specify format for file listing + (default: "{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NL}")""") subparser.add_argument('-P', '--prefix', dest='prefix', type=str, help='only consider archive names starting with this prefix') + subparser.add_argument('-e', '--exclude', dest='excludes', + type=parse_pattern, action='append', + metavar="PATTERN", help='exclude paths matching PATTERN') + subparser.add_argument('--exclude-from', dest='exclude_files', + type=argparse.FileType('r'), action='append', + metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line') subparser.add_argument('location', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='', type=location_validator(), help='repository/archive to list contents of') + subparser.add_argument('paths', metavar='PATH', nargs='*', type=str, + help='paths to extract; patterns are supported') mount_epilog = textwrap.dedent(""" This command mounts an archive as a FUSE filesystem. This can be useful for diff --git a/borg/helpers.py b/borg/helpers.py index 327953b2f..547141ab6 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -1,8 +1,9 @@ import argparse from binascii import hexlify from collections import namedtuple -from functools import wraps +from functools import wraps, partial import grp +import hashlib import os import stat import textwrap @@ -10,6 +11,7 @@ import pwd import re from shutil import get_terminal_size import sys +from string import Formatter import platform import time import unicodedata @@ -548,6 +550,20 @@ def dir_is_tagged(path, exclude_caches, exclude_if_present): return tag_paths +def partial_format(format, mapping): + """ + Apply format.format_map(mapping) while preserving unknown keys + + Does not support attribute access, indexing and ![rsa] conversions + """ + for key, value in mapping.items(): + key = re.escape(key) + format = re.sub(r'(? %s' % source + else: + mode = 'h' + mode[1:] + extra = ' link to %s' % source + item_data['type'] = item_type + item_data['mode'] = mode + item_data['user'] = item[b'user'] or item[b'uid'] + item_data['group'] = item[b'group'] or item[b'gid'] + item_data['uid'] = item[b'uid'] + item_data['gid'] = item[b'gid'] + item_data['path'] = remove_surrogates(item[b'path']) + item_data['bpath'] = item[b'path'] + item_data['source'] = source + item_data['linktarget'] = source + item_data['extra'] = extra + for key in self.used_call_keys: + item_data[key] = self.call_keys[key](item) + return item_data + + def format_item(self, item): + return self.format.format_map(self.get_item_data(item)) + + def calculate_num_chunks(self, item): + return len(item.get(b'chunks', [])) + + def calculate_unique_chunks(self, item): + chunk_index = self.archive.cache.chunks + return sum(1 for chunk_id, _, _ in item.get(b'chunks', []) if chunk_index[chunk_id][0] == 1) + + def calculate_size(self, item): + return sum(size for _, size, _ in item.get(b'chunks', [])) + + def calculate_csize(self, item): + return sum(csize for _, _, csize in item.get(b'chunks', [])) + + def hash_item(self, hash_function, item): + if b'chunks' not in item: + return "" + hash = hashlib.new(hash_function) + for chunk in self.archive.pipeline.fetch_many([c[0] for c in item[b'chunks']]): + hash.update(chunk) + return hash.hexdigest() + + def format_time(self, key, item): + return format_time(safe_timestamp(item.get(key) or item[b'mtime'])) + + def time(self, key, item): + return safe_timestamp(item.get(key) or item[b'mtime']) diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 8e2f35c65..924a1e540 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -892,16 +892,50 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.assert_in('test-2', output) self.assert_not_in('something-else', output) - def test_list_list_format(self): + def test_list_format(self): self.cmd('init', self.repository_location) test_archive = self.repository_location + '::test' self.cmd('create', test_archive, src_dir) + self.cmd('list', '--list-format', '-', test_archive, exit_code=1) + self.archiver.exit_code = 0 # reset exit code for following tests output_1 = self.cmd('list', test_archive) - output_2 = self.cmd('list', '--list-format', '{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NEWLINE}', test_archive) - output_3 = self.cmd('list', '--list-format', '{mtime:%s} {path}{NL}', test_archive) + output_2 = self.cmd('list', '--format', '{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NEWLINE}', test_archive) + output_3 = self.cmd('list', '--format', '{mtime:%s} {path}{NL}', test_archive) self.assertEqual(output_1, output_2) self.assertNotEqual(output_1, output_3) + def test_list_hash(self): + self.create_regular_file('empty_file', size=0) + self.create_regular_file('amb', contents=b'a' * 1000000) + self.cmd('init', self.repository_location) + test_archive = self.repository_location + '::test' + self.cmd('create', test_archive, 'input') + output = self.cmd('list', '--format', '{sha256} {path}{NL}', test_archive) + assert "cdc76e5c9914fb9281a1c7e284d73e67f1809a48a497200e046d39ccc7112cd0 input/amb" in output + assert "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 input/empty_file" in output + + def test_list_chunk_counts(self): + self.create_regular_file('empty_file', size=0) + self.create_regular_file('two_chunks') + with open(os.path.join(self.input_path, 'two_chunks'), 'wb') as fd: + fd.write(b'abba' * 2000000) + fd.write(b'baab' * 2000000) + self.cmd('init', self.repository_location) + test_archive = self.repository_location + '::test' + self.cmd('create', test_archive, 'input') + output = self.cmd('list', '--format', '{num_chunks} {unique_chunks} {path}{NL}', test_archive) + assert "0 0 input/empty_file" in output + assert "2 2 input/two_chunks" in output + + def test_list_size(self): + self.create_regular_file('compressible_file', size=10000) + self.cmd('init', self.repository_location) + test_archive = self.repository_location + '::test' + self.cmd('create', '-C', 'lz4', test_archive, 'input') + output = self.cmd('list', '--format', '{size} {csize} {path}{NL}', test_archive) + size, csize, path = output.split("\n")[1].split(" ") + assert int(csize) < int(size) + def test_break_lock(self): self.cmd('init', self.repository_location) self.cmd('break-lock', self.repository_location) diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index cdb96b964..3a3f2361d 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -15,7 +15,7 @@ from ..helpers import Location, format_file_size, format_timedelta, make_path_sa yes, TRUISH, FALSISH, DEFAULTISH, \ StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams, \ ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, parse_pattern, \ - PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern + PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, partial_format from . import BaseTestCase, environment_variable, FakeInputs @@ -877,3 +877,11 @@ def test_progress_endless_step(capfd): pi.show() out, err = capfd.readouterr() assert err == '.' + + +def test_partial_format(): + assert partial_format('{space:10}', {'space': ' '}) == ' ' * 10 + assert partial_format('{foobar}', {'bar': 'wrong', 'foobar': 'correct'}) == 'correct' + assert partial_format('{unknown_key}', {}) == '{unknown_key}' + assert partial_format('{key}{{escaped_key}}', {}) == '{key}{{escaped_key}}' + assert partial_format('{{escaped_key}}', {'escaped_key': 1234}) == '{{escaped_key}}'