diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 7315aca0e..029776641 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -27,6 +27,7 @@ try: from binascii import unhexlify from contextlib import contextmanager from datetime import datetime, timedelta + from io import TextIOWrapper from .logger import create_logger, setup_logging @@ -51,7 +52,7 @@ try: from .helpers import PrefixSpec, GlobSpec, CommentSpec, SortBySpec, FilesCacheMode from .helpers import BaseFormatter, ItemFormatter, ArchiveFormatter from .helpers import format_timedelta, format_file_size, parse_file_size, format_archive - from .helpers import safe_encode, remove_surrogates, bin_to_hex, prepare_dump_dict + from .helpers import safe_encode, remove_surrogates, bin_to_hex, prepare_dump_dict, eval_escapes from .helpers import interval, prune_within, prune_split, PRUNING_PATTERNS from .helpers import timestamp from .helpers import get_cache_dir, os_stat @@ -73,6 +74,7 @@ try: from .helpers import flags_root, flags_dir, flags_special_follow, flags_special from .helpers import msgpack from .helpers import sig_int + from .helpers import iter_separated from .nanorst import rst_to_terminal from .patterns import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern from .patterns import PatternMatcher @@ -533,6 +535,37 @@ class Archiver: else: status = '-' self.print_file_status(status, path) + elif args.paths_from_command or args.paths_from_stdin: + paths_sep = eval_escapes(args.paths_delimiter) if args.paths_delimiter is not None else '\n' + if args.paths_from_command: + try: + proc = subprocess.Popen(args.paths, stdout=subprocess.PIPE) + except (FileNotFoundError, PermissionError) as e: + self.print_error('Failed to execute command: %s', e) + return self.exit_code + pipe_bin = proc.stdout + else: # args.paths_from_stdin == True + pipe_bin = sys.stdin.buffer + pipe = TextIOWrapper(pipe_bin, errors='surrogateescape') + for path in iter_separated(pipe, paths_sep): + try: + with backup_io('stat'): + st = os_stat(path=path, parent_fd=None, name=None, follow_symlinks=False) + status = self._process_any(path=path, parent_fd=None, name=None, st=st, fso=fso, + cache=cache, read_special=args.read_special, dry_run=dry_run) + except (BackupOSError, BackupError) as e: + self.print_warning('%s: %s', path, e) + status = 'E' + if status == 'C': + self.print_warning('%s: file changed while we backed it up', path) + if status is None: + status = '?' + self.print_file_status(status, path) + if args.paths_from_command: + rc = proc.wait() + if rc != 0: + self.print_error('Command %r exited with status %d', args.paths[0], rc) + return self.exit_code else: for path in args.paths: if path == '-': # stdin @@ -3277,6 +3310,13 @@ class Archiver: subparser.add_argument('--content-from-command', action='store_true', help='interpret PATH as command and store its stdout. See also section Reading from' ' stdin below.') + subparser.add_argument('--paths-from-stdin', action='store_true', + help='read DELIM-separated list of paths to backup from stdin. Will not ' + 'recurse into directories.') + subparser.add_argument('--paths-from-command', action='store_true', + help='interpret PATH as command and treat its output as ``--paths-from-stdin``') + subparser.add_argument('--paths-delimiter', metavar='DELIM', + help='set path delimiter for ``--paths-from-stdin`` and ``--paths-from-command`` (default: \\n) ') exclude_group = define_exclusion_group(subparser, tag_files=True) exclude_group.add_argument('--exclude-nodump', dest='exclude_nodump', action='store_true', @@ -4522,10 +4562,12 @@ class Archiver: args = parser.parse_args(args or ['-h']) parser.common_options.resolve(args) func = get_func(args) + if func == self.do_create and args.paths and args.paths_from_stdin: + parser.error('Must not pass PATH with ``--paths-from-stdin``.') if func == self.do_create and not args.paths: - if args.content_from_command: + if args.content_from_command or args.paths_from_command: parser.error('No command given.') - else: + elif not args.paths_from_stdin: # need at least 1 path but args.paths may also be populated from patterns parser.error('Need at least one PATH argument.') if not getattr(args, 'lock', True): # Option --bypass-lock sets args.lock = False diff --git a/src/borg/helpers/misc.py b/src/borg/helpers/misc.py index bc7369076..aec90ee7b 100644 --- a/src/borg/helpers/misc.py +++ b/src/borg/helpers/misc.py @@ -213,3 +213,20 @@ class ErrorIgnoringTextIOWrapper(io.TextIOWrapper): except OSError: pass return len(s) + + +def iter_separated(fd, sep=None, read_size=4096): + """Iter over chunks of open file ``fd`` delimited by ``sep``. Doesn't trim.""" + buf = fd.read(read_size) + is_str = isinstance(buf, str) + part = '' if is_str else b'' + sep = sep or ('\n' if is_str else b'\n') + while len(buf) > 0: + part2, *items = buf.split(sep) + *full, part = (part + part2, *items) + yield from full + buf = fd.read(read_size) + # won't yield an empty part if stream ended with `sep` + # or if there was no data before EOF + if len(part) > 0: + yield part diff --git a/src/borg/helpers/parseformat.py b/src/borg/helpers/parseformat.py index b17da5055..2994c07bc 100644 --- a/src/borg/helpers/parseformat.py +++ b/src/borg/helpers/parseformat.py @@ -48,6 +48,11 @@ def remove_surrogates(s, errors='replace'): return s.encode('utf-8', errors).decode('utf-8') +def eval_escapes(s): + """Evaluate literal escape sequences in a string (eg `\\n` -> `\n`).""" + return s.encode('ascii', 'backslashreplace').decode('unicode-escape') + + def decode_dict(d, keys, encoding='utf-8', errors='surrogateescape'): for key in keys: if isinstance(d.get(key), bytes): diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index b973dc2db..339b6917b 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -1065,6 +1065,47 @@ class ArchiverTestCase(ArchiverTestCaseBase): output = self.cmd('create', '--content-from-command', self.repository_location + '::test', exit_code=2) assert output.endswith('No command given.\n') + def test_create_paths_from_stdin(self): + self.cmd('init', '--encryption=repokey', self.repository_location) + self.create_regular_file("file1", size=1024 * 80) + self.create_regular_file("dir1/file2", size=1024 * 80) + self.create_regular_file("dir1/file3", size=1024 * 80) + self.create_regular_file("file4", size=1024 * 80) + + input_data = b'input/file1\0input/dir1\0input/file4' + self.cmd('create', '--paths-from-stdin', '--paths-delimiter', '\\0', + self.repository_location + '::test', input=input_data) + archive_list = self.cmd('list', '--json-lines', self.repository_location + '::test') + paths = [json.loads(line)['path'] for line in archive_list.split('\n') if line] + assert paths == ['input/file1', 'input/dir1', 'input/file4'] + + def test_create_paths_from_command(self): + self.cmd('init', '--encryption=repokey', self.repository_location) + self.create_regular_file("file1", size=1024 * 80) + self.create_regular_file("file2", size=1024 * 80) + self.create_regular_file("file3", size=1024 * 80) + self.create_regular_file("file4", size=1024 * 80) + + input_data = 'input/file1\ninput/file2\ninput/file3' + self.cmd('create', '--paths-from-command', + self.repository_location + '::test', '--', 'echo', input_data) + archive_list = self.cmd('list', '--json-lines', self.repository_location + '::test') + paths = [json.loads(line)['path'] for line in archive_list.split('\n') if line] + assert paths == ['input/file1', 'input/file2', 'input/file3'] + + def test_create_paths_from_command_with_failed_command(self): + self.cmd('init', '--encryption=repokey', self.repository_location) + output = self.cmd('create', '--paths-from-command', self.repository_location + '::test', + '--', 'sh', '-c', 'exit 73;', exit_code=2) + assert output.endswith("Command 'sh' exited with status 73\n") + archive_list = json.loads(self.cmd('list', '--json', self.repository_location)) + assert archive_list['archives'] == [] + + def test_create_paths_from_command_missing_command(self): + self.cmd('init', '--encryption=repokey', self.repository_location) + output = self.cmd('create', '--paths-from-command', self.repository_location + '::test', exit_code=2) + assert output.endswith('No command given.\n') + def test_create_without_root(self): """test create without a root""" self.cmd('init', '--encryption=repokey', self.repository_location) diff --git a/src/borg/testsuite/helpers.py b/src/borg/testsuite/helpers.py index f3d80b250..795ea7680 100644 --- a/src/borg/testsuite/helpers.py +++ b/src/borg/testsuite/helpers.py @@ -4,6 +4,7 @@ import shutil import sys from argparse import ArgumentTypeError from datetime import datetime, timezone, timedelta +from io import StringIO, BytesIO from time import sleep import pytest @@ -27,6 +28,8 @@ from ..helpers import chunkit from ..helpers import safe_ns, safe_s, SUPPORT_32BIT_PLATFORMS from ..helpers import popen_with_error_handling from ..helpers import dash_open +from ..helpers import iter_separated +from ..helpers import eval_escapes from . import BaseTestCase, FakeInputs @@ -1022,3 +1025,27 @@ def test_dash_open(): assert dash_open('-', 'w') is sys.stdout assert dash_open('-', 'rb') is sys.stdin.buffer assert dash_open('-', 'wb') is sys.stdout.buffer + + +def test_iter_separated(): + # newline and utf-8 + sep, items = '\n', ['foo', 'bar/baz', 'αáčő'] + fd = StringIO(sep.join(items)) + assert list(iter_separated(fd)) == items + # null and bogus ending + sep, items = '\0', ['foo/bar', 'baz', 'spam'] + fd = StringIO(sep.join(items) + '\0') + assert list(iter_separated(fd, sep=sep)) == ['foo/bar', 'baz', 'spam'] + # multichar + sep, items = 'SEP', ['foo/bar', 'baz', 'spam'] + fd = StringIO(sep.join(items)) + assert list(iter_separated(fd, sep=sep)) == items + # bytes + sep, items = b'\n', [b'foo', b'blop\t', b'gr\xe4ezi'] + fd = BytesIO(sep.join(items)) + assert list(iter_separated(fd)) == items + + +def test_eval_escapes(): + assert eval_escapes('\\n\\0\\x23') == '\n\0#' + assert eval_escapes('äç\\n') == 'äç\n'