diff --git a/borg/archiver.py b/borg/archiver.py index 8a243f890..f998b3cf9 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -18,9 +18,9 @@ import collections from . import __version__ from .helpers import Error, location_validator, archivename_validator, format_line, format_time, format_file_size, \ - parse_pattern, PathPrefixPattern, to_localtime, timestamp, safe_timestamp, bin_to_hex, \ - get_cache_dir, prune_within, prune_split, \ - Manifest, NoManifestError, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ + parse_pattern, parse_exclude_pattern, parse_inclexcl_pattern, PathPrefixPattern, to_localtime, timestamp, \ + safe_timestamp, bin_to_hex, get_cache_dir, prune_within, prune_split, \ + Manifest, NoManifestError, remove_surrogates, update_patterns, format_archive, check_extension_modules, Statistics, \ dir_is_tagged, bigint_to_int, ChunkerParams, CompressionSpec, PrefixSpec, is_slow_msgpack, yes, sysinfo, \ EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR, log_multi, PatternMatcher, ErrorIgnoringTextIOWrapper from .helpers import signal_handler, raising_signal_handler, SigHup, SigTerm @@ -121,6 +121,18 @@ class Archiver: if self.output_list and (self.output_filter is None or status in self.output_filter): logger.info("%1s %s", status, remove_surrogates(path)) + @staticmethod + def build_matcher(inclexcl_patterns, paths): + matcher = PatternMatcher() + if inclexcl_patterns: + matcher.add_inclexcl(inclexcl_patterns) + include_patterns = [] + if paths: + include_patterns.extend(parse_pattern(i, PathPrefixPattern) for i in paths) + matcher.add(include_patterns, True) + matcher.fallback = not include_patterns + return matcher, include_patterns + def do_serve(self, args): """Start in server mode. This command is usually not used manually. """ @@ -233,8 +245,7 @@ class Archiver: def do_create(self, args, repository, manifest=None, key=None): """Create new archive""" matcher = PatternMatcher(fallback=True) - if args.excludes: - matcher.add(args.excludes, False) + matcher.add_inclexcl(args.pattern) def create_inner(archive, cache): # Add cache dir to inode_skip list @@ -424,17 +435,7 @@ class Archiver: if sys.platform.startswith(('linux', 'freebsd', 'netbsd', 'openbsd', 'darwin', )): logger.warning('Hint: You likely need to fix your locale setup. E.g. install locales and use: LANG=en_US.UTF-8') - matcher = PatternMatcher() - if args.excludes: - matcher.add(args.excludes, False) - - include_patterns = [] - - if args.paths: - include_patterns.extend(parse_pattern(i, PathPrefixPattern) for i in args.paths) - matcher.add(include_patterns, True) - - matcher.fallback = not include_patterns + matcher, include_patterns = self.build_matcher(args.pattern, args.paths) output_list = args.output_list dry_run = args.dry_run @@ -893,8 +894,9 @@ class Archiver: helptext = collections.OrderedDict() helptext['patterns'] = textwrap.dedent(''' - Exclusion patterns support four separate styles, fnmatch, shell, regular - expressions and path prefixes. By default, fnmatch is used. If followed + File patterns support four separate styles, fnmatch, shell, regular + expressions and path prefixes. By default, fnmatch is used for + `--exclude` patterns and shell-style is used for `--pattern`. If followed by a colon (':') the first two characters of a pattern are used as a style selector. Explicit style selection is necessary when a non-default style is desired or when the desired pattern starts with @@ -902,12 +904,12 @@ class Archiver: `Fnmatch `_, selector `fm:` - This is the default style. These patterns use a variant of shell - pattern syntax, with '*' matching any number of characters, '?' - matching any single character, '[...]' matching any single - character specified, including ranges, and '[!...]' matching any - character not specified. For the purpose of these patterns, the - path separator ('\\' for Windows and '/' on other systems) is not + This is the default style for --exclude and --exclude-from. + These patterns use a variant of shell pattern syntax, with '*' matching + any number of characters, '?' matching any single character, '[...]' + matching any single character specified, including ranges, and '[!...]' + matching any character not specified. For the purpose of these patterns, + the path separator ('\\' for Windows and '/' on other systems) is not treated specially. Wrap meta-characters in brackets for a literal match (i.e. `[?]` to match the literal character `?`). For a path to match a pattern, it must completely match from start to end, or @@ -918,6 +920,7 @@ class Archiver: Shell-style patterns, selector `sh:` + This is the default style for --pattern and --patterns-from. Like fnmatch patterns these are similar to shell patterns. The difference is that the pattern may include `**/` for matching zero or more directory levels, `*` for matching zero or more arbitrary characters with the @@ -978,7 +981,44 @@ class Archiver: re:^/home/[^/]\.tmp/ sh:/home/*/.thumbnails EOF - $ borg create --exclude-from exclude.txt backup /\n\n''') + $ borg create --exclude-from exclude.txt backup / + + # exclude the contents of /data/docs/ but not /data/docs/pdf + $ borg create -e +/data/docs/pdf -e /data/docs/ backup / + # equivalent: + $ borg create -e +pm:/data/docs/pdf -e -pm:/data/docs/ backup / + + + A more general way to define filename matching patterns may be passed via + `--pattern` and `--patterns-from`. Using these options, you may specify the + backup roots (starting points) and patterns for inclusion/exclusion. A + root path starts with the prefix `R`, followed by a path (a plain path, not a + file pattern). An include rule is specified by `+` followed by a pattern. + Exclude rules start with a `-`. + Inclusion patterns are useful to e.g. exclude the contents of a directory + except for some important files in this directory. The first matching pattern + is used so if an include pattern matches before an exclude pattern, the file + is backed up. + + Note that the default pattern style for `--pattern` and `--patterns-from` is + shell style (`sh:`), so those patterns behave like rsync include/exclude patterns. + + An example `--patterns-from` file could look like that:: + + R / + # can be rebuild + - /home/*/.cache + # they're downloads for a reason + - /home/*/Downloads + # susan is a nice person + # include susans home + + /home/susan + # ... and its contents + + /home/susan/* + # don't backup the other home directories + - /home/* + +\n\n''') helptext['placeholders'] = textwrap.dedent(''' Repository (or Archive) URLs, --prefix and --remote-path values support these placeholders: @@ -1339,10 +1379,10 @@ class Archiver: subparser.add_argument('--filter', dest='output_filter', metavar='STATUSCHARS', help='only display items with the given status characters') subparser.add_argument('-e', '--exclude', dest='excludes', - type=parse_pattern, action='append', + type=parse_exclude_pattern, action='append', dest='pattern', metavar="PATTERN", help='exclude paths matching PATTERN') subparser.add_argument('--exclude-from', dest='exclude_files', - type=argparse.FileType('r'), action='append', + type=argparse.FileType('r'), action='append', default=[], metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line') subparser.add_argument('--exclude-caches', dest='exclude_caches', action='store_true', default=False, @@ -1353,6 +1393,13 @@ class Archiver: subparser.add_argument('--keep-tag-files', dest='keep_tag_files', action='store_true', default=False, help='keep tag files of excluded caches/directories') + subparser.add_argument('--pattern', dest='pattern', + type=parse_inclexcl_pattern, action='append', + metavar="PATTERN", help='include/exclude paths matching PATTERN') + subparser.set_defaults(pattern=[]) + subparser.add_argument('--patterns-from', dest='pattern_files', + type=argparse.FileType('r'), action='append', default=[], + metavar='PATTERNFILE', help='read include/exclude patterns from PATTERNFILE, one per line') subparser.add_argument('-c', '--checkpoint-interval', dest='checkpoint_interval', type=int, default=300, metavar='SECONDS', help='write checkpoint every SECONDS seconds (Default: 300)') @@ -1423,11 +1470,18 @@ class Archiver: default=False, action='store_true', help='do not actually change any files') subparser.add_argument('-e', '--exclude', dest='excludes', - type=parse_pattern, action='append', + type=parse_exclude_pattern, action='append', dest='pattern', metavar="PATTERN", help='exclude paths matching PATTERN') subparser.add_argument('--exclude-from', dest='exclude_files', - type=argparse.FileType('r'), action='append', + type=argparse.FileType('r'), action='append', default=[], metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line') + subparser.add_argument('--pattern', dest='pattern', + type=parse_inclexcl_pattern, action='append', + metavar="PATTERN", help='include/exclude paths matching PATTERN') + subparser.set_defaults(pattern=[]) + subparser.add_argument('--patterns-from', dest='pattern_files', + type=argparse.FileType('r'), action='append', default=[], + metavar='PATTERNFILE', help='read include/exclude patterns from PATTERNFILE, one per line') subparser.add_argument('--numeric-owner', dest='numeric_owner', action='store_true', default=False, help='only obey numeric user and group identifiers') @@ -1982,7 +2036,7 @@ class Archiver: args = self.preprocess_args(args) parser = self.build_parser(args) args = parser.parse_args(args or ['-h']) - update_excludes(args) + update_patterns(args) return args def run(self, args): diff --git a/borg/helpers.py b/borg/helpers.py index b38ec9455..bef4cf2a2 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -312,17 +312,37 @@ def load_excludes(fh): both line ends are ignored. """ patterns = (line for line in (i.strip() for i in fh) if not line.startswith('#')) - return [parse_pattern(pattern) for pattern in patterns if pattern] + return [parse_exclude_pattern(pattern) + for pattern in patterns if pattern] -def update_excludes(args): - """Merge exclude patterns from files with those on command line.""" - if hasattr(args, 'exclude_files') and args.exclude_files: - if not hasattr(args, 'excludes') or args.excludes is None: - args.excludes = [] - for file in args.exclude_files: - args.excludes += load_excludes(file) - file.close() +def load_patterns(fh): + """Load and parse include/exclude/root patterns from file object. + Lines empty or starting with '#' after stripping whitespace on both line ends are ignored. + """ + patternlines = (line for line in (i.strip() for i in fh) if not line.startswith('#')) + roots = [] + inclexclpatterns = [] + for patternline in patternlines: + pattern = parse_inclexcl_pattern(patternline) + if pattern: + if pattern.ptype is RootPath: + roots.append(pattern.pattern) + else: + inclexclpatterns.append(pattern) + return roots, inclexclpatterns + + +def update_patterns(args): + """Merge patterns from exclude- and pattern-files with those on command line.""" + for file in args.pattern_files: + roots, inclexclpatterns = load_patterns(file) + args.paths += roots + args.pattern += inclexclpatterns + file.close() + for file in args.exclude_files: + args.pattern += load_excludes(file) + file.close() class PatternMatcher: @@ -338,6 +358,12 @@ class PatternMatcher: """ self._items.extend((i, value) for i in patterns) + def add_inclexcl(self, patterns): + """Add list of patterns (of type InclExclPattern) to internal list. The patterns ptype member is returned from + the match function when one of the given patterns matches. + """ + self._items.extend(patterns) + def match(self, path): for (pattern, value) in self._items: if pattern.match(path): @@ -489,6 +515,8 @@ _PATTERN_STYLES = set([ _PATTERN_STYLE_BY_PREFIX = dict((i.PREFIX, i) for i in _PATTERN_STYLES) +InclExclPattern = namedtuple('InclExclPattern', 'pattern ptype') +RootPath = object() def parse_pattern(pattern, fallback=FnmatchPattern): """Read pattern from string and return an instance of the appropriate implementation class. @@ -506,6 +534,34 @@ def parse_pattern(pattern, fallback=FnmatchPattern): return cls(pattern) +def parse_exclude_pattern(pattern, fallback=FnmatchPattern): + """Read pattern from string and return an instance of the appropriate implementation class. + """ + epattern = parse_pattern(pattern, fallback) + return InclExclPattern(epattern, False) + + +def parse_inclexcl_pattern(pattern, fallback=ShellPattern): + """Read pattern from string and return a InclExclPattern object.""" + type_prefix_map = { + '-': False, + '+': True, + 'R': RootPath, + 'r': RootPath, + } + ptype = None + if len(pattern) > 1 and pattern[0] in type_prefix_map: + (ptype, pattern) = (type_prefix_map[pattern[0]], pattern[1:]) + pattern = pattern.lstrip() + if ptype is None or not pattern: + raise argparse.ArgumentTypeError("Unable to parse pattern: {}".format(pattern)) + if ptype is RootPath: + pobj = pattern + else: + pobj = parse_pattern(pattern, fallback) + return InclExclPattern(pobj, ptype) + + def timestamp(s): """Convert a --timestamp=s argument to a datetime object""" try: diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index 47d49f999..f5aa0fd39 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -9,12 +9,13 @@ import sys import msgpack import msgpack.fallback import time +import argparse from ..helpers import Location, format_file_size, format_timedelta, format_line, PlaceholderError, make_path_safe, \ prune_within, prune_split, get_cache_dir, get_keys_dir, get_security_dir, Statistics, is_slow_msgpack, \ yes, TRUISH, FALSISH, DEFAULTISH, \ StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams, \ - ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, parse_pattern, \ + ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, load_patterns, parse_pattern, \ PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, \ Buffer from . import BaseTestCase, FakeInputs @@ -424,7 +425,7 @@ def test_invalid_unicode_pattern(pattern): (["pp:aaabbb"], None), (["pp:/data", "pp: #/", "pp:\tstart", "pp:/whitespace"], ["/more/data", "/home"]), ]) -def test_patterns_from_file(tmpdir, lines, expected): +def test_exclude_patterns_from_file(tmpdir, lines, expected): files = [ '/data/something00.txt', '/more/data', '/home', ' #/wsfoobar', @@ -434,7 +435,7 @@ def test_patterns_from_file(tmpdir, lines, expected): def evaluate(filename): matcher = PatternMatcher(fallback=True) - matcher.add(load_excludes(open(filename, "rt")), False) + matcher.add_inclexcl(load_excludes(open(filename, "rt"))) return [path for path in files if matcher.match(path)] exclfile = tmpdir.join("exclude.txt") @@ -445,6 +446,93 @@ def test_patterns_from_file(tmpdir, lines, expected): assert evaluate(str(exclfile)) == (files if expected is None else expected) +@pytest.mark.parametrize("lines, expected_roots, expected_numpatterns", [ + # "None" means all files, i.e. none excluded + ([], [], 0), + (["# Comment only"], [], 0), + (["- *"], [], 1), + (["+fm:*/something00.txt", + "-/data"], [], 2), + (["R /"], ["/"], 0), + (["R /", + "# comment"], ["/"], 0), + (["# comment", + "- /data", + "R /home"], ["/home"], 1), +]) +def test_load_patterns_from_file(tmpdir, lines, expected_roots, expected_numpatterns): + def evaluate(filename): + matcher = PatternMatcher(fallback=True) + roots, inclexclpatterns = load_patterns(open(filename, "rt")) + return roots, len(inclexclpatterns) + patternfile = tmpdir.join("exclude.txt") + + with patternfile.open("wt") as fh: + fh.write("\n".join(lines)) + + roots, numpatterns = evaluate(str(patternfile)) + assert roots == expected_roots + assert numpatterns == expected_numpatterns + + +@pytest.mark.parametrize("lines", [ + (["X /data"]), # illegal pattern type prefix + (["/data"]), # need a pattern type prefix +]) +def test_load_invalid_patterns_from_file(tmpdir, lines): + patternfile = tmpdir.join("exclude.txt") + with patternfile.open("wt") as fh: + fh.write("\n".join(lines)) + filename = str(patternfile) + with pytest.raises(argparse.ArgumentTypeError): + matcher = PatternMatcher(fallback=True) + roots, inclexclpatterns = load_patterns(open(filename, "rt")) + + +@pytest.mark.parametrize("lines, expected", [ + # "None" means all files, i.e. none excluded + ([], None), + (["# Comment only"], None), + (["- *"], []), + # default match type is sh: for patterns -> * doesn't match a / + (["-*/something0?.txt"], + ['/data', '/data/something00.txt', '/data/subdir/something01.txt', + '/home', '/home/leo', '/home/leo/t', '/home/other']), + (["-fm:*/something00.txt"], + ['/data', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t', '/home/other']), + (["-fm:*/something0?.txt"], + ["/data", '/home', '/home/leo', '/home/leo/t', '/home/other']), + (["+/*/something0?.txt", + "-/data"], + ["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']), + (["+fm:*/something00.txt", + "-/data"], + ["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']), + (["+fm:/home/leo", + "-/home/"], + ['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t']), +]) +def test_inclexcl_patterns_from_file(tmpdir, lines, expected): + files = [ + '/data', '/data/something00.txt', '/data/subdir/something01.txt', + '/home', '/home/leo', '/home/leo/t', '/home/other' + ] + + def evaluate(filename): + matcher = PatternMatcher(fallback=True) + roots, inclexclpatterns = load_patterns(open(filename, "rt")) + matcher.add_inclexcl(inclexclpatterns) + return [path for path in files if matcher.match(path)] + + patternfile = tmpdir.join("exclude.txt") + + with patternfile.open("wt") as fh: + fh.write("\n".join(lines)) + + assert evaluate(str(patternfile)) == (files if expected is None else expected) + + + @pytest.mark.parametrize("pattern, cls", [ ("", FnmatchPattern),