add two new options --pattern and --patterns-from as discussed in #1406

This commit is contained in:
Alexander 'Leo' Bergolth 2016-08-02 16:02:02 +02:00
parent d5bc486dc9
commit 876b670d60
3 changed files with 240 additions and 42 deletions

View file

@ -18,9 +18,9 @@ import collections
from . import __version__ from . import __version__
from .helpers import Error, location_validator, archivename_validator, format_line, format_time, format_file_size, \ from .helpers import Error, location_validator, archivename_validator, format_line, format_time, format_file_size, \
parse_pattern, PathPrefixPattern, to_localtime, timestamp, safe_timestamp, bin_to_hex, \ parse_pattern, parse_exclude_pattern, parse_inclexcl_pattern, PathPrefixPattern, to_localtime, timestamp, \
get_cache_dir, prune_within, prune_split, \ safe_timestamp, bin_to_hex, get_cache_dir, prune_within, prune_split, \
Manifest, NoManifestError, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ Manifest, NoManifestError, remove_surrogates, update_patterns, format_archive, check_extension_modules, Statistics, \
dir_is_tagged, bigint_to_int, ChunkerParams, CompressionSpec, PrefixSpec, is_slow_msgpack, yes, sysinfo, \ dir_is_tagged, bigint_to_int, ChunkerParams, CompressionSpec, PrefixSpec, is_slow_msgpack, yes, sysinfo, \
EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR, log_multi, PatternMatcher, ErrorIgnoringTextIOWrapper EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR, log_multi, PatternMatcher, ErrorIgnoringTextIOWrapper
from .helpers import signal_handler, raising_signal_handler, SigHup, SigTerm from .helpers import signal_handler, raising_signal_handler, SigHup, SigTerm
@ -121,6 +121,18 @@ class Archiver:
if self.output_list and (self.output_filter is None or status in self.output_filter): if self.output_list and (self.output_filter is None or status in self.output_filter):
logger.info("%1s %s", status, remove_surrogates(path)) logger.info("%1s %s", status, remove_surrogates(path))
@staticmethod
def build_matcher(inclexcl_patterns, paths):
matcher = PatternMatcher()
if inclexcl_patterns:
matcher.add_inclexcl(inclexcl_patterns)
include_patterns = []
if paths:
include_patterns.extend(parse_pattern(i, PathPrefixPattern) for i in paths)
matcher.add(include_patterns, True)
matcher.fallback = not include_patterns
return matcher, include_patterns
def do_serve(self, args): def do_serve(self, args):
"""Start in server mode. This command is usually not used manually. """Start in server mode. This command is usually not used manually.
""" """
@ -233,8 +245,7 @@ class Archiver:
def do_create(self, args, repository, manifest=None, key=None): def do_create(self, args, repository, manifest=None, key=None):
"""Create new archive""" """Create new archive"""
matcher = PatternMatcher(fallback=True) matcher = PatternMatcher(fallback=True)
if args.excludes: matcher.add_inclexcl(args.pattern)
matcher.add(args.excludes, False)
def create_inner(archive, cache): def create_inner(archive, cache):
# Add cache dir to inode_skip list # Add cache dir to inode_skip list
@ -424,17 +435,7 @@ class Archiver:
if sys.platform.startswith(('linux', 'freebsd', 'netbsd', 'openbsd', 'darwin', )): if sys.platform.startswith(('linux', 'freebsd', 'netbsd', 'openbsd', 'darwin', )):
logger.warning('Hint: You likely need to fix your locale setup. E.g. install locales and use: LANG=en_US.UTF-8') logger.warning('Hint: You likely need to fix your locale setup. E.g. install locales and use: LANG=en_US.UTF-8')
matcher = PatternMatcher() matcher, include_patterns = self.build_matcher(args.pattern, args.paths)
if args.excludes:
matcher.add(args.excludes, False)
include_patterns = []
if args.paths:
include_patterns.extend(parse_pattern(i, PathPrefixPattern) for i in args.paths)
matcher.add(include_patterns, True)
matcher.fallback = not include_patterns
output_list = args.output_list output_list = args.output_list
dry_run = args.dry_run dry_run = args.dry_run
@ -893,8 +894,9 @@ class Archiver:
helptext = collections.OrderedDict() helptext = collections.OrderedDict()
helptext['patterns'] = textwrap.dedent(''' helptext['patterns'] = textwrap.dedent('''
Exclusion patterns support four separate styles, fnmatch, shell, regular File patterns support four separate styles, fnmatch, shell, regular
expressions and path prefixes. By default, fnmatch is used. If followed expressions and path prefixes. By default, fnmatch is used for
`--exclude` patterns and shell-style is used for `--pattern`. If followed
by a colon (':') the first two characters of a pattern are used as a by a colon (':') the first two characters of a pattern are used as a
style selector. Explicit style selection is necessary when a style selector. Explicit style selection is necessary when a
non-default style is desired or when the desired pattern starts with non-default style is desired or when the desired pattern starts with
@ -902,12 +904,12 @@ class Archiver:
`Fnmatch <https://docs.python.org/3/library/fnmatch.html>`_, selector `fm:` `Fnmatch <https://docs.python.org/3/library/fnmatch.html>`_, selector `fm:`
This is the default style. These patterns use a variant of shell This is the default style for --exclude and --exclude-from.
pattern syntax, with '*' matching any number of characters, '?' These patterns use a variant of shell pattern syntax, with '*' matching
matching any single character, '[...]' matching any single any number of characters, '?' matching any single character, '[...]'
character specified, including ranges, and '[!...]' matching any matching any single character specified, including ranges, and '[!...]'
character not specified. For the purpose of these patterns, the matching any character not specified. For the purpose of these patterns,
path separator ('\\' for Windows and '/' on other systems) is not the path separator ('\\' for Windows and '/' on other systems) is not
treated specially. Wrap meta-characters in brackets for a literal treated specially. Wrap meta-characters in brackets for a literal
match (i.e. `[?]` to match the literal character `?`). For a path match (i.e. `[?]` to match the literal character `?`). For a path
to match a pattern, it must completely match from start to end, or to match a pattern, it must completely match from start to end, or
@ -918,6 +920,7 @@ class Archiver:
Shell-style patterns, selector `sh:` Shell-style patterns, selector `sh:`
This is the default style for --pattern and --patterns-from.
Like fnmatch patterns these are similar to shell patterns. The difference Like fnmatch patterns these are similar to shell patterns. The difference
is that the pattern may include `**/` for matching zero or more directory is that the pattern may include `**/` for matching zero or more directory
levels, `*` for matching zero or more arbitrary characters with the levels, `*` for matching zero or more arbitrary characters with the
@ -978,7 +981,44 @@ class Archiver:
re:^/home/[^/]\.tmp/ re:^/home/[^/]\.tmp/
sh:/home/*/.thumbnails sh:/home/*/.thumbnails
EOF EOF
$ borg create --exclude-from exclude.txt backup /\n\n''') $ borg create --exclude-from exclude.txt backup /
# exclude the contents of /data/docs/ but not /data/docs/pdf
$ borg create -e +/data/docs/pdf -e /data/docs/ backup /
# equivalent:
$ borg create -e +pm:/data/docs/pdf -e -pm:/data/docs/ backup /
A more general way to define filename matching patterns may be passed via
`--pattern` and `--patterns-from`. Using these options, you may specify the
backup roots (starting points) and patterns for inclusion/exclusion. A
root path starts with the prefix `R`, followed by a path (a plain path, not a
file pattern). An include rule is specified by `+` followed by a pattern.
Exclude rules start with a `-`.
Inclusion patterns are useful to e.g. exclude the contents of a directory
except for some important files in this directory. The first matching pattern
is used so if an include pattern matches before an exclude pattern, the file
is backed up.
Note that the default pattern style for `--pattern` and `--patterns-from` is
shell style (`sh:`), so those patterns behave like rsync include/exclude patterns.
An example `--patterns-from` file could look like that::
R /
# can be rebuild
- /home/*/.cache
# they're downloads for a reason
- /home/*/Downloads
# susan is a nice person
# include susans home
+ /home/susan
# ... and its contents
+ /home/susan/*
# don't backup the other home directories
- /home/*
\n\n''')
helptext['placeholders'] = textwrap.dedent(''' helptext['placeholders'] = textwrap.dedent('''
Repository (or Archive) URLs, --prefix and --remote-path values support these Repository (or Archive) URLs, --prefix and --remote-path values support these
placeholders: placeholders:
@ -1339,10 +1379,10 @@ class Archiver:
subparser.add_argument('--filter', dest='output_filter', metavar='STATUSCHARS', subparser.add_argument('--filter', dest='output_filter', metavar='STATUSCHARS',
help='only display items with the given status characters') help='only display items with the given status characters')
subparser.add_argument('-e', '--exclude', dest='excludes', subparser.add_argument('-e', '--exclude', dest='excludes',
type=parse_pattern, action='append', type=parse_exclude_pattern, action='append', dest='pattern',
metavar="PATTERN", help='exclude paths matching PATTERN') metavar="PATTERN", help='exclude paths matching PATTERN')
subparser.add_argument('--exclude-from', dest='exclude_files', subparser.add_argument('--exclude-from', dest='exclude_files',
type=argparse.FileType('r'), action='append', type=argparse.FileType('r'), action='append', default=[],
metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line') metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line')
subparser.add_argument('--exclude-caches', dest='exclude_caches', subparser.add_argument('--exclude-caches', dest='exclude_caches',
action='store_true', default=False, action='store_true', default=False,
@ -1353,6 +1393,13 @@ class Archiver:
subparser.add_argument('--keep-tag-files', dest='keep_tag_files', subparser.add_argument('--keep-tag-files', dest='keep_tag_files',
action='store_true', default=False, action='store_true', default=False,
help='keep tag files of excluded caches/directories') help='keep tag files of excluded caches/directories')
subparser.add_argument('--pattern', dest='pattern',
type=parse_inclexcl_pattern, action='append',
metavar="PATTERN", help='include/exclude paths matching PATTERN')
subparser.set_defaults(pattern=[])
subparser.add_argument('--patterns-from', dest='pattern_files',
type=argparse.FileType('r'), action='append', default=[],
metavar='PATTERNFILE', help='read include/exclude patterns from PATTERNFILE, one per line')
subparser.add_argument('-c', '--checkpoint-interval', dest='checkpoint_interval', subparser.add_argument('-c', '--checkpoint-interval', dest='checkpoint_interval',
type=int, default=300, metavar='SECONDS', type=int, default=300, metavar='SECONDS',
help='write checkpoint every SECONDS seconds (Default: 300)') help='write checkpoint every SECONDS seconds (Default: 300)')
@ -1423,11 +1470,18 @@ class Archiver:
default=False, action='store_true', default=False, action='store_true',
help='do not actually change any files') help='do not actually change any files')
subparser.add_argument('-e', '--exclude', dest='excludes', subparser.add_argument('-e', '--exclude', dest='excludes',
type=parse_pattern, action='append', type=parse_exclude_pattern, action='append', dest='pattern',
metavar="PATTERN", help='exclude paths matching PATTERN') metavar="PATTERN", help='exclude paths matching PATTERN')
subparser.add_argument('--exclude-from', dest='exclude_files', subparser.add_argument('--exclude-from', dest='exclude_files',
type=argparse.FileType('r'), action='append', type=argparse.FileType('r'), action='append', default=[],
metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line') metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line')
subparser.add_argument('--pattern', dest='pattern',
type=parse_inclexcl_pattern, action='append',
metavar="PATTERN", help='include/exclude paths matching PATTERN')
subparser.set_defaults(pattern=[])
subparser.add_argument('--patterns-from', dest='pattern_files',
type=argparse.FileType('r'), action='append', default=[],
metavar='PATTERNFILE', help='read include/exclude patterns from PATTERNFILE, one per line')
subparser.add_argument('--numeric-owner', dest='numeric_owner', subparser.add_argument('--numeric-owner', dest='numeric_owner',
action='store_true', default=False, action='store_true', default=False,
help='only obey numeric user and group identifiers') help='only obey numeric user and group identifiers')
@ -1982,7 +2036,7 @@ class Archiver:
args = self.preprocess_args(args) args = self.preprocess_args(args)
parser = self.build_parser(args) parser = self.build_parser(args)
args = parser.parse_args(args or ['-h']) args = parser.parse_args(args or ['-h'])
update_excludes(args) update_patterns(args)
return args return args
def run(self, args): def run(self, args):

View file

@ -312,17 +312,37 @@ def load_excludes(fh):
both line ends are ignored. both line ends are ignored.
""" """
patterns = (line for line in (i.strip() for i in fh) if not line.startswith('#')) patterns = (line for line in (i.strip() for i in fh) if not line.startswith('#'))
return [parse_pattern(pattern) for pattern in patterns if pattern] return [parse_exclude_pattern(pattern)
for pattern in patterns if pattern]
def update_excludes(args): def load_patterns(fh):
"""Merge exclude patterns from files with those on command line.""" """Load and parse include/exclude/root patterns from file object.
if hasattr(args, 'exclude_files') and args.exclude_files: Lines empty or starting with '#' after stripping whitespace on both line ends are ignored.
if not hasattr(args, 'excludes') or args.excludes is None: """
args.excludes = [] patternlines = (line for line in (i.strip() for i in fh) if not line.startswith('#'))
for file in args.exclude_files: roots = []
args.excludes += load_excludes(file) inclexclpatterns = []
file.close() for patternline in patternlines:
pattern = parse_inclexcl_pattern(patternline)
if pattern:
if pattern.ptype is RootPath:
roots.append(pattern.pattern)
else:
inclexclpatterns.append(pattern)
return roots, inclexclpatterns
def update_patterns(args):
"""Merge patterns from exclude- and pattern-files with those on command line."""
for file in args.pattern_files:
roots, inclexclpatterns = load_patterns(file)
args.paths += roots
args.pattern += inclexclpatterns
file.close()
for file in args.exclude_files:
args.pattern += load_excludes(file)
file.close()
class PatternMatcher: class PatternMatcher:
@ -338,6 +358,12 @@ class PatternMatcher:
""" """
self._items.extend((i, value) for i in patterns) self._items.extend((i, value) for i in patterns)
def add_inclexcl(self, patterns):
"""Add list of patterns (of type InclExclPattern) to internal list. The patterns ptype member is returned from
the match function when one of the given patterns matches.
"""
self._items.extend(patterns)
def match(self, path): def match(self, path):
for (pattern, value) in self._items: for (pattern, value) in self._items:
if pattern.match(path): if pattern.match(path):
@ -489,6 +515,8 @@ _PATTERN_STYLES = set([
_PATTERN_STYLE_BY_PREFIX = dict((i.PREFIX, i) for i in _PATTERN_STYLES) _PATTERN_STYLE_BY_PREFIX = dict((i.PREFIX, i) for i in _PATTERN_STYLES)
InclExclPattern = namedtuple('InclExclPattern', 'pattern ptype')
RootPath = object()
def parse_pattern(pattern, fallback=FnmatchPattern): def parse_pattern(pattern, fallback=FnmatchPattern):
"""Read pattern from string and return an instance of the appropriate implementation class. """Read pattern from string and return an instance of the appropriate implementation class.
@ -506,6 +534,34 @@ def parse_pattern(pattern, fallback=FnmatchPattern):
return cls(pattern) return cls(pattern)
def parse_exclude_pattern(pattern, fallback=FnmatchPattern):
"""Read pattern from string and return an instance of the appropriate implementation class.
"""
epattern = parse_pattern(pattern, fallback)
return InclExclPattern(epattern, False)
def parse_inclexcl_pattern(pattern, fallback=ShellPattern):
"""Read pattern from string and return a InclExclPattern object."""
type_prefix_map = {
'-': False,
'+': True,
'R': RootPath,
'r': RootPath,
}
ptype = None
if len(pattern) > 1 and pattern[0] in type_prefix_map:
(ptype, pattern) = (type_prefix_map[pattern[0]], pattern[1:])
pattern = pattern.lstrip()
if ptype is None or not pattern:
raise argparse.ArgumentTypeError("Unable to parse pattern: {}".format(pattern))
if ptype is RootPath:
pobj = pattern
else:
pobj = parse_pattern(pattern, fallback)
return InclExclPattern(pobj, ptype)
def timestamp(s): def timestamp(s):
"""Convert a --timestamp=s argument to a datetime object""" """Convert a --timestamp=s argument to a datetime object"""
try: try:

View file

@ -9,12 +9,13 @@ import sys
import msgpack import msgpack
import msgpack.fallback import msgpack.fallback
import time import time
import argparse
from ..helpers import Location, format_file_size, format_timedelta, format_line, PlaceholderError, make_path_safe, \ from ..helpers import Location, format_file_size, format_timedelta, format_line, PlaceholderError, make_path_safe, \
prune_within, prune_split, get_cache_dir, get_keys_dir, get_security_dir, Statistics, is_slow_msgpack, \ prune_within, prune_split, get_cache_dir, get_keys_dir, get_security_dir, Statistics, is_slow_msgpack, \
yes, TRUISH, FALSISH, DEFAULTISH, \ yes, TRUISH, FALSISH, DEFAULTISH, \
StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams, \ StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams, \
ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, parse_pattern, \ ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, load_patterns, parse_pattern, \
PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, \ PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, \
Buffer Buffer
from . import BaseTestCase, FakeInputs from . import BaseTestCase, FakeInputs
@ -424,7 +425,7 @@ def test_invalid_unicode_pattern(pattern):
(["pp:aaabbb"], None), (["pp:aaabbb"], None),
(["pp:/data", "pp: #/", "pp:\tstart", "pp:/whitespace"], ["/more/data", "/home"]), (["pp:/data", "pp: #/", "pp:\tstart", "pp:/whitespace"], ["/more/data", "/home"]),
]) ])
def test_patterns_from_file(tmpdir, lines, expected): def test_exclude_patterns_from_file(tmpdir, lines, expected):
files = [ files = [
'/data/something00.txt', '/more/data', '/home', '/data/something00.txt', '/more/data', '/home',
' #/wsfoobar', ' #/wsfoobar',
@ -434,7 +435,7 @@ def test_patterns_from_file(tmpdir, lines, expected):
def evaluate(filename): def evaluate(filename):
matcher = PatternMatcher(fallback=True) matcher = PatternMatcher(fallback=True)
matcher.add(load_excludes(open(filename, "rt")), False) matcher.add_inclexcl(load_excludes(open(filename, "rt")))
return [path for path in files if matcher.match(path)] return [path for path in files if matcher.match(path)]
exclfile = tmpdir.join("exclude.txt") exclfile = tmpdir.join("exclude.txt")
@ -445,6 +446,93 @@ def test_patterns_from_file(tmpdir, lines, expected):
assert evaluate(str(exclfile)) == (files if expected is None else expected) assert evaluate(str(exclfile)) == (files if expected is None else expected)
@pytest.mark.parametrize("lines, expected_roots, expected_numpatterns", [
# "None" means all files, i.e. none excluded
([], [], 0),
(["# Comment only"], [], 0),
(["- *"], [], 1),
(["+fm:*/something00.txt",
"-/data"], [], 2),
(["R /"], ["/"], 0),
(["R /",
"# comment"], ["/"], 0),
(["# comment",
"- /data",
"R /home"], ["/home"], 1),
])
def test_load_patterns_from_file(tmpdir, lines, expected_roots, expected_numpatterns):
def evaluate(filename):
matcher = PatternMatcher(fallback=True)
roots, inclexclpatterns = load_patterns(open(filename, "rt"))
return roots, len(inclexclpatterns)
patternfile = tmpdir.join("exclude.txt")
with patternfile.open("wt") as fh:
fh.write("\n".join(lines))
roots, numpatterns = evaluate(str(patternfile))
assert roots == expected_roots
assert numpatterns == expected_numpatterns
@pytest.mark.parametrize("lines", [
(["X /data"]), # illegal pattern type prefix
(["/data"]), # need a pattern type prefix
])
def test_load_invalid_patterns_from_file(tmpdir, lines):
patternfile = tmpdir.join("exclude.txt")
with patternfile.open("wt") as fh:
fh.write("\n".join(lines))
filename = str(patternfile)
with pytest.raises(argparse.ArgumentTypeError):
matcher = PatternMatcher(fallback=True)
roots, inclexclpatterns = load_patterns(open(filename, "rt"))
@pytest.mark.parametrize("lines, expected", [
# "None" means all files, i.e. none excluded
([], None),
(["# Comment only"], None),
(["- *"], []),
# default match type is sh: for patterns -> * doesn't match a /
(["-*/something0?.txt"],
['/data', '/data/something00.txt', '/data/subdir/something01.txt',
'/home', '/home/leo', '/home/leo/t', '/home/other']),
(["-fm:*/something00.txt"],
['/data', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t', '/home/other']),
(["-fm:*/something0?.txt"],
["/data", '/home', '/home/leo', '/home/leo/t', '/home/other']),
(["+/*/something0?.txt",
"-/data"],
["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']),
(["+fm:*/something00.txt",
"-/data"],
["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']),
(["+fm:/home/leo",
"-/home/"],
['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t']),
])
def test_inclexcl_patterns_from_file(tmpdir, lines, expected):
files = [
'/data', '/data/something00.txt', '/data/subdir/something01.txt',
'/home', '/home/leo', '/home/leo/t', '/home/other'
]
def evaluate(filename):
matcher = PatternMatcher(fallback=True)
roots, inclexclpatterns = load_patterns(open(filename, "rt"))
matcher.add_inclexcl(inclexclpatterns)
return [path for path in files if matcher.match(path)]
patternfile = tmpdir.join("exclude.txt")
with patternfile.open("wt") as fh:
fh.write("\n".join(lines))
assert evaluate(str(patternfile)) == (files if expected is None else expected)
@pytest.mark.parametrize("pattern, cls", [ @pytest.mark.parametrize("pattern, cls", [
("", FnmatchPattern), ("", FnmatchPattern),