From 580496b59263ced0ad496c543752f417b411216c Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Mon, 1 May 2017 16:58:29 +0200 Subject: [PATCH] create patterns module --- src/borg/archive.py | 2 +- src/borg/archiver.py | 4 +- src/borg/helpers.py | 387 +-------------------------- src/borg/patterns.py | 392 +++++++++++++++++++++++++++ src/borg/testsuite/archiver.py | 4 +- src/borg/testsuite/helpers.py | 460 -------------------------------- src/borg/testsuite/patterns.py | 467 +++++++++++++++++++++++++++++++++ 7 files changed, 865 insertions(+), 851 deletions(-) create mode 100644 src/borg/patterns.py create mode 100644 src/borg/testsuite/patterns.py diff --git a/src/borg/archive.py b/src/borg/archive.py index fc209b266..0a0bd9e58 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -36,7 +36,7 @@ from .helpers import StableDict from .helpers import bin_to_hex from .helpers import safe_ns from .helpers import ellipsis_truncate, ProgressIndicatorPercent, log_multi -from .helpers import PathPrefixPattern, FnmatchPattern, IECommand +from .patterns import PathPrefixPattern, FnmatchPattern, IECommand from .item import Item, ArchiveItem from .key import key_factory from .platform import acl_get, acl_set, set_flags, get_flags, swidth diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 70ba07f17..c2d972024 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -51,15 +51,15 @@ from .helpers import Manifest from .helpers import hardlinkable from .helpers import StableDict from .helpers import check_extension_modules -from .helpers import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern from .helpers import dir_is_tagged, is_slow_msgpack, yes, sysinfo from .helpers import log_multi -from .helpers import PatternMatcher from .helpers import signal_handler, raising_signal_handler, SigHup, SigTerm from .helpers import ErrorIgnoringTextIOWrapper from .helpers import ProgressIndicatorPercent from .helpers import basic_json_data, json_print from .helpers import replace_placeholders +from .patterns import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern +from .patterns import PatternMatcher from .item import Item from .key import key_creator, tam_required_file, tam_required, RepoKey, PassphraseKey from .keymanager import KeyManager diff --git a/src/borg/helpers.py b/src/borg/helpers.py index 8bc9959e9..05e277e12 100644 --- a/src/borg/helpers.py +++ b/src/borg/helpers.py @@ -18,14 +18,11 @@ import sys import textwrap import threading import time -import unicodedata import uuid from binascii import hexlify from collections import namedtuple, deque, abc, Counter from datetime import datetime, timezone, timedelta -from enum import Enum -from fnmatch import translate -from functools import wraps, partial, lru_cache +from functools import partial, lru_cache from itertools import islice from operator import attrgetter from string import Formatter @@ -42,7 +39,6 @@ from . import __version_tuple__ as borg_version_tuple from . import chunker from . import crypto from . import hashindex -from . import shellpattern from .constants import * # NOQA @@ -389,387 +385,6 @@ def parse_timestamp(timestamp): return datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S').replace(tzinfo=timezone.utc) -def parse_patternfile_line(line, roots, ie_commands, fallback): - """Parse a pattern-file line and act depending on which command it represents.""" - ie_command = parse_inclexcl_command(line, fallback=fallback) - if ie_command.cmd is IECommand.RootPath: - roots.append(ie_command.val) - elif ie_command.cmd is IECommand.PatternStyle: - fallback = ie_command.val - else: - # it is some kind of include/exclude command - ie_commands.append(ie_command) - return fallback - - -def load_pattern_file(fileobj, roots, ie_commands, fallback=None): - if fallback is None: - fallback = ShellPattern # ShellPattern is defined later in this module - for line in clean_lines(fileobj): - fallback = parse_patternfile_line(line, roots, ie_commands, fallback) - - -def load_exclude_file(fileobj, patterns): - for patternstr in clean_lines(fileobj): - patterns.append(parse_exclude_pattern(patternstr)) - - -class ArgparsePatternAction(argparse.Action): - def __init__(self, nargs=1, **kw): - super().__init__(nargs=nargs, **kw) - - def __call__(self, parser, args, values, option_string=None): - parse_patternfile_line(values[0], args.paths, args.patterns, ShellPattern) - - -class ArgparsePatternFileAction(argparse.Action): - def __init__(self, nargs=1, **kw): - super().__init__(nargs=nargs, **kw) - - def __call__(self, parser, args, values, option_string=None): - """Load and parse patterns from a file. - Lines empty or starting with '#' after stripping whitespace on both line ends are ignored. - """ - filename = values[0] - with open(filename) as f: - self.parse(f, args) - - def parse(self, fobj, args): - load_pattern_file(fobj, args.paths, args.patterns) - - -class ArgparseExcludeFileAction(ArgparsePatternFileAction): - def parse(self, fobj, args): - load_exclude_file(fobj, args.patterns) - - -class PatternMatcher: - """Represents a collection of pattern objects to match paths against. - - *fallback* is a boolean value that *match()* returns if no matching patterns are found. - - """ - def __init__(self, fallback=None): - self._items = [] - - # Value to return from match function when none of the patterns match. - self.fallback = fallback - - # optimizations - self._path_full_patterns = {} # full path -> return value - - # indicates whether the last match() call ended on a pattern for which - # we should recurse into any matching folder. Will be set to True or - # False when calling match(). - self.recurse_dir = None - - # whether to recurse into directories when no match is found - # TODO: allow modification as a config option? - self.recurse_dir_default = True - - self.include_patterns = [] - - # TODO: move this info to parse_inclexcl_command and store in PatternBase subclass? - self.is_include_cmd = { - IECommand.Exclude: False, - IECommand.ExcludeNoRecurse: False, - IECommand.Include: True - } - - def empty(self): - return not len(self._items) and not len(self._path_full_patterns) - - def _add(self, pattern, cmd): - """*cmd* is an IECommand value. - """ - if isinstance(pattern, PathFullPattern): - key = pattern.pattern # full, normalized path - self._path_full_patterns[key] = cmd - else: - self._items.append((pattern, cmd)) - - def add(self, patterns, cmd): - """Add list of patterns to internal list. *cmd* indicates whether the - pattern is an include/exclude pattern, and whether recursion should be - done on excluded folders. - """ - for pattern in patterns: - self._add(pattern, cmd) - - def add_includepaths(self, include_paths): - """Used to add inclusion-paths from args.paths (from commandline). - """ - include_patterns = [parse_pattern(p, PathPrefixPattern) for p in include_paths] - self.add(include_patterns, IECommand.Include) - self.fallback = not include_patterns - self.include_patterns = include_patterns - - def get_unmatched_include_patterns(self): - "Note that this only returns patterns added via *add_includepaths*." - return [p for p in self.include_patterns if p.match_count == 0] - - def add_inclexcl(self, patterns): - """Add list of patterns (of type CmdTuple) to internal list. - """ - for pattern, cmd in patterns: - self._add(pattern, cmd) - - def match(self, path): - """Return True or False depending on whether *path* is matched. - - If no match is found among the patterns in this matcher, then the value - in self.fallback is returned (defaults to None). - - """ - path = normalize_path(path) - # do a fast lookup for full path matches (note: we do not count such matches): - non_existent = object() - value = self._path_full_patterns.get(path, non_existent) - - if value is not non_existent: - # we have a full path match! - # TODO: get from pattern; don't hard-code - self.recurse_dir = True - return value - - # this is the slow way, if we have many patterns in self._items: - for (pattern, cmd) in self._items: - if pattern.match(path, normalize=False): - self.recurse_dir = pattern.recurse_dir - return self.is_include_cmd[cmd] - - # by default we will recurse if there is no match - self.recurse_dir = self.recurse_dir_default - return self.fallback - - -def normalize_path(path): - """normalize paths for MacOS (but do nothing on other platforms)""" - # HFS+ converts paths to a canonical form, so users shouldn't be required to enter an exact match. - # Windows and Unix filesystems allow different forms, so users always have to enter an exact match. - return unicodedata.normalize('NFD', path) if sys.platform == 'darwin' else path - - -class PatternBase: - """Shared logic for inclusion/exclusion patterns. - """ - PREFIX = NotImplemented - - def __init__(self, pattern, recurse_dir=False): - self.pattern_orig = pattern - self.match_count = 0 - pattern = normalize_path(pattern) - self._prepare(pattern) - self.recurse_dir = recurse_dir - - def match(self, path, normalize=True): - """Return a boolean indicating whether *path* is matched by this pattern. - - If normalize is True (default), the path will get normalized using normalize_path(), - otherwise it is assumed that it already is normalized using that function. - """ - if normalize: - path = normalize_path(path) - matches = self._match(path) - if matches: - self.match_count += 1 - return matches - - def __repr__(self): - return '%s(%s)' % (type(self), self.pattern) - - def __str__(self): - return self.pattern_orig - - def _prepare(self, pattern): - "Should set the value of self.pattern" - raise NotImplementedError - - def _match(self, path): - raise NotImplementedError - - -class PathFullPattern(PatternBase): - """Full match of a path.""" - PREFIX = "pf" - - def _prepare(self, pattern): - self.pattern = os.path.normpath(pattern) - - def _match(self, path): - return path == self.pattern - - -# For PathPrefixPattern, FnmatchPattern and ShellPattern, we require that the pattern either match the whole path -# or an initial segment of the path up to but not including a path separator. To unify the two cases, we add a path -# separator to the end of the path before matching. - - -class PathPrefixPattern(PatternBase): - """Literal files or directories listed on the command line - for some operations (e.g. extract, but not create). - If a directory is specified, all paths that start with that - path match as well. A trailing slash makes no difference. - """ - PREFIX = "pp" - - def _prepare(self, pattern): - self.pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep - - def _match(self, path): - return (path + os.path.sep).startswith(self.pattern) - - -class FnmatchPattern(PatternBase): - """Shell glob patterns to exclude. A trailing slash means to - exclude the contents of a directory, but not the directory itself. - """ - PREFIX = "fm" - - def _prepare(self, pattern): - if pattern.endswith(os.path.sep): - pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep + '*' + os.path.sep - else: - pattern = os.path.normpath(pattern) + os.path.sep + '*' - - self.pattern = pattern - - # fnmatch and re.match both cache compiled regular expressions. - # Nevertheless, this is about 10 times faster. - self.regex = re.compile(translate(self.pattern)) - - def _match(self, path): - return (self.regex.match(path + os.path.sep) is not None) - - -class ShellPattern(PatternBase): - """Shell glob patterns to exclude. A trailing slash means to - exclude the contents of a directory, but not the directory itself. - """ - PREFIX = "sh" - - def _prepare(self, pattern): - sep = os.path.sep - - if pattern.endswith(sep): - pattern = os.path.normpath(pattern).rstrip(sep) + sep + "**" + sep + "*" + sep - else: - pattern = os.path.normpath(pattern) + sep + "**" + sep + "*" - - self.pattern = pattern - self.regex = re.compile(shellpattern.translate(self.pattern)) - - def _match(self, path): - return (self.regex.match(path + os.path.sep) is not None) - - -class RegexPattern(PatternBase): - """Regular expression to exclude. - """ - PREFIX = "re" - - def _prepare(self, pattern): - self.pattern = pattern - self.regex = re.compile(pattern) - - def _match(self, path): - # Normalize path separators - if os.path.sep != '/': - path = path.replace(os.path.sep, '/') - - return (self.regex.search(path) is not None) - - -_PATTERN_CLASSES = set([ - FnmatchPattern, - PathFullPattern, - PathPrefixPattern, - RegexPattern, - ShellPattern, -]) - -_PATTERN_CLASS_BY_PREFIX = dict((i.PREFIX, i) for i in _PATTERN_CLASSES) - -CmdTuple = namedtuple('CmdTuple', 'val cmd') - - -class IECommand(Enum): - """A command that an InclExcl file line can represent. - """ - RootPath = 1 - PatternStyle = 2 - Include = 3 - Exclude = 4 - ExcludeNoRecurse = 5 - - -def get_pattern_class(prefix): - try: - return _PATTERN_CLASS_BY_PREFIX[prefix] - except KeyError: - raise ValueError("Unknown pattern style: {}".format(prefix)) from None - - -def parse_pattern(pattern, fallback=FnmatchPattern, recurse_dir=True): - """Read pattern from string and return an instance of the appropriate implementation class. - - """ - if len(pattern) > 2 and pattern[2] == ":" and pattern[:2].isalnum(): - (style, pattern) = (pattern[:2], pattern[3:]) - cls = get_pattern_class(style) - else: - cls = fallback - return cls(pattern, recurse_dir) - - -def parse_exclude_pattern(pattern_str, fallback=FnmatchPattern): - """Read pattern from string and return an instance of the appropriate implementation class. - """ - epattern_obj = parse_pattern(pattern_str, fallback) - return CmdTuple(epattern_obj, IECommand.Exclude) - - -def parse_inclexcl_command(cmd_line_str, fallback=ShellPattern): - """Read a --patterns-from command from string and return a CmdTuple object.""" - - cmd_prefix_map = { - '-': IECommand.Exclude, - '!': IECommand.ExcludeNoRecurse, - '+': IECommand.Include, - 'R': IECommand.RootPath, - 'r': IECommand.RootPath, - 'P': IECommand.PatternStyle, - 'p': IECommand.PatternStyle, - } - - try: - cmd = cmd_prefix_map[cmd_line_str[0]] - - # remaining text on command-line following the command character - remainder_str = cmd_line_str[1:].lstrip() - - if not remainder_str: - raise ValueError("Missing pattern/information!") - except (IndexError, KeyError, ValueError): - raise argparse.ArgumentTypeError("Unable to parse pattern/command: {}".format(cmd_line_str)) - - if cmd is IECommand.RootPath: - # TODO: validate string? - val = remainder_str - elif cmd is IECommand.PatternStyle: - # then remainder_str is something like 're' or 'sh' - try: - val = get_pattern_class(remainder_str) - except ValueError: - raise argparse.ArgumentTypeError("Invalid pattern style: {}".format(remainder_str)) - else: - # determine recurse_dir based on command type - recurse_dir = cmd not in [IECommand.ExcludeNoRecurse] - val = parse_pattern(remainder_str, fallback, recurse_dir) - - return CmdTuple(val, cmd) - - def timestamp(s): """Convert a --timestamp=s argument to a datetime object""" try: diff --git a/src/borg/patterns.py b/src/borg/patterns.py new file mode 100644 index 000000000..88cae3570 --- /dev/null +++ b/src/borg/patterns.py @@ -0,0 +1,392 @@ +import argparse +import os.path +import re +import sys +import unicodedata +from collections import namedtuple +from enum import Enum +from fnmatch import translate + +from . import shellpattern +from .helpers import clean_lines + + +def parse_patternfile_line(line, roots, ie_commands, fallback): + """Parse a pattern-file line and act depending on which command it represents.""" + ie_command = parse_inclexcl_command(line, fallback=fallback) + if ie_command.cmd is IECommand.RootPath: + roots.append(ie_command.val) + elif ie_command.cmd is IECommand.PatternStyle: + fallback = ie_command.val + else: + # it is some kind of include/exclude command + ie_commands.append(ie_command) + return fallback + + +def load_pattern_file(fileobj, roots, ie_commands, fallback=None): + if fallback is None: + fallback = ShellPattern # ShellPattern is defined later in this module + for line in clean_lines(fileobj): + fallback = parse_patternfile_line(line, roots, ie_commands, fallback) + + +def load_exclude_file(fileobj, patterns): + for patternstr in clean_lines(fileobj): + patterns.append(parse_exclude_pattern(patternstr)) + + +class ArgparsePatternAction(argparse.Action): + def __init__(self, nargs=1, **kw): + super().__init__(nargs=nargs, **kw) + + def __call__(self, parser, args, values, option_string=None): + parse_patternfile_line(values[0], args.paths, args.patterns, ShellPattern) + + +class ArgparsePatternFileAction(argparse.Action): + def __init__(self, nargs=1, **kw): + super().__init__(nargs=nargs, **kw) + + def __call__(self, parser, args, values, option_string=None): + """Load and parse patterns from a file. + Lines empty or starting with '#' after stripping whitespace on both line ends are ignored. + """ + filename = values[0] + with open(filename) as f: + self.parse(f, args) + + def parse(self, fobj, args): + load_pattern_file(fobj, args.paths, args.patterns) + + +class ArgparseExcludeFileAction(ArgparsePatternFileAction): + def parse(self, fobj, args): + load_exclude_file(fobj, args.patterns) + + +class PatternMatcher: + """Represents a collection of pattern objects to match paths against. + + *fallback* is a boolean value that *match()* returns if no matching patterns are found. + + """ + def __init__(self, fallback=None): + self._items = [] + + # Value to return from match function when none of the patterns match. + self.fallback = fallback + + # optimizations + self._path_full_patterns = {} # full path -> return value + + # indicates whether the last match() call ended on a pattern for which + # we should recurse into any matching folder. Will be set to True or + # False when calling match(). + self.recurse_dir = None + + # whether to recurse into directories when no match is found + # TODO: allow modification as a config option? + self.recurse_dir_default = True + + self.include_patterns = [] + + # TODO: move this info to parse_inclexcl_command and store in PatternBase subclass? + self.is_include_cmd = { + IECommand.Exclude: False, + IECommand.ExcludeNoRecurse: False, + IECommand.Include: True + } + + def empty(self): + return not len(self._items) and not len(self._path_full_patterns) + + def _add(self, pattern, cmd): + """*cmd* is an IECommand value. + """ + if isinstance(pattern, PathFullPattern): + key = pattern.pattern # full, normalized path + self._path_full_patterns[key] = cmd + else: + self._items.append((pattern, cmd)) + + def add(self, patterns, cmd): + """Add list of patterns to internal list. *cmd* indicates whether the + pattern is an include/exclude pattern, and whether recursion should be + done on excluded folders. + """ + for pattern in patterns: + self._add(pattern, cmd) + + def add_includepaths(self, include_paths): + """Used to add inclusion-paths from args.paths (from commandline). + """ + include_patterns = [parse_pattern(p, PathPrefixPattern) for p in include_paths] + self.add(include_patterns, IECommand.Include) + self.fallback = not include_patterns + self.include_patterns = include_patterns + + def get_unmatched_include_patterns(self): + "Note that this only returns patterns added via *add_includepaths*." + return [p for p in self.include_patterns if p.match_count == 0] + + def add_inclexcl(self, patterns): + """Add list of patterns (of type CmdTuple) to internal list. + """ + for pattern, cmd in patterns: + self._add(pattern, cmd) + + def match(self, path): + """Return True or False depending on whether *path* is matched. + + If no match is found among the patterns in this matcher, then the value + in self.fallback is returned (defaults to None). + + """ + path = normalize_path(path) + # do a fast lookup for full path matches (note: we do not count such matches): + non_existent = object() + value = self._path_full_patterns.get(path, non_existent) + + if value is not non_existent: + # we have a full path match! + # TODO: get from pattern; don't hard-code + self.recurse_dir = True + return value + + # this is the slow way, if we have many patterns in self._items: + for (pattern, cmd) in self._items: + if pattern.match(path, normalize=False): + self.recurse_dir = pattern.recurse_dir + return self.is_include_cmd[cmd] + + # by default we will recurse if there is no match + self.recurse_dir = self.recurse_dir_default + return self.fallback + + +def normalize_path(path): + """normalize paths for MacOS (but do nothing on other platforms)""" + # HFS+ converts paths to a canonical form, so users shouldn't be required to enter an exact match. + # Windows and Unix filesystems allow different forms, so users always have to enter an exact match. + return unicodedata.normalize('NFD', path) if sys.platform == 'darwin' else path + + +class PatternBase: + """Shared logic for inclusion/exclusion patterns. + """ + PREFIX = NotImplemented + + def __init__(self, pattern, recurse_dir=False): + self.pattern_orig = pattern + self.match_count = 0 + pattern = normalize_path(pattern) + self._prepare(pattern) + self.recurse_dir = recurse_dir + + def match(self, path, normalize=True): + """Return a boolean indicating whether *path* is matched by this pattern. + + If normalize is True (default), the path will get normalized using normalize_path(), + otherwise it is assumed that it already is normalized using that function. + """ + if normalize: + path = normalize_path(path) + matches = self._match(path) + if matches: + self.match_count += 1 + return matches + + def __repr__(self): + return '%s(%s)' % (type(self), self.pattern) + + def __str__(self): + return self.pattern_orig + + def _prepare(self, pattern): + "Should set the value of self.pattern" + raise NotImplementedError + + def _match(self, path): + raise NotImplementedError + + +class PathFullPattern(PatternBase): + """Full match of a path.""" + PREFIX = "pf" + + def _prepare(self, pattern): + self.pattern = os.path.normpath(pattern) + + def _match(self, path): + return path == self.pattern + + +# For PathPrefixPattern, FnmatchPattern and ShellPattern, we require that the pattern either match the whole path +# or an initial segment of the path up to but not including a path separator. To unify the two cases, we add a path +# separator to the end of the path before matching. + + +class PathPrefixPattern(PatternBase): + """Literal files or directories listed on the command line + for some operations (e.g. extract, but not create). + If a directory is specified, all paths that start with that + path match as well. A trailing slash makes no difference. + """ + PREFIX = "pp" + + def _prepare(self, pattern): + self.pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep + + def _match(self, path): + return (path + os.path.sep).startswith(self.pattern) + + +class FnmatchPattern(PatternBase): + """Shell glob patterns to exclude. A trailing slash means to + exclude the contents of a directory, but not the directory itself. + """ + PREFIX = "fm" + + def _prepare(self, pattern): + if pattern.endswith(os.path.sep): + pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep + '*' + os.path.sep + else: + pattern = os.path.normpath(pattern) + os.path.sep + '*' + + self.pattern = pattern + + # fnmatch and re.match both cache compiled regular expressions. + # Nevertheless, this is about 10 times faster. + self.regex = re.compile(translate(self.pattern)) + + def _match(self, path): + return (self.regex.match(path + os.path.sep) is not None) + + +class ShellPattern(PatternBase): + """Shell glob patterns to exclude. A trailing slash means to + exclude the contents of a directory, but not the directory itself. + """ + PREFIX = "sh" + + def _prepare(self, pattern): + sep = os.path.sep + + if pattern.endswith(sep): + pattern = os.path.normpath(pattern).rstrip(sep) + sep + "**" + sep + "*" + sep + else: + pattern = os.path.normpath(pattern) + sep + "**" + sep + "*" + + self.pattern = pattern + self.regex = re.compile(shellpattern.translate(self.pattern)) + + def _match(self, path): + return (self.regex.match(path + os.path.sep) is not None) + + +class RegexPattern(PatternBase): + """Regular expression to exclude. + """ + PREFIX = "re" + + def _prepare(self, pattern): + self.pattern = pattern + self.regex = re.compile(pattern) + + def _match(self, path): + # Normalize path separators + if os.path.sep != '/': + path = path.replace(os.path.sep, '/') + + return (self.regex.search(path) is not None) + + +_PATTERN_CLASSES = set([ + FnmatchPattern, + PathFullPattern, + PathPrefixPattern, + RegexPattern, + ShellPattern, +]) + +_PATTERN_CLASS_BY_PREFIX = dict((i.PREFIX, i) for i in _PATTERN_CLASSES) + +CmdTuple = namedtuple('CmdTuple', 'val cmd') + + +class IECommand(Enum): + """A command that an InclExcl file line can represent. + """ + RootPath = 1 + PatternStyle = 2 + Include = 3 + Exclude = 4 + ExcludeNoRecurse = 5 + + +def get_pattern_class(prefix): + try: + return _PATTERN_CLASS_BY_PREFIX[prefix] + except KeyError: + raise ValueError("Unknown pattern style: {}".format(prefix)) from None + + +def parse_pattern(pattern, fallback=FnmatchPattern, recurse_dir=True): + """Read pattern from string and return an instance of the appropriate implementation class. + + """ + if len(pattern) > 2 and pattern[2] == ":" and pattern[:2].isalnum(): + (style, pattern) = (pattern[:2], pattern[3:]) + cls = get_pattern_class(style) + else: + cls = fallback + return cls(pattern, recurse_dir) + + +def parse_exclude_pattern(pattern_str, fallback=FnmatchPattern): + """Read pattern from string and return an instance of the appropriate implementation class. + """ + epattern_obj = parse_pattern(pattern_str, fallback) + return CmdTuple(epattern_obj, IECommand.Exclude) + + +def parse_inclexcl_command(cmd_line_str, fallback=ShellPattern): + """Read a --patterns-from command from string and return a CmdTuple object.""" + + cmd_prefix_map = { + '-': IECommand.Exclude, + '!': IECommand.ExcludeNoRecurse, + '+': IECommand.Include, + 'R': IECommand.RootPath, + 'r': IECommand.RootPath, + 'P': IECommand.PatternStyle, + 'p': IECommand.PatternStyle, + } + + try: + cmd = cmd_prefix_map[cmd_line_str[0]] + + # remaining text on command-line following the command character + remainder_str = cmd_line_str[1:].lstrip() + + if not remainder_str: + raise ValueError("Missing pattern/information!") + except (IndexError, KeyError, ValueError): + raise argparse.ArgumentTypeError("Unable to parse pattern/command: {}".format(cmd_line_str)) + + if cmd is IECommand.RootPath: + # TODO: validate string? + val = remainder_str + elif cmd is IECommand.PatternStyle: + # then remainder_str is something like 're' or 'sh' + try: + val = get_pattern_class(remainder_str) + except ValueError: + raise argparse.ArgumentTypeError("Invalid pattern style: {}".format(remainder_str)) + else: + # determine recurse_dir based on command type + recurse_dir = cmd not in [IECommand.ExcludeNoRecurse] + val = parse_pattern(remainder_str, fallback, recurse_dir) + + return CmdTuple(val, cmd) diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index c065feb16..3aa2c1b5c 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -33,12 +33,12 @@ from ..archiver import Archiver from ..cache import Cache from ..constants import * # NOQA from ..crypto import bytes_to_long, num_aes_blocks -from ..helpers import PatternMatcher, parse_pattern, Location, get_security_dir +from ..helpers import Location, get_security_dir from ..helpers import Manifest from ..helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR from ..helpers import bin_to_hex -from ..helpers import IECommand from ..helpers import MAX_S +from ..patterns import IECommand, PatternMatcher, parse_pattern from ..item import Item from ..key import KeyfileKeyBase, RepoKey, KeyfileKey, Passphrase, TAMRequiredError from ..keymanager import RepoIdMismatch, NotABorgKeyFile diff --git a/src/borg/testsuite/helpers.py b/src/borg/testsuite/helpers.py index 047e41c84..7eb421168 100644 --- a/src/borg/testsuite/helpers.py +++ b/src/borg/testsuite/helpers.py @@ -23,9 +23,6 @@ from ..helpers import yes, TRUISH, FALSISH, DEFAULTISH from ..helpers import StableDict, int_to_bigint, bigint_to_int, bin_to_hex from ..helpers import parse_timestamp, ChunkIteratorFileWrapper, ChunkerParams from ..helpers import ProgressIndicatorPercent, ProgressIndicatorEndless -from ..helpers import load_exclude_file, load_pattern_file -from ..helpers import parse_pattern, PatternMatcher -from ..helpers import PathFullPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, RegexPattern from ..helpers import swidth_slice from ..helpers import chunkit from ..helpers import safe_ns, safe_s, SUPPORT_32BIT_PLATFORMS @@ -244,463 +241,6 @@ class FormatTimedeltaTestCase(BaseTestCase): ) -def check_patterns(files, pattern, expected): - """Utility for testing patterns. - """ - assert all([f == os.path.normpath(f) for f in files]), "Pattern matchers expect normalized input paths" - - matched = [f for f in files if pattern.match(f)] - - assert matched == (files if expected is None else expected) - - -@pytest.mark.parametrize("pattern, expected", [ - # "None" means all files, i.e. all match the given pattern - ("/", []), - ("/home", ["/home"]), - ("/home///", ["/home"]), - ("/./home", ["/home"]), - ("/home/user", ["/home/user"]), - ("/home/user2", ["/home/user2"]), - ("/home/user/.bashrc", ["/home/user/.bashrc"]), - ]) -def test_patterns_full(pattern, expected): - files = ["/home", "/home/user", "/home/user2", "/home/user/.bashrc", ] - - check_patterns(files, PathFullPattern(pattern), expected) - - -@pytest.mark.parametrize("pattern, expected", [ - # "None" means all files, i.e. all match the given pattern - ("", []), - ("relative", []), - ("relative/path/", ["relative/path"]), - ("relative/path", ["relative/path"]), - ]) -def test_patterns_full_relative(pattern, expected): - files = ["relative/path", "relative/path2", ] - - check_patterns(files, PathFullPattern(pattern), expected) - - -@pytest.mark.parametrize("pattern, expected", [ - # "None" means all files, i.e. all match the given pattern - ("/", None), - ("/./", None), - ("", []), - ("/home/u", []), - ("/home/user", ["/home/user/.profile", "/home/user/.bashrc"]), - ("/etc", ["/etc/server/config", "/etc/server/hosts"]), - ("///etc//////", ["/etc/server/config", "/etc/server/hosts"]), - ("/./home//..//home/user2", ["/home/user2/.profile", "/home/user2/public_html/index.html"]), - ("/srv", ["/srv/messages", "/srv/dmesg"]), - ]) -def test_patterns_prefix(pattern, expected): - files = [ - "/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc", - "/home/user2/.profile", "/home/user2/public_html/index.html", "/srv/messages", "/srv/dmesg", - ] - - check_patterns(files, PathPrefixPattern(pattern), expected) - - -@pytest.mark.parametrize("pattern, expected", [ - # "None" means all files, i.e. all match the given pattern - ("", []), - ("foo", []), - ("relative", ["relative/path1", "relative/two"]), - ("more", ["more/relative"]), - ]) -def test_patterns_prefix_relative(pattern, expected): - files = ["relative/path1", "relative/two", "more/relative"] - - check_patterns(files, PathPrefixPattern(pattern), expected) - - -@pytest.mark.parametrize("pattern, expected", [ - # "None" means all files, i.e. all match the given pattern - ("/*", None), - ("/./*", None), - ("*", None), - ("*/*", None), - ("*///*", None), - ("/home/u", []), - ("/home/*", - ["/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile", "/home/user2/public_html/index.html", - "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]), - ("/home/user/*", ["/home/user/.profile", "/home/user/.bashrc"]), - ("/etc/*", ["/etc/server/config", "/etc/server/hosts"]), - ("*/.pr????e", ["/home/user/.profile", "/home/user2/.profile"]), - ("///etc//////*", ["/etc/server/config", "/etc/server/hosts"]), - ("/./home//..//home/user2/*", ["/home/user2/.profile", "/home/user2/public_html/index.html"]), - ("/srv*", ["/srv/messages", "/srv/dmesg"]), - ("/home/*/.thumbnails", ["/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]), - ]) -def test_patterns_fnmatch(pattern, expected): - files = [ - "/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc", - "/home/user2/.profile", "/home/user2/public_html/index.html", "/srv/messages", "/srv/dmesg", - "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails", - ] - - check_patterns(files, FnmatchPattern(pattern), expected) - - -@pytest.mark.parametrize("pattern, expected", [ - # "None" means all files, i.e. all match the given pattern - ("*", None), - ("**/*", None), - ("/**/*", None), - ("/./*", None), - ("*/*", None), - ("*///*", None), - ("/home/u", []), - ("/home/*", - ["/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile", "/home/user2/public_html/index.html", - "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]), - ("/home/user/*", ["/home/user/.profile", "/home/user/.bashrc"]), - ("/etc/*/*", ["/etc/server/config", "/etc/server/hosts"]), - ("/etc/**/*", ["/etc/server/config", "/etc/server/hosts"]), - ("/etc/**/*/*", ["/etc/server/config", "/etc/server/hosts"]), - ("*/.pr????e", []), - ("**/.pr????e", ["/home/user/.profile", "/home/user2/.profile"]), - ("///etc//////*", ["/etc/server/config", "/etc/server/hosts"]), - ("/./home//..//home/user2/", ["/home/user2/.profile", "/home/user2/public_html/index.html"]), - ("/./home//..//home/user2/**/*", ["/home/user2/.profile", "/home/user2/public_html/index.html"]), - ("/srv*/", ["/srv/messages", "/srv/dmesg", "/srv2/blafasel"]), - ("/srv*", ["/srv", "/srv/messages", "/srv/dmesg", "/srv2", "/srv2/blafasel"]), - ("/srv/*", ["/srv/messages", "/srv/dmesg"]), - ("/srv2/**", ["/srv2", "/srv2/blafasel"]), - ("/srv2/**/", ["/srv2/blafasel"]), - ("/home/*/.thumbnails", ["/home/foo/.thumbnails"]), - ("/home/*/*/.thumbnails", ["/home/foo/bar/.thumbnails"]), - ]) -def test_patterns_shell(pattern, expected): - files = [ - "/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc", - "/home/user2/.profile", "/home/user2/public_html/index.html", "/srv", "/srv/messages", "/srv/dmesg", - "/srv2", "/srv2/blafasel", "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails", - ] - - check_patterns(files, ShellPattern(pattern), expected) - - -@pytest.mark.parametrize("pattern, expected", [ - # "None" means all files, i.e. all match the given pattern - ("", None), - (".*", None), - ("^/", None), - ("^abc$", []), - ("^[^/]", []), - ("^(?!/srv|/foo|/opt)", - ["/home", "/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile", - "/home/user2/public_html/index.html", "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails", ]), - ]) -def test_patterns_regex(pattern, expected): - files = [ - '/srv/data', '/foo/bar', '/home', - '/home/user/.profile', '/home/user/.bashrc', - '/home/user2/.profile', '/home/user2/public_html/index.html', - '/opt/log/messages.txt', '/opt/log/dmesg.txt', - "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails", - ] - - obj = RegexPattern(pattern) - assert str(obj) == pattern - assert obj.pattern == pattern - - check_patterns(files, obj, expected) - - -def test_regex_pattern(): - # The forward slash must match the platform-specific path separator - assert RegexPattern("^/$").match("/") - assert RegexPattern("^/$").match(os.path.sep) - assert not RegexPattern(r"^\\$").match("/") - - -def use_normalized_unicode(): - return sys.platform in ("darwin",) - - -def _make_test_patterns(pattern): - return [PathPrefixPattern(pattern), - FnmatchPattern(pattern), - RegexPattern("^{}/foo$".format(pattern)), - ShellPattern(pattern), - ] - - -@pytest.mark.parametrize("pattern", _make_test_patterns("b\N{LATIN SMALL LETTER A WITH ACUTE}")) -def test_composed_unicode_pattern(pattern): - assert pattern.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") - assert pattern.match("ba\N{COMBINING ACUTE ACCENT}/foo") == use_normalized_unicode() - - -@pytest.mark.parametrize("pattern", _make_test_patterns("ba\N{COMBINING ACUTE ACCENT}")) -def test_decomposed_unicode_pattern(pattern): - assert pattern.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") == use_normalized_unicode() - assert pattern.match("ba\N{COMBINING ACUTE ACCENT}/foo") - - -@pytest.mark.parametrize("pattern", _make_test_patterns(str(b"ba\x80", "latin1"))) -def test_invalid_unicode_pattern(pattern): - assert not pattern.match("ba/foo") - assert pattern.match(str(b"ba\x80/foo", "latin1")) - - -@pytest.mark.parametrize("lines, expected", [ - # "None" means all files, i.e. none excluded - ([], None), - (["# Comment only"], None), - (["*"], []), - (["# Comment", - "*/something00.txt", - " *whitespace* ", - # Whitespace before comment - " #/ws*", - # Empty line - "", - "# EOF"], - ["/more/data", "/home", " #/wsfoobar"]), - (["re:.*"], []), - (["re:\s"], ["/data/something00.txt", "/more/data", "/home"]), - ([r"re:(.)(\1)"], ["/more/data", "/home", "\tstart/whitespace", "/whitespace/end\t"]), - (["", "", "", - "# This is a test with mixed pattern styles", - # Case-insensitive pattern - "re:(?i)BAR|ME$", - "", - "*whitespace*", - "fm:*/something00*"], - ["/more/data"]), - ([r" re:^\s "], ["/data/something00.txt", "/more/data", "/home", "/whitespace/end\t"]), - ([r" re:\s$ "], ["/data/something00.txt", "/more/data", "/home", " #/wsfoobar", "\tstart/whitespace"]), - (["pp:./"], None), - (["pp:/"], [" #/wsfoobar", "\tstart/whitespace"]), - (["pp:aaabbb"], None), - (["pp:/data", "pp: #/", "pp:\tstart", "pp:/whitespace"], ["/more/data", "/home"]), - (["/nomatch", "/more/*"], - ['/data/something00.txt', '/home', ' #/wsfoobar', '\tstart/whitespace', '/whitespace/end\t']), - # the order of exclude patterns shouldn't matter - (["/more/*", "/nomatch"], - ['/data/something00.txt', '/home', ' #/wsfoobar', '\tstart/whitespace', '/whitespace/end\t']), - ]) -def test_exclude_patterns_from_file(tmpdir, lines, expected): - files = [ - '/data/something00.txt', '/more/data', '/home', - ' #/wsfoobar', - '\tstart/whitespace', - '/whitespace/end\t', - ] - - def evaluate(filename): - patterns = [] - load_exclude_file(open(filename, "rt"), patterns) - matcher = PatternMatcher(fallback=True) - matcher.add_inclexcl(patterns) - return [path for path in files if matcher.match(path)] - - exclfile = tmpdir.join("exclude.txt") - - with exclfile.open("wt") as fh: - fh.write("\n".join(lines)) - - assert evaluate(str(exclfile)) == (files if expected is None else expected) - - -@pytest.mark.parametrize("lines, expected_roots, expected_numpatterns", [ - # "None" means all files, i.e. none excluded - ([], [], 0), - (["# Comment only"], [], 0), - (["- *"], [], 1), - (["+fm:*/something00.txt", - "-/data"], [], 2), - (["R /"], ["/"], 0), - (["R /", - "# comment"], ["/"], 0), - (["# comment", - "- /data", - "R /home"], ["/home"], 1), -]) -def test_load_patterns_from_file(tmpdir, lines, expected_roots, expected_numpatterns): - def evaluate(filename): - roots = [] - inclexclpatterns = [] - load_pattern_file(open(filename, "rt"), roots, inclexclpatterns) - return roots, len(inclexclpatterns) - patternfile = tmpdir.join("patterns.txt") - - with patternfile.open("wt") as fh: - fh.write("\n".join(lines)) - - roots, numpatterns = evaluate(str(patternfile)) - assert roots == expected_roots - assert numpatterns == expected_numpatterns - - -def test_switch_patterns_style(): - patterns = """\ - +0_initial_default_is_shell - p fm - +1_fnmatch - P re - +2_regex - +3_more_regex - P pp - +4_pathprefix - p fm - p sh - +5_shell - """ - pattern_file = io.StringIO(patterns) - roots, patterns = [], [] - load_pattern_file(pattern_file, roots, patterns) - assert len(patterns) == 6 - assert isinstance(patterns[0].val, ShellPattern) - assert isinstance(patterns[1].val, FnmatchPattern) - assert isinstance(patterns[2].val, RegexPattern) - assert isinstance(patterns[3].val, RegexPattern) - assert isinstance(patterns[4].val, PathPrefixPattern) - assert isinstance(patterns[5].val, ShellPattern) - - -@pytest.mark.parametrize("lines", [ - (["X /data"]), # illegal pattern type prefix - (["/data"]), # need a pattern type prefix -]) -def test_load_invalid_patterns_from_file(tmpdir, lines): - patternfile = tmpdir.join("patterns.txt") - with patternfile.open("wt") as fh: - fh.write("\n".join(lines)) - filename = str(patternfile) - with pytest.raises(argparse.ArgumentTypeError): - roots = [] - inclexclpatterns = [] - load_pattern_file(open(filename, "rt"), roots, inclexclpatterns) - - -@pytest.mark.parametrize("lines, expected", [ - # "None" means all files, i.e. none excluded - ([], None), - (["# Comment only"], None), - (["- *"], []), - # default match type is sh: for patterns -> * doesn't match a / - (["-*/something0?.txt"], - ['/data', '/data/something00.txt', '/data/subdir/something01.txt', - '/home', '/home/leo', '/home/leo/t', '/home/other']), - (["-fm:*/something00.txt"], - ['/data', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t', '/home/other']), - (["-fm:*/something0?.txt"], - ["/data", '/home', '/home/leo', '/home/leo/t', '/home/other']), - (["+/*/something0?.txt", - "-/data"], - ["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']), - (["+fm:*/something00.txt", - "-/data"], - ["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']), - # include /home/leo and exclude the rest of /home: - (["+/home/leo", - "-/home/*"], - ['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t']), - # wrong order, /home/leo is already excluded by -/home/*: - (["-/home/*", - "+/home/leo"], - ['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home']), - (["+fm:/home/leo", - "-/home/"], - ['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t']), -]) -def test_inclexcl_patterns_from_file(tmpdir, lines, expected): - files = [ - '/data', '/data/something00.txt', '/data/subdir/something01.txt', - '/home', '/home/leo', '/home/leo/t', '/home/other' - ] - - def evaluate(filename): - matcher = PatternMatcher(fallback=True) - roots = [] - inclexclpatterns = [] - load_pattern_file(open(filename, "rt"), roots, inclexclpatterns) - matcher.add_inclexcl(inclexclpatterns) - return [path for path in files if matcher.match(path)] - - patternfile = tmpdir.join("patterns.txt") - - with patternfile.open("wt") as fh: - fh.write("\n".join(lines)) - - assert evaluate(str(patternfile)) == (files if expected is None else expected) - - -@pytest.mark.parametrize("pattern, cls", [ - ("", FnmatchPattern), - - # Default style - ("*", FnmatchPattern), - ("/data/*", FnmatchPattern), - - # fnmatch style - ("fm:", FnmatchPattern), - ("fm:*", FnmatchPattern), - ("fm:/data/*", FnmatchPattern), - ("fm:fm:/data/*", FnmatchPattern), - - # Regular expression - ("re:", RegexPattern), - ("re:.*", RegexPattern), - ("re:^/something/", RegexPattern), - ("re:re:^/something/", RegexPattern), - - # Path prefix - ("pp:", PathPrefixPattern), - ("pp:/", PathPrefixPattern), - ("pp:/data/", PathPrefixPattern), - ("pp:pp:/data/", PathPrefixPattern), - - # Shell-pattern style - ("sh:", ShellPattern), - ("sh:*", ShellPattern), - ("sh:/data/*", ShellPattern), - ("sh:sh:/data/*", ShellPattern), - ]) -def test_parse_pattern(pattern, cls): - assert isinstance(parse_pattern(pattern), cls) - - -@pytest.mark.parametrize("pattern", ["aa:", "fo:*", "00:", "x1:abc"]) -def test_parse_pattern_error(pattern): - with pytest.raises(ValueError): - parse_pattern(pattern) - - -def test_pattern_matcher(): - pm = PatternMatcher() - - assert pm.fallback is None - - for i in ["", "foo", "bar"]: - assert pm.match(i) is None - - # add extra entries to aid in testing - for target in ["A", "B", "Empty", "FileNotFound"]: - pm.is_include_cmd[target] = target - - pm.add([RegexPattern("^a")], "A") - pm.add([RegexPattern("^b"), RegexPattern("^z")], "B") - pm.add([RegexPattern("^$")], "Empty") - pm.fallback = "FileNotFound" - - assert pm.match("") == "Empty" - assert pm.match("aaa") == "A" - assert pm.match("bbb") == "B" - assert pm.match("ccc") == "FileNotFound" - assert pm.match("xyz") == "FileNotFound" - assert pm.match("z") == "B" - - assert PatternMatcher(fallback="hey!").fallback == "hey!" - - def test_chunkerparams(): assert ChunkerParams('19,23,21,4095') == (19, 23, 21, 4095) assert ChunkerParams('10,23,16,4095') == (10, 23, 16, 4095) diff --git a/src/borg/testsuite/patterns.py b/src/borg/testsuite/patterns.py new file mode 100644 index 000000000..ff447888f --- /dev/null +++ b/src/borg/testsuite/patterns.py @@ -0,0 +1,467 @@ +import argparse +import io +import os.path +import sys + +import pytest + +from ..patterns import PathFullPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, RegexPattern +from ..patterns import load_exclude_file, load_pattern_file +from ..patterns import parse_pattern, PatternMatcher + + +def check_patterns(files, pattern, expected): + """Utility for testing patterns. + """ + assert all([f == os.path.normpath(f) for f in files]), "Pattern matchers expect normalized input paths" + + matched = [f for f in files if pattern.match(f)] + + assert matched == (files if expected is None else expected) + + +@pytest.mark.parametrize("pattern, expected", [ + # "None" means all files, i.e. all match the given pattern + ("/", []), + ("/home", ["/home"]), + ("/home///", ["/home"]), + ("/./home", ["/home"]), + ("/home/user", ["/home/user"]), + ("/home/user2", ["/home/user2"]), + ("/home/user/.bashrc", ["/home/user/.bashrc"]), + ]) +def test_patterns_full(pattern, expected): + files = ["/home", "/home/user", "/home/user2", "/home/user/.bashrc", ] + + check_patterns(files, PathFullPattern(pattern), expected) + + +@pytest.mark.parametrize("pattern, expected", [ + # "None" means all files, i.e. all match the given pattern + ("", []), + ("relative", []), + ("relative/path/", ["relative/path"]), + ("relative/path", ["relative/path"]), + ]) +def test_patterns_full_relative(pattern, expected): + files = ["relative/path", "relative/path2", ] + + check_patterns(files, PathFullPattern(pattern), expected) + + +@pytest.mark.parametrize("pattern, expected", [ + # "None" means all files, i.e. all match the given pattern + ("/", None), + ("/./", None), + ("", []), + ("/home/u", []), + ("/home/user", ["/home/user/.profile", "/home/user/.bashrc"]), + ("/etc", ["/etc/server/config", "/etc/server/hosts"]), + ("///etc//////", ["/etc/server/config", "/etc/server/hosts"]), + ("/./home//..//home/user2", ["/home/user2/.profile", "/home/user2/public_html/index.html"]), + ("/srv", ["/srv/messages", "/srv/dmesg"]), + ]) +def test_patterns_prefix(pattern, expected): + files = [ + "/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc", + "/home/user2/.profile", "/home/user2/public_html/index.html", "/srv/messages", "/srv/dmesg", + ] + + check_patterns(files, PathPrefixPattern(pattern), expected) + + +@pytest.mark.parametrize("pattern, expected", [ + # "None" means all files, i.e. all match the given pattern + ("", []), + ("foo", []), + ("relative", ["relative/path1", "relative/two"]), + ("more", ["more/relative"]), + ]) +def test_patterns_prefix_relative(pattern, expected): + files = ["relative/path1", "relative/two", "more/relative"] + + check_patterns(files, PathPrefixPattern(pattern), expected) + + +@pytest.mark.parametrize("pattern, expected", [ + # "None" means all files, i.e. all match the given pattern + ("/*", None), + ("/./*", None), + ("*", None), + ("*/*", None), + ("*///*", None), + ("/home/u", []), + ("/home/*", + ["/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile", "/home/user2/public_html/index.html", + "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]), + ("/home/user/*", ["/home/user/.profile", "/home/user/.bashrc"]), + ("/etc/*", ["/etc/server/config", "/etc/server/hosts"]), + ("*/.pr????e", ["/home/user/.profile", "/home/user2/.profile"]), + ("///etc//////*", ["/etc/server/config", "/etc/server/hosts"]), + ("/./home//..//home/user2/*", ["/home/user2/.profile", "/home/user2/public_html/index.html"]), + ("/srv*", ["/srv/messages", "/srv/dmesg"]), + ("/home/*/.thumbnails", ["/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]), + ]) +def test_patterns_fnmatch(pattern, expected): + files = [ + "/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc", + "/home/user2/.profile", "/home/user2/public_html/index.html", "/srv/messages", "/srv/dmesg", + "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails", + ] + + check_patterns(files, FnmatchPattern(pattern), expected) + + +@pytest.mark.parametrize("pattern, expected", [ + # "None" means all files, i.e. all match the given pattern + ("*", None), + ("**/*", None), + ("/**/*", None), + ("/./*", None), + ("*/*", None), + ("*///*", None), + ("/home/u", []), + ("/home/*", + ["/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile", "/home/user2/public_html/index.html", + "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]), + ("/home/user/*", ["/home/user/.profile", "/home/user/.bashrc"]), + ("/etc/*/*", ["/etc/server/config", "/etc/server/hosts"]), + ("/etc/**/*", ["/etc/server/config", "/etc/server/hosts"]), + ("/etc/**/*/*", ["/etc/server/config", "/etc/server/hosts"]), + ("*/.pr????e", []), + ("**/.pr????e", ["/home/user/.profile", "/home/user2/.profile"]), + ("///etc//////*", ["/etc/server/config", "/etc/server/hosts"]), + ("/./home//..//home/user2/", ["/home/user2/.profile", "/home/user2/public_html/index.html"]), + ("/./home//..//home/user2/**/*", ["/home/user2/.profile", "/home/user2/public_html/index.html"]), + ("/srv*/", ["/srv/messages", "/srv/dmesg", "/srv2/blafasel"]), + ("/srv*", ["/srv", "/srv/messages", "/srv/dmesg", "/srv2", "/srv2/blafasel"]), + ("/srv/*", ["/srv/messages", "/srv/dmesg"]), + ("/srv2/**", ["/srv2", "/srv2/blafasel"]), + ("/srv2/**/", ["/srv2/blafasel"]), + ("/home/*/.thumbnails", ["/home/foo/.thumbnails"]), + ("/home/*/*/.thumbnails", ["/home/foo/bar/.thumbnails"]), + ]) +def test_patterns_shell(pattern, expected): + files = [ + "/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc", + "/home/user2/.profile", "/home/user2/public_html/index.html", "/srv", "/srv/messages", "/srv/dmesg", + "/srv2", "/srv2/blafasel", "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails", + ] + + check_patterns(files, ShellPattern(pattern), expected) + + +@pytest.mark.parametrize("pattern, expected", [ + # "None" means all files, i.e. all match the given pattern + ("", None), + (".*", None), + ("^/", None), + ("^abc$", []), + ("^[^/]", []), + ("^(?!/srv|/foo|/opt)", + ["/home", "/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile", + "/home/user2/public_html/index.html", "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails", ]), + ]) +def test_patterns_regex(pattern, expected): + files = [ + '/srv/data', '/foo/bar', '/home', + '/home/user/.profile', '/home/user/.bashrc', + '/home/user2/.profile', '/home/user2/public_html/index.html', + '/opt/log/messages.txt', '/opt/log/dmesg.txt', + "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails", + ] + + obj = RegexPattern(pattern) + assert str(obj) == pattern + assert obj.pattern == pattern + + check_patterns(files, obj, expected) + + +def test_regex_pattern(): + # The forward slash must match the platform-specific path separator + assert RegexPattern("^/$").match("/") + assert RegexPattern("^/$").match(os.path.sep) + assert not RegexPattern(r"^\\$").match("/") + + +def use_normalized_unicode(): + return sys.platform in ("darwin",) + + +def _make_test_patterns(pattern): + return [PathPrefixPattern(pattern), + FnmatchPattern(pattern), + RegexPattern("^{}/foo$".format(pattern)), + ShellPattern(pattern), + ] + + +@pytest.mark.parametrize("pattern", _make_test_patterns("b\N{LATIN SMALL LETTER A WITH ACUTE}")) +def test_composed_unicode_pattern(pattern): + assert pattern.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") + assert pattern.match("ba\N{COMBINING ACUTE ACCENT}/foo") == use_normalized_unicode() + + +@pytest.mark.parametrize("pattern", _make_test_patterns("ba\N{COMBINING ACUTE ACCENT}")) +def test_decomposed_unicode_pattern(pattern): + assert pattern.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") == use_normalized_unicode() + assert pattern.match("ba\N{COMBINING ACUTE ACCENT}/foo") + + +@pytest.mark.parametrize("pattern", _make_test_patterns(str(b"ba\x80", "latin1"))) +def test_invalid_unicode_pattern(pattern): + assert not pattern.match("ba/foo") + assert pattern.match(str(b"ba\x80/foo", "latin1")) + + +@pytest.mark.parametrize("lines, expected", [ + # "None" means all files, i.e. none excluded + ([], None), + (["# Comment only"], None), + (["*"], []), + (["# Comment", + "*/something00.txt", + " *whitespace* ", + # Whitespace before comment + " #/ws*", + # Empty line + "", + "# EOF"], + ["/more/data", "/home", " #/wsfoobar"]), + (["re:.*"], []), + (["re:\s"], ["/data/something00.txt", "/more/data", "/home"]), + ([r"re:(.)(\1)"], ["/more/data", "/home", "\tstart/whitespace", "/whitespace/end\t"]), + (["", "", "", + "# This is a test with mixed pattern styles", + # Case-insensitive pattern + "re:(?i)BAR|ME$", + "", + "*whitespace*", + "fm:*/something00*"], + ["/more/data"]), + ([r" re:^\s "], ["/data/something00.txt", "/more/data", "/home", "/whitespace/end\t"]), + ([r" re:\s$ "], ["/data/something00.txt", "/more/data", "/home", " #/wsfoobar", "\tstart/whitespace"]), + (["pp:./"], None), + (["pp:/"], [" #/wsfoobar", "\tstart/whitespace"]), + (["pp:aaabbb"], None), + (["pp:/data", "pp: #/", "pp:\tstart", "pp:/whitespace"], ["/more/data", "/home"]), + (["/nomatch", "/more/*"], + ['/data/something00.txt', '/home', ' #/wsfoobar', '\tstart/whitespace', '/whitespace/end\t']), + # the order of exclude patterns shouldn't matter + (["/more/*", "/nomatch"], + ['/data/something00.txt', '/home', ' #/wsfoobar', '\tstart/whitespace', '/whitespace/end\t']), + ]) +def test_exclude_patterns_from_file(tmpdir, lines, expected): + files = [ + '/data/something00.txt', '/more/data', '/home', + ' #/wsfoobar', + '\tstart/whitespace', + '/whitespace/end\t', + ] + + def evaluate(filename): + patterns = [] + load_exclude_file(open(filename, "rt"), patterns) + matcher = PatternMatcher(fallback=True) + matcher.add_inclexcl(patterns) + return [path for path in files if matcher.match(path)] + + exclfile = tmpdir.join("exclude.txt") + + with exclfile.open("wt") as fh: + fh.write("\n".join(lines)) + + assert evaluate(str(exclfile)) == (files if expected is None else expected) + + +@pytest.mark.parametrize("lines, expected_roots, expected_numpatterns", [ + # "None" means all files, i.e. none excluded + ([], [], 0), + (["# Comment only"], [], 0), + (["- *"], [], 1), + (["+fm:*/something00.txt", + "-/data"], [], 2), + (["R /"], ["/"], 0), + (["R /", + "# comment"], ["/"], 0), + (["# comment", + "- /data", + "R /home"], ["/home"], 1), +]) +def test_load_patterns_from_file(tmpdir, lines, expected_roots, expected_numpatterns): + def evaluate(filename): + roots = [] + inclexclpatterns = [] + load_pattern_file(open(filename, "rt"), roots, inclexclpatterns) + return roots, len(inclexclpatterns) + patternfile = tmpdir.join("patterns.txt") + + with patternfile.open("wt") as fh: + fh.write("\n".join(lines)) + + roots, numpatterns = evaluate(str(patternfile)) + assert roots == expected_roots + assert numpatterns == expected_numpatterns + + +def test_switch_patterns_style(): + patterns = """\ + +0_initial_default_is_shell + p fm + +1_fnmatch + P re + +2_regex + +3_more_regex + P pp + +4_pathprefix + p fm + p sh + +5_shell + """ + pattern_file = io.StringIO(patterns) + roots, patterns = [], [] + load_pattern_file(pattern_file, roots, patterns) + assert len(patterns) == 6 + assert isinstance(patterns[0].val, ShellPattern) + assert isinstance(patterns[1].val, FnmatchPattern) + assert isinstance(patterns[2].val, RegexPattern) + assert isinstance(patterns[3].val, RegexPattern) + assert isinstance(patterns[4].val, PathPrefixPattern) + assert isinstance(patterns[5].val, ShellPattern) + + +@pytest.mark.parametrize("lines", [ + (["X /data"]), # illegal pattern type prefix + (["/data"]), # need a pattern type prefix +]) +def test_load_invalid_patterns_from_file(tmpdir, lines): + patternfile = tmpdir.join("patterns.txt") + with patternfile.open("wt") as fh: + fh.write("\n".join(lines)) + filename = str(patternfile) + with pytest.raises(argparse.ArgumentTypeError): + roots = [] + inclexclpatterns = [] + load_pattern_file(open(filename, "rt"), roots, inclexclpatterns) + + +@pytest.mark.parametrize("lines, expected", [ + # "None" means all files, i.e. none excluded + ([], None), + (["# Comment only"], None), + (["- *"], []), + # default match type is sh: for patterns -> * doesn't match a / + (["-*/something0?.txt"], + ['/data', '/data/something00.txt', '/data/subdir/something01.txt', + '/home', '/home/leo', '/home/leo/t', '/home/other']), + (["-fm:*/something00.txt"], + ['/data', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t', '/home/other']), + (["-fm:*/something0?.txt"], + ["/data", '/home', '/home/leo', '/home/leo/t', '/home/other']), + (["+/*/something0?.txt", + "-/data"], + ["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']), + (["+fm:*/something00.txt", + "-/data"], + ["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']), + # include /home/leo and exclude the rest of /home: + (["+/home/leo", + "-/home/*"], + ['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t']), + # wrong order, /home/leo is already excluded by -/home/*: + (["-/home/*", + "+/home/leo"], + ['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home']), + (["+fm:/home/leo", + "-/home/"], + ['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t']), +]) +def test_inclexcl_patterns_from_file(tmpdir, lines, expected): + files = [ + '/data', '/data/something00.txt', '/data/subdir/something01.txt', + '/home', '/home/leo', '/home/leo/t', '/home/other' + ] + + def evaluate(filename): + matcher = PatternMatcher(fallback=True) + roots = [] + inclexclpatterns = [] + load_pattern_file(open(filename, "rt"), roots, inclexclpatterns) + matcher.add_inclexcl(inclexclpatterns) + return [path for path in files if matcher.match(path)] + + patternfile = tmpdir.join("patterns.txt") + + with patternfile.open("wt") as fh: + fh.write("\n".join(lines)) + + assert evaluate(str(patternfile)) == (files if expected is None else expected) + + +@pytest.mark.parametrize("pattern, cls", [ + ("", FnmatchPattern), + + # Default style + ("*", FnmatchPattern), + ("/data/*", FnmatchPattern), + + # fnmatch style + ("fm:", FnmatchPattern), + ("fm:*", FnmatchPattern), + ("fm:/data/*", FnmatchPattern), + ("fm:fm:/data/*", FnmatchPattern), + + # Regular expression + ("re:", RegexPattern), + ("re:.*", RegexPattern), + ("re:^/something/", RegexPattern), + ("re:re:^/something/", RegexPattern), + + # Path prefix + ("pp:", PathPrefixPattern), + ("pp:/", PathPrefixPattern), + ("pp:/data/", PathPrefixPattern), + ("pp:pp:/data/", PathPrefixPattern), + + # Shell-pattern style + ("sh:", ShellPattern), + ("sh:*", ShellPattern), + ("sh:/data/*", ShellPattern), + ("sh:sh:/data/*", ShellPattern), + ]) +def test_parse_pattern(pattern, cls): + assert isinstance(parse_pattern(pattern), cls) + + +@pytest.mark.parametrize("pattern", ["aa:", "fo:*", "00:", "x1:abc"]) +def test_parse_pattern_error(pattern): + with pytest.raises(ValueError): + parse_pattern(pattern) + + +def test_pattern_matcher(): + pm = PatternMatcher() + + assert pm.fallback is None + + for i in ["", "foo", "bar"]: + assert pm.match(i) is None + + # add extra entries to aid in testing + for target in ["A", "B", "Empty", "FileNotFound"]: + pm.is_include_cmd[target] = target + + pm.add([RegexPattern("^a")], "A") + pm.add([RegexPattern("^b"), RegexPattern("^z")], "B") + pm.add([RegexPattern("^$")], "Empty") + pm.fallback = "FileNotFound" + + assert pm.match("") == "Empty" + assert pm.match("aaa") == "A" + assert pm.match("bbb") == "B" + assert pm.match("ccc") == "FileNotFound" + assert pm.match("xyz") == "FileNotFound" + assert pm.match("z") == "B" + + assert PatternMatcher(fallback="hey!").fallback == "hey!"