From 13ddfdf4a3b64b109dde3a7ba5333a32e14be758 Mon Sep 17 00:00:00 2001 From: Ed Blackman Date: Wed, 9 Sep 2015 15:00:58 -0400 Subject: [PATCH] Move pattern normalization decision into decorator Using a decorator moves the duplicate code in the init methods into a single decorator method, while still retaining the same runtime overhead (zero for for the non-OSX path, one extra function call plus the call to unicodedata.normalize for OSX). The pattern classes are much visually cleaner, and duplicate code limited to two lines normalizing the pattern on OSX. Because the decoration happens at class init time (vs instance init time for the previous approach), the OSX and non-OSX test cases can no longer be called in the same run, so I also removed the OSX test case monkey patching and uncommented the platform skipif decorator. --- borg/helpers.py | 52 +++++++++++++++++++-------------------- borg/testsuite/helpers.py | 15 +---------- 2 files changed, 26 insertions(+), 41 deletions(-) diff --git a/borg/helpers.py b/borg/helpers.py index ecf138125..0da9918f8 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -1,6 +1,7 @@ import argparse import binascii from collections import namedtuple +from functools import wraps import grp import os import pwd @@ -222,9 +223,22 @@ def exclude_path(path, patterns): # unify the two cases, we add a path separator to the end of # the path before matching. -##### !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -##### For discussion only, don't merge this code! -##### !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +def normalized(func): + """ Decorator for the Pattern match methods, returning a wrapper that + normalizes OSX paths to match the normalized pattern on OSX, and + returning the original method on other platforms""" + @wraps(func) + def normalize_wrapper(self, path): + return func(self, unicodedata.normalize("NFD", path)) + + if sys.platform in ('darwin',): + # HFS+ converts paths to a canonical form, so users shouldn't be + # required to enter an exact match + return normalize_wrapper + else: + # Windows and Unix filesystems allow different forms, so users + # always have to enter an exact match + return func class IncludePattern: """Literal files or directories listed on the command line @@ -233,23 +247,15 @@ class IncludePattern: path match as well. A trailing slash makes no difference. """ def __init__(self, pattern): - def match(path): - return (path+os.path.sep).startswith(self.pattern) - - # HFS+ converts paths to a canonical form, so users shouldn't be - # required to enter an exact match if sys.platform in ('darwin',): - # repository paths will be mostly in NFD, as the OSX exception list - # to NFD is small, so normalize to that form for best performance pattern = unicodedata.normalize("NFD", pattern) - self.match = lambda p: match(unicodedata.normalize("NFD", p)) - # Windows and Unix filesystems allow different forms, so users - # always have to enter an exact match - else: - self.match = match self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep + @normalized + def match(self, path): + return (path+os.path.sep).startswith(self.pattern) + def __repr__(self): return '%s(%s)' % (type(self), self.pattern) @@ -259,30 +265,22 @@ class ExcludePattern(IncludePattern): exclude the contents of a directory, but not the directory itself. """ def __init__(self, pattern): - def match(path): - return self.regex.match(path+os.path.sep) is not None - if pattern.endswith(os.path.sep): self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep+'*'+os.path.sep else: self.pattern = os.path.normpath(pattern)+os.path.sep+'*' - # HFS+ converts paths to a canonical form, so users shouldn't be - # required to enter an exact match if sys.platform in ('darwin',): - # repository paths will be mostly in NFD, as the OSX exception list - # to NFD is small, so normalize to that form for best performance self.pattern = unicodedata.normalize("NFD", self.pattern) - self.match = lambda p: match(unicodedata.normalize("NFD", p)) - # Windows and Unix filesystems allow different forms, so users - # always have to enter an exact match - else: - self.match = match # fnmatch and re.match both cache compiled regular expressions. # Nevertheless, this is about 10 times faster. self.regex = re.compile(translate(self.pattern)) + @normalized + def match(self, path): + return self.regex.match(path+os.path.sep) is not None + def __repr__(self): return '%s(%s)' % (type(self), self.pattern) diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index 077c171b2..f755df22a 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -212,21 +212,8 @@ class PatternNonAsciiTestCase(BaseTestCase): assert e.match(str(b"ba\x80/foo", 'latin1')) -#@pytest.mark.skipif(sys.platform not in ('darwin',), reason='OS X test') +@pytest.mark.skipif(sys.platform not in ('darwin',), reason='OS X test') class OSXPatternNormalizationTestCase(BaseTestCase): - # monkey patch sys.platform to allow testing on non-OSX during development - # remove and uncomment OSX-only decorator before push - def setUp(self): - self.oldplatform = sys.platform - sys.platform = 'darwin' - pass - - # monkey patch sys.platform to allow testing on non-OSX during development - # remove and uncomment OSX-only decorator before push - def tearDown(self): - sys.platform = self.oldplatform - pass - def testComposedUnicode(self): pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}' i = IncludePattern(pattern)