Merge pull request #1911 from enkore/f/itemnt

Work on metadata handling speed
This commit is contained in:
enkore 2016-12-03 18:06:27 +01:00 committed by GitHub
commit 7e5ed40e2f
10 changed files with 48 additions and 79 deletions

View file

@ -50,6 +50,7 @@ compress_source = 'src/borg/compress.pyx'
crypto_source = 'src/borg/crypto.pyx'
chunker_source = 'src/borg/chunker.pyx'
hashindex_source = 'src/borg/hashindex.pyx'
item_source = 'src/borg/item.pyx'
platform_posix_source = 'src/borg/platform/posix.pyx'
platform_linux_source = 'src/borg/platform/linux.pyx'
platform_darwin_source = 'src/borg/platform/darwin.pyx'
@ -60,6 +61,7 @@ cython_sources = [
crypto_source,
chunker_source,
hashindex_source,
item_source,
platform_posix_source,
platform_linux_source,
@ -83,6 +85,7 @@ try:
'src/borg/crypto.c',
'src/borg/chunker.c', 'src/borg/_chunker.c',
'src/borg/hashindex.c', 'src/borg/_hashindex.c',
'src/borg/item.c',
'src/borg/platform/posix.c',
'src/borg/platform/linux.c',
'src/borg/platform/freebsd.c',
@ -99,6 +102,7 @@ except ImportError:
crypto_source = crypto_source.replace('.pyx', '.c')
chunker_source = chunker_source.replace('.pyx', '.c')
hashindex_source = hashindex_source.replace('.pyx', '.c')
item_source = item_source.replace('.pyx', '.c')
platform_posix_source = platform_posix_source.replace('.pyx', '.c')
platform_linux_source = platform_linux_source.replace('.pyx', '.c')
platform_freebsd_source = platform_freebsd_source.replace('.pyx', '.c')
@ -358,7 +362,8 @@ if not on_rtd:
Extension('borg.compress', [compress_source], libraries=['lz4'], include_dirs=include_dirs, library_dirs=library_dirs, define_macros=define_macros),
Extension('borg.crypto', [crypto_source], libraries=crypto_libraries, include_dirs=include_dirs, library_dirs=library_dirs, define_macros=define_macros),
Extension('borg.chunker', [chunker_source]),
Extension('borg.hashindex', [hashindex_source])
Extension('borg.hashindex', [hashindex_source]),
Extension('borg.item', [item_source]),
]
if sys.platform.startswith(('linux', 'freebsd', 'darwin')):
ext_modules.append(Extension('borg.platform.posix', [platform_posix_source]))

View file

@ -29,12 +29,11 @@ from .helpers import Error, IntegrityError
from .helpers import uid2user, user2uid, gid2group, group2gid
from .helpers import parse_timestamp, to_localtime
from .helpers import format_time, format_timedelta, format_file_size, file_status
from .helpers import safe_encode, safe_decode, make_path_safe, remove_surrogates, swidth_slice
from .helpers import decode_dict, StableDict
from .helpers import int_to_bigint, bigint_to_int, bin_to_hex
from .helpers import safe_encode, safe_decode, make_path_safe, remove_surrogates
from .helpers import StableDict
from .helpers import bin_to_hex
from .helpers import ellipsis_truncate, ProgressIndicatorPercent, log_multi
from .helpers import PathPrefixPattern, FnmatchPattern
from .helpers import consume, chunkit
from .helpers import CompressionDecider1, CompressionDecider2, CompressionSpec
from .item import Item, ArchiveItem
from .key import key_factory
@ -125,19 +124,22 @@ class BackupOSError(Exception):
return str(self.os_error)
@contextmanager
def backup_io():
"""Context manager changing OSError to BackupOSError."""
try:
yield
except OSError as os_error:
raise BackupOSError(os_error) from os_error
class BackupIO:
def __enter__(self):
pass
def __exit__(self, exc_type, exc_val, exc_tb):
if exc_type and issubclass(exc_type, OSError):
raise BackupOSError(exc_val) from exc_val
backup_io = BackupIO()
def backup_io_iter(iterator):
while True:
try:
with backup_io():
with backup_io:
item = next(iterator)
except StopIteration:
return
@ -475,13 +477,13 @@ Number of files: {0.stats.nfiles}'''.format(
pass
mode = item.mode
if stat.S_ISREG(mode):
with backup_io():
with backup_io:
if not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path))
# Hard link?
if 'source' in item:
source = os.path.join(dest, *item.source.split(os.sep)[stripped_components:])
with backup_io():
with backup_io:
if os.path.exists(path):
os.unlink(path)
if item.source not in hardlink_masters:
@ -490,24 +492,24 @@ Number of files: {0.stats.nfiles}'''.format(
item.chunks, link_target = hardlink_masters[item.source]
if link_target:
# Hard link was extracted previously, just link
with backup_io():
with backup_io:
os.link(link_target, path)
return
# Extract chunks, since the item which had the chunks was not extracted
with backup_io():
with backup_io:
fd = open(path, 'wb')
with fd:
ids = [c.id for c in item.chunks]
for _, data in self.pipeline.fetch_many(ids, is_preloaded=True):
if pi:
pi.show(increase=len(data), info=[remove_surrogates(item.path)])
with backup_io():
with backup_io:
if sparse and self.zeros.startswith(data):
# all-zero chunk: create a hole in a sparse file
fd.seek(len(data), 1)
else:
fd.write(data)
with backup_io():
with backup_io:
pos = fd.tell()
fd.truncate(pos)
fd.flush()
@ -519,7 +521,7 @@ Number of files: {0.stats.nfiles}'''.format(
# Update master entry with extracted file path, so that following hardlinks don't extract twice.
hardlink_masters[item.get('source') or original_path] = (None, path)
return
with backup_io():
with backup_io:
# No repository access beyond this point.
if stat.S_ISDIR(mode):
if not os.path.exists(path):
@ -705,7 +707,7 @@ Number of files: {0.stats.nfiles}'''.format(
def stat_ext_attrs(self, st, path):
attrs = {}
with backup_io():
with backup_io:
xattrs = xattr.get_all(path, follow_symlinks=False)
bsdflags = get_flags(path, st)
acl_get(path, attrs, st, self.numeric_owner)
@ -742,7 +744,7 @@ Number of files: {0.stats.nfiles}'''.format(
return 'b' # block device
def process_symlink(self, path, st):
with backup_io():
with backup_io:
source = os.readlink(path)
item = Item(path=make_path_safe(path), source=source)
item.update(self.stat_attrs(st, path))
@ -854,7 +856,7 @@ Number of files: {0.stats.nfiles}'''.format(
else:
compress = self.compression_decider1.decide(path)
self.file_compression_logger.debug('%s -> compression %s', path, compress['name'])
with backup_io():
with backup_io:
fh = Archive._open_rb(path)
with os.fdopen(fh, 'rb') as fd:
self.chunk_file(item, cache, self.stats, backup_io_iter(self.chunker.chunkify(fd, fh)), compress=compress)

View file

@ -24,7 +24,7 @@ logger = create_logger()
from . import __version__
from . import helpers
from .archive import Archive, ArchiveChecker, ArchiveRecreater, Statistics, is_special
from .archive import BackupOSError, CHUNKER_PARAMS
from .archive import BackupOSError
from .cache import Cache
from .constants import * # NOQA
from .helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR

View file

@ -15,7 +15,7 @@ from .hashindex import ChunkIndex, ChunkIndexEntry
from .helpers import Location
from .helpers import Error
from .helpers import get_cache_dir, get_security_dir
from .helpers import decode_dict, int_to_bigint, bigint_to_int, bin_to_hex
from .helpers import bin_to_hex
from .helpers import format_file_size
from .helpers import yes
from .helpers import remove_surrogates
@ -350,7 +350,7 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
# this is to avoid issues with filesystem snapshots and mtime granularity.
# Also keep files from older backups that have not reached BORG_FILES_CACHE_TTL yet.
entry = FileCacheEntry(*msgpack.unpackb(item))
if entry.age == 0 and bigint_to_int(entry.mtime) < self._newest_mtime or \
if entry.age == 0 and entry.mtime < self._newest_mtime or \
entry.age > 0 and entry.age < ttl:
msgpack.pack((path_hash, entry), fd)
pi.output('Saving cache config')
@ -567,7 +567,7 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
if not entry:
return None
entry = FileCacheEntry(*msgpack.unpackb(entry))
if (entry.size == st.st_size and bigint_to_int(entry.mtime) == st.st_mtime_ns and
if (entry.size == st.st_size and entry.mtime == st.st_mtime_ns and
(ignore_inode or entry.inode == st.st_ino)):
self.files[path_hash] = msgpack.packb(entry._replace(age=0))
return entry.chunk_ids
@ -577,6 +577,6 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
def memorize_file(self, path_hash, st, ids):
if not (self.do_files and stat.S_ISREG(st.st_mode)):
return
entry = FileCacheEntry(age=0, inode=st.st_ino, size=st.st_size, mtime=int_to_bigint(st.st_mtime_ns), chunk_ids=ids)
entry = FileCacheEntry(age=0, inode=st.st_ino, size=st.st_size, mtime=st.st_mtime_ns, chunk_ids=ids)
self.files[path_hash] = msgpack.packb(entry)
self._newest_mtime = max(self._newest_mtime or 0, st.st_mtime_ns)

View file

@ -86,7 +86,7 @@ class PlaceholderError(Error):
def check_extension_modules():
from . import platform, compress
from . import platform, compress, item
if hashindex.API_VERSION != 4:
raise ExtensionModuleError
if chunker.API_VERSION != 2:
@ -97,6 +97,8 @@ def check_extension_modules():
raise ExtensionModuleError
if platform.API_VERSION != platform.OS_API_VERSION != 5:
raise ExtensionModuleError
if item.API_VERSION != 1:
raise ExtensionModuleError
ArchiveInfo = namedtuple('ArchiveInfo', 'name id ts')
@ -691,7 +693,7 @@ def SortBySpec(text):
def safe_timestamp(item_timestamp_ns):
try:
return datetime.fromtimestamp(bigint_to_int(item_timestamp_ns) / 1e9)
return datetime.fromtimestamp(item_timestamp_ns / 1e9)
except OverflowError:
# likely a broken file time and datetime did not want to go beyond year 9999
return datetime(9999, 12, 31, 23, 59, 59)
@ -1090,24 +1092,6 @@ class StableDict(dict):
return sorted(super().items())
def bigint_to_int(mtime):
"""Convert bytearray to int
"""
if isinstance(mtime, bytes):
return int.from_bytes(mtime, 'little', signed=True)
return mtime
def int_to_bigint(value):
"""Convert integers larger than 64 bits to bytearray
Smaller integers are left alone
"""
if value.bit_length() > 63:
return value.to_bytes((value.bit_length() + 9) // 8, 'little', signed=True)
return value
def is_slow_msgpack():
return msgpack.Packer is msgpack.fallback.Packer

View file

@ -1,8 +1,9 @@
from .constants import ITEM_KEYS
from .helpers import safe_encode, safe_decode
from .helpers import bigint_to_int, int_to_bigint
from .helpers import StableDict
API_VERSION = 1
class PropDict:
"""
@ -151,9 +152,9 @@ class Item(PropDict):
rdev = PropDict._make_property('rdev', int)
bsdflags = PropDict._make_property('bsdflags', int)
atime = PropDict._make_property('atime', int, 'bigint', encode=int_to_bigint, decode=bigint_to_int)
ctime = PropDict._make_property('ctime', int, 'bigint', encode=int_to_bigint, decode=bigint_to_int)
mtime = PropDict._make_property('mtime', int, 'bigint', encode=int_to_bigint, decode=bigint_to_int)
atime = PropDict._make_property('atime', int)
ctime = PropDict._make_property('ctime', int)
mtime = PropDict._make_property('mtime', int)
hardlink_master = PropDict._make_property('hardlink_master', bool)

View file

@ -14,7 +14,7 @@ logger = create_logger()
from .constants import * # NOQA
from .compress import Compressor, get_compressor
from .crypto import AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks, hmac_sha256, blake2b_256
from .crypto import AES, bytes_to_long, bytes_to_int, num_aes_blocks, hmac_sha256, blake2b_256
from .helpers import Chunk
from .helpers import Error, IntegrityError
from .helpers import yes

View file

@ -220,7 +220,7 @@ def test_key_length_msgpacked_items():
def test_backup_io():
with pytest.raises(BackupOSError):
with backup_io():
with backup_io:
raise OSError(123)

View file

@ -18,7 +18,7 @@ from ..helpers import prune_within, prune_split
from ..helpers import get_cache_dir, get_keys_dir, get_security_dir
from ..helpers import is_slow_msgpack
from ..helpers import yes, TRUISH, FALSISH, DEFAULTISH
from ..helpers import StableDict, int_to_bigint, bigint_to_int, bin_to_hex
from ..helpers import StableDict, bin_to_hex
from ..helpers import parse_timestamp, ChunkIteratorFileWrapper, ChunkerParams, Chunk
from ..helpers import ProgressIndicatorPercent, ProgressIndicatorEndless
from ..helpers import load_excludes
@ -27,19 +27,7 @@ from ..helpers import parse_pattern, PatternMatcher, RegexPattern, PathPrefixPat
from ..helpers import swidth_slice
from ..helpers import chunkit
from . import BaseTestCase, environment_variable, FakeInputs
class BigIntTestCase(BaseTestCase):
def test_bigint(self):
self.assert_equal(int_to_bigint(0), 0)
self.assert_equal(int_to_bigint(2**63-1), 2**63-1)
self.assert_equal(int_to_bigint(-2**63+1), -2**63+1)
self.assert_equal(int_to_bigint(2**63), b'\x00\x00\x00\x00\x00\x00\x00\x80\x00')
self.assert_equal(int_to_bigint(-2**63), b'\x00\x00\x00\x00\x00\x00\x00\x80\xff')
self.assert_equal(bigint_to_int(int_to_bigint(-2**70)), -2**70)
self.assert_equal(bigint_to_int(int_to_bigint(2**70)), 2**70)
from . import BaseTestCase, FakeInputs
def test_bin_to_hex():

View file

@ -77,17 +77,6 @@ def test_item_int_property():
item.mode = "invalid"
def test_item_bigint_property():
item = Item()
small, big = 42, 2 ** 65
item.atime = small
assert item.atime == small
assert item.as_dict() == {'atime': small}
item.atime = big
assert item.atime == big
assert item.as_dict() == {'atime': b'\0' * 8 + b'\x02'}
def test_item_user_group_none():
item = Item()
item.user = None