mirror of
https://github.com/borgbackup/borg.git
synced 2026-03-26 04:14:45 -04:00
Merge pull request #1911 from enkore/f/itemnt
Work on metadata handling speed
This commit is contained in:
commit
7e5ed40e2f
10 changed files with 48 additions and 79 deletions
7
setup.py
7
setup.py
|
|
@ -50,6 +50,7 @@ compress_source = 'src/borg/compress.pyx'
|
|||
crypto_source = 'src/borg/crypto.pyx'
|
||||
chunker_source = 'src/borg/chunker.pyx'
|
||||
hashindex_source = 'src/borg/hashindex.pyx'
|
||||
item_source = 'src/borg/item.pyx'
|
||||
platform_posix_source = 'src/borg/platform/posix.pyx'
|
||||
platform_linux_source = 'src/borg/platform/linux.pyx'
|
||||
platform_darwin_source = 'src/borg/platform/darwin.pyx'
|
||||
|
|
@ -60,6 +61,7 @@ cython_sources = [
|
|||
crypto_source,
|
||||
chunker_source,
|
||||
hashindex_source,
|
||||
item_source,
|
||||
|
||||
platform_posix_source,
|
||||
platform_linux_source,
|
||||
|
|
@ -83,6 +85,7 @@ try:
|
|||
'src/borg/crypto.c',
|
||||
'src/borg/chunker.c', 'src/borg/_chunker.c',
|
||||
'src/borg/hashindex.c', 'src/borg/_hashindex.c',
|
||||
'src/borg/item.c',
|
||||
'src/borg/platform/posix.c',
|
||||
'src/borg/platform/linux.c',
|
||||
'src/borg/platform/freebsd.c',
|
||||
|
|
@ -99,6 +102,7 @@ except ImportError:
|
|||
crypto_source = crypto_source.replace('.pyx', '.c')
|
||||
chunker_source = chunker_source.replace('.pyx', '.c')
|
||||
hashindex_source = hashindex_source.replace('.pyx', '.c')
|
||||
item_source = item_source.replace('.pyx', '.c')
|
||||
platform_posix_source = platform_posix_source.replace('.pyx', '.c')
|
||||
platform_linux_source = platform_linux_source.replace('.pyx', '.c')
|
||||
platform_freebsd_source = platform_freebsd_source.replace('.pyx', '.c')
|
||||
|
|
@ -358,7 +362,8 @@ if not on_rtd:
|
|||
Extension('borg.compress', [compress_source], libraries=['lz4'], include_dirs=include_dirs, library_dirs=library_dirs, define_macros=define_macros),
|
||||
Extension('borg.crypto', [crypto_source], libraries=crypto_libraries, include_dirs=include_dirs, library_dirs=library_dirs, define_macros=define_macros),
|
||||
Extension('borg.chunker', [chunker_source]),
|
||||
Extension('borg.hashindex', [hashindex_source])
|
||||
Extension('borg.hashindex', [hashindex_source]),
|
||||
Extension('borg.item', [item_source]),
|
||||
]
|
||||
if sys.platform.startswith(('linux', 'freebsd', 'darwin')):
|
||||
ext_modules.append(Extension('borg.platform.posix', [platform_posix_source]))
|
||||
|
|
|
|||
|
|
@ -29,12 +29,11 @@ from .helpers import Error, IntegrityError
|
|||
from .helpers import uid2user, user2uid, gid2group, group2gid
|
||||
from .helpers import parse_timestamp, to_localtime
|
||||
from .helpers import format_time, format_timedelta, format_file_size, file_status
|
||||
from .helpers import safe_encode, safe_decode, make_path_safe, remove_surrogates, swidth_slice
|
||||
from .helpers import decode_dict, StableDict
|
||||
from .helpers import int_to_bigint, bigint_to_int, bin_to_hex
|
||||
from .helpers import safe_encode, safe_decode, make_path_safe, remove_surrogates
|
||||
from .helpers import StableDict
|
||||
from .helpers import bin_to_hex
|
||||
from .helpers import ellipsis_truncate, ProgressIndicatorPercent, log_multi
|
||||
from .helpers import PathPrefixPattern, FnmatchPattern
|
||||
from .helpers import consume, chunkit
|
||||
from .helpers import CompressionDecider1, CompressionDecider2, CompressionSpec
|
||||
from .item import Item, ArchiveItem
|
||||
from .key import key_factory
|
||||
|
|
@ -125,19 +124,22 @@ class BackupOSError(Exception):
|
|||
return str(self.os_error)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def backup_io():
|
||||
"""Context manager changing OSError to BackupOSError."""
|
||||
try:
|
||||
yield
|
||||
except OSError as os_error:
|
||||
raise BackupOSError(os_error) from os_error
|
||||
class BackupIO:
|
||||
def __enter__(self):
|
||||
pass
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
if exc_type and issubclass(exc_type, OSError):
|
||||
raise BackupOSError(exc_val) from exc_val
|
||||
|
||||
|
||||
backup_io = BackupIO()
|
||||
|
||||
|
||||
def backup_io_iter(iterator):
|
||||
while True:
|
||||
try:
|
||||
with backup_io():
|
||||
with backup_io:
|
||||
item = next(iterator)
|
||||
except StopIteration:
|
||||
return
|
||||
|
|
@ -475,13 +477,13 @@ Number of files: {0.stats.nfiles}'''.format(
|
|||
pass
|
||||
mode = item.mode
|
||||
if stat.S_ISREG(mode):
|
||||
with backup_io():
|
||||
with backup_io:
|
||||
if not os.path.exists(os.path.dirname(path)):
|
||||
os.makedirs(os.path.dirname(path))
|
||||
# Hard link?
|
||||
if 'source' in item:
|
||||
source = os.path.join(dest, *item.source.split(os.sep)[stripped_components:])
|
||||
with backup_io():
|
||||
with backup_io:
|
||||
if os.path.exists(path):
|
||||
os.unlink(path)
|
||||
if item.source not in hardlink_masters:
|
||||
|
|
@ -490,24 +492,24 @@ Number of files: {0.stats.nfiles}'''.format(
|
|||
item.chunks, link_target = hardlink_masters[item.source]
|
||||
if link_target:
|
||||
# Hard link was extracted previously, just link
|
||||
with backup_io():
|
||||
with backup_io:
|
||||
os.link(link_target, path)
|
||||
return
|
||||
# Extract chunks, since the item which had the chunks was not extracted
|
||||
with backup_io():
|
||||
with backup_io:
|
||||
fd = open(path, 'wb')
|
||||
with fd:
|
||||
ids = [c.id for c in item.chunks]
|
||||
for _, data in self.pipeline.fetch_many(ids, is_preloaded=True):
|
||||
if pi:
|
||||
pi.show(increase=len(data), info=[remove_surrogates(item.path)])
|
||||
with backup_io():
|
||||
with backup_io:
|
||||
if sparse and self.zeros.startswith(data):
|
||||
# all-zero chunk: create a hole in a sparse file
|
||||
fd.seek(len(data), 1)
|
||||
else:
|
||||
fd.write(data)
|
||||
with backup_io():
|
||||
with backup_io:
|
||||
pos = fd.tell()
|
||||
fd.truncate(pos)
|
||||
fd.flush()
|
||||
|
|
@ -519,7 +521,7 @@ Number of files: {0.stats.nfiles}'''.format(
|
|||
# Update master entry with extracted file path, so that following hardlinks don't extract twice.
|
||||
hardlink_masters[item.get('source') or original_path] = (None, path)
|
||||
return
|
||||
with backup_io():
|
||||
with backup_io:
|
||||
# No repository access beyond this point.
|
||||
if stat.S_ISDIR(mode):
|
||||
if not os.path.exists(path):
|
||||
|
|
@ -705,7 +707,7 @@ Number of files: {0.stats.nfiles}'''.format(
|
|||
|
||||
def stat_ext_attrs(self, st, path):
|
||||
attrs = {}
|
||||
with backup_io():
|
||||
with backup_io:
|
||||
xattrs = xattr.get_all(path, follow_symlinks=False)
|
||||
bsdflags = get_flags(path, st)
|
||||
acl_get(path, attrs, st, self.numeric_owner)
|
||||
|
|
@ -742,7 +744,7 @@ Number of files: {0.stats.nfiles}'''.format(
|
|||
return 'b' # block device
|
||||
|
||||
def process_symlink(self, path, st):
|
||||
with backup_io():
|
||||
with backup_io:
|
||||
source = os.readlink(path)
|
||||
item = Item(path=make_path_safe(path), source=source)
|
||||
item.update(self.stat_attrs(st, path))
|
||||
|
|
@ -854,7 +856,7 @@ Number of files: {0.stats.nfiles}'''.format(
|
|||
else:
|
||||
compress = self.compression_decider1.decide(path)
|
||||
self.file_compression_logger.debug('%s -> compression %s', path, compress['name'])
|
||||
with backup_io():
|
||||
with backup_io:
|
||||
fh = Archive._open_rb(path)
|
||||
with os.fdopen(fh, 'rb') as fd:
|
||||
self.chunk_file(item, cache, self.stats, backup_io_iter(self.chunker.chunkify(fd, fh)), compress=compress)
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ logger = create_logger()
|
|||
from . import __version__
|
||||
from . import helpers
|
||||
from .archive import Archive, ArchiveChecker, ArchiveRecreater, Statistics, is_special
|
||||
from .archive import BackupOSError, CHUNKER_PARAMS
|
||||
from .archive import BackupOSError
|
||||
from .cache import Cache
|
||||
from .constants import * # NOQA
|
||||
from .helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ from .hashindex import ChunkIndex, ChunkIndexEntry
|
|||
from .helpers import Location
|
||||
from .helpers import Error
|
||||
from .helpers import get_cache_dir, get_security_dir
|
||||
from .helpers import decode_dict, int_to_bigint, bigint_to_int, bin_to_hex
|
||||
from .helpers import bin_to_hex
|
||||
from .helpers import format_file_size
|
||||
from .helpers import yes
|
||||
from .helpers import remove_surrogates
|
||||
|
|
@ -350,7 +350,7 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
|
|||
# this is to avoid issues with filesystem snapshots and mtime granularity.
|
||||
# Also keep files from older backups that have not reached BORG_FILES_CACHE_TTL yet.
|
||||
entry = FileCacheEntry(*msgpack.unpackb(item))
|
||||
if entry.age == 0 and bigint_to_int(entry.mtime) < self._newest_mtime or \
|
||||
if entry.age == 0 and entry.mtime < self._newest_mtime or \
|
||||
entry.age > 0 and entry.age < ttl:
|
||||
msgpack.pack((path_hash, entry), fd)
|
||||
pi.output('Saving cache config')
|
||||
|
|
@ -567,7 +567,7 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
|
|||
if not entry:
|
||||
return None
|
||||
entry = FileCacheEntry(*msgpack.unpackb(entry))
|
||||
if (entry.size == st.st_size and bigint_to_int(entry.mtime) == st.st_mtime_ns and
|
||||
if (entry.size == st.st_size and entry.mtime == st.st_mtime_ns and
|
||||
(ignore_inode or entry.inode == st.st_ino)):
|
||||
self.files[path_hash] = msgpack.packb(entry._replace(age=0))
|
||||
return entry.chunk_ids
|
||||
|
|
@ -577,6 +577,6 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
|
|||
def memorize_file(self, path_hash, st, ids):
|
||||
if not (self.do_files and stat.S_ISREG(st.st_mode)):
|
||||
return
|
||||
entry = FileCacheEntry(age=0, inode=st.st_ino, size=st.st_size, mtime=int_to_bigint(st.st_mtime_ns), chunk_ids=ids)
|
||||
entry = FileCacheEntry(age=0, inode=st.st_ino, size=st.st_size, mtime=st.st_mtime_ns, chunk_ids=ids)
|
||||
self.files[path_hash] = msgpack.packb(entry)
|
||||
self._newest_mtime = max(self._newest_mtime or 0, st.st_mtime_ns)
|
||||
|
|
|
|||
|
|
@ -86,7 +86,7 @@ class PlaceholderError(Error):
|
|||
|
||||
|
||||
def check_extension_modules():
|
||||
from . import platform, compress
|
||||
from . import platform, compress, item
|
||||
if hashindex.API_VERSION != 4:
|
||||
raise ExtensionModuleError
|
||||
if chunker.API_VERSION != 2:
|
||||
|
|
@ -97,6 +97,8 @@ def check_extension_modules():
|
|||
raise ExtensionModuleError
|
||||
if platform.API_VERSION != platform.OS_API_VERSION != 5:
|
||||
raise ExtensionModuleError
|
||||
if item.API_VERSION != 1:
|
||||
raise ExtensionModuleError
|
||||
|
||||
|
||||
ArchiveInfo = namedtuple('ArchiveInfo', 'name id ts')
|
||||
|
|
@ -691,7 +693,7 @@ def SortBySpec(text):
|
|||
|
||||
def safe_timestamp(item_timestamp_ns):
|
||||
try:
|
||||
return datetime.fromtimestamp(bigint_to_int(item_timestamp_ns) / 1e9)
|
||||
return datetime.fromtimestamp(item_timestamp_ns / 1e9)
|
||||
except OverflowError:
|
||||
# likely a broken file time and datetime did not want to go beyond year 9999
|
||||
return datetime(9999, 12, 31, 23, 59, 59)
|
||||
|
|
@ -1090,24 +1092,6 @@ class StableDict(dict):
|
|||
return sorted(super().items())
|
||||
|
||||
|
||||
def bigint_to_int(mtime):
|
||||
"""Convert bytearray to int
|
||||
"""
|
||||
if isinstance(mtime, bytes):
|
||||
return int.from_bytes(mtime, 'little', signed=True)
|
||||
return mtime
|
||||
|
||||
|
||||
def int_to_bigint(value):
|
||||
"""Convert integers larger than 64 bits to bytearray
|
||||
|
||||
Smaller integers are left alone
|
||||
"""
|
||||
if value.bit_length() > 63:
|
||||
return value.to_bytes((value.bit_length() + 9) // 8, 'little', signed=True)
|
||||
return value
|
||||
|
||||
|
||||
def is_slow_msgpack():
|
||||
return msgpack.Packer is msgpack.fallback.Packer
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
from .constants import ITEM_KEYS
|
||||
from .helpers import safe_encode, safe_decode
|
||||
from .helpers import bigint_to_int, int_to_bigint
|
||||
from .helpers import StableDict
|
||||
|
||||
API_VERSION = 1
|
||||
|
||||
|
||||
class PropDict:
|
||||
"""
|
||||
|
|
@ -151,9 +152,9 @@ class Item(PropDict):
|
|||
rdev = PropDict._make_property('rdev', int)
|
||||
bsdflags = PropDict._make_property('bsdflags', int)
|
||||
|
||||
atime = PropDict._make_property('atime', int, 'bigint', encode=int_to_bigint, decode=bigint_to_int)
|
||||
ctime = PropDict._make_property('ctime', int, 'bigint', encode=int_to_bigint, decode=bigint_to_int)
|
||||
mtime = PropDict._make_property('mtime', int, 'bigint', encode=int_to_bigint, decode=bigint_to_int)
|
||||
atime = PropDict._make_property('atime', int)
|
||||
ctime = PropDict._make_property('ctime', int)
|
||||
mtime = PropDict._make_property('mtime', int)
|
||||
|
||||
hardlink_master = PropDict._make_property('hardlink_master', bool)
|
||||
|
||||
|
|
@ -14,7 +14,7 @@ logger = create_logger()
|
|||
|
||||
from .constants import * # NOQA
|
||||
from .compress import Compressor, get_compressor
|
||||
from .crypto import AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks, hmac_sha256, blake2b_256
|
||||
from .crypto import AES, bytes_to_long, bytes_to_int, num_aes_blocks, hmac_sha256, blake2b_256
|
||||
from .helpers import Chunk
|
||||
from .helpers import Error, IntegrityError
|
||||
from .helpers import yes
|
||||
|
|
|
|||
|
|
@ -220,7 +220,7 @@ def test_key_length_msgpacked_items():
|
|||
|
||||
def test_backup_io():
|
||||
with pytest.raises(BackupOSError):
|
||||
with backup_io():
|
||||
with backup_io:
|
||||
raise OSError(123)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ from ..helpers import prune_within, prune_split
|
|||
from ..helpers import get_cache_dir, get_keys_dir, get_security_dir
|
||||
from ..helpers import is_slow_msgpack
|
||||
from ..helpers import yes, TRUISH, FALSISH, DEFAULTISH
|
||||
from ..helpers import StableDict, int_to_bigint, bigint_to_int, bin_to_hex
|
||||
from ..helpers import StableDict, bin_to_hex
|
||||
from ..helpers import parse_timestamp, ChunkIteratorFileWrapper, ChunkerParams, Chunk
|
||||
from ..helpers import ProgressIndicatorPercent, ProgressIndicatorEndless
|
||||
from ..helpers import load_excludes
|
||||
|
|
@ -27,19 +27,7 @@ from ..helpers import parse_pattern, PatternMatcher, RegexPattern, PathPrefixPat
|
|||
from ..helpers import swidth_slice
|
||||
from ..helpers import chunkit
|
||||
|
||||
from . import BaseTestCase, environment_variable, FakeInputs
|
||||
|
||||
|
||||
class BigIntTestCase(BaseTestCase):
|
||||
|
||||
def test_bigint(self):
|
||||
self.assert_equal(int_to_bigint(0), 0)
|
||||
self.assert_equal(int_to_bigint(2**63-1), 2**63-1)
|
||||
self.assert_equal(int_to_bigint(-2**63+1), -2**63+1)
|
||||
self.assert_equal(int_to_bigint(2**63), b'\x00\x00\x00\x00\x00\x00\x00\x80\x00')
|
||||
self.assert_equal(int_to_bigint(-2**63), b'\x00\x00\x00\x00\x00\x00\x00\x80\xff')
|
||||
self.assert_equal(bigint_to_int(int_to_bigint(-2**70)), -2**70)
|
||||
self.assert_equal(bigint_to_int(int_to_bigint(2**70)), 2**70)
|
||||
from . import BaseTestCase, FakeInputs
|
||||
|
||||
|
||||
def test_bin_to_hex():
|
||||
|
|
|
|||
|
|
@ -77,17 +77,6 @@ def test_item_int_property():
|
|||
item.mode = "invalid"
|
||||
|
||||
|
||||
def test_item_bigint_property():
|
||||
item = Item()
|
||||
small, big = 42, 2 ** 65
|
||||
item.atime = small
|
||||
assert item.atime == small
|
||||
assert item.as_dict() == {'atime': small}
|
||||
item.atime = big
|
||||
assert item.atime == big
|
||||
assert item.as_dict() == {'atime': b'\0' * 8 + b'\x02'}
|
||||
|
||||
|
||||
def test_item_user_group_none():
|
||||
item = Item()
|
||||
item.user = None
|
||||
|
|
|
|||
Loading…
Reference in a new issue