From ba6706395a41f949b673b191b369b91216979e0b Mon Sep 17 00:00:00 2001 From: Suryansh Pal Date: Sun, 8 Mar 2026 17:36:01 +0530 Subject: [PATCH] use xxhash from PyPI, fixes #6535 --- .github/workflows/ci.yml | 23 ++---- .github/workflows/codeql-analysis.yml | 2 +- .readthedocs.yaml | 1 - Brewfile | 1 - Vagrantfile | 9 +- docs/global.rst.inc | 1 + docs/installation.rst | 13 ++- pyproject.toml | 1 + scripts/Dockerfile.linux-run | 1 - setup.py | 6 +- src/borg/archiver/benchmark_cmd.py | 5 +- src/borg/cache.py | 9 +- src/borg/checksums.pyi | 7 -- src/borg/checksums.pyx | 82 ------------------- src/borg/crypto/file_integrity.py | 5 +- src/borg/helpers/parseformat.py | 4 +- src/borg/legacyremote.py | 7 +- src/borg/legacyrepository.py | 6 +- src/borg/remote.py | 7 +- src/borg/repoobj.py | 7 +- src/borg/repository.py | 7 +- src/borg/storelocking.py | 7 +- src/borg/testsuite/archiver/check_cmd_test.py | 7 +- src/borg/testsuite/checksums_test.py | 29 +++---- src/borg/testsuite/legacyrepository_test.py | 7 +- src/borg/testsuite/repository_test.py | 6 +- 26 files changed, 85 insertions(+), 175 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 78fc9f47b..0d458f3c2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -79,7 +79,7 @@ jobs: run: | sudo apt-get update sudo apt-get install -y pkg-config build-essential - sudo apt-get install -y libssl-dev libacl1-dev libxxhash-dev liblz4-dev + sudo apt-get install -y libssl-dev libacl1-dev liblz4-dev - name: Install Python dependencies run: | @@ -200,7 +200,7 @@ jobs: run: | sudo apt-get update sudo apt-get install -y pkg-config build-essential - sudo apt-get install -y libssl-dev libacl1-dev libxxhash-dev liblz4-dev + sudo apt-get install -y libssl-dev libacl1-dev liblz4-dev sudo apt-get install -y bash zsh fish # for shell completion tests sudo apt-get install -y rclone openssh-server curl if [[ "$TOXENV" == *"llfuse"* ]]; then @@ -436,7 +436,7 @@ jobs: freebsd) export IGNORE_OSVERSION=yes sudo -E pkg update -f - sudo -E pkg install -y xxhash liblz4 pkgconf + sudo -E pkg install -y liblz4 pkgconf sudo -E pkg install -y fusefs-libs sudo -E kldload fusefs sudo -E sysctl vfs.usermount=1 @@ -492,7 +492,7 @@ jobs: echo "https://ftp.NetBSD.org/pub/pkgsrc/packages/NetBSD/${arch}/10.1/All" | sudo tee /usr/pkg/etc/pkgin/repositories.conf > /dev/null sudo -E pkgin update sudo -E pkgin -y upgrade - sudo -E pkgin -y install lz4 xxhash git + sudo -E pkgin -y install lz4 git sudo -E pkgin -y install rust sudo -E pkgin -y install pkg-config sudo -E pkgin -y install py311-pip py311-virtualenv py311-tox @@ -526,7 +526,7 @@ jobs: ;; openbsd) - sudo -E pkg_add xxhash lz4 git + sudo -E pkg_add lz4 git sudo -E pkg_add rust sudo -E pkg_add openssl%3.4 sudo -E pkg_add py3-pip py3-virtualenv py3-tox @@ -542,14 +542,6 @@ jobs: sudo python3 -m ensurepip sudo python3 -m pip install virtualenv - # install libxxhash from source - git clone --depth 1 https://github.com/Cyan4973/xxHash.git - cd xxHash - sudo gmake install INSTALL=/usr/gnu/bin/install PREFIX=/usr/local - cd .. - export PKG_CONFIG_PATH="/usr/local/lib/pkgconfig:${PKG_CONFIG_PATH:-}" - export LD_LIBRARY_PATH="/usr/local/lib:${LD_LIBRARY_PATH:-}" - python3 -m venv .venv . .venv/bin/activate python -V @@ -564,12 +556,12 @@ jobs: haiku) pkgman refresh - pkgman install -y git pkgconfig lz4 xxhash + pkgman install -y git pkgconfig lz4 pkgman install -y openssl3 pkgman install -y rust_bin pkgman install -y python3.10 pkgman install -y cffi - pkgman install -y lz4_devel xxhash_devel openssl3_devel libffi_devel + pkgman install -y lz4_devel openssl3_devel libffi_devel # there is no pkgman package for tox, so we install it into a venv python3 -m ensurepip --upgrade @@ -579,7 +571,6 @@ jobs: export PKG_CONFIG_PATH="/system/develop/lib/pkgconfig:/system/lib/pkgconfig:${PKG_CONFIG_PATH:-}" export BORG_LIBLZ4_PREFIX=/system/develop - export BORG_LIBXXHASH_PREFIX=/system/develop export BORG_OPENSSL_PREFIX=/system/develop pip install -r requirements.d/development.txt pip install -e . diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index f9404e196..e02fc6add 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -66,7 +66,7 @@ jobs: run: | sudo apt-get update sudo apt-get install -y pkg-config build-essential - sudo apt-get install -y libssl-dev libacl1-dev libxxhash-dev liblz4-dev + sudo apt-get install -y libssl-dev libacl1-dev liblz4-dev # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL uses: github/codeql-action/init@v4 diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 390028f89..415f4b024 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -16,7 +16,6 @@ build: - libacl1-dev - libssl-dev - liblz4-dev - - libxxhash-dev python: install: diff --git a/Brewfile b/Brewfile index f1fbecc13..8c9c6c03f 100644 --- a/Brewfile +++ b/Brewfile @@ -1,6 +1,5 @@ brew 'pkgconf' brew 'lz4' -brew 'xxhash' brew 'openssl@3' # osxfuse (aka macFUSE) is only required for "borg mount", diff --git a/Vagrantfile b/Vagrantfile index 312467e33..b79bb1869 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -16,7 +16,7 @@ def packages_debianoid(user) apt-get -y -qq dist-upgrade # for building borgbackup and dependencies: apt install -y pkg-config - apt install -y libssl-dev libacl1-dev libxxhash-dev liblz4-dev || true + apt install -y libssl-dev libacl1-dev liblz4-dev || true apt install -y libfuse-dev fuse || true apt install -y libfuse3-dev fuse3 || true apt install -y locales || true @@ -38,7 +38,7 @@ def packages_freebsd # install all the (security and other) updates, base system freebsd-update --not-running-from-cron fetch install # for building borgbackup and dependencies: - pkg install -y xxhash liblz4 pkgconf + pkg install -y liblz4 pkgconf pkg install -y fusefs-libs || true pkg install -y fusefs-libs3 || true pkg install -y rust @@ -83,7 +83,6 @@ def packages_openbsd rm comp$(uname -r | tr -d .).tgz pkg_add bash chsh -s bash vagrant - pkg_add xxhash pkg_add lz4 pkg_add git # no fakeroot pkg_add rust @@ -99,7 +98,7 @@ def packages_netbsd echo 'https://ftp.NetBSD.org/pub/pkgsrc/packages/NetBSD/$arch/9.3/All' > /usr/pkg/etc/pkgin/repositories.conf pkgin update pkgin -y upgrade - pkg_add lz4 xxhash git + pkg_add lz4 git pkg_add rust pkg_add bash chsh -s bash vagrant @@ -129,7 +128,7 @@ end def packages_openindiana return <<-EOF pkg install gcc-13 git - pkg install pkg-config libxxhash + pkg install pkg-config pkg install python-313 ln -sf /usr/bin/python3.13 /usr/bin/python3 ln -sf /usr/bin/python3.13-config /usr/bin/python3-config diff --git a/docs/global.rst.inc b/docs/global.rst.inc index 308fd9d58..0a1fe9f5c 100644 --- a/docs/global.rst.inc +++ b/docs/global.rst.inc @@ -28,3 +28,4 @@ .. _userspace filesystems: https://en.wikipedia.org/wiki/Filesystem_in_Userspace .. _Cython: https://cython.org/ .. _virtualenv: https://pypi.org/project/virtualenv/ +.. _python-xxhash: https://github.com/ifduyue/python-xxhash/ diff --git a/docs/installation.rst b/docs/installation.rst index 143533f7b..4c8388358 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -164,7 +164,6 @@ development header files (sometimes in a separate `-dev` or `-devel` package). * `Python 3`_ >= 3.10.0 * OpenSSL_ >= 1.1.1 (LibreSSL will not work) * libacl_ (which depends on libattr_) -* libxxhash_ >= 0.8.1 * liblz4_ >= 1.7.0 (r129) * libffi (required for argon2-cffi-bindings) * pkg-config (cli tool) - Borg uses this to discover header and library @@ -200,7 +199,7 @@ Arch Linux Install the runtime and build dependencies:: - pacman -S python python-pip python-virtualenv openssl acl xxhash lz4 base-devel + pacman -S python python-pip python-virtualenv openssl acl lz4 base-devel pacman -S fuse2 # needed for llfuse pacman -S fuse3 # needed for pyfuse3 @@ -216,7 +215,7 @@ Install the dependencies with development headers:: sudo apt-get install python3 python3-dev python3-pip python3-virtualenv \ libacl1-dev \ libssl-dev \ - liblz4-dev libxxhash-dev \ + liblz4-dev \ libffi-dev \ build-essential pkg-config sudo apt-get install libfuse-dev fuse # needed for llfuse @@ -234,7 +233,7 @@ Install the dependencies with development headers:: sudo dnf install python3 python3-devel python3-pip python3-virtualenv \ libacl-devel \ openssl-devel \ - lz4-devel xxhash-devel \ + lz4-devel \ libffi-devel \ pkgconf sudo dnf install gcc gcc-c++ redhat-rpm-config @@ -251,7 +250,7 @@ Install the dependencies automatically using zypper:: Alternatively, you can enumerate all build dependencies in the command line:: sudo zypper install python3 python3-devel \ - libacl-devel openssl-devel xxhash-devel liblz4-devel \ + libacl-devel openssl-devel liblz4-devel \ libffi-devel \ python3-Cython python3-Sphinx python3-msgpack-python python3-pkgconfig pkgconf \ python3-pytest python3-setuptools python3-setuptools_scm \ @@ -302,7 +301,7 @@ and commands to make FUSE work for using the mount command. pkg install -y python3 pkgconf pkg install openssl - pkg install liblz4 xxhash + pkg install liblz4 pkg install fusefs-libs # needed for llfuse pkg install -y git python3 -m ensurepip # to install pip for Python3 @@ -346,7 +345,7 @@ Use the Cygwin installer to install the dependencies:: python39 python39-devel python39-setuptools python39-pip python39-wheel python39-virtualenv - libssl-devel libxxhash-devel liblz4-devel + libssl-devel liblz4-devel binutils gcc-g++ git make openssh Make sure to use a virtual environment to avoid confusions with any Python installed on Windows. diff --git a/pyproject.toml b/pyproject.toml index 59d9f5521..bd175a5cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,7 @@ dependencies = [ "argon2-cffi", "shtab>=1.8.0", "backports-zstd; python_version < '3.14'", # for python < 3.14. + "xxhash>=2.0.0", ] [project.optional-dependencies] diff --git a/scripts/Dockerfile.linux-run b/scripts/Dockerfile.linux-run index f6a1220cf..3c077ed46 100644 --- a/scripts/Dockerfile.linux-run +++ b/scripts/Dockerfile.linux-run @@ -8,7 +8,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ pkg-config \ libssl-dev \ libacl1-dev \ - libxxhash-dev \ liblz4-dev \ libfuse3-dev \ fuse3 \ diff --git a/setup.py b/setup.py index 033d2dfa4..1c2ea3def 100644 --- a/setup.py +++ b/setup.py @@ -163,11 +163,7 @@ if not on_rtd: dict(extra_compile_args=cflags), ) - checksums_ext_kwargs = members_appended( - dict(sources=[checksums_source]), - lib_ext_kwargs(pc, "BORG_LIBXXHASH_PREFIX", "xxhash", "libxxhash", ">= 0.7.3"), - dict(extra_compile_args=cflags), - ) + checksums_ext_kwargs = members_appended(dict(sources=[checksums_source]), dict(extra_compile_args=cflags)) if sys.platform == "linux": linux_ext_kwargs = members_appended( diff --git a/src/borg/archiver/benchmark_cmd.py b/src/borg/archiver/benchmark_cmd.py index 11e9b7800..cf346e65a 100644 --- a/src/borg/archiver/benchmark_cmd.py +++ b/src/borg/archiver/benchmark_cmd.py @@ -216,14 +216,15 @@ class BenchmarkMixIn: else: print(f"{spec:<24} {format_file_size(size):<10} {dt:.3f}s") - from ..checksums import crc32, xxh64 + from xxhash import xxh64 + from ..checksums import crc32 if not args.json: print("Non-cryptographic checksums / hashes ===========================") else: result["checksums"] = [] size = 1000000000 - tests = [("xxh64", lambda: xxh64(random_10M)), ("crc32 (zlib)", lambda: crc32(random_10M))] + tests = [("xxh64", lambda: xxh64(random_10M).digest()), ("crc32 (zlib)", lambda: crc32(random_10M))] for spec, func in tests: dt = timeit(func, number=number_default) if args.json: diff --git a/src/borg/cache.py b/src/borg/cache.py index b51ae67c0..c69553f84 100644 --- a/src/borg/cache.py +++ b/src/borg/cache.py @@ -8,6 +8,8 @@ from datetime import datetime, timezone, timedelta from pathlib import Path from time import perf_counter +from xxhash import xxh64 + from borgstore.backends.errors import PermissionDenied from .logger import create_logger @@ -19,7 +21,6 @@ files_cache_logger = create_logger("borg.debug.files_cache") from borgstore.store import ItemInfo from .constants import CACHE_README, FILES_CACHE_MODE_DISABLED, ROBJ_FILE_STREAM, TIME_DIFFERS2_NS -from .checksums import xxh64 from .hashindex import ChunkIndex, ChunkIndexEntry from .helpers import Error from .helpers import get_cache_dir, get_security_dir @@ -52,7 +53,7 @@ def files_cache_name(archive_name, files_cache_name="files"): # when not, the user may manually do that by using the env var. if not suffix: # avoid issues with too complex or long archive_name by hashing it: - suffix = bin_to_hex(xxh64(archive_name.encode())) + suffix = xxh64(archive_name.encode()).hexdigest() return files_cache_name + "." + suffix @@ -745,7 +746,7 @@ def write_chunkindex_to_repo_cache( if clear: # if we don't need the in-memory chunks index anymore: chunks.clear() # free memory, immediately - new_hash = bin_to_hex(xxh64(data, seed=CHUNKINDEX_HASH_SEED)) + new_hash = xxh64(data, seed=CHUNKINDEX_HASH_SEED).hexdigest() cached_hashes = list_chunkindex_hashes(repository) if force_write or new_hash not in cached_hashes: # when an updated chunks index is stored into the cache, we also store its hash as part of the name. @@ -786,7 +787,7 @@ def read_chunkindex_from_repo_cache(repository, hash): except StoreObjectNotFound: logger.debug(f"{cache_name} not found in the repository.") else: - if xxh64(chunks_data, seed=CHUNKINDEX_HASH_SEED) == hex_to_bin(hash): + if xxh64(chunks_data, seed=CHUNKINDEX_HASH_SEED).digest() == hex_to_bin(hash): logger.debug(f"{cache_name} is valid.") with io.BytesIO(chunks_data) as f: chunks = ChunkIndex.read(f) diff --git a/src/borg/checksums.pyi b/src/borg/checksums.pyi index adbb10578..35bab571f 100644 --- a/src/borg/checksums.pyi +++ b/src/borg/checksums.pyi @@ -1,8 +1 @@ def crc32(data: bytes, value: int = 0) -> int: ... -def xxh64(data: bytes, seed: int = 0) -> bytes: ... - -class StreamingXXH64: - def __init__(self, seed: int = 0) -> None: ... - def update(self, data: bytes) -> None: ... - def digest(self) -> bytes: ... - def hexdigest(self) -> str: ... diff --git a/src/borg/checksums.pyx b/src/borg/checksums.pyx index 9d938c61e..ccf15f198 100644 --- a/src/borg/checksums.pyx +++ b/src/borg/checksums.pyx @@ -1,88 +1,6 @@ import zlib -from .platformflags import is_darwin -from .helpers import bin_to_hex - -from libc.stdint cimport uint32_t -from cpython.buffer cimport PyBUF_SIMPLE, PyObject_GetBuffer, PyBuffer_Release -from cpython.bytes cimport PyBytes_FromStringAndSize - - -cdef extern from "xxhash.h": - ctypedef struct XXH64_canonical_t: - char digest[8] - - ctypedef struct XXH64_state_t: - pass # opaque - - ctypedef unsigned long long XXH64_hash_t - - ctypedef enum XXH_errorcode: - XXH_OK, - XXH_ERROR - - XXH64_state_t* XXH64_createState() - XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) - XXH64_hash_t XXH64(const void* input, size_t length, unsigned long long seed) - - XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed) - XXH_errorcode XXH64_update(XXH64_state_t* statePtr, const void* input, size_t length) - XXH64_hash_t XXH64_digest(const XXH64_state_t* statePtr) - - void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash) - XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src) - - -cdef Py_buffer ro_buffer(object data) except *: - cdef Py_buffer view - PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) - return view - # Borg 2.0 repositories do not compute CRC32 over large amounts of data, # so speed does not matter much anymore, and we can just use zlib.crc32. crc32 = zlib.crc32 - - -def xxh64(data, seed=0): - cdef unsigned long long _seed = seed - cdef XXH64_hash_t hash - cdef XXH64_canonical_t digest - cdef Py_buffer data_buf = ro_buffer(data) - try: - hash = XXH64(data_buf.buf, data_buf.len, _seed) - finally: - PyBuffer_Release(&data_buf) - XXH64_canonicalFromHash(&digest, hash) - return PyBytes_FromStringAndSize( digest.digest, 8) - - -cdef class StreamingXXH64: - cdef XXH64_state_t* state - - def __cinit__(self, seed=0): - self.state = XXH64_createState() - cdef unsigned long long _seed = seed - if XXH64_reset(self.state, _seed) != XXH_OK: - raise Exception('XXH64_reset failed') - - def __dealloc__(self): - XXH64_freeState(self.state) - - def update(self, data): - cdef Py_buffer data_buf = ro_buffer(data) - try: - if XXH64_update(self.state, data_buf.buf, data_buf.len) != XXH_OK: - raise Exception('XXH64_update failed') - finally: - PyBuffer_Release(&data_buf) - - def digest(self): - cdef XXH64_hash_t hash - cdef XXH64_canonical_t digest - hash = XXH64_digest(self.state) - XXH64_canonicalFromHash(&digest, hash) - return PyBytes_FromStringAndSize( digest.digest, 8) - - def hexdigest(self): - return bin_to_hex(self.digest()) diff --git a/src/borg/crypto/file_integrity.py b/src/borg/crypto/file_integrity.py index fbce69b5f..e5310a136 100644 --- a/src/borg/crypto/file_integrity.py +++ b/src/borg/crypto/file_integrity.py @@ -5,9 +5,10 @@ from hmac import compare_digest from collections.abc import Callable from pathlib import Path +from xxhash import xxh64 + from ..helpers import IntegrityError from ..logger import create_logger -from ..checksums import StreamingXXH64 logger = create_logger() @@ -112,7 +113,7 @@ class SHA512FileHashingWrapper(FileHashingWrapper): class XXH64FileHashingWrapper(FileHashingWrapper): ALGORITHM = "XXH64" - FACTORY = StreamingXXH64 + FACTORY = xxh64 SUPPORTED_ALGORITHMS = { diff --git a/src/borg/helpers/parseformat.py b/src/borg/helpers/parseformat.py index 9d3151235..00dc14e5b 100644 --- a/src/borg/helpers/parseformat.py +++ b/src/borg/helpers/parseformat.py @@ -899,11 +899,11 @@ class ItemFormatter(BaseFormatter): return any(key in cls.KEYS_REQUIRING_CACHE for key in format_keys) def __init__(self, archive, format): - from ..checksums import StreamingXXH64 + from xxhash import xxh64 static_data = {"archivename": archive.name, "archiveid": archive.fpr} | self.FIXED_KEYS super().__init__(format, static_data) - self.xxh64 = StreamingXXH64 + self.xxh64 = xxh64 self.archive = archive # track which keys were requested in the format string self.format_keys = {f[1] for f in Formatter().parse(format)} diff --git a/src/borg/legacyremote.py b/src/borg/legacyremote.py index cedeb375a..3fbc58608 100644 --- a/src/borg/legacyremote.py +++ b/src/borg/legacyremote.py @@ -14,6 +14,8 @@ import textwrap import time from subprocess import Popen, PIPE +from xxhash import xxh64 + from . import __version__ from .compress import Compressor from .constants import * # NOQA @@ -30,7 +32,6 @@ from .logger import create_logger from .helpers import msgpack from .legacyrepository import LegacyRepository from .version import parse_version, format_version -from .checksums import xxh64 from .helpers.datastruct import EfficientCollectionQueue from .platform import is_win32 @@ -911,13 +912,13 @@ def cache_if_remote(repository, *, decrypted_cache=False, pack=None, unpack=None def pack(data): csize, decrypted = data meta, compressed = compressor.compress({}, decrypted) - return cache_struct.pack(csize, xxh64(compressed), meta["ctype"], meta["clevel"]) + compressed + return cache_struct.pack(csize, xxh64(compressed).digest(), meta["ctype"], meta["clevel"]) + compressed def unpack(data): data = memoryview(data) csize, checksum, ctype, clevel = cache_struct.unpack(data[: cache_struct.size]) compressed = data[cache_struct.size :] - if checksum != xxh64(compressed): + if checksum != xxh64(compressed).digest(): raise IntegrityError("detected corrupted data in metadata cache") meta = dict(ctype=ctype, clevel=clevel, csize=len(compressed)) _, decrypted = compressor.decompress(meta, compressed) diff --git a/src/borg/legacyrepository.py b/src/borg/legacyrepository.py index 504556ba6..9ef97a1a1 100644 --- a/src/borg/legacyrepository.py +++ b/src/borg/legacyrepository.py @@ -12,6 +12,8 @@ from functools import partial from itertools import islice from collections.abc import Callable +import xxhash + from .constants import * # NOQA from .hashindex import NSIndex1Entry, NSIndex1 from .helpers import Error, ErrorWithTraceback, IntegrityError, format_file_size, parse_file_size @@ -26,7 +28,7 @@ from .logger import create_logger from .manifest import Manifest, NoManifestError from .platform import SaveFile, SyncFile, sync_dir, safe_fadvise from .repoobj import RepoObj -from .checksums import crc32, StreamingXXH64 +from .checksums import crc32 from .crypto.file_integrity import IntegrityCheckedFile, FileIntegrityError logger = create_logger(__name__) @@ -1559,7 +1561,7 @@ class LoggedIO: data.release() def entry_hash(self, *data): - h = StreamingXXH64() + h = xxhash.xxh64() for d in data: h.update(d) return h.digest() diff --git a/src/borg/remote.py b/src/borg/remote.py index 927c1a3c9..5fdafb422 100644 --- a/src/borg/remote.py +++ b/src/borg/remote.py @@ -17,6 +17,8 @@ import time import traceback from subprocess import Popen, PIPE +from xxhash import xxh64 + import borg.logger from . import __version__ from .compress import Compressor @@ -37,7 +39,6 @@ from .helpers import msgpack from .legacyrepository import LegacyRepository from .repository import Repository, StoreObjectNotFound from .version import parse_version, format_version -from .checksums import xxh64 from .helpers.datastruct import EfficientCollectionQueue from .platform import is_win32 @@ -1251,13 +1252,13 @@ def cache_if_remote(repository, *, decrypted_cache=False, pack=None, unpack=None def pack(data): csize, decrypted = data meta, compressed = compressor.compress({}, decrypted) - return cache_struct.pack(csize, xxh64(compressed), meta["ctype"], meta["clevel"]) + compressed + return cache_struct.pack(csize, xxh64(compressed).digest(), meta["ctype"], meta["clevel"]) + compressed def unpack(data): data = memoryview(data) csize, checksum, ctype, clevel = cache_struct.unpack(data[: cache_struct.size]) compressed = data[cache_struct.size :] - if checksum != xxh64(compressed): + if checksum != xxh64(compressed).digest(): raise IntegrityError("detected corrupted data in metadata cache") meta = dict(ctype=ctype, clevel=clevel, csize=len(compressed)) _, decrypted = compressor.decompress(meta, compressed) diff --git a/src/borg/repoobj.py b/src/borg/repoobj.py index 4a17e8ab4..a5aecf3da 100644 --- a/src/borg/repoobj.py +++ b/src/borg/repoobj.py @@ -1,8 +1,9 @@ from collections import namedtuple from struct import Struct +from xxhash import xxh64 + from .constants import * # NOQA -from .checksums import xxh64 from .helpers import msgpack, workarounds from .helpers.errors import IntegrityError from .compress import Compressor, LZ4_COMPRESSOR, get_compressor @@ -66,7 +67,9 @@ class RepoObj: data_encrypted = self.key.encrypt(id, data_compressed) meta_packed = msgpack.packb(meta) meta_encrypted = self.key.encrypt(id, meta_packed) - hdr = self.ObjHeader(len(meta_encrypted), len(data_encrypted), xxh64(meta_encrypted), xxh64(data_encrypted)) + hdr = self.ObjHeader( + len(meta_encrypted), len(data_encrypted), xxh64(meta_encrypted).digest(), xxh64(data_encrypted).digest() + ) hdr_packed = self.obj_header.pack(*hdr) return hdr_packed + meta_encrypted + data_encrypted diff --git a/src/borg/repository.py b/src/borg/repository.py index 3987c572d..a3f8aa8cc 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -2,13 +2,14 @@ import os import time from pathlib import Path +from xxhash import xxh64 + from borgstore.store import Store from borgstore.store import ObjectNotFound as StoreObjectNotFound from borgstore.backends.errors import BackendError as StoreBackendError from borgstore.backends.errors import BackendDoesNotExist as StoreBackendDoesNotExist from borgstore.backends.errors import BackendAlreadyExists as StoreBackendAlreadyExists -from .checksums import xxh64 from .constants import * # NOQA from .hashindex import ChunkIndex, ChunkIndexEntry from .helpers import Error, ErrorWithTraceback, IntegrityError @@ -306,12 +307,12 @@ class Repository: meta = obj[hdr_size : hdr_size + hdr.meta_size] if hdr.meta_size != len(meta): log_error("metadata size incorrect.") - elif hdr.meta_hash != xxh64(meta): + elif hdr.meta_hash != xxh64(meta).digest(): log_error("metadata does not match checksum.") data = obj[hdr_size + hdr.meta_size : hdr_size + hdr.meta_size + hdr.data_size] if hdr.data_size != len(data): log_error("data size incorrect.") - elif hdr.data_hash != xxh64(data): + elif hdr.data_hash != xxh64(data).digest(): log_error("data does not match checksum.") else: log_error("too small.") diff --git a/src/borg/storelocking.py b/src/borg/storelocking.py index 25a9af30e..ce30bdbe6 100644 --- a/src/borg/storelocking.py +++ b/src/borg/storelocking.py @@ -3,11 +3,12 @@ import json import random import time +from xxhash import xxh64 + from borgstore.store import ObjectNotFound from . import platform -from .checksums import xxh64 -from .helpers import Error, ErrorWithTraceback, bin_to_hex +from .helpers import Error, ErrorWithTraceback from .logger import create_logger logger = create_logger(__name__) @@ -100,7 +101,7 @@ class Lock: timestamp = now.isoformat(timespec="milliseconds") lock = dict(exclusive=exclusive, hostid=self.id[0], processid=self.id[1], threadid=self.id[2], time=timestamp) value = json.dumps(lock).encode("utf-8") - key = bin_to_hex(xxh64(value)) + key = xxh64(value).hexdigest() logger.debug(f"LOCK-CREATE: creating lock in store. key: {key}, lock: {lock}.") self.store.store(f"locks/{key}", value) if update_last_refresh: diff --git a/src/borg/testsuite/archiver/check_cmd_test.py b/src/borg/testsuite/archiver/check_cmd_test.py index 3554a1416..c392b5241 100644 --- a/src/borg/testsuite/archiver/check_cmd_test.py +++ b/src/borg/testsuite/archiver/check_cmd_test.py @@ -367,7 +367,12 @@ def test_verify_data(archivers, request, init_args): # note: it only works like tested here for a highly engineered data corruption attack, # because with accidental corruption, usually already the xxh64 low-level check fails. def fake_xxh64(data, seed=0): - return b"fakefake" + # xxhash.xxh64.digest() returns -> bytes + class FakeDigest: + def digest(self): + return b"fakefake" + + return FakeDigest() import borg.repoobj import borg.repository diff --git a/src/borg/testsuite/checksums_test.py b/src/borg/testsuite/checksums_test.py index 030f36156..a799bf6e8 100644 --- a/src/borg/testsuite/checksums_test.py +++ b/src/borg/testsuite/checksums_test.py @@ -1,26 +1,23 @@ -from .. import checksums -from ..helpers import bin_to_hex, hex_to_bin +from xxhash import xxh64 + +from ..helpers import hex_to_bin def test_xxh64(): - assert bin_to_hex(checksums.xxh64(b"test", 123)) == "2b81b9401bef86cf" - assert bin_to_hex(checksums.xxh64(b"test")) == "4fdcca5ddb678139" + assert xxh64(b"test", 123).hexdigest() == "2b81b9401bef86cf" + assert xxh64(b"test").hexdigest() == "4fdcca5ddb678139" assert ( - bin_to_hex( - checksums.xxh64( - hex_to_bin( - "6f663f01c118abdea553373d5eae44e7dac3b6829b46b9bbeff202b6c592c22d724" - "fb3d25a347cca6c5b8f20d567e4bb04b9cfa85d17f691590f9a9d32e8ccc9102e9d" - "cf8a7e6716280cd642ce48d03fdf114c9f57c20d9472bb0f81c147645e6fa3d331" - ) + xxh64( + hex_to_bin( + "6f663f01c118abdea553373d5eae44e7dac3b6829b46b9bbeff202b6c592c22d724" + "fb3d25a347cca6c5b8f20d567e4bb04b9cfa85d17f691590f9a9d32e8ccc9102e9d" + "cf8a7e6716280cd642ce48d03fdf114c9f57c20d9472bb0f81c147645e6fa3d331" ) - ) + ).hexdigest() == "35d5d2f545d9511a" ) - -def test_streaming_xxh64(): - hasher = checksums.StreamingXXH64(123) + hasher = xxh64(seed=123) hasher.update(b"te") hasher.update(b"st") - assert bin_to_hex(hasher.digest()) == hasher.hexdigest() == "2b81b9401bef86cf" + assert hasher.hexdigest() == "2b81b9401bef86cf" diff --git a/src/borg/testsuite/legacyrepository_test.py b/src/borg/testsuite/legacyrepository_test.py index 31bb5936f..a97a094c9 100644 --- a/src/borg/testsuite/legacyrepository_test.py +++ b/src/borg/testsuite/legacyrepository_test.py @@ -1,11 +1,12 @@ import logging import os import sys + from unittest.mock import patch import pytest +from xxhash import xxh64 -from ..checksums import xxh64 from ..hashindex import NSIndex1 from ..helpers import Location from ..helpers import IntegrityError @@ -74,7 +75,7 @@ def get_path(repository): def fchunk(data, meta=b""): # Create a raw chunk that has a valid RepoObj layout but does not use encryption or compression. - hdr = RepoObj.obj_header.pack(len(meta), len(data), xxh64(meta), xxh64(data)) + hdr = RepoObj.obj_header.pack(len(meta), len(data), xxh64(meta).digest(), xxh64(data).digest()) assert isinstance(data, bytes) chunk = hdr + meta + data return chunk @@ -149,7 +150,7 @@ def test_multiple_transactions(repo_fixtures, request): def test_read_data(repo_fixtures, request): with get_repository_from_fixture(repo_fixtures, request) as repository: meta, data = b"meta", b"data" - hdr = RepoObj.obj_header.pack(len(meta), len(data), xxh64(meta), xxh64(data)) + hdr = RepoObj.obj_header.pack(len(meta), len(data), xxh64(meta).digest(), xxh64(data).digest()) chunk_complete = hdr + meta + data chunk_short = hdr + meta repository.put(H(0), chunk_complete) diff --git a/src/borg/testsuite/repository_test.py b/src/borg/testsuite/repository_test.py index 67afa6584..112d2094d 100644 --- a/src/borg/testsuite/repository_test.py +++ b/src/borg/testsuite/repository_test.py @@ -3,8 +3,8 @@ import os import sys import pytest +from xxhash import xxh64 -from ..checksums import xxh64 from ..helpers import Location from ..helpers import IntegrityError from ..platformflags import is_win32 @@ -57,7 +57,7 @@ def reopen(repository, exclusive: bool | None = True, create=False): def fchunk(data, meta=b""): # Format chunk: create a raw chunk that has a valid RepoObj layout, but does not use encryption or compression. - hdr = RepoObj.obj_header.pack(len(meta), len(data), xxh64(meta), xxh64(data)) + hdr = RepoObj.obj_header.pack(len(meta), len(data), xxh64(meta).digest(), xxh64(data).digest()) assert isinstance(data, bytes) chunk = hdr + meta + data return chunk @@ -99,7 +99,7 @@ def test_basic_operations(repo_fixtures, request): def test_read_data(repo_fixtures, request): with get_repository_from_fixture(repo_fixtures, request) as repository: meta, data = b"meta", b"data" - hdr = RepoObj.obj_header.pack(len(meta), len(data), xxh64(meta), xxh64(data)) + hdr = RepoObj.obj_header.pack(len(meta), len(data), xxh64(meta).digest(), xxh64(data).digest()) chunk_complete = hdr + meta + data chunk_short = hdr + meta repository.put(H(0), chunk_complete)