mirror of
https://github.com/borgbackup/borg.git
synced 2026-05-28 04:03:21 -04:00
legacy: move NSIndex1 to borg.legacy.hashindex, refs #9556
This commit is contained in:
parent
180130ba4e
commit
fb6192bf86
5 changed files with 113 additions and 113 deletions
|
|
@ -1,7 +1,9 @@
|
|||
from typing import NamedTuple, Tuple, Type, IO, Iterator, Any
|
||||
from typing import NamedTuple, Tuple, Type, IO, Iterator, Any, MutableMapping
|
||||
|
||||
PATH_OR_FILE = str | IO
|
||||
|
||||
class HTProxyMixin(MutableMapping): ...
|
||||
|
||||
class ChunkIndexEntry(NamedTuple):
|
||||
flags: int
|
||||
size: int
|
||||
|
|
@ -22,16 +24,6 @@ class ChunkIndex:
|
|||
def __getitem__(self, key: bytes) -> Type[ChunkIndexEntry]: ...
|
||||
def __setitem__(self, key: bytes, value: CIE) -> None: ...
|
||||
|
||||
class NSIndex1Entry(NamedTuple):
|
||||
segment: int
|
||||
offset: int
|
||||
|
||||
class NSIndex1: # legacy
|
||||
def iteritems(self, *args, **kwargs) -> Iterator: ...
|
||||
def __contains__(self, key: bytes) -> bool: ...
|
||||
def __getitem__(self, key: bytes) -> Any: ...
|
||||
def __setitem__(self, key: bytes, value: Any) -> None: ...
|
||||
|
||||
class FuseVersionsIndexEntry(NamedTuple):
|
||||
version: int
|
||||
hash: bytes
|
||||
|
|
|
|||
|
|
@ -143,103 +143,3 @@ class FuseVersionsIndex(HTProxyMixin, MutableMapping):
|
|||
"""
|
||||
def __init__(self):
|
||||
self.ht = HashTableNT(key_size=16, value_type=FuseVersionsIndexEntry, value_format=FuseVersionsIndexEntryFormat)
|
||||
|
||||
|
||||
NSIndex1Entry = namedtuple('NSIndex1Entry', 'segment offset')
|
||||
NSIndex1EntryFormatT = namedtuple('NSIndex1EntryFormatT', 'segment offset')
|
||||
NSIndex1EntryFormat = NSIndex1EntryFormatT(segment="I", offset="I")
|
||||
|
||||
|
||||
class NSIndex1(HTProxyMixin, MutableMapping):
|
||||
"""
|
||||
Mapping from key256 to (segment32, offset32), as used by the legacy repository index of Borg 1.x.
|
||||
"""
|
||||
MAX_VALUE = 2**32 - 1 # borghash has the full uint32_t range
|
||||
MAGIC = b"BORG_IDX" # borg 1.x
|
||||
HEADER_FMT = "<8sIIBB" # magic, entries, buckets, ksize, vsize
|
||||
KEY_SIZE = 32
|
||||
VALUE_SIZE = 8
|
||||
|
||||
def __init__(self, capacity=1000, path=None, usable=None):
|
||||
if usable is not None:
|
||||
capacity = usable * 2 # load factor 0.5
|
||||
self.ht = HashTableNT(key_size=self.KEY_SIZE, value_type=NSIndex1Entry, value_format=NSIndex1EntryFormat,
|
||||
capacity=capacity)
|
||||
if path:
|
||||
self._read(path)
|
||||
|
||||
def iteritems(self, marker=None):
|
||||
do_yield = marker is None
|
||||
for key, value in self.ht.items():
|
||||
if do_yield:
|
||||
yield key, value
|
||||
else:
|
||||
do_yield = key == marker
|
||||
|
||||
@classmethod
|
||||
def read(cls, path):
|
||||
return cls(path=path)
|
||||
|
||||
def size(self):
|
||||
return self.ht.size() # not quite correct as this is not the on-disk read-only format.
|
||||
|
||||
def write(self, path):
|
||||
if isinstance(path, str):
|
||||
with open(path, 'wb') as fd:
|
||||
self._write_fd(fd)
|
||||
else:
|
||||
self._write_fd(path)
|
||||
|
||||
def _read(self, path):
|
||||
if isinstance(path, str):
|
||||
with open(path, 'rb') as fd:
|
||||
self._read_fd(fd)
|
||||
else:
|
||||
self._read_fd(path)
|
||||
|
||||
def _write_fd(self, fd):
|
||||
used = len(self.ht)
|
||||
header_bytes = struct.pack(self.HEADER_FMT, self.MAGIC, used, used, self.KEY_SIZE, self.VALUE_SIZE)
|
||||
fd.write(header_bytes)
|
||||
# record the header as a separate integrity-hash part if supported
|
||||
hash_part = getattr(fd, "hash_part", None)
|
||||
if hash_part:
|
||||
hash_part("HashHeader")
|
||||
count = 0
|
||||
for key, _ in self.ht.items():
|
||||
value = self.ht._get_raw(key)
|
||||
fd.write(key)
|
||||
fd.write(value)
|
||||
count += 1
|
||||
assert count == used
|
||||
|
||||
def _read_fd(self, fd):
|
||||
header_size = struct.calcsize(self.HEADER_FMT)
|
||||
header_bytes = fd.read(header_size)
|
||||
if len(header_bytes) < header_size:
|
||||
raise ValueError(f"Invalid file: file is too short (header).")
|
||||
# verify the header as a separate integrity-hash part if supported
|
||||
hash_part = getattr(fd, "hash_part", None)
|
||||
if hash_part:
|
||||
hash_part("HashHeader")
|
||||
magic, entries, buckets, ksize, vsize = struct.unpack(self.HEADER_FMT, header_bytes)
|
||||
if magic != self.MAGIC:
|
||||
raise ValueError(f"Invalid file: magic {self.MAGIC.decode()} not found.")
|
||||
assert ksize == self.KEY_SIZE, "invalid key size"
|
||||
assert vsize == self.VALUE_SIZE, "invalid value size"
|
||||
buckets_size = buckets * (ksize + vsize)
|
||||
current_pos = fd.tell()
|
||||
end_of_file = fd.seek(0, os.SEEK_END)
|
||||
if current_pos + buckets_size != end_of_file:
|
||||
raise ValueError(f"Invalid file: file size does not match (buckets).")
|
||||
fd.seek(current_pos)
|
||||
for i in range(buckets):
|
||||
key = fd.read(ksize)
|
||||
value = fd.read(vsize)
|
||||
if value.startswith(b'\xFF\xFF\xFF\xFF'): # LE for 0xffffffff (empty/unused bucket)
|
||||
continue
|
||||
if value.startswith(b'\xFE\xFF\xFF\xFF'): # LE for 0xfffffffe (deleted/tombstone bucket)
|
||||
continue
|
||||
self.ht._set_raw(key, value)
|
||||
pos = fd.tell()
|
||||
assert pos == end_of_file
|
||||
|
|
|
|||
108
src/borg/legacy/hashindex.py
Normal file
108
src/borg/legacy/hashindex.py
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
from collections.abc import MutableMapping
|
||||
from collections import namedtuple
|
||||
import os
|
||||
import struct
|
||||
|
||||
from borghash import HashTableNT
|
||||
|
||||
from ..hashindex import HTProxyMixin
|
||||
|
||||
|
||||
NSIndex1Entry = namedtuple("NSIndex1Entry", "segment offset")
|
||||
NSIndex1EntryFormatT = namedtuple("NSIndex1EntryFormatT", "segment offset")
|
||||
NSIndex1EntryFormat = NSIndex1EntryFormatT(segment="I", offset="I")
|
||||
|
||||
|
||||
class NSIndex1(HTProxyMixin, MutableMapping):
|
||||
"""
|
||||
Mapping from key256 to (segment32, offset32), as used by the legacy repository index of Borg 1.x.
|
||||
"""
|
||||
|
||||
MAX_VALUE = 2**32 - 1 # borghash has the full uint32_t range
|
||||
MAGIC = b"BORG_IDX" # borg 1.x
|
||||
HEADER_FMT = "<8sIIBB" # magic, entries, buckets, ksize, vsize
|
||||
KEY_SIZE = 32
|
||||
VALUE_SIZE = 8
|
||||
|
||||
def __init__(self, capacity=1000, path=None, usable=None):
|
||||
if usable is not None:
|
||||
capacity = usable * 2 # load factor 0.5
|
||||
self.ht = HashTableNT(
|
||||
key_size=self.KEY_SIZE, value_type=NSIndex1Entry, value_format=NSIndex1EntryFormat, capacity=capacity
|
||||
)
|
||||
if path:
|
||||
self._read(path)
|
||||
|
||||
def iteritems(self, marker=None):
|
||||
do_yield = marker is None
|
||||
for key, value in self.ht.items():
|
||||
if do_yield:
|
||||
yield key, value
|
||||
else:
|
||||
do_yield = key == marker
|
||||
|
||||
@classmethod
|
||||
def read(cls, path):
|
||||
return cls(path=path)
|
||||
|
||||
def size(self):
|
||||
return self.ht.size() # not quite correct as this is not the on-disk read-only format.
|
||||
|
||||
def write(self, path):
|
||||
if isinstance(path, str):
|
||||
with open(path, "wb") as fd:
|
||||
self._write_fd(fd)
|
||||
else:
|
||||
self._write_fd(path)
|
||||
|
||||
def _read(self, path):
|
||||
if isinstance(path, str):
|
||||
with open(path, "rb") as fd:
|
||||
self._read_fd(fd)
|
||||
else:
|
||||
self._read_fd(path)
|
||||
|
||||
def _write_fd(self, fd):
|
||||
used = len(self.ht)
|
||||
header_bytes = struct.pack(self.HEADER_FMT, self.MAGIC, used, used, self.KEY_SIZE, self.VALUE_SIZE)
|
||||
fd.write(header_bytes)
|
||||
hash_part = getattr(fd, "hash_part", None)
|
||||
if hash_part:
|
||||
hash_part("HashHeader")
|
||||
count = 0
|
||||
for key, _ in self.ht.items():
|
||||
value = self.ht._get_raw(key)
|
||||
fd.write(key)
|
||||
fd.write(value)
|
||||
count += 1
|
||||
assert count == used
|
||||
|
||||
def _read_fd(self, fd):
|
||||
header_size = struct.calcsize(self.HEADER_FMT)
|
||||
header_bytes = fd.read(header_size)
|
||||
if len(header_bytes) < header_size:
|
||||
raise ValueError("Invalid file: file is too short (header).")
|
||||
hash_part = getattr(fd, "hash_part", None)
|
||||
if hash_part:
|
||||
hash_part("HashHeader")
|
||||
magic, entries, buckets, ksize, vsize = struct.unpack(self.HEADER_FMT, header_bytes)
|
||||
if magic != self.MAGIC:
|
||||
raise ValueError(f"Invalid file: magic {self.MAGIC.decode()} not found.")
|
||||
assert ksize == self.KEY_SIZE, "invalid key size"
|
||||
assert vsize == self.VALUE_SIZE, "invalid value size"
|
||||
buckets_size = buckets * (ksize + vsize)
|
||||
current_pos = fd.tell()
|
||||
end_of_file = fd.seek(0, os.SEEK_END)
|
||||
if current_pos + buckets_size != end_of_file:
|
||||
raise ValueError("Invalid file: file size does not match (buckets).")
|
||||
fd.seek(current_pos)
|
||||
for i in range(buckets):
|
||||
key = fd.read(ksize)
|
||||
value = fd.read(vsize)
|
||||
if value.startswith(b"\xff\xff\xff\xff"): # LE for 0xffffffff (empty/unused bucket)
|
||||
continue
|
||||
if value.startswith(b"\xfe\xff\xff\xff"): # LE for 0xfffffffe (deleted/tombstone bucket)
|
||||
continue
|
||||
self.ht._set_raw(key, value)
|
||||
pos = fd.tell()
|
||||
assert pos == end_of_file
|
||||
|
|
@ -16,7 +16,7 @@ from zlib import crc32
|
|||
import xxhash
|
||||
|
||||
from ..constants import * # NOQA
|
||||
from ..hashindex import NSIndex1Entry, NSIndex1
|
||||
from .hashindex import NSIndex1Entry, NSIndex1
|
||||
from ..helpers import Error, ErrorWithTraceback, IntegrityError, format_file_size, parse_file_size
|
||||
from ..helpers import Location
|
||||
from ..helpers import ProgressIndicatorPercent
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ from unittest.mock import patch
|
|||
import pytest
|
||||
from xxhash import xxh64
|
||||
|
||||
from ..hashindex import NSIndex1
|
||||
from ..legacy.hashindex import NSIndex1
|
||||
from ..helpers import Location
|
||||
from ..helpers import IntegrityError
|
||||
from ..helpers import msgpack
|
||||
|
|
|
|||
Loading…
Reference in a new issue