spelling: hard link(s)

This commit is contained in:
Thomas Waldmann 2025-09-23 15:11:31 +02:00
parent 2c7bec0149
commit 5aa536df4c
No known key found for this signature in database
GPG key ID: 243ACFA951F78E01
15 changed files with 61 additions and 60 deletions

View file

@ -240,7 +240,7 @@ def stat_update_check(st_old, st_curr):
# in this case, we dispatched to wrong handler - abort
raise BackupRaceConditionError("file type changed (race condition), skipping file")
if st_old.st_ino != st_curr.st_ino:
# in this case, the hardlinks-related code in create_helper has the wrong inode - abort!
# in this case, the hard-links-related code in create_helper has the wrong inode - abort!
raise BackupRaceConditionError("file inode changed (race condition), skipping file")
# looks ok, we are still dealing with the same thing - return current stat:
return st_curr
@ -290,7 +290,7 @@ class DownloadPipeline:
"""
Preloads the content data chunks of an item (if any).
optimize_hardlinks can be set to True if item chunks only need to be preloaded for
1st hardlink, but not for any further hardlink to same inode / with same hlid.
1st hard link, but not for any further hard link to same inode / with same hlid.
Returns True if chunks were preloaded.
Warning: if data chunks are preloaded then all data chunks have to be retrieved,
@ -305,7 +305,7 @@ class DownloadPipeline:
elif hlid in self.hlids_preloaded:
preload_chunks = False
else:
# not having the hardlink's chunks already preloaded for other hardlink to same inode
# not having the hard link's chunks already preloaded for other hard link to same inode
preload_chunks = True
self.hlids_preloaded.add(hlid)
else:
@ -730,15 +730,15 @@ Duration: {0.duration}
link_target = hlm.retrieve(id=item.hlid)
if link_target is not None and has_link:
if not dry_run:
# another hardlink to same inode (same hlid) was extracted previously, just link to it
# another hard link to same inode (same hlid) was extracted previously, just link to it
with backup_io("link"):
os.link(link_target, path, follow_symlinks=False)
hardlink_set = True
yield hardlink_set
if not hardlink_set:
if "hlid" in item and has_link:
# Update entry with extracted item path, so that following hardlinks don't extract twice.
# We have hardlinking support, so we will hardlink not extract.
# Update entry with extracted item path, so that following hard links don't extract twice.
# We have hardlinking support, so we will hard link not extract.
hlm.remember(id=item.hlid, info=path)
else:
# Broken platform with no hardlinking support.
@ -765,7 +765,7 @@ Duration: {0.duration}
:param dry_run: do not write any data
:param stdout: write extracted data to stdout
:param sparse: write sparse files (chunk-granularity, independent of the original being sparse)
:param hlm: maps hlid to link_target for extracting subtrees with hardlinks correctly
:param hlm: maps hlid to link_target for extracting subtrees with hard links correctly
:param pi: ProgressIndicatorPercent (or similar) for file extraction progress (in bytes)
:param continue_extraction: continue a previously interrupted extraction of the same archive
"""
@ -791,7 +791,7 @@ Duration: {0.duration}
if dry_run or stdout:
with self.extract_helper(item, "", hlm, dry_run=dry_run or stdout) as hardlink_set:
if not hardlink_set:
# it does not really set hardlinks due to dry_run, but we need to behave same
# it does not really set hard links due to dry_run, but we need to behave same
# as non-dry_run concerning fetching preloaded chunks from the pipeline or
# it would get stuck.
if "chunks" in item:
@ -1248,7 +1248,7 @@ class FilesystemObjectProcessors:
hl_chunks = None
update_map = False
if hardlinked:
status = "h" # hardlink
status = "h" # hard link
nothing = object()
chunks = self.hlm.retrieve(id=(st.st_ino, st.st_dev), default=nothing)
if chunks is nothing:
@ -1261,7 +1261,7 @@ class FilesystemObjectProcessors:
self.add_item(item, stats=self.stats)
if update_map:
# remember the hlid of this fs object and if the item has chunks,
# also remember them, so we do not have to re-chunk a hardlink.
# also remember them, so we do not have to re-chunk a hard link.
chunks = item.chunks if "chunks" in item else None
self.hlm.remember(id=(st.st_ino, st.st_dev), info=chunks)
@ -1394,13 +1394,13 @@ class FilesystemObjectProcessors:
# this needs to be done early, so that part files also get the patched mode.
item.mode = stat.S_IFREG | stat.S_IMODE(item.mode)
# we begin processing chunks now.
if hl_chunks is not None: # create_helper gave us chunks from a previous hardlink
if hl_chunks is not None: # create_helper gave us chunks from a previous hard link
item.chunks = []
for chunk_id, chunk_size in hl_chunks:
# process one-by-one, so we will know in item.chunks how far we got
chunk_entry = cache.reuse_chunk(chunk_id, chunk_size, self.stats)
item.chunks.append(chunk_entry)
else: # normal case, no "2nd+" hardlink
else: # normal case, no "2nd+" hard link
if not is_special_file:
hashed_path = safe_encode(item.path) # path as in archive item!
started_hashing = time.monotonic()

View file

@ -696,7 +696,7 @@ class CreateMixIn:
- 'd' = directory
- 'b' = block device
- 'c' = char device
- 'h' = regular file, hardlink (to already seen inodes)
- 'h' = regular file, hard link (to already seen inodes)
- 's' = symlink
- 'f' = fifo

View file

@ -140,8 +140,8 @@ class TarMixIn:
tarinfo.uname = item.get("user", "")
tarinfo.gname = item.get("group", "")
# The linkname in tar has 2 uses:
# for symlinks it means the destination, while for hardlinks it refers to the file.
# Since hardlinks in tar have a different type code (LNKTYPE) the format might
# for symlinks it means the destination, while for hard links it refers to the file.
# Since hard links in tar have a different type code (LNKTYPE) the format might
# support hardlinking arbitrary objects (including symlinks and directories), but
# whether implementations actually support that is a whole different question...
tarinfo.linkname = ""
@ -152,7 +152,7 @@ class TarMixIn:
if "hlid" in item:
linkname = hlm.retrieve(id=item.hlid)
if linkname is not None:
# the first hardlink was already added to the archive, add a tar-hardlink reference to it.
# the first hard link was already added to the archive, add a tar-hard-link reference to it.
tarinfo.type = tarfile.LNKTYPE
tarinfo.linkname = linkname
else:
@ -309,7 +309,7 @@ class TarMixIn:
elif tarinfo.issym():
status = tfo.process_symlink(tarinfo=tarinfo, status="s", type=stat.S_IFLNK)
elif tarinfo.islnk():
# tar uses a hardlink model like: the first instance of a hardlink is stored as a regular file,
# tar uses a hard link model like: the first instance of a hard link is stored as a regular file,
# later instances are special entries referencing back to the first instance.
status = tfo.process_hardlink(tarinfo=tarinfo, status="h", type=stat.S_IFREG)
elif tarinfo.isblk():

View file

@ -422,7 +422,7 @@ class FuseBackend:
else:
inode = item_inode
self._items[inode] = item
# remember extracted item path, so that following hardlinks don't extract twice.
# remember extracted item path, so that following hard links don't extract twice.
hlm.remember(id=item.hlid, info=path)
else:
inode = item_inode

View file

@ -320,7 +320,7 @@ def to_sanitized_path(path):
class HardLinkManager:
"""
Manage hardlinks (and avoid code duplication doing so).
Manage hard links (and avoid code duplication doing so).
A) When creating a borg2 archive from the filesystem, we have to maintain a mapping like:
(dev, ino) -> (hlid, chunks) # for fs_hl_targets
@ -328,17 +328,18 @@ class HardLinkManager:
B) When extracting a borg2 archive to the filesystem, we have to maintain a mapping like:
hlid -> path
If we encounter the same hlid again later, we hardlink to the path of the already extracted content of same hlid.
If we encounter the same hlid again later, we hard link to the path of the already extracted
content of same hlid.
C) When transferring from a borg1 archive, we need:
path -> chunks_correct # for borg1_hl_targets, chunks_correct must be either from .chunks_healthy or .chunks.
If we encounter a regular file item with source == path later, we reuse chunks_correct
and create the same hlid = hardlink_id_from_path(source).
D) When importing a tar file (simplified 1-pass way for now, not creating borg hardlink items):
D) When importing a tar file (simplified 1-pass way for now, not creating borg hard link items):
path -> chunks
If we encounter a LNK tar entry later with linkname==path, we re-use the chunks and create a regular file item.
For better hardlink support (including the very first hardlink item for each group of same-target hardlinks),
For better hard link support (including the very first hard link item for each group of same-target hard links),
we would need a 2-pass processing, which is not yet implemented.
"""
@ -357,12 +358,12 @@ class HardLinkManager:
return "source" in item and self.borg1_hardlinkable(item.mode)
def hardlink_id_from_path(self, path):
"""compute a hardlink id from a path"""
"""compute a hard link id from a path"""
assert isinstance(path, str)
return hashlib.sha256(path.encode("utf-8", errors="surrogateescape")).digest()
def hardlink_id_from_inode(self, *, ino, dev):
"""compute a hardlink id from an inode"""
"""compute a hard link id from an inode"""
assert isinstance(ino, int)
assert isinstance(dev, int)
return hashlib.sha256(f"{ino}/{dev}".encode()).digest()
@ -414,11 +415,11 @@ def secure_erase(path, *, avoid_collateral_damage):
If avoid_collateral_damage is True, we only secure erase if the total link count is 1,
otherwise we just do a normal "delete" (unlink) without first overwriting it with random.
This avoids other hardlinks pointing to same inode as <path> getting damaged, but might be less secure.
A typical scenario where this is useful are quick "hardlink copies" of bigger directories.
This avoids other hard links pointing to same inode as <path> getting damaged, but might be less secure.
A typical scenario where this is useful are quick "hard link copies" of bigger directories.
If avoid_collateral_damage is False, we always secure erase.
If there are hardlinks pointing to the same inode as <path>, they will contain random garbage afterwards.
If there are hard links pointing to the same inode as <path>, they will contain random garbage afterwards.
"""
path_obj = Path(path)
with path_obj.open("r+b") as fd:
@ -435,7 +436,7 @@ def safe_unlink(path):
Safely unlink (delete) *path*.
If we run out of space while deleting the file, we try truncating it first.
BUT we truncate only if path is the only hardlink referring to this content.
BUT we truncate only if path is the only hard link referring to this content.
Use this when deleting potentially large files when recovering
from a VFS error such as ENOSPC. It can help a full file system
@ -452,9 +453,9 @@ def safe_unlink(path):
# we ran out of space while trying to delete the file.
st = path_obj.stat()
if st.st_nlink > 1:
# rather give up here than cause collateral damage to the other hardlink.
# rather give up here than cause collateral damage to the other hard link.
raise
# no other hardlink! try to recover free space by truncating this file.
# no other hard link! try to recover free space by truncating this file.
try:
# Do not create *path* if it does not exist, open for truncation in r+b mode (=O_RDWR|O_BINARY).
with open(path, "r+b") as fd:

View file

@ -945,7 +945,7 @@ class ItemFormatter(BaseFormatter):
return len(item.get("chunks", []))
def calculate_size(self, item):
# note: does not support hardlink slaves, they will be size 0
# note: does not support hard link slaves, they will be size 0
return item.get_size()
def hash_item(self, hash_function, item):

View file

@ -338,7 +338,7 @@ class LegacyRepository:
if os.path.isfile(config_path):
link_error_msg = (
"Failed to erase old repository config file securely (hardlinks not supported). "
"Failed to erase old repository config file securely (hard links not supported). "
"Old repokey data, if any, might persist on physical storage."
)
try:

View file

@ -39,7 +39,7 @@ KF_ENCRYPTION = "--encryption=keyfile-chacha20-poly1305"
src_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "archiver"))
src_file = "archiver/__init__.py" # relative path of one file in src_dir
requires_hardlinks = pytest.mark.skipif(not are_hardlinks_supported(), reason="hardlinks not supported")
requires_hardlinks = pytest.mark.skipif(not are_hardlinks_supported(), reason="hard links not supported")
def exec_cmd(*args, archiver=None, fork=False, exe=None, input=b"", binary_output=False, **kw):

View file

@ -46,7 +46,7 @@ def test_symlink_extract(archivers, request):
@pytest.mark.skipif(
not are_symlinks_supported() or not are_hardlinks_supported() or is_darwin,
reason="symlinks or hardlinks or hardlinked symlinks not supported",
reason="symbolic links or hard links or hard-linked sym-links not supported",
)
def test_hardlinked_symlinks_extract(archivers, request):
archiver = request.getfixturevalue(archivers)
@ -323,7 +323,7 @@ def test_extract_hardlinks_twice(archivers, request):
# if issue #5603 happens, extraction gives rc == 1 (triggering AssertionError) and warnings like:
# input/a/hardlink: link: [Errno 2] No such file or directory: 'input/a/hardlink' -> 'input/a/hardlink'
# input/b/hardlink: link: [Errno 2] No such file or directory: 'input/a/hardlink' -> 'input/b/hardlink'
# otherwise, when fixed, the hardlinks should be there and have a link count of 2
# otherwise, when fixed, the hard links should be there and have a link count of 2
assert os.stat("input/a/hardlink").st_nlink == 2
assert os.stat("input/b/hardlink").st_nlink == 2
@ -690,12 +690,12 @@ def test_extract_continue(archivers, request):
file1_st = os.stat("input/file1")
# simulate a partially extracted file2 (smaller size, archived mtime not yet set)
file2_st = os.stat("input/file2")
# make a hardlink, so it does not free the inode when unlinking input/file2
# make a hard link, so it does not free the inode when unlinking input/file2
os.link("input/file2", "hardlink-to-keep-inode-f2")
os.truncate("input/file2", 123) # -> incorrect size, incorrect mtime
# simulate file3 has not yet been extracted
file3_st = os.stat("input/file3")
# make a hardlink, so it does not free the inode when unlinking input/file3
# make a hard link, so it does not free the inode when unlinking input/file3
os.link("input/file3", "hardlink-to-keep-inode-f3")
os.remove("input/file3")
time.sleep(1) # needed due to timestamp granularity of apple hfs+

View file

@ -113,12 +113,12 @@ def test_fuse(archivers, request):
assert same_ts_ns(sti1.st_ctime * 1e9, sto1.st_ctime * 1e9)
assert same_ts_ns(sti1.st_mtime * 1e9, sto1.st_mtime * 1e9)
if are_hardlinks_supported():
# note: there is another hardlink to this, see below
# note: there is another hard link to this, see below
assert sti1.st_nlink == sto1.st_nlink == 2
# read
with open(in_fn, "rb") as in_f, open(out_fn, "rb") as out_f:
assert in_f.read() == out_f.read()
# hardlink (to 'input/file1')
# hard link (to 'input/file1')
if are_hardlinks_supported():
in_fn = "input/hardlink"
out_fn = os.path.join(mountpoint, "archive", "input", "hardlink")
@ -191,7 +191,7 @@ def test_fuse_versions_view(archivers, request):
hl3 = os.path.join(mountpoint, "input", "hardlink3", "hardlink3.00001")
assert os.stat(hl1).st_ino == os.stat(hl2).st_ino == os.stat(hl3).st_ino
assert open(hl3, "rb").read() == b"123456"
# similar again, but exclude the 1st hardlink:
# similar again, but exclude the 1st hard link:
with fuse_mount(archiver, mountpoint, "-o", "versions", "-e", "input/hardlink1"):
if are_hardlinks_supported():
hl2 = os.path.join(mountpoint, "input", "hardlink2", "hardlink2.00001")

View file

@ -59,7 +59,7 @@ def test_recreate_exclude_keep_tagged(archivers, request):
_assert_test_keep_tagged(archiver)
@pytest.mark.skipif(not are_hardlinks_supported(), reason="hardlinks not supported")
@pytest.mark.skipif(not are_hardlinks_supported(), reason="hard links not supported")
def test_recreate_hardlinked_tags(archivers, request): # test for issue #4911
archiver = request.getfixturevalue(archivers)
cmd(archiver, "repo-create", "--encryption=none")
@ -69,11 +69,11 @@ def test_recreate_hardlinked_tags(archivers, request): # test for issue #4911
os.mkdir(os.path.join(archiver.input_path, "subdir")) # to make sure the tag is encountered *after* file1
os.link(
os.path.join(archiver.input_path, "file1"), os.path.join(archiver.input_path, "subdir", CACHE_TAG_NAME)
) # correct tag name, hardlink to file1
) # correct tag name, hard link to file1
cmd(archiver, "create", "test", "input")
# in the "test" archive, we now have, in this order:
# - a regular file item for "file1"
# - a hardlink item for "CACHEDIR.TAG" referring back to file1 for its contents
# - a hard link item for "CACHEDIR.TAG" referring back to file1 for its contents
cmd(archiver, "recreate", "test", "--exclude-caches", "--keep-exclude-tags")
# if issue #4911 is present, the recreate will crash with a KeyError for "input/file1"
@ -113,7 +113,7 @@ def test_recreate_basic(archivers, request):
assert "dir2/file3" not in listing
@pytest.mark.skipif(not are_hardlinks_supported(), reason="hardlinks not supported")
@pytest.mark.skipif(not are_hardlinks_supported(), reason="hard links not supported")
def test_recreate_subtree_hardlinks(archivers, request):
archiver = request.getfixturevalue(archivers)
# This is essentially the same problem set as in test_extract_hardlinks

View file

@ -105,7 +105,7 @@ def test_extract_hardlinks_tar(archivers, request):
def test_import_tar(archivers, request, tar_format="PAX"):
archiver = request.getfixturevalue(archivers)
create_test_files(archiver.input_path, create_hardlinks=False) # hardlinks become separate files
create_test_files(archiver.input_path, create_hardlinks=False) # hard links become separate files
os.unlink("input/flagfile")
cmd(archiver, "repo-create", "--encryption=none")
cmd(archiver, "create", "src", "input")
@ -148,7 +148,7 @@ def test_import_tar_with_dotdot(archivers, request):
@requires_gzip
def test_import_tar_gz(archivers, request, tar_format="GNU"):
archiver = request.getfixturevalue(archivers)
create_test_files(archiver.input_path, create_hardlinks=False) # hardlinks become separate files
create_test_files(archiver.input_path, create_hardlinks=False) # hard links become separate files
os.unlink("input/flagfile")
cmd(archiver, "repo-create", "--encryption=none")
cmd(archiver, "create", "src", "input")
@ -162,7 +162,7 @@ def test_import_tar_gz(archivers, request, tar_format="GNU"):
@requires_gnutar
def test_import_concatenated_tar_with_ignore_zeros(archivers, request):
archiver = request.getfixturevalue(archivers)
create_test_files(archiver.input_path, create_hardlinks=False) # hardlinks become separate files
create_test_files(archiver.input_path, create_hardlinks=False) # hard links become separate files
os.unlink("input/flagfile")
with changedir("input"):
subprocess.check_call(["tar", "cf", "file1.tar", "file1"])
@ -191,7 +191,7 @@ def test_import_concatenated_tar_with_ignore_zeros(archivers, request):
@requires_gnutar
def test_import_concatenated_tar_without_ignore_zeros(archivers, request):
archiver = request.getfixturevalue(archivers)
create_test_files(archiver.input_path, create_hardlinks=False) # hardlinks become separate files
create_test_files(archiver.input_path, create_hardlinks=False) # hard links become separate files
os.unlink("input/flagfile")
with changedir("input"):

View file

@ -119,24 +119,24 @@ def test_transfer_upgrade(archivers, request, monkeypatch):
if key in e:
e[key] = convert_tz(e[key], repo12_tzoffset, None)
# borg 1 used hardlink slaves linking back to their hardlink masters.
# borg 2 uses symmetric approach: just normal items. if they are hardlinks,
# borg 1 used hard link slaves linking back to their hard link masters.
# borg 2 uses symmetric approach: just normal items. if they are hard links,
# each item has normal attributes, including the chunks list, size. additionally,
# they have a hlid and same hlid means same inode / belonging to same set of hardlinks.
# they have a hlid and same hlid means same inode / belonging to same set of hard links.
hardlink = bool(g.get("hlid")) # note: json has "" as hlid if there is no hlid in the item
if hardlink:
hardlinks[g["path"]] = g["hlid"]
if e["mode"].startswith("h"):
# fix expectations: borg1 signalled a hardlink slave with "h"
# borg2 treats all hardlinks symmetrically as normal files
# fix expectations: borg1 signalled a hard link slave with "h"
# borg2 treats all hard links symmetrically as normal files
e["mode"] = g["mode"][0] + e["mode"][1:]
# borg1 used source/linktarget to link back to hardlink master
# borg1 used source/linktarget to link back to hard link master
assert e["source"] != ""
assert e["linktarget"] != ""
# fix expectations: borg2 does not use source/linktarget any more for hardlinks
# fix expectations: borg2 does not use source/linktarget any more for hard links
e["source"] = ""
e["linktarget"] = ""
# borg 1 has size == 0 for hardlink slaves, borg 2 has the real file size
# borg 1 has size == 0 for hard link slaves, borg 2 has the real file size
assert e["size"] == 0
assert g["size"] >= 0
# fix expectation for size
@ -160,7 +160,7 @@ def test_transfer_upgrade(archivers, request, monkeypatch):
assert g == e
if name == "archive1":
# hardlinks referring to same inode have same hlid
# hard links referring to same inode have same hlid
assert hardlinks["tmp/borgtest/hardlink1"] == hardlinks["tmp/borgtest/hardlink2"]
repo_path = f"{original_location}2"

View file

@ -237,7 +237,7 @@ def test_dash_open():
assert dash_open("-", "wb") is sys.stdout.buffer
@pytest.mark.skipif(not are_hardlinks_supported(), reason="hardlinks not supported")
@pytest.mark.skipif(not are_hardlinks_supported(), reason="hard links not supported")
def test_safe_unlink_is_safe(tmpdir):
contents = b"Hello, world\n"
victim = tmpdir / "victim"
@ -250,7 +250,7 @@ def test_safe_unlink_is_safe(tmpdir):
assert victim.read_binary() == contents
@pytest.mark.skipif(not are_hardlinks_supported(), reason="hardlinks not supported")
@pytest.mark.skipif(not are_hardlinks_supported(), reason="hard links not supported")
def test_safe_unlink_is_safe_ENOSPC(tmpdir, monkeypatch):
contents = b"Hello, world\n"
victim = tmpdir / "victim"

View file

@ -90,7 +90,7 @@ class UpgraderFrom12To20:
item.chunks = chunks
for chunk_id, chunk_size in chunks:
self.cache.reuse_chunk(chunk_id, chunk_size, self.archive.stats)
del item.source # not used for hardlinks any more, replaced by hlid
del item.source # not used for hard links anymore, replaced by hlid
# make sure we only have desired stuff in the new item. specifically, make sure to get rid of:
# - 'acl' remnants of bug in attic <= 0.13
# - 'hardlink_master' (superseded by hlid)