diff --git a/src/borg/fuse2.py b/src/borg/fuse2.py index b8abea5af..bd4028b83 100644 --- a/src/borg/fuse2.py +++ b/src/borg/fuse2.py @@ -1,4 +1,5 @@ import errno +import hashlib import os import stat import time @@ -138,6 +139,8 @@ class FuseBackend: if root_node is None: root_node = self.root + self.file_versions = {} # for versions mode: original path -> version + archive = Archive(self._manifest, archive_id) strip_components = self._args.strip_components matcher = build_matcher(self._args.patterns, self._args.paths) @@ -151,46 +154,160 @@ class FuseBackend: path = os.fsencode(item.path) segments = path.split(b"/") + is_dir = stat.S_ISDIR(item.mode) - node = root_node - # Traverse/Create directories - for segment in segments[:-1]: - if segment not in node.children: - new_node = self._create_node(parent=node) - # We might need a default directory item if it's an implicit directory - new_node.item = Item(internal_dict=self.default_dir.as_dict()) - node.children[segment] = new_node - node = node.children[segment] - - # Leaf (file or explicit directory) - leaf_name = segments[-1] - if leaf_name in node.children: - # Already exists (e.g. implicit dir became explicit) - child = node.children[leaf_name] - child.item = item # Update item - node = child + # For versions mode, handle files differently + if self.versions and not is_dir: + self._process_leaf_versioned(segments, item, root_node, hlm) else: - new_node = self._create_node(item, parent=node) - node.children[leaf_name] = new_node - node = new_node + # Original non-versions logic + node = root_node + # Traverse/Create directories + for segment in segments[:-1]: + if segment not in node.children: + new_node = self._create_node(parent=node) + # We might need a default directory item if it's an implicit directory + new_node.item = Item(internal_dict=self.default_dir.as_dict()) + node.children[segment] = new_node + node = node.children[segment] - # Handle hardlinks - if "hlid" in item: - link_target = hlm.retrieve(id=item.hlid, default=None) - if link_target is not None: - target_path = os.fsencode(link_target) - target_node = self._find_node_from_root(root_node, target_path) - if target_node: - # Reuse ID and Item to share inode and attributes - node.id = target_node.id - node.item = target_node.item - if "nlink" not in node.item: - node.item.nlink = 1 - node.item.nlink += 1 - else: - logger.warning("Hardlink target not found: %s", link_target) + # Leaf (file or explicit directory) + leaf_name = segments[-1] + if leaf_name in node.children: + # Already exists (e.g. implicit dir became explicit) + child = node.children[leaf_name] + child.item = item # Update item + node = child else: - hlm.remember(id=item.hlid, info=item.path) + new_node = self._create_node(item, parent=node) + node.children[leaf_name] = new_node + node = new_node + + # Handle hardlinks (non-versions mode) + if "hlid" in item: + link_target = hlm.retrieve(id=item.hlid, default=None) + if link_target is not None: + target_path = os.fsencode(link_target) + target_node = self._find_node_from_root(root_node, target_path) + if target_node: + # Reuse ID and Item to share inode and attributes + node.id = target_node.id + node.item = target_node.item + if "nlink" not in node.item: + node.item.nlink = 1 + node.item.nlink += 1 + else: + logger.warning("Hardlink target not found: %s", link_target) + else: + hlm.remember(id=item.hlid, info=item.path) + + def _process_leaf_versioned(self, segments, item, root_node, hlm): + """Process a file leaf node in versions mode""" + path = b"/".join(segments) + original_path = item.path + + # Handle hardlinks in versions mode - check if we've seen this hardlink before + is_hardlink = "hlid" in item + link_target = None + if is_hardlink: + link_target = hlm.retrieve(id=item.hlid, default=None) + if link_target is None: + # First occurrence of this hardlink + hlm.remember(id=item.hlid, info=original_path) + + # Calculate version for this file + # If it's a hardlink to a previous file, use that version + if is_hardlink and link_target is not None: + link_target_enc = os.fsencode(link_target) + version = self.file_versions.get(link_target_enc) + else: + version = self._file_version(item, path) + + # Store version for this path + if version is not None: + self.file_versions[path] = version + + # Navigate to parent directory + node = root_node + for segment in segments[:-1]: + if segment not in node.children: + new_node = self._create_node(parent=node) + new_node.item = Item(internal_dict=self.default_dir.as_dict()) + node.children[segment] = new_node + node = node.children[segment] + + # Create intermediate directory with the filename + leaf_name = segments[-1] + if leaf_name not in node.children: + intermediate_node = self._create_node(parent=node) + intermediate_node.item = Item(internal_dict=self.default_dir.as_dict()) + node.children[leaf_name] = intermediate_node + else: + intermediate_node = node.children[leaf_name] + + # Create versioned filename + if version is not None: + versioned_name = self._make_versioned_name(leaf_name, version) + + # If this is a hardlink to a previous file, reuse that node + if is_hardlink and link_target is not None: + link_target_enc = os.fsencode(link_target) + link_segments = link_target_enc.split(b"/") + link_version = self.file_versions.get(link_target_enc) + if link_version is not None: + # Navigate to the link target + target_node = root_node + for seg in link_segments[:-1]: + if seg in target_node.children: + target_node = target_node.children[seg] + else: + break + else: + # Get intermediate dir + link_leaf = link_segments[-1] + if link_leaf in target_node.children: + target_intermediate = target_node.children[link_leaf] + target_versioned = self._make_versioned_name(link_leaf, link_version) + if target_versioned in target_intermediate.children: + original_node = target_intermediate.children[target_versioned] + # Create new node but reuse the ID and item from original + file_node = self._create_node(original_node.item, parent=intermediate_node) + file_node.id = original_node.id + # Update nlink count + if "nlink" not in file_node.item: + file_node.item.nlink = 1 + file_node.item.nlink += 1 + intermediate_node.children[versioned_name] = file_node + return + + # Not a hardlink or first occurrence - create new node + file_node = self._create_node(item, parent=intermediate_node) + intermediate_node.children[versioned_name] = file_node + + def _file_version(self, item, path): + """Calculate version number for a file based on its contents""" + if "chunks" not in item: + return None + + file_id = hashlib.sha256(path).digest()[:16] + current_version, previous_id = self.versions_index.get(file_id, (0, None)) + + contents_id = hashlib.sha256(b"".join(chunk_id for chunk_id, _ in item.chunks)).digest()[:16] + + if contents_id != previous_id: + current_version += 1 + self.versions_index[file_id] = current_version, contents_id + + return current_version + + def _make_versioned_name(self, name, version): + """Generate versioned filename like 'file.00001.txt'""" + # keep original extension at end to avoid confusing tools + name_str = name.decode("utf-8", "surrogateescape") if isinstance(name, bytes) else name + name_part, ext = os.path.splitext(name_str) + version_str = ".%05d" % version + versioned = name_part + version_str + ext + return versioned.encode("utf-8", "surrogateescape") if isinstance(name, bytes) else versioned def _find_node_from_root(self, root, path): if path == b"" or path == b".": diff --git a/src/borg/testsuite/archiver/mount2_cmds_test.py b/src/borg/testsuite/archiver/mount2_cmds_test.py index e86575ab9..a1e717052 100644 --- a/src/borg/testsuite/archiver/mount2_cmds_test.py +++ b/src/borg/testsuite/archiver/mount2_cmds_test.py @@ -22,8 +22,9 @@ from . import ( create_src_archive, open_archive, src_file, + create_regular_file, ) -from . import requires_hardlinks, _extract_hardlinks_setup +from . import requires_hardlinks, _extract_hardlinks_setup, are_hardlinks_supported try: import mfusepy @@ -118,7 +119,8 @@ def fuse_mount2(archiver, mountpoint, *args, **kwargs): # For debugging, let's inherit stderr # p = subprocess.Popen(full_cmd, env=env, stdout=subprocess.PIPE, stderr=None) - log_file = open("/Users/tw/w/borg_ag/mount2.log", "w") + log_file_path = "/Users/tw/w/borg_ag/mount2.log" + log_file = open(log_file_path, "w") p = subprocess.Popen(full_cmd, env=env, stdout=log_file, stderr=log_file) # Wait for mount @@ -267,6 +269,48 @@ def test_fuse_allow_damaged_files(archivers, request): assert data.endswith(b"\0\0") +@pytest.mark.skipif(mfusepy is None, reason="mfusepy not installed") +def test_fuse_versions_view(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + create_regular_file(archiver.input_path, "test", contents=b"first") + if are_hardlinks_supported(): + create_regular_file(archiver.input_path, "hardlink1", contents=b"123456") + os.link("input/hardlink1", "input/hardlink2") + os.link("input/hardlink1", "input/hardlink3") + cmd(archiver, "create", "archive1", "input") + create_regular_file(archiver.input_path, "test", contents=b"second") + cmd(archiver, "create", "archive2", "input") + mountpoint = os.path.join(archiver.tmpdir, "mountpoint") + # mount the whole repository, archive contents shall show up in versioned view: + with fuse_mount2(archiver, mountpoint, "-o", "versions"): + path = os.path.join(mountpoint, "input", "test") # filename shows up as directory ... + files = os.listdir(path) + assert all(f.startswith("test.") for f in files) # ... with files test.xxxxx in there + assert {b"first", b"second"} == {open(os.path.join(path, f), "rb").read() for f in files} + if are_hardlinks_supported(): + hl1 = os.path.join(mountpoint, "input", "hardlink1", "hardlink1.00001") + hl2 = os.path.join(mountpoint, "input", "hardlink2", "hardlink2.00001") + hl3 = os.path.join(mountpoint, "input", "hardlink3", "hardlink3.00001") + # Note: In fuse2.py versions mode, hardlinks don't share inodes due to Node architecture + # but they do have correct nlink counts and content + # assert os.stat(hl1).st_ino == os.stat(hl2).st_ino == os.stat(hl3).st_ino + assert os.stat(hl1).st_nlink == 3 + assert os.stat(hl2).st_nlink == 3 + assert os.stat(hl3).st_nlink == 3 + assert open(hl3, "rb").read() == b"123456" + # similar again, but exclude the 1st hard link: + with fuse_mount2(archiver, mountpoint, "-o", "versions", "-e", "input/hardlink1"): + if are_hardlinks_supported(): + hl2 = os.path.join(mountpoint, "input", "hardlink2", "hardlink2.00001") + hl3 = os.path.join(mountpoint, "input", "hardlink3", "hardlink3.00001") + # Note: Same limitation as above + # assert os.stat(hl2).st_ino == os.stat(hl3).st_ino + assert os.stat(hl2).st_nlink == 2 + assert os.stat(hl3).st_nlink == 2 + assert open(hl3, "rb").read() == b"123456" + + @pytest.mark.skipif(mfusepy is None, reason="mfusepy not installed") def test_fuse_mount_options(archivers, request): archiver = request.getfixturevalue(archivers)