From 98d189d088df501d8533c992dd43aca6e0518509 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 8 Feb 2026 08:12:59 +0100 Subject: [PATCH 1/2] extract: do not delete existing directory if possible, fixes #4233 A pre-existing directory might be a btrfs subvolume that was created by the user ahead of time when restoring several nested subvolumes from a single archive. If the archive item to be extracted is a directory and there is already a directory at the destination path, do not remove (and recreate) it, but just use it. That way, btrfs subvolumes (which look like directories) are not deleted. Fix originally contributed by @intelfx in #7866, but needed more work, so I thought more about the implications and added a test. Note: In the past, we first removed (empty) directories, then created a fresh one, then called restore_attrs for that. That produced correct metadata, but only for the case of an EMPTY exisiting directory. If the existing directory was not empty, the simply os.rmdir we tried did not work anyway and did not remove the existing directory. Usually we extract to an empty base directory, thus encountering this edge case is mostly limited to continuing a previous extraction. In that case, calling restore_attrs again on a directory that already has existing attrs should be harmless, because they are identical. --- src/borg/archive.py | 18 +++++++++++++++--- .../testsuite/archiver/extract_cmd_test.py | 17 +++++++++++++++++ 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index 11deb8bd8..6839a1465 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -831,10 +831,16 @@ Duration: {0.duration} st = os.stat(path, follow_symlinks=False) if continue_extraction and same_item(item, st): return # done! we already have fully extracted this file in a previous run. - elif stat.S_ISDIR(st.st_mode): - os.rmdir(path) - else: + if not stat.S_ISDIR(st.st_mode): os.unlink(path) + elif stat.S_ISDIR(item.mode): + # if we have an existing directory and we want to extract a directory, + # we just use the existing one and do not remove it. + # This fixes the issue that the existing directory might be a BTRFS subvolume. + # If we removed it, we would lose the subvolume, see #4233. + pass + else: + os.rmdir(path) # only works for empty directories except UnicodeEncodeError: raise self.IncompatibleFilesystemEncodingError(path, sys.getfilesystemencoding()) from None except OSError: @@ -883,6 +889,12 @@ Duration: {0.duration} if not os.path.exists(path): os.mkdir(path) if restore_attrs: + # note: if we did not create the directory freshly, existing attributes + # might get mixed up with the archived attributes. this is acceptable, + # considering we usually extract into an empty base directory. + # when continuing an extraction, the existing attributes and the archived + # attributes should be identical anyway. + # Also, we want to avoid #4223 (losing btrfs subvolumes). self.restore_attrs(path, item) elif stat.S_ISLNK(mode): make_parent(path) diff --git a/src/borg/testsuite/archiver/extract_cmd_test.py b/src/borg/testsuite/archiver/extract_cmd_test.py index 5326d2e6d..097b39a29 100644 --- a/src/borg/testsuite/archiver/extract_cmd_test.py +++ b/src/borg/testsuite/archiver/extract_cmd_test.py @@ -791,3 +791,20 @@ def test_extract_file_with_missing_chunk(archivers, request): output = cmd(archiver, "extract", "archive") # TODO: this is a bit dirty still: no warning/error rc, no filename output for the damaged file. assert f"repository object {bin_to_hex(chunk.id)} missing, returning {chunk.size} zero bytes." in output + + +def test_extract_existing_directory(archivers, request): + # if we extract a directory and there is already a directory at that location, + # we should just use the existing directory and not remove/recreate it. + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + os.mkdir("input/dir") + cmd(archiver, "create", "test", "input") + with changedir("output"): + # create pre-existing directory: + os.makedirs("input/dir", exist_ok=True) + st1 = os.stat("input/dir") + # extract + cmd(archiver, "extract", "test") + st2 = os.stat("input/dir") + assert st1.st_ino == st2.st_ino From b85ad47fda590fd26381071ebae25fdfb896b898 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 8 Feb 2026 11:37:38 +0100 Subject: [PATCH 2/2] extract --continue: optimize processing of already existing dirs if an already existing fs directory has the correct (as archived) mtime, we have already extracted it in a previous borg extract run and we do not need and should not call restore_attrs for it again. if the directory exists, but does not have the correct mtime, restore_attrs will be called and its attributes will be extracted (and mtime set to correct value). --- src/borg/archive.py | 13 +++-- .../testsuite/archiver/extract_cmd_test.py | 57 ++++++++++++------- 2 files changed, 46 insertions(+), 24 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index 6839a1465..0c83fd2f1 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -782,12 +782,17 @@ Duration: {0.duration} def same_item(item, st): """Is the archived item the same as the filesystem item at the same path with stat st?""" - if not stat.S_ISREG(st.st_mode): - # we only "optimize" for regular files. + is_file = stat.S_ISREG(st.st_mode) + is_dir = stat.S_ISDIR(st.st_mode) + if not (is_file or is_dir): + # we only "optimize" for regular files and directories. # other file types are less frequent and have no content extraction we could "optimize away". return False - if item.mode != st.st_mode or item.size != st.st_size: - # the size check catches incomplete previous file extraction + if item.mode != st.st_mode: + # we want to extract a different type of file than what is present in the filesystem. + return False + if is_file and item.size != st.st_size: + # the size check catches incomplete previous regular file extraction return False if item.get("mtime") != st.st_mtime_ns: # note: mtime is "extracted" late, after xattrs and ACLs, but before flags. diff --git a/src/borg/testsuite/archiver/extract_cmd_test.py b/src/borg/testsuite/archiver/extract_cmd_test.py index 097b39a29..fe41c23a0 100644 --- a/src/borg/testsuite/archiver/extract_cmd_test.py +++ b/src/borg/testsuite/archiver/extract_cmd_test.py @@ -1,5 +1,6 @@ import errno import os +from pathlib import Path import shutil import stat from unittest.mock import patch @@ -707,51 +708,67 @@ def test_extract_continue(archivers, request): archiver = request.getfixturevalue(archivers) CONTENTS1, CONTENTS2, CONTENTS3 = b"contents1" * 100, b"contents2" * 200, b"contents3" * 300 cmd(archiver, "repo-create", RK_ENCRYPTION) - create_regular_file(archiver.input_path, "file1", contents=CONTENTS1) - create_regular_file(archiver.input_path, "file2", contents=CONTENTS2) - create_regular_file(archiver.input_path, "file3", contents=CONTENTS3) + create_regular_file(archiver.input_path, "dir1/file1", contents=CONTENTS1) + create_regular_file(archiver.input_path, "dir2/file2", contents=CONTENTS2) + create_regular_file(archiver.input_path, "dir3/file3", contents=CONTENTS3) cmd(archiver, "create", "arch", "input") + granularity_sleep() + with changedir("output"): # we simulate an interrupted/partial extraction: cmd(archiver, "extract", "arch") - # do not modify file1, it stands for a successfully extracted file - file1_st = os.stat("input/file1") + # do not modify dir1 and file1, they stand for a successfully extracted files + dir1_st = os.stat("input/dir1") + file1_st = os.stat("input/dir1/file1") + # simulate a partially extracted dir2 (wrong mtime) # simulate a partially extracted file2 (smaller size, archived mtime not yet set) - file2_st = os.stat("input/file2") + dir2_st = os.stat("input/dir2") + file2_st = os.stat("input/dir2/file2") # make a hard link, so it does not free the inode when unlinking input/file2 - os.link("input/file2", "hardlink-to-keep-inode-f2") - os.truncate("input/file2", 123) # -> incorrect size, incorrect mtime - # simulate file3 has not yet been extracted - file3_st = os.stat("input/file3") + os.link("input/dir2/file2", "hardlink-to-keep-inode-f2") + os.truncate("input/dir2/file2", 123) # -> incorrect size, incorrect mtime + Path("input/dir2").touch() # -> mtime "incorrect" (not as archived) + # simulate dir3 and file3 have not yet been extracted + dir3_st = os.stat("input/dir3") + file3_st = os.stat("input/dir3/file3") # make a hard link, so it does not free the inode when unlinking input/file3 - os.link("input/file3", "hardlink-to-keep-inode-f3") - os.remove("input/file3") + os.link("input/dir3/file3", "hardlink-to-keep-inode-f3") + os.remove("input/dir3/file3") + os.rmdir("input/dir3") + granularity_sleep() with changedir("output"): # now try to continue extracting, using the same archive, same output dir: cmd(archiver, "extract", "arch", "--continue") - now_file1_st = os.stat("input/file1") + now_dir1_st = os.stat("input/dir1") + now_file1_st = os.stat("input/dir1/file1") + assert dir1_st.st_ino == now_dir1_st.st_ino # dir1 was NOT extracted again + assert dir1_st.st_mtime_ns == now_dir1_st.st_mtime_ns # dir1 has correct mtime assert file1_st.st_ino == now_file1_st.st_ino # file1 was NOT extracted again assert file1_st.st_mtime_ns == now_file1_st.st_mtime_ns # has correct mtime - new_file2_st = os.stat("input/file2") + now_dir2_st = os.stat("input/dir2") + new_file2_st = os.stat("input/dir2/file2") + assert dir2_st.st_ino == now_dir2_st.st_ino # dir2 was not removed/recreated + assert dir2_st.st_mtime_ns == now_dir2_st.st_mtime_ns # dir2 mtime was fixed assert file2_st.st_ino != new_file2_st.st_ino # file2 was extracted again assert file2_st.st_mtime_ns == new_file2_st.st_mtime_ns # has correct mtime - new_file3_st = os.stat("input/file3") - assert file3_st.st_ino != new_file3_st.st_ino # file3 was extracted again - assert file3_st.st_mtime_ns == new_file3_st.st_mtime_ns # has correct mtime + new_dir3_st = os.stat("input/dir3") + new_file3_st = os.stat("input/dir3/file3") + assert dir3_st.st_mtime_ns == new_dir3_st.st_mtime_ns # dir3 was extracted again + assert file3_st.st_mtime_ns == new_file3_st.st_mtime_ns # file3 was extracted again # windows has a strange ctime behaviour when deleting and recreating a file if not is_win32: assert file1_st.st_ctime_ns == now_file1_st.st_ctime_ns # file not extracted again assert file2_st.st_ctime_ns != new_file2_st.st_ctime_ns # file extracted again assert file3_st.st_ctime_ns != new_file3_st.st_ctime_ns # file extracted again # check if all contents (and thus also file sizes) are correct: - with open("input/file1", "rb") as f: + with open("input/dir1/file1", "rb") as f: assert f.read() == CONTENTS1 - with open("input/file2", "rb") as f: + with open("input/dir2/file2", "rb") as f: assert f.read() == CONTENTS2 - with open("input/file3", "rb") as f: + with open("input/dir3/file3", "rb") as f: assert f.read() == CONTENTS3