mirror of
https://github.com/borgbackup/borg.git
synced 2026-06-11 01:41:57 -04:00
Merge pull request #9288 from ThomasWaldmann/directory-extraction-master
Some checks are pending
Lint / lint (push) Waiting to run
CI / lint (push) Waiting to run
CI / security (push) Waiting to run
CI / asan_ubsan (push) Blocked by required conditions
CI / native_tests (push) Blocked by required conditions
CI / vm_tests (Haiku, false, haiku, r1beta5) (push) Blocked by required conditions
CI / vm_tests (NetBSD, false, netbsd, 10.1) (push) Blocked by required conditions
CI / vm_tests (OpenBSD, false, openbsd, 7.7) (push) Blocked by required conditions
CI / vm_tests (borg-freebsd-14-x86_64-gh, FreeBSD, true, freebsd, 14.3) (push) Blocked by required conditions
CI / windows_tests (push) Blocked by required conditions
CodeQL / Analyze (push) Waiting to run
Some checks are pending
Lint / lint (push) Waiting to run
CI / lint (push) Waiting to run
CI / security (push) Waiting to run
CI / asan_ubsan (push) Blocked by required conditions
CI / native_tests (push) Blocked by required conditions
CI / vm_tests (Haiku, false, haiku, r1beta5) (push) Blocked by required conditions
CI / vm_tests (NetBSD, false, netbsd, 10.1) (push) Blocked by required conditions
CI / vm_tests (OpenBSD, false, openbsd, 7.7) (push) Blocked by required conditions
CI / vm_tests (borg-freebsd-14-x86_64-gh, FreeBSD, true, freebsd, 14.3) (push) Blocked by required conditions
CI / windows_tests (push) Blocked by required conditions
CodeQL / Analyze (push) Waiting to run
extract: do not delete existing directory if possible, fixes #4233
This commit is contained in:
commit
8da5de3670
2 changed files with 78 additions and 27 deletions
|
|
@ -782,12 +782,17 @@ Duration: {0.duration}
|
|||
|
||||
def same_item(item, st):
|
||||
"""Is the archived item the same as the filesystem item at the same path with stat st?"""
|
||||
if not stat.S_ISREG(st.st_mode):
|
||||
# we only "optimize" for regular files.
|
||||
is_file = stat.S_ISREG(st.st_mode)
|
||||
is_dir = stat.S_ISDIR(st.st_mode)
|
||||
if not (is_file or is_dir):
|
||||
# we only "optimize" for regular files and directories.
|
||||
# other file types are less frequent and have no content extraction we could "optimize away".
|
||||
return False
|
||||
if item.mode != st.st_mode or item.size != st.st_size:
|
||||
# the size check catches incomplete previous file extraction
|
||||
if item.mode != st.st_mode:
|
||||
# we want to extract a different type of file than what is present in the filesystem.
|
||||
return False
|
||||
if is_file and item.size != st.st_size:
|
||||
# the size check catches incomplete previous regular file extraction
|
||||
return False
|
||||
if item.get("mtime") != st.st_mtime_ns:
|
||||
# note: mtime is "extracted" late, after xattrs and ACLs, but before flags.
|
||||
|
|
@ -831,10 +836,16 @@ Duration: {0.duration}
|
|||
st = os.stat(path, follow_symlinks=False)
|
||||
if continue_extraction and same_item(item, st):
|
||||
return # done! we already have fully extracted this file in a previous run.
|
||||
elif stat.S_ISDIR(st.st_mode):
|
||||
os.rmdir(path)
|
||||
else:
|
||||
if not stat.S_ISDIR(st.st_mode):
|
||||
os.unlink(path)
|
||||
elif stat.S_ISDIR(item.mode):
|
||||
# if we have an existing directory and we want to extract a directory,
|
||||
# we just use the existing one and do not remove it.
|
||||
# This fixes the issue that the existing directory might be a BTRFS subvolume.
|
||||
# If we removed it, we would lose the subvolume, see #4233.
|
||||
pass
|
||||
else:
|
||||
os.rmdir(path) # only works for empty directories
|
||||
except UnicodeEncodeError:
|
||||
raise self.IncompatibleFilesystemEncodingError(path, sys.getfilesystemencoding()) from None
|
||||
except OSError:
|
||||
|
|
@ -883,6 +894,12 @@ Duration: {0.duration}
|
|||
if not os.path.exists(path):
|
||||
os.mkdir(path)
|
||||
if restore_attrs:
|
||||
# note: if we did not create the directory freshly, existing attributes
|
||||
# might get mixed up with the archived attributes. this is acceptable,
|
||||
# considering we usually extract into an empty base directory.
|
||||
# when continuing an extraction, the existing attributes and the archived
|
||||
# attributes should be identical anyway.
|
||||
# Also, we want to avoid #4223 (losing btrfs subvolumes).
|
||||
self.restore_attrs(path, item)
|
||||
elif stat.S_ISLNK(mode):
|
||||
make_parent(path)
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
import errno
|
||||
import os
|
||||
from pathlib import Path
|
||||
import shutil
|
||||
import stat
|
||||
from unittest.mock import patch
|
||||
|
|
@ -707,51 +708,67 @@ def test_extract_continue(archivers, request):
|
|||
archiver = request.getfixturevalue(archivers)
|
||||
CONTENTS1, CONTENTS2, CONTENTS3 = b"contents1" * 100, b"contents2" * 200, b"contents3" * 300
|
||||
cmd(archiver, "repo-create", RK_ENCRYPTION)
|
||||
create_regular_file(archiver.input_path, "file1", contents=CONTENTS1)
|
||||
create_regular_file(archiver.input_path, "file2", contents=CONTENTS2)
|
||||
create_regular_file(archiver.input_path, "file3", contents=CONTENTS3)
|
||||
create_regular_file(archiver.input_path, "dir1/file1", contents=CONTENTS1)
|
||||
create_regular_file(archiver.input_path, "dir2/file2", contents=CONTENTS2)
|
||||
create_regular_file(archiver.input_path, "dir3/file3", contents=CONTENTS3)
|
||||
cmd(archiver, "create", "arch", "input")
|
||||
|
||||
granularity_sleep()
|
||||
|
||||
with changedir("output"):
|
||||
# we simulate an interrupted/partial extraction:
|
||||
cmd(archiver, "extract", "arch")
|
||||
# do not modify file1, it stands for a successfully extracted file
|
||||
file1_st = os.stat("input/file1")
|
||||
# do not modify dir1 and file1, they stand for a successfully extracted files
|
||||
dir1_st = os.stat("input/dir1")
|
||||
file1_st = os.stat("input/dir1/file1")
|
||||
# simulate a partially extracted dir2 (wrong mtime)
|
||||
# simulate a partially extracted file2 (smaller size, archived mtime not yet set)
|
||||
file2_st = os.stat("input/file2")
|
||||
dir2_st = os.stat("input/dir2")
|
||||
file2_st = os.stat("input/dir2/file2")
|
||||
# make a hard link, so it does not free the inode when unlinking input/file2
|
||||
os.link("input/file2", "hardlink-to-keep-inode-f2")
|
||||
os.truncate("input/file2", 123) # -> incorrect size, incorrect mtime
|
||||
# simulate file3 has not yet been extracted
|
||||
file3_st = os.stat("input/file3")
|
||||
os.link("input/dir2/file2", "hardlink-to-keep-inode-f2")
|
||||
os.truncate("input/dir2/file2", 123) # -> incorrect size, incorrect mtime
|
||||
Path("input/dir2").touch() # -> mtime "incorrect" (not as archived)
|
||||
# simulate dir3 and file3 have not yet been extracted
|
||||
dir3_st = os.stat("input/dir3")
|
||||
file3_st = os.stat("input/dir3/file3")
|
||||
# make a hard link, so it does not free the inode when unlinking input/file3
|
||||
os.link("input/file3", "hardlink-to-keep-inode-f3")
|
||||
os.remove("input/file3")
|
||||
os.link("input/dir3/file3", "hardlink-to-keep-inode-f3")
|
||||
os.remove("input/dir3/file3")
|
||||
os.rmdir("input/dir3")
|
||||
|
||||
granularity_sleep()
|
||||
|
||||
with changedir("output"):
|
||||
# now try to continue extracting, using the same archive, same output dir:
|
||||
cmd(archiver, "extract", "arch", "--continue")
|
||||
now_file1_st = os.stat("input/file1")
|
||||
now_dir1_st = os.stat("input/dir1")
|
||||
now_file1_st = os.stat("input/dir1/file1")
|
||||
assert dir1_st.st_ino == now_dir1_st.st_ino # dir1 was NOT extracted again
|
||||
assert dir1_st.st_mtime_ns == now_dir1_st.st_mtime_ns # dir1 has correct mtime
|
||||
assert file1_st.st_ino == now_file1_st.st_ino # file1 was NOT extracted again
|
||||
assert file1_st.st_mtime_ns == now_file1_st.st_mtime_ns # has correct mtime
|
||||
new_file2_st = os.stat("input/file2")
|
||||
now_dir2_st = os.stat("input/dir2")
|
||||
new_file2_st = os.stat("input/dir2/file2")
|
||||
assert dir2_st.st_ino == now_dir2_st.st_ino # dir2 was not removed/recreated
|
||||
assert dir2_st.st_mtime_ns == now_dir2_st.st_mtime_ns # dir2 mtime was fixed
|
||||
assert file2_st.st_ino != new_file2_st.st_ino # file2 was extracted again
|
||||
assert file2_st.st_mtime_ns == new_file2_st.st_mtime_ns # has correct mtime
|
||||
new_file3_st = os.stat("input/file3")
|
||||
assert file3_st.st_ino != new_file3_st.st_ino # file3 was extracted again
|
||||
assert file3_st.st_mtime_ns == new_file3_st.st_mtime_ns # has correct mtime
|
||||
new_dir3_st = os.stat("input/dir3")
|
||||
new_file3_st = os.stat("input/dir3/file3")
|
||||
assert dir3_st.st_mtime_ns == new_dir3_st.st_mtime_ns # dir3 was extracted again
|
||||
assert file3_st.st_mtime_ns == new_file3_st.st_mtime_ns # file3 was extracted again
|
||||
# windows has a strange ctime behaviour when deleting and recreating a file
|
||||
if not is_win32:
|
||||
assert file1_st.st_ctime_ns == now_file1_st.st_ctime_ns # file not extracted again
|
||||
assert file2_st.st_ctime_ns != new_file2_st.st_ctime_ns # file extracted again
|
||||
assert file3_st.st_ctime_ns != new_file3_st.st_ctime_ns # file extracted again
|
||||
# check if all contents (and thus also file sizes) are correct:
|
||||
with open("input/file1", "rb") as f:
|
||||
with open("input/dir1/file1", "rb") as f:
|
||||
assert f.read() == CONTENTS1
|
||||
with open("input/file2", "rb") as f:
|
||||
with open("input/dir2/file2", "rb") as f:
|
||||
assert f.read() == CONTENTS2
|
||||
with open("input/file3", "rb") as f:
|
||||
with open("input/dir3/file3", "rb") as f:
|
||||
assert f.read() == CONTENTS3
|
||||
|
||||
|
||||
|
|
@ -791,3 +808,20 @@ def test_extract_file_with_missing_chunk(archivers, request):
|
|||
output = cmd(archiver, "extract", "archive")
|
||||
# TODO: this is a bit dirty still: no warning/error rc, no filename output for the damaged file.
|
||||
assert f"repository object {bin_to_hex(chunk.id)} missing, returning {chunk.size} zero bytes." in output
|
||||
|
||||
|
||||
def test_extract_existing_directory(archivers, request):
|
||||
# if we extract a directory and there is already a directory at that location,
|
||||
# we should just use the existing directory and not remove/recreate it.
|
||||
archiver = request.getfixturevalue(archivers)
|
||||
cmd(archiver, "repo-create", RK_ENCRYPTION)
|
||||
os.mkdir("input/dir")
|
||||
cmd(archiver, "create", "test", "input")
|
||||
with changedir("output"):
|
||||
# create pre-existing directory:
|
||||
os.makedirs("input/dir", exist_ok=True)
|
||||
st1 = os.stat("input/dir")
|
||||
# extract
|
||||
cmd(archiver, "extract", "test")
|
||||
st2 = os.stat("input/dir")
|
||||
assert st1.st_ino == st2.st_ino
|
||||
|
|
|
|||
Loading…
Reference in a new issue