extract --continue: optimize processing of already existing dirs

if an already existing fs directory has the correct (as archived) mtime,
we have already extracted it in a previous borg extract run and we do not
need and should not call restore_attrs for it again.

if the directory exists, but does not have the correct mtime, restore_attrs
will be called and its attributes will be extracted (and mtime set to
correct value).
This commit is contained in:
Thomas Waldmann 2026-02-08 11:37:38 +01:00
parent 98d189d088
commit b85ad47fda
No known key found for this signature in database
GPG key ID: 243ACFA951F78E01
2 changed files with 46 additions and 24 deletions

View file

@ -782,12 +782,17 @@ Duration: {0.duration}
def same_item(item, st):
"""Is the archived item the same as the filesystem item at the same path with stat st?"""
if not stat.S_ISREG(st.st_mode):
# we only "optimize" for regular files.
is_file = stat.S_ISREG(st.st_mode)
is_dir = stat.S_ISDIR(st.st_mode)
if not (is_file or is_dir):
# we only "optimize" for regular files and directories.
# other file types are less frequent and have no content extraction we could "optimize away".
return False
if item.mode != st.st_mode or item.size != st.st_size:
# the size check catches incomplete previous file extraction
if item.mode != st.st_mode:
# we want to extract a different type of file than what is present in the filesystem.
return False
if is_file and item.size != st.st_size:
# the size check catches incomplete previous regular file extraction
return False
if item.get("mtime") != st.st_mtime_ns:
# note: mtime is "extracted" late, after xattrs and ACLs, but before flags.

View file

@ -1,5 +1,6 @@
import errno
import os
from pathlib import Path
import shutil
import stat
from unittest.mock import patch
@ -707,51 +708,67 @@ def test_extract_continue(archivers, request):
archiver = request.getfixturevalue(archivers)
CONTENTS1, CONTENTS2, CONTENTS3 = b"contents1" * 100, b"contents2" * 200, b"contents3" * 300
cmd(archiver, "repo-create", RK_ENCRYPTION)
create_regular_file(archiver.input_path, "file1", contents=CONTENTS1)
create_regular_file(archiver.input_path, "file2", contents=CONTENTS2)
create_regular_file(archiver.input_path, "file3", contents=CONTENTS3)
create_regular_file(archiver.input_path, "dir1/file1", contents=CONTENTS1)
create_regular_file(archiver.input_path, "dir2/file2", contents=CONTENTS2)
create_regular_file(archiver.input_path, "dir3/file3", contents=CONTENTS3)
cmd(archiver, "create", "arch", "input")
granularity_sleep()
with changedir("output"):
# we simulate an interrupted/partial extraction:
cmd(archiver, "extract", "arch")
# do not modify file1, it stands for a successfully extracted file
file1_st = os.stat("input/file1")
# do not modify dir1 and file1, they stand for a successfully extracted files
dir1_st = os.stat("input/dir1")
file1_st = os.stat("input/dir1/file1")
# simulate a partially extracted dir2 (wrong mtime)
# simulate a partially extracted file2 (smaller size, archived mtime not yet set)
file2_st = os.stat("input/file2")
dir2_st = os.stat("input/dir2")
file2_st = os.stat("input/dir2/file2")
# make a hard link, so it does not free the inode when unlinking input/file2
os.link("input/file2", "hardlink-to-keep-inode-f2")
os.truncate("input/file2", 123) # -> incorrect size, incorrect mtime
# simulate file3 has not yet been extracted
file3_st = os.stat("input/file3")
os.link("input/dir2/file2", "hardlink-to-keep-inode-f2")
os.truncate("input/dir2/file2", 123) # -> incorrect size, incorrect mtime
Path("input/dir2").touch() # -> mtime "incorrect" (not as archived)
# simulate dir3 and file3 have not yet been extracted
dir3_st = os.stat("input/dir3")
file3_st = os.stat("input/dir3/file3")
# make a hard link, so it does not free the inode when unlinking input/file3
os.link("input/file3", "hardlink-to-keep-inode-f3")
os.remove("input/file3")
os.link("input/dir3/file3", "hardlink-to-keep-inode-f3")
os.remove("input/dir3/file3")
os.rmdir("input/dir3")
granularity_sleep()
with changedir("output"):
# now try to continue extracting, using the same archive, same output dir:
cmd(archiver, "extract", "arch", "--continue")
now_file1_st = os.stat("input/file1")
now_dir1_st = os.stat("input/dir1")
now_file1_st = os.stat("input/dir1/file1")
assert dir1_st.st_ino == now_dir1_st.st_ino # dir1 was NOT extracted again
assert dir1_st.st_mtime_ns == now_dir1_st.st_mtime_ns # dir1 has correct mtime
assert file1_st.st_ino == now_file1_st.st_ino # file1 was NOT extracted again
assert file1_st.st_mtime_ns == now_file1_st.st_mtime_ns # has correct mtime
new_file2_st = os.stat("input/file2")
now_dir2_st = os.stat("input/dir2")
new_file2_st = os.stat("input/dir2/file2")
assert dir2_st.st_ino == now_dir2_st.st_ino # dir2 was not removed/recreated
assert dir2_st.st_mtime_ns == now_dir2_st.st_mtime_ns # dir2 mtime was fixed
assert file2_st.st_ino != new_file2_st.st_ino # file2 was extracted again
assert file2_st.st_mtime_ns == new_file2_st.st_mtime_ns # has correct mtime
new_file3_st = os.stat("input/file3")
assert file3_st.st_ino != new_file3_st.st_ino # file3 was extracted again
assert file3_st.st_mtime_ns == new_file3_st.st_mtime_ns # has correct mtime
new_dir3_st = os.stat("input/dir3")
new_file3_st = os.stat("input/dir3/file3")
assert dir3_st.st_mtime_ns == new_dir3_st.st_mtime_ns # dir3 was extracted again
assert file3_st.st_mtime_ns == new_file3_st.st_mtime_ns # file3 was extracted again
# windows has a strange ctime behaviour when deleting and recreating a file
if not is_win32:
assert file1_st.st_ctime_ns == now_file1_st.st_ctime_ns # file not extracted again
assert file2_st.st_ctime_ns != new_file2_st.st_ctime_ns # file extracted again
assert file3_st.st_ctime_ns != new_file3_st.st_ctime_ns # file extracted again
# check if all contents (and thus also file sizes) are correct:
with open("input/file1", "rb") as f:
with open("input/dir1/file1", "rb") as f:
assert f.read() == CONTENTS1
with open("input/file2", "rb") as f:
with open("input/dir2/file2", "rb") as f:
assert f.read() == CONTENTS2
with open("input/file3", "rb") as f:
with open("input/dir3/file3", "rb") as f:
assert f.read() == CONTENTS3