fix race condition with data loss potential, fixes #3536

if we detect the conditions for this (rare) race,
abort reading the file and retry.

The caller (_process_any) will do up to MAX_RETRIES
before giving up. If it gives up, a warning is logged
and the file is not written to the archive and won't
be memorized in the files cache either.

Thus, the file will be read/chunked/hashed again at
the next borg create run.
This commit is contained in:
Thomas Waldmann 2024-09-20 13:58:42 +02:00
parent c100e7b1f5
commit b60378cf0e
No known key found for this signature in database
GPG key ID: 243ACFA951F78E01
2 changed files with 21 additions and 4 deletions

View file

@ -1376,6 +1376,7 @@ class FilesystemObjectProcessors:
# Only chunkify the file if needed
changed_while_backup = False
if "chunks" not in item:
start_reading = time.time_ns()
with backup_io("read"):
self.process_file_chunks(
item,
@ -1385,13 +1386,25 @@ class FilesystemObjectProcessors:
backup_io_iter(self.chunker.chunkify(None, fd)),
)
self.stats.chunking_time = self.chunker.chunking_time
end_reading = time.time_ns()
if not is_win32: # TODO for win32
with backup_io("fstat2"):
st2 = os.fstat(fd)
# special files:
# - fifos change naturally, because they are fed from the other side. no problem.
# - blk/chr devices don't change ctime anyway.
changed_while_backup = not is_special_file and st.st_ctime_ns != st2.st_ctime_ns
if is_special_file:
# special files:
# - fifos change naturally, because they are fed from the other side. no problem.
# - blk/chr devices don't change ctime anyway.
pass
elif st.st_ctime_ns != st2.st_ctime_ns:
# ctime was changed, this is either a metadata or a data change.
changed_while_backup = True
elif start_reading - TIME_DIFFERS1_NS < st2.st_ctime_ns < end_reading + TIME_DIFFERS1_NS:
# this is to treat a very special race condition, see #3536.
# - file was changed right before st.ctime was determined.
# - then, shortly afterwards, but already while we read the file, the
# file was changed again, but st2.ctime is the same due to ctime granularity.
# when comparing file ctime to local clock, widen interval by TIME_DIFFERS1_NS.
changed_while_backup = True
if changed_while_backup:
# regular file changed while we backed it up, might be inconsistent/corrupt!
if last_try:

View file

@ -113,6 +113,10 @@ CH_DATA, CH_ALLOC, CH_HOLE = 0, 1, 2
FILES_CACHE_MODE_UI_DEFAULT = "ctime,size,inode" # default for "borg create" command (CLI UI)
FILES_CACHE_MODE_DISABLED = "d" # most borg commands do not use the files cache at all (disable)
# account for clocks being slightly out-of-sync, timestamps granularity.
# we can't go much higher here (like e.g. to 2s) without causing issues.
TIME_DIFFERS1_NS = 20000000
# return codes returned by borg command
EXIT_SUCCESS = 0 # everything done, no problems
EXIT_WARNING = 1 # reached normal end of operation, but there were issues (generic warning)