diff --git a/src/borg/constants.py b/src/borg/constants.py index a1aa3da98..c2ebd4d7c 100644 --- a/src/borg/constants.py +++ b/src/borg/constants.py @@ -62,6 +62,12 @@ LIST_SCAN_LIMIT = 100000 DEFAULT_SEGMENTS_PER_DIR = 1000 +# Some bounds on segment / segment_dir indexes +MIN_SEGMENT_INDEX = 0 +MAX_SEGMENT_INDEX = 2**32 - 1 +MIN_SEGMENT_DIR_INDEX = 0 +MAX_SEGMENT_DIR_INDEX = 2**32 - 1 + FD_MAX_AGE = 4 * 60 # 4 minutes CHUNK_MIN_EXP = 19 # 2**19 == 512kiB diff --git a/src/borg/repository.py b/src/borg/repository.py index 61905b394..24079954c 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -1319,28 +1319,48 @@ class LoggedIO: safe_fadvise(fd.fileno(), 0, 0, 'DONTNEED') fd.close() + def get_segment_dirs(self, data_dir, start_index=MIN_SEGMENT_DIR_INDEX, end_index=MAX_SEGMENT_DIR_INDEX): + """Returns generator yielding required segment dirs in data_dir as `os.DirEntry` objects. + Start and end are inclusive. + """ + segment_dirs = ( + f + for f in os.scandir(data_dir) + if f.is_dir() and f.name.isdigit() and start_index <= int(f.name) <= end_index + ) + return segment_dirs + + def get_segment_files(self, segment_dir, start_index=MIN_SEGMENT_INDEX, end_index=MAX_SEGMENT_INDEX): + """Returns generator yielding required segment files in segment_dir as `os.DirEntry` objects. + Start and end are inclusive. + """ + segment_files = ( + f + for f in os.scandir(segment_dir) + if f.is_file() and f.name.isdigit() and start_index <= int(f.name) <= end_index + ) + return segment_files + def segment_iterator(self, segment=None, reverse=False): if segment is None: - segment = 0 if not reverse else 2 ** 32 - 1 - data_path = os.path.join(self.path, 'data') + segment = MIN_SEGMENT_INDEX if not reverse else MAX_SEGMENT_INDEX start_segment_dir = segment // self.segments_per_dir - dirs = os.listdir(data_path) + data_path = os.path.join(self.path, 'data') if not reverse: - dirs = [dir for dir in dirs if dir.isdigit() and int(dir) >= start_segment_dir] + dirs = self.get_segment_dirs(data_path, start_index=start_segment_dir) else: - dirs = [dir for dir in dirs if dir.isdigit() and int(dir) <= start_segment_dir] - dirs = sorted(dirs, key=int, reverse=reverse) + dirs = self.get_segment_dirs(data_path, end_index=start_segment_dir) + dirs = sorted(dirs, key=lambda dir: int(dir.name), reverse=reverse) for dir in dirs: - filenames = os.listdir(os.path.join(data_path, dir)) if not reverse: - filenames = [filename for filename in filenames if filename.isdigit() and int(filename) >= segment] + files = self.get_segment_files(dir, start_index=segment) else: - filenames = [filename for filename in filenames if filename.isdigit() and int(filename) <= segment] - filenames = sorted(filenames, key=int, reverse=reverse) - for filename in filenames: + files = self.get_segment_files(dir, end_index=segment) + files = sorted(files, key=lambda file: int(file.name), reverse=reverse) + for file in files: # Note: Do not filter out logically deleted segments (see "File system interaction" above), # since this is used by cleanup and txn state detection as well. - yield int(filename), os.path.join(data_path, dir, filename) + yield int(file.name), file.path def get_latest_segment(self): for segment, filename in self.segment_iterator(reverse=True):