diff --git a/borg/archive.py b/borg/archive.py index ab4ddb0d9..1ce93ab5d 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -46,6 +46,11 @@ flags_normal = os.O_RDONLY | getattr(os, 'O_BINARY', 0) flags_noatime = flags_normal | getattr(os, 'O_NOATIME', 0) +def is_special(mode): + # file types that get special treatment in --read-special mode + return stat.S_ISBLK(mode) or stat.S_ISCHR(mode) or stat.S_ISFIFO(mode) + + class BackupOSError(Exception): """ Wrapper for OSError raised while accessing backup files. @@ -589,9 +594,16 @@ Number of files: {0.stats.nfiles}'''.format( return status else: self.hard_links[st.st_ino, st.st_dev] = safe_path - path_hash = self.key.id_hash(os.path.join(self.cwd, path).encode('utf-8', 'surrogateescape')) + is_special_file = is_special(st.st_mode) + if not is_special_file: + path_hash = self.key.id_hash(os.path.join(self.cwd, path).encode('utf-8', 'surrogateescape')) + ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode) + else: + # in --read-special mode, we may be called for special files. + # there should be no information in the cache about special files processed in + # read-special mode, but we better play safe as this was wrong in the past: + path_hash = ids = None first_run = not cache.files - ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode) if first_run: logger.debug('Processing files ...') chunks = None @@ -616,10 +628,17 @@ Number of files: {0.stats.nfiles}'''.format( chunks.append(cache.add_chunk(self.key.id_hash(chunk), chunk, self.stats)) if self.show_progress: self.stats.show_progress(item=item, dt=0.2) - cache.memorize_file(path_hash, st, [c[0] for c in chunks]) + if not is_special_file: + # we must not memorize special files, because the contents of e.g. a + # block or char device will change without its mtime/size/inode changing. + cache.memorize_file(path_hash, st, [c[0] for c in chunks]) status = status or 'M' # regular file, modified (if not 'A' already) item[b'chunks'] = chunks item.update(self.stat_attrs(st, path)) + if is_special_file: + # we processed a special file like a regular file. reflect that in mode, + # so it can be extracted / accessed in FUSE mount like a regular file: + item[b'mode'] = stat.S_IFREG | stat.S_IMODE(item[b'mode']) self.stats.nfiles += 1 self.add_item(item) return status diff --git a/borg/archiver.py b/borg/archiver.py index c8d4b1355..49e0bdbaa 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -29,7 +29,7 @@ from .upgrader import AtticRepositoryUpgrader, BorgRepositoryUpgrader from .repository import Repository from .cache import Cache from .key import key_creator, RepoKey, PassphraseKey -from .archive import backup_io, BackupOSError, Archive, ArchiveChecker, CHUNKER_PARAMS +from .archive import backup_io, BackupOSError, Archive, ArchiveChecker, CHUNKER_PARAMS, is_special from .remote import RepositoryServer, RemoteRepository, cache_if_remote has_lchflags = hasattr(os, 'lchflags') @@ -256,15 +256,7 @@ class Archiver: return try: - # usually, do not follow symlinks (if we have a symlink, we want to - # backup it as such). - # but if we are in --read-special mode, we later process as - # a regular file (we open and read the symlink target file's content). - # thus, in read_special mode, we also want to stat the symlink target - # file, for consistency. if we did not, we also have issues extracting - # this file, as it would be in the archive as a symlink, not as the - # target's file type (which could be e.g. a block device). - st = os.stat(path, follow_symlinks=read_special) + st = os.lstat(path) except OSError as e: self.print_warning('%s: %s', path, e) return @@ -277,7 +269,7 @@ class Archiver: # Ignore if nodump flag is set if has_lchflags and (st.st_flags & stat.UF_NODUMP): return - if stat.S_ISREG(st.st_mode) or read_special and not stat.S_ISDIR(st.st_mode): + if stat.S_ISREG(st.st_mode): if not dry_run: try: status = archive.process_file(path, st, cache, self.ignore_inode) @@ -309,13 +301,26 @@ class Archiver: read_special=read_special, dry_run=dry_run) elif stat.S_ISLNK(st.st_mode): if not dry_run: - status = archive.process_symlink(path, st) + if not read_special: + status = archive.process_symlink(path, st) + else: + st_target = os.stat(path) + if is_special(st_target.st_mode): + status = archive.process_file(path, st_target, cache) + else: + status = archive.process_symlink(path, st) elif stat.S_ISFIFO(st.st_mode): if not dry_run: - status = archive.process_fifo(path, st) + if not read_special: + status = archive.process_fifo(path, st) + else: + status = archive.process_file(path, st, cache) elif stat.S_ISCHR(st.st_mode) or stat.S_ISBLK(st.st_mode): if not dry_run: - status = archive.process_dev(path, st) + if not read_special: + status = archive.process_dev(path, st) + else: + status = archive.process_file(path, st, cache) elif stat.S_ISSOCK(st.st_mode): # Ignore unix sockets return @@ -1134,7 +1139,8 @@ class Archiver: 'lzma,0 .. lzma,9 == lzma (with level 0..9).') subparser.add_argument('--read-special', dest='read_special', action='store_true', default=False, - help='open and read special files as if they were regular files') + help='open and read block and char device files as well as FIFOs as if they were ' + 'regular files. Also follows symlinks pointing to these kinds of files.') subparser.add_argument('-n', '--dry-run', dest='dry_run', action='store_true', default=False, help='do not create a backup archive') diff --git a/docs/usage.rst b/docs/usage.rst index e1a30060b..ca8d536db 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -651,32 +651,34 @@ For more details, see :ref:`chunker_details`. --read-special ~~~~~~~~~~~~~~ -The option ``--read-special`` is not intended for normal, filesystem-level (full or -partly-recursive) backups. You only give this option if you want to do something -rather ... special -- and if you have hand-picked some files that you want to treat -that way. +The --read-special option is special - you do not want to use it for normal +full-filesystem backups, but rather after carefully picking some targets for it. -``borg create --read-special`` will open all files without doing any special -treatment according to the file type (the only exception here are directories: -they will be recursed into). Just imagine what happens if you do ``cat -filename`` --- the content you will see there is what borg will backup for that -filename. +The option ``--read-special`` triggers special treatment for block and char +device files as well as FIFOs. Instead of storing them as such a device (or +FIFO), they will get opened, their content will be read and in the backup +archive they will show up like a regular file. -So, for example, symlinks will be followed, block device content will be read, -named pipes / UNIX domain sockets will be read. +Symlinks will also get special treatment if (and only if) they point to such +a special file: instead of storing them as a symlink, the target special file +will get processed as described above. -You need to be careful with what you give as filename when using ``--read-special``, -e.g. if you give ``/dev/zero``, your backup will never terminate. +One intended use case of this is backing up the contents of one or multiple +block devices, like e.g. LVM snapshots or inactive LVs or disk partitions. -The given files' metadata is saved as it would be saved without -``--read-special`` (e.g. its name, its size [might be 0], its mode, etc.) -- but -additionally, also the content read from it will be saved for it. +You need to be careful about what you include when using ``--read-special``, +e.g. if you include ``/dev/zero``, your backup will never terminate. Restoring such files' content is currently only supported one at a time via ``--stdout`` option (and you have to redirect stdout to where ever it shall go, maybe directly into an existing device file of your choice or indirectly via ``dd``). +To some extent, mounting a backup archive with the backups of special files +via ``borg mount`` and then loop-mounting the image files from inside the mount +point will work. If you plan to access a lot of data in there, it likely will +scale and perform better if you do not work via the FUSE mount. + Example +++++++