From f3d95893b20b26c3690188907de2ff93d08495d9 Mon Sep 17 00:00:00 2001 From: Christopher Loessl Date: Sun, 20 Apr 2025 15:33:42 +0200 Subject: [PATCH] feat(backup): add possibility to exclude macOS cloud-only files --- changelog/unreleased/issue-5352 | 11 ++++ cmd/restic/cmd_backup.go | 7 ++- doc/040_backup.rst | 10 ++-- internal/fs/stat.go | 2 +- internal/fs/stat_bsd.go | 4 +- internal/fs/stat_darwin.go | 56 ++++++++++++++++++++ internal/fs/stat_darwin_test.go | 91 +++++++++++++++++++++++++++++++++ internal/fs/stat_test.go | 11 ++++ 8 files changed, 180 insertions(+), 12 deletions(-) create mode 100644 changelog/unreleased/issue-5352 create mode 100644 internal/fs/stat_darwin.go create mode 100644 internal/fs/stat_darwin_test.go diff --git a/changelog/unreleased/issue-5352 b/changelog/unreleased/issue-5352 new file mode 100644 index 000000000..c67ae0da1 --- /dev/null +++ b/changelog/unreleased/issue-5352 @@ -0,0 +1,11 @@ +Enhancement: Add support for --exclude-cloud-files on macOS (e.g. iCloud drive) + +Restic treated files stored in iCloud drive as though they were regular files. +This caused restic to download all files (including files marked as cloud only) while iterating over them. + +Restic now allows the user to exclude these files when backing up with the `--exclude-cloud-files` option. + +Works from Sonoma (macOS 14.0) onwards. Older macOS versions materialize files when `stat` is called on the file. + +https://github.com/restic/restic/pull/4990 +https://github.com/restic/restic/issues/5352 diff --git a/cmd/restic/cmd_backup.go b/cmd/restic/cmd_backup.go index 6d6483b19..b8ad97000 100644 --- a/cmd/restic/cmd_backup.go +++ b/cmd/restic/cmd_backup.go @@ -138,7 +138,9 @@ func (opts *BackupOptions) AddFlags(f *pflag.FlagSet) { f.BoolVar(&opts.NoScan, "no-scan", false, "do not run scanner to estimate size of backup") if runtime.GOOS == "windows" { f.BoolVar(&opts.UseFsSnapshot, "use-fs-snapshot", false, "use filesystem snapshot where possible (currently only Windows VSS)") - f.BoolVar(&opts.ExcludeCloudFiles, "exclude-cloud-files", false, "excludes online-only cloud files (such as OneDrive Files On-Demand)") + } + if runtime.GOOS == "windows" || runtime.GOOS == "darwin" { + f.BoolVar(&opts.ExcludeCloudFiles, "exclude-cloud-files", false, "excludes online-only cloud files (such as OneDrive, iCloud drive, …)") } f.BoolVar(&opts.SkipIfUnchanged, "skip-if-unchanged", false, "skip snapshot creation if identical to parent snapshot") @@ -352,9 +354,6 @@ func collectRejectFuncs(opts BackupOptions, targets []string, fs fs.FS, warnf fu } if opts.ExcludeCloudFiles && !opts.Stdin && !opts.StdinCommand { - if runtime.GOOS != "windows" { - return nil, errors.Fatalf("exclude-cloud-files is only supported on Windows") - } f, err := archiver.RejectCloudFiles(warnf) if err != nil { return nil, err diff --git a/doc/040_backup.rst b/doc/040_backup.rst index a2092e486..3abd2aeb9 100644 --- a/doc/040_backup.rst +++ b/doc/040_backup.rst @@ -29,11 +29,11 @@ again: start scan on [/home/user/work] start backup on [/home/user/work] scan finished in 1.837s: 5307 files, 1.720 GiB - + Files: 5307 new, 0 changed, 0 unmodified Dirs: 1867 new, 0 changed, 0 unmodified Added to the repository: 1.200 GiB (1.103 GiB stored) - + processed 5307 files, 1.720 GiB in 0:12 snapshot 40dc1520 saved @@ -117,7 +117,7 @@ repository (since all data is already there). This is de-duplication at work! start scan on [/home/user/work] start backup on [/home/user/work] scan finished in 1.881s: 5307 files, 1.720 GiB - + Files: 0 new, 0 changed, 5307 unmodified Dirs: 0 new, 0 changed, 1867 unmodified Added to the repository: 0 B (0 B stored) @@ -257,7 +257,7 @@ the corresponding folder and use relative paths. start scan on [.] start backup on [.] scan finished in 1.814s: 5307 files, 1.720 GiB - + Files: 0 new, 0 changed, 5307 unmodified Dirs: 0 new, 0 changed, 1867 unmodified Added to the repository: 0 B (0 B stored) @@ -298,7 +298,7 @@ the exclude options are: - ``--iexclude-file`` Same as ``exclude-file`` but ignores cases like in ``--iexclude`` - ``--exclude-if-present foo`` Specified one or more times to exclude a folder's content if it contains a file called ``foo`` (optionally having a given header, no wildcards for the file name supported) - ``--exclude-larger-than size`` Specified once to exclude files larger than the given size -- ``--exclude-cloud-files`` Specified once to exclude online-only cloud files (such as OneDrive Files On-Demand), currently only supported on Windows +- ``--exclude-cloud-files`` Specified once to exclude online-only cloud files (such as OneDrive Files On-Demand, iCloud drive), currently only supported on Windows and macOS Please see ``restic help backup`` for more specific information about each exclude option. diff --git a/internal/fs/stat.go b/internal/fs/stat.go index 4c55e6105..e582b5539 100644 --- a/internal/fs/stat.go +++ b/internal/fs/stat.go @@ -25,7 +25,7 @@ type ExtendedFileInfo struct { ModTime time.Time // last (content) modification time stamp ChangeTime time.Time // last status change time stamp - //nolint:unused // only used on Windows + //nolint:unused // only used on Windows/Darwin sys any // Value returned by os.FileInfo.Sys() } diff --git a/internal/fs/stat_bsd.go b/internal/fs/stat_bsd.go index 95238be77..5a3704194 100644 --- a/internal/fs/stat_bsd.go +++ b/internal/fs/stat_bsd.go @@ -1,5 +1,5 @@ -//go:build freebsd || darwin || netbsd -// +build freebsd darwin netbsd +//go:build freebsd || netbsd +// +build freebsd netbsd package fs diff --git a/internal/fs/stat_darwin.go b/internal/fs/stat_darwin.go new file mode 100644 index 000000000..ee39e8bfa --- /dev/null +++ b/internal/fs/stat_darwin.go @@ -0,0 +1,56 @@ +//go:build darwin +// +build darwin + +package fs + +import ( + "fmt" + "os" + "syscall" + "time" + + "golang.org/x/sys/unix" +) + +// extendedStat extracts info into an ExtendedFileInfo for macOS. +func extendedStat(fi os.FileInfo) *ExtendedFileInfo { + s := fi.Sys().(*syscall.Stat_t) + + return &ExtendedFileInfo{ + Name: fi.Name(), + Mode: fi.Mode(), + + DeviceID: uint64(s.Dev), + Inode: uint64(s.Ino), + Links: uint64(s.Nlink), + UID: s.Uid, + GID: s.Gid, + Device: uint64(s.Rdev), + BlockSize: int64(s.Blksize), + Blocks: s.Blocks, + Size: s.Size, + + AccessTime: time.Unix(s.Atimespec.Unix()), + ModTime: time.Unix(s.Mtimespec.Unix()), + ChangeTime: time.Unix(s.Ctimespec.Unix()), + + sys: s, + } +} + +// RecallOnDataAccess checks if a file is available locally on the disk or if the file is +// just a dataless files which must be downloaded from a remote server. This is typically used +// in cloud syncing services (e.g. iCloud drive) to prevent downloading files from cloud storage +// until they are accessed. +func (fi *ExtendedFileInfo) RecallOnDataAccess() (bool, error) { + extAttribute, ok := fi.sys.(*syscall.Stat_t) + if !ok { + return false, fmt.Errorf("could not determine file attributes: %s", fi.Name) + } + const mask uint32 = unix.SF_DATALESS // 0x40000000 + if extAttribute.Flags&mask == mask { + return true, nil + } + + return false, nil +} diff --git a/internal/fs/stat_darwin_test.go b/internal/fs/stat_darwin_test.go new file mode 100644 index 000000000..c8768e29d --- /dev/null +++ b/internal/fs/stat_darwin_test.go @@ -0,0 +1,91 @@ +package fs_test + +import ( + iofs "io/fs" + "os" + "path/filepath" + "syscall" + "testing" + "time" + + "github.com/restic/restic/internal/fs" + rtest "github.com/restic/restic/internal/test" + "golang.org/x/sys/unix" +) + +func TestRecallOnDataAccessRealFile(t *testing.T) { + // create a temp file for testing + tempdir := rtest.TempDir(t) + filename := filepath.Join(tempdir, "regular-file") + err := os.WriteFile(filename, []byte("foobar"), 0640) + rtest.OK(t, err) + + fi, err := os.Stat(filename) + rtest.OK(t, err) + + xs := fs.ExtendedStat(fi) + + // ensure we can check attrs without error + recall, err := xs.RecallOnDataAccess() + rtest.Assert(t, err == nil, "err should be nil", err) + rtest.Assert(t, recall == false, "RecallOnDataAccess should be false") +} + +// mockFileInfo implements os.FileInfo for mocking file attributes +type mockFileInfo struct { + Flags uint32 +} + +func (m mockFileInfo) IsDir() bool { + return false +} +func (m mockFileInfo) ModTime() time.Time { + return time.Now() +} +func (m mockFileInfo) Mode() iofs.FileMode { + return 0 +} +func (m mockFileInfo) Name() string { + return "test" +} +func (m mockFileInfo) Size() int64 { + return 0 +} +func (m mockFileInfo) Sys() any { + return &syscall.Stat_t{ + Flags: m.Flags, + } +} + +func TestRecallOnDataAccessMockCloudFile(t *testing.T) { + fi := mockFileInfo{ + Flags: unix.SF_DATALESS, + } + xs := fs.ExtendedStat(fi) + + recall, err := xs.RecallOnDataAccess() + rtest.Assert(t, err == nil, "err should be nil", err) + rtest.Assert(t, recall, "RecallOnDataAccess should be true") +} + +func TestRecallOnDataAccessMockRegularFile(t *testing.T) { + fi := mockFileInfo{ + Flags: 0, + } + xs := fs.ExtendedStat(fi) + + recall, err := xs.RecallOnDataAccess() + rtest.Assert(t, err == nil, "err should be nil", err) + rtest.Assert(t, recall == false, "RecallOnDataAccess should be false") +} + +func TestRecallOnDataAccessMockError(t *testing.T) { + efi := &fs.ExtendedFileInfo{ + Name: "test-file-name", + } + + recall, err := efi.RecallOnDataAccess() + rtest.Assert(t, err != nil, "err should be set", err) + rtest.Assert(t, err.Error() == "could not determine file attributes: test-file-name", "err message not correct", err) + rtest.Assert(t, recall == false, "RecallOnDataAccess should be false") +} diff --git a/internal/fs/stat_test.go b/internal/fs/stat_test.go index d52415c1d..8a55decd7 100644 --- a/internal/fs/stat_test.go +++ b/internal/fs/stat_test.go @@ -27,3 +27,14 @@ func TestExtendedStat(t *testing.T) { t.Errorf("extFI.ModTime does not match, want %v, got %v", fi.ModTime(), extFI.ModTime) } } + +func TestNilExtendPanic(t *testing.T) { + defer func() { + if r := recover(); r != nil { + rtest.Assert(t, r == "os.FileInfo is nil", "Panic message does not match, want %v, got %v", "os.FileInfo is nil", r) + } else { + rtest.Assert(t, false, "Expected panic, but no panic occurred") + } + }() + _ = ExtendedStat(nil) +}