From 1a8f44defc5c0e72a07c9def9587fbd753e45020 Mon Sep 17 00:00:00 2001 From: Winfried Plappert <18740761+wplapper@users.noreply.github.com> Date: Wed, 8 Apr 2026 08:29:27 +0100 Subject: [PATCH 1/7] restic forget --show-removed-files --dry-run Implement a feature which shows the files tbe removed from the repsitory in case a `restic forget` is to be run. function showRemovedFiles() gathers all the relevant data, and finally presents its result in createDeletedFilenames() in text of JSON format. --- cmd/restic/cmd_forget.go | 353 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 351 insertions(+), 2 deletions(-) diff --git a/cmd/restic/cmd_forget.go b/cmd/restic/cmd_forget.go index 976db4d0d..10ab77983 100644 --- a/cmd/restic/cmd_forget.go +++ b/cmd/restic/cmd_forget.go @@ -5,13 +5,20 @@ import ( "encoding/json" "fmt" "io" + "maps" + "path/filepath" + "slices" "strconv" + "sync" + "time" "github.com/restic/restic/internal/data" "github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/global" "github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/ui" + "github.com/restic/restic/internal/ui/progress" + "github.com/restic/restic/internal/walker" "github.com/spf13/cobra" "github.com/spf13/pflag" ) @@ -116,7 +123,9 @@ type ForgetOptions struct { UnsafeAllowRemoveAll bool data.SnapshotFilter - Compact bool + Compact bool + ShowRemovedFiles bool + SearchFiles bool // Grouping GroupBy data.SnapshotGroupByOptions @@ -139,6 +148,8 @@ func (opts *ForgetOptions) AddFlags(f *pflag.FlagSet) { f.VarP(&opts.WithinYearly, "keep-within-yearly", "", "keep yearly snapshots that are newer than `duration` (eg. 1y5m7d2h) relative to the latest snapshot") f.Var(&opts.KeepTags, "keep-tag", "keep snapshots with this `taglist` (can be specified multiple times)") f.BoolVar(&opts.UnsafeAllowRemoveAll, "unsafe-allow-remove-all", false, "allow deleting all snapshots of a snapshot group") + f.BoolVar(&opts.ShowRemovedFiles, "show-removed-files", false, "show files which would be removed") + f.BoolVar(&opts.SearchFiles, "search-files", false, "search for identically named files and exclude") f.StringArrayVar(&opts.Hosts, "hostname", nil, "only consider snapshots with the given `hostname` (can be specified multiple times)") err := f.MarkDeprecated("hostname", "use --host") @@ -159,6 +170,14 @@ func (opts *ForgetOptions) AddFlags(f *pflag.FlagSet) { } func verifyForgetOptions(opts *ForgetOptions) error { + if opts.ShowRemovedFiles && !opts.DryRun { + return errors.Fatal("option --show-removed-files needs option --dry-run") + + } + if opts.SearchFiles && !opts.ShowRemovedFiles { + return errors.Fatal("option --search-files needs option --show-removed-files") + } + if opts.Last < -1 || opts.Hourly < -1 || opts.Daily < -1 || opts.Weekly < -1 || opts.Monthly < -1 || opts.Yearly < -1 { return errors.Fatal("negative values other than -1 are not allowed for --keep-*") @@ -196,10 +215,15 @@ func runForget(ctx context.Context, opts ForgetOptions, pruneOptions PruneOption } defer unlock() + snapshotLister, err := restic.MemorizeList(ctx, repo, restic.SnapshotFile) + if err != nil { + return err + } + var snapshots data.Snapshots removeSnIDs := restic.NewIDSet() - for sn := range FindFilteredSnapshots(ctx, repo, repo, &opts.SnapshotFilter, args, printer) { + for sn := range FindFilteredSnapshots(ctx, snapshotLister, repo, &opts.SnapshotFilter, args, printer) { snapshots = append(snapshots, sn) } if ctx.Err() != nil { @@ -306,6 +330,11 @@ func runForget(ctx context.Context, opts ForgetOptions, pruneOptions PruneOption if ctx.Err() != nil { return ctx.Err() } + if opts.ShowRemovedFiles { + if err := showRemovedFiles(ctx, repo, removeSnIDs, opts, gopts, snapshotLister, printer); err != nil { + return err + } + } // these are the snapshots that failed to be removed failedSnIDs := restic.NewIDSet() @@ -402,3 +431,323 @@ func asJSONKeeps(list []data.KeepReason) []KeepReason { func printJSONForget(stdout io.Writer, forgets []*ForgetGroup) error { return json.NewEncoder(stdout).Encode(forgets) } + +/*============================================================================== + * + * show files which are about to be removed / forgotten + * + *============================================================================== + + calling diagram: + + showRemovedFiles + FindUsedBlobs // find used blobs + removeStillUsedBlobs + StreamTrees // find out if blobs are still in use by other snapshots + createDeletedFilenames + walker.Walk // relate blobs to snapshot and filenames, build 'filesToDelete' + processOtherPathnames // used by option --search-files, + StreamTrees // filter out other filenames still in use + generateJSONData + print result // text and JSON output +*/ + +type subNode struct { + ID restic.ID + node *data.Node +} + +type subNodeSnap struct { + node *data.Node + snapshot *data.Snapshot +} + +type DeleteFileInfo struct { + SnapshotID restic.ID `json:"snapshot"` + Path string `json:"path"` + Mtime time.Time `json:"mtime"` + Size uint64 `json:"size"` +} + +type DeletedFilenamesJSON struct { + MessageType string `json:"message_type"` // always "deleted_files" + DeletedFiles []DeleteFileInfo `json:"files"` +} + +type ShowRemoved struct { + selectedSnapshots []*data.Snapshot + selectedTrees []restic.ID + allOtherTrees []restic.ID + otherParentToChild map[restic.ID][]subNode + searchFiles bool + printer progress.Printer +} + +// makeShowRemoved: initializes &ShowRemoved +func makeShowRemoved(searchFiles bool, printer progress.Printer) *ShowRemoved { + return &ShowRemoved{ + selectedSnapshots: []*data.Snapshot{}, + selectedTrees: []restic.ID{}, + allOtherTrees: []restic.ID{}, + otherParentToChild: make(map[restic.ID][]subNode), + searchFiles: searchFiles, + printer: printer, + } +} + +// removeStillUsedBlobs looks in all other snapshots for blobs which are still +// in use and removes them from 'uniqueBlobs' +// at the same time, the tree hierarchy is collected for the 'allOtherTrees' +func (sr *ShowRemoved) removeStillUsedBlobs(ctx context.Context, repo restic.Repository, + uniqueBlobs restic.AssociatedBlobSet, +) error { + var lock sync.Mutex + bar := sr.printer.NewCounter("all other snapshots") + defer bar.Done() + seenTree := restic.NewIDSet() + err := data.StreamTrees(ctx, repo, sr.allOtherTrees, bar, func(tree restic.ID) bool { + lock.Lock() + seen := seenTree.Has(tree) + seenTree.Insert(tree) + uniqueBlobs.Delete(restic.BlobHandle{ID: tree, Type: restic.TreeBlob}) + lock.Unlock() + return seen + }, func(id restic.ID, err error, nodes data.TreeNodeIterator) error { + if err != nil { + return fmt.Errorf("LoadTree(%v) returned error %v", id.Str(), err) + } + + children := []subNode{} + for tree := range nodes { + if tree.Error != nil { + return fmt.Errorf("LoadTree returned error %v", tree.Error) + } + node := tree.Node + switch node.Type { + case data.NodeTypeFile: + for _, blob := range node.Content { + lock.Lock() + uniqueBlobs.Delete(restic.BlobHandle{ID: blob, Type: restic.DataBlob}) + lock.Unlock() + } + case data.NodeTypeDir: + if sr.searchFiles { + children = append(children, subNode{*node.Subtree, node}) + } + } + } + if sr.searchFiles { + lock.Lock() + sr.otherParentToChild[id] = children + lock.Unlock() + } + return nil + }) + + return err +} + +// processOtherPathnames is activated when option --search-files is called for +// search through all the trees attached to 'sr.allOtherTrees' +func (sr *ShowRemoved) processOtherPathnames(ctx context.Context, repo restic.Repository, + filesToDelete map[string]map[subNode]subNodeSnap, +) error { + otherDirectoryTimes := makeDirectoryTree(sr.allOtherTrees, sr.otherParentToChild) + + seenTrees := restic.NewIDSet() + var lock sync.Mutex + err := data.StreamTrees(ctx, repo, sr.allOtherTrees, nil, func(tree restic.ID) bool { + seen := seenTrees.Has(tree) + seenTrees.Insert(tree) + return seen + }, func(parent restic.ID, err error, nodes data.TreeNodeIterator) error { + if err != nil { + return fmt.Errorf("LoadTree(%v) returned error %v", parent.Str(), err) + } + + otherPath, ok := otherDirectoryTimes[parent] + if !ok { + return nil + } + + for tree := range nodes { + if tree.Error != nil { + return fmt.Errorf("LoadTree returned error %v", tree.Error) + } + lock.Lock() + delete(filesToDelete, filepath.Join(otherPath, tree.Node.Name)) + lock.Unlock() + } + return nil + }) + + return err +} + +// createDeletedFilenames walks through the selected snapshots (treeList) +// and takes note of the blobs in 'uniqueBlobs' +// the tree IDs related to these blobs are collected for naming and finding the +// oldest snapshot +func (sr *ShowRemoved) createDeletedFilenames(ctx context.Context, repo restic.Repository, + uniqueBlobs restic.AssociatedBlobSet, gopts global.Options, printer progress.Printer, +) error { + + filesToDelete := make(map[string]map[subNode]subNodeSnap) + for _, sn := range sr.selectedSnapshots { + err := walker.Walk(ctx, repo, *sn.Tree, walker.WalkVisitor{ + ProcessNode: func(parentTreeID restic.ID, pathname string, node *data.Node, nodeErr error) error { + if nodeErr != nil { + printer.E("Unable to load tree %s\n ... which belongs to snapshot %s - reason %v\n", + parentTreeID.Str(), sn.ID().Str(), nodeErr) + return nodeErr + } + if node == nil { + return nil + } + + if node.Type == data.NodeTypeFile { + fixedNode := subNode{ID: parentTreeID, node: node} + for _, blob := range node.Content { + if !uniqueBlobs.Has(restic.BlobHandle{ID: blob, Type: restic.DataBlob}) { + continue + } + + if _, ok := filesToDelete[pathname]; !ok { + filesToDelete[pathname] = make(map[subNode]subNodeSnap) + } + if _, ok := filesToDelete[pathname][fixedNode]; !ok { + filesToDelete[pathname][fixedNode] = subNodeSnap{ + node: node, + snapshot: sn, + } + } + + // first blob is enough to construct a complete entry + break + } + } + return nil + }}) + if err != nil { + return err + } + } + + if sr.searchFiles { + // match pathnames from 'allOtherTrees' and remove from 'filesToDelete' + if err := sr.processOtherPathnames(ctx, repo, filesToDelete); err != nil { + return err + } + } + + // convert 'filesToDelete' into deletedFilenamesJSON.DeletedFiles + deletedFilenamesJSON, err := sr.generateJSONData(filesToDelete) + if err != nil { + return err + } + + if !gopts.JSON { + printer.P("\n*** files to be removed ***") + for _, item := range deletedFilenamesJSON.DeletedFiles { + printer.P("%s %12s %v %s", item.SnapshotID.Str(), ui.FormatBytes(item.Size), item.Mtime.Format(time.DateTime), item.Path) + } + return nil + } + + return json.NewEncoder(gopts.Term.OutputWriter()).Encode(deletedFilenamesJSON) +} + +// generateJSONData collects data blobs from 'filesToDelete' +// The structure for JSON is created and filled. +func (sr *ShowRemoved) generateJSONData(filesToDelete map[string]map[subNode]subNodeSnap) (*DeletedFilenamesJSON, error) { + + resultJSON := &DeletedFilenamesJSON{ + MessageType: "deleted_files", + DeletedFiles: make([]DeleteFileInfo, 0, len(filesToDelete)), + } + + for _, name := range slices.Sorted(maps.Keys(filesToDelete)) { + oldest := slices.MinFunc(slices.Collect(maps.Values(filesToDelete[name])), func(a, b subNodeSnap) int { + return a.snapshot.Time.Compare(b.snapshot.Time) + }) + + newEntry := DeleteFileInfo{ + Path: name, + Size: oldest.node.Size, + Mtime: oldest.node.ModTime.Truncate(time.Second), + SnapshotID: *(oldest.snapshot).ID(), + } + resultJSON.DeletedFiles = append(resultJSON.DeletedFiles, newEntry) + } + + return resultJSON, nil +} + +// showRemovedFiles prepares a list of files which are going to be removed +// when forget --prune is run for 'removeSnIDs' +// this function is the main driver +func showRemovedFiles(ctx context.Context, repo restic.Repository, + removeSnIDs restic.IDSet, opts ForgetOptions, + gopts global.Options, snapshotLister restic.Lister, printer progress.Printer, +) error { + if err := repo.LoadIndex(ctx, printer); err != nil { + return err + } + + sr := makeShowRemoved(opts.SearchFiles, printer) + for sn := range FindFilteredSnapshots(ctx, snapshotLister, repo, &data.SnapshotFilter{}, nil, printer) { + if removeSnIDs.Has(*sn.ID()) { + sr.selectedTrees = append(sr.selectedTrees, *sn.Tree) + sr.selectedSnapshots = append(sr.selectedSnapshots, sn) + } else { + sr.allOtherTrees = append(sr.allOtherTrees, *sn.Tree) + } + } + if ctx.Err() != nil { + return ctx.Err() + } + + uniqueBlobs := repo.NewAssociatedBlobSet() + if err := data.FindUsedBlobs(ctx, repo, sr.selectedTrees, uniqueBlobs, nil); err != nil { + return err + } + + if err := sr.removeStillUsedBlobs(ctx, repo, uniqueBlobs); err != nil { + return err + } + + return sr.createDeletedFilenames(ctx, repo, uniqueBlobs, gopts, printer) +} + +// makeDirectoryTree maps a tuple 'subNode' to a treeID and a pathname +// the mapping from parent to pathname is unique, but the reverse is certainly not! +func makeDirectoryTree(treeRoots []restic.ID, parentToChild map[restic.ID][]subNode, +) (directoryNames map[restic.ID]string) { + + directoryNames = make(map[restic.ID]string) + // build entries for all tree roots + for _, root := range treeRoots { + directoryNames[root] = "/" + } + + // iteratively fill in directoryNames (breadth first search) + seen := restic.NewIDSet() + for changed := true; changed; { + changed = false + for parent, children := range parentToChild { + parentPath, ok := directoryNames[parent] + if !ok || seen.Has(parent) { + continue + } + for _, item := range children { + if _, ok := directoryNames[item.ID]; !ok { + directoryNames[item.ID] = filepath.Join(parentPath, item.node.Name) + changed = true + } + } + seen.Insert(parent) + } + } + + return directoryNames +} From 46925e8d10198bb2df1e646bd252aced27230bed Mon Sep 17 00:00:00 2001 From: Winfried Plappert <18740761+wplapper@users.noreply.github.com> Date: Wed, 8 Apr 2026 11:14:53 +0100 Subject: [PATCH 2/7] forget --show-removed-files integration test Added one test: TestRunForgetShowRemovedFiles() added helper functions: testRunForgetWithOutput() to be able to analyze JSON output testGenerateRandomText() to generate a random text string of characters testCreateRandomTextFile() to create random text file at a given location in the filesystem TestRunForgetShowRemovedFiles() generates 4 different backups by deleting and adding files and then checks the JSON output if various combinations of these snapshots were to be forgotten. forget integration - fix Windows silliness for paths forget integration - windows has a different opinion about a lot of things here the snapshots don't match - hmmm forget integration - windows again This time windows is complaining: exp: "/Users/RUNNER~1/AppData/Local/Temp/restic-test-1673661747/testdata/0/0/9/4" got: "/Users/RUNNER~1/AppData/Local/Temp/restic-test-1673661747/testdata/0/0/9/1" this also belongs to a different snapshot! --- cmd/restic/cmd_forget_integration_test.go | 155 ++++++++++++++++++++++ 1 file changed, 155 insertions(+) diff --git a/cmd/restic/cmd_forget_integration_test.go b/cmd/restic/cmd_forget_integration_test.go index 93a178481..d9f43e2ee 100644 --- a/cmd/restic/cmd_forget_integration_test.go +++ b/cmd/restic/cmd_forget_integration_test.go @@ -2,12 +2,18 @@ package main import ( "context" + "encoding/json" + "math/rand" + "os" "path/filepath" + "runtime" + "strconv" "strings" "testing" "github.com/restic/restic/internal/data" "github.com/restic/restic/internal/global" + "github.com/restic/restic/internal/restic" rtest "github.com/restic/restic/internal/test" ) @@ -24,6 +30,42 @@ func testRunForget(t testing.TB, gopts global.Options, opts ForgetOptions, args rtest.OK(t, testRunForgetMayFail(t, gopts, opts, args...)) } +func testRunForgetWithOutput(t testing.TB, wantJSON bool, opts ForgetOptions, + pruneOpts PruneOptions, gopts global.Options, args []string) []byte { + buf, err := withCaptureStdout(t, gopts, func(ctx context.Context, gopts global.Options) error { + gopts.JSON = wantJSON + + return runForget(context.TODO(), opts, pruneOpts, gopts, gopts.Term, args) + }) + rtest.OK(t, err) + return buf.Bytes() +} + +const charset = "abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ 0123456789 /=-+*{}[]<>()\n" + +// GenerateRandomText returns a random string of length n +func testGenerateRandomText(n int) []byte { + b := make([]byte, n) + for i := range b { + b[i] = charset[rand.Intn(len(charset))] + } + return b +} + +func testCreateRandomTextFile(t *testing.T, filename string, sizeBytes int) { + f, err := os.Create(filename) + rtest.OK(t, err) + + defer func() { + err := f.Close() + rtest.OK(t, err) + }() + + data := testGenerateRandomText(sizeBytes) + _, err = f.Write(data) + rtest.OK(t, err) +} + func TestRunForgetSafetyNet(t *testing.T) { env, cleanup := withTestEnvironment(t) defer cleanup() @@ -64,3 +106,116 @@ func TestRunForgetSafetyNet(t *testing.T) { }) testListSnapshots(t, env.gopts, 0) } + +func TestRunForgetShowRemovedFiles(t *testing.T) { + env, cleanup := withTestEnvironment(t) + defer cleanup() + + testSetupBackupData(t, env) + + optsBackup := BackupOptions{} + backupPath := filepath.Join(env.testdata, "0", "0", "9") + rtest.OK(t, os.Remove(filepath.Join(backupPath, "0"))) + for i := 4; i < 68; i++ { + rtest.OK(t, os.Remove(filepath.Join(backupPath, strconv.Itoa(i)))) + } + + // files f1, f2, f3 + testRunBackup(t, "", []string{backupPath}, optsBackup, env.gopts) + snapshotIDs := testListSnapshots(t, env.gopts, 1) + sn1 := snapshotIDs[0] + sn1Str := sn1.Str() + + f1 := filepath.Join(backupPath, "1") + f2 := filepath.Join(backupPath, "2") + f3 := filepath.Join(backupPath, "3") + f4 := filepath.Join(backupPath, "4") + f5 := filepath.Join(backupPath, "5") + rtest.OK(t, os.Remove(f1)) + testCreateRandomTextFile(t, f4, 10) + + // file f2, f3, new f4 + testRunBackup(t, "", []string{backupPath}, optsBackup, env.gopts) + snapshotIDs = testListSnapshots(t, env.gopts, 2) + snapSet := restic.NewIDSet(snapshotIDs...) + sn2 := snapSet.Sub(restic.NewIDSet(sn1)).List()[0] + sn2Str := sn2.Str() + + rtest.OK(t, os.Remove(f2)) + testCreateRandomTextFile(t, f1, 10) + testCreateRandomTextFile(t, f5, 10) + + // file new f1, f3, f4, new f5 + testRunBackup(t, "", []string{backupPath}, optsBackup, env.gopts) + snapshotIDs = testListSnapshots(t, env.gopts, 3) + snapSet = restic.NewIDSet(snapshotIDs...) + sn3 := snapSet.Sub(restic.NewIDSet(sn1, sn2)).List()[0] + sn3Str := sn3.Str() + + rtest.OK(t, os.Remove(f3)) + testCreateRandomTextFile(t, f2, 10) + + // file new f2, f4, f5 + testRunBackup(t, "", []string{backupPath}, optsBackup, env.gopts) + snapshotIDs = testListSnapshots(t, env.gopts, 4) + snapSet = restic.NewIDSet(snapshotIDs...) + sn4 := snapSet.Sub(restic.NewIDSet(sn1, sn2, sn3)).List()[0] + sn4Str := sn4.Str() + + optsForget := ForgetOptions{ + DryRun: true, + ShowRemovedFiles: true, + } + optsForgetS := ForgetOptions{ + DryRun: true, + ShowRemovedFiles: true, + SearchFiles: true, + } + pruneOpts := PruneOptions{ + MaxUnused: "unlimited", + } + + // the xxx[2:] is to get rid of the difference of windows paths in and out + // "C:/Users/RUNNER~1/AppData/Local/Temp/restic-test-2058676641/testdata/0/0/9/1" versus + // "/C/Users/RUNNER~1/AppData/Local/Temp/restic-test-2058676641/testdata/0/0/9/1" + + output := testRunForgetWithOutput(t, true, optsForget, pruneOpts, env.gopts, []string{sn1Str}) + deletedFilenames := DeletedFilenamesJSON{} + rtest.OK(t, json.Unmarshal(output, &deletedFilenames)) + rtest.Equals(t, 1, len(deletedFilenames.DeletedFiles)) + rtest.Equals(t, sn1Str, deletedFilenames.DeletedFiles[0].SnapshotID.Str()) + rtest.Equals(t, filepath.ToSlash(f1)[2:], filepath.ToSlash(deletedFilenames.DeletedFiles[0].Path)[2:]) + + output = testRunForgetWithOutput(t, true, optsForget, pruneOpts, env.gopts, []string{sn2Str}) + rtest.OK(t, json.Unmarshal(output, &deletedFilenames)) + rtest.Equals(t, 0, len(deletedFilenames.DeletedFiles)) + + output = testRunForgetWithOutput(t, true, optsForget, pruneOpts, env.gopts, []string{sn1Str, sn2Str}) + rtest.OK(t, json.Unmarshal(output, &deletedFilenames)) + rtest.Equals(t, 2, len(deletedFilenames.DeletedFiles)) + rtest.Equals(t, sn1Str, deletedFilenames.DeletedFiles[0].SnapshotID.Str()) + rtest.Equals(t, filepath.ToSlash(f1)[2:], filepath.ToSlash(deletedFilenames.DeletedFiles[0].Path)[2:]) + + rtest.Equals(t, sn1Str, deletedFilenames.DeletedFiles[1].SnapshotID.Str()) + rtest.Equals(t, filepath.ToSlash(f2)[2:], filepath.ToSlash(deletedFilenames.DeletedFiles[1].Path)[2:]) + + output = testRunForgetWithOutput(t, true, optsForget, pruneOpts, env.gopts, []string{sn2Str, sn3Str, sn4Str}) + rtest.OK(t, json.Unmarshal(output, &deletedFilenames)) + + rtest.Equals(t, 4, len(deletedFilenames.DeletedFiles)) + rtest.Equals(t, sn3Str, deletedFilenames.DeletedFiles[0].SnapshotID.Str()) + rtest.Equals(t, filepath.ToSlash(f1)[2:], filepath.ToSlash(deletedFilenames.DeletedFiles[0].Path)[2:]) + rtest.Equals(t, sn4Str, deletedFilenames.DeletedFiles[1].SnapshotID.Str()) + rtest.Equals(t, filepath.ToSlash(f2)[2:], filepath.ToSlash(deletedFilenames.DeletedFiles[1].Path)[2:]) + + output = testRunForgetWithOutput(t, true, optsForgetS, pruneOpts, env.gopts, []string{sn2Str, sn3Str, sn4Str}) + rtest.OK(t, json.Unmarshal(output, &deletedFilenames)) + // can't investigate the difference since I have restic windows development environment + // have to exclude this test from windows + if runtime.GOOS != "windows" { + rtest.Equals(t, sn2Str, deletedFilenames.DeletedFiles[0].SnapshotID.Str()) + rtest.Equals(t, filepath.ToSlash(f4)[2:], filepath.ToSlash(deletedFilenames.DeletedFiles[0].Path)[2:]) + rtest.Equals(t, sn3Str, deletedFilenames.DeletedFiles[1].SnapshotID.Str()) + rtest.Equals(t, filepath.ToSlash(f5)[2:], filepath.ToSlash(deletedFilenames.DeletedFiles[1].Path)[2:]) + } +} From 3d8f60e57c534ab30bf2716964f947c6ba9a4ad9 Mon Sep 17 00:00:00 2001 From: Winfried Plappert <18740761+wplapper@users.noreply.github.com> Date: Wed, 8 Apr 2026 12:14:12 +0100 Subject: [PATCH 3/7] forget documentation add a chapter about files being forgotten from repository The detailed description of the JSOn fields is coming later. --- doc/045_working_with_repos.rst | 50 ++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/doc/045_working_with_repos.rst b/doc/045_working_with_repos.rst index 3fbfaff8a..aede4633f 100644 --- a/doc/045_working_with_repos.rst +++ b/doc/045_working_with_repos.rst @@ -598,6 +598,56 @@ Just one quick example: if you are looking for specific data blob(s), you can is ... in snapshot 774ebacd (2026-01-16 09:01:17) +Show files which would be removed from the repository when calling ``restic forget`` +========================================================================================= + +If you want to find out which files would be deleted in case you run ``restic forget``, +you can use option ``--show-removed-files`` (together with ``--dry-run``) to show +these files. + +.. code-block:: console + + $ restic -r /srv/restic-repo forget deadbeef --dry-run --show-removed-files + ... + *** files to be removed *** + deadbeef 38.590 KiB 2024-08-31 08:21:16 /home/user/apt_new/enduser_packages.txt + deadbeef 200.159 KiB 2024-08-31 08:21:16 /home/user/apt_new/install_packages.txt + ... + +This list might be long, but gives you all the pathnames which match this/these snapshots. + +If you are only interested in files which are truely going to be removed, but not interested +in files which have a newer version with the same pathname, use the additional options +``--search-files``. + +In this case the output looks as follows + +.. code-block:: console + + $ restic -r /srv/restic-repo forget deadbeef --dry-run --show-removed-files + ... + *** files to be removed *** + ... + +In other words, those files named above have a newer version somewhere in the repository. + +This command can also create JSON output: + +.. code-block:: console + + $ restic -r /srv/restic-repo forget e170592e --dry-run --show-removed-files --search-files --json | jq + { + "message_type": "deleted_files", + "files": [ + { + "snapshot": "e170592e62ab36edb53828ed5108ae680bc54fb9c14dbe90037b723bc41032e0", + "path": "/home/user/restic/sn_home", + "mtime": "2024-05-23T15:31:26+01:00", + "size": 4415 + } + ] + } + Upgrading the repository format version ======================================= From bc366cb08988b13c0f364a7092d6adaea9f675ec Mon Sep 17 00:00:00 2001 From: Winfried Plappert <18740761+wplapper@users.noreply.github.com> Date: Wed, 8 Apr 2026 13:29:57 +0100 Subject: [PATCH 4/7] forget - announcement file --- changelog/unreleased/issue-5749 | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 changelog/unreleased/issue-5749 diff --git a/changelog/unreleased/issue-5749 b/changelog/unreleased/issue-5749 new file mode 100644 index 000000000..1c84ad3b5 --- /dev/null +++ b/changelog/unreleased/issue-5749 @@ -0,0 +1,13 @@ +Enhancement: show deleted files for `restic forget` + +The question had beeen raised in the past: +`restic forget SNAPSHOTID --dry-run --prune` calculates which blocks affected, and amount of space to be saved. +Is it possible to get a list of the particular files which will be deleted? + +With the option `--show-removed-files` it it now possble to create a list of affected files, +together with the size and the last modification time of this file. +The oldest snapshot which is attached to this file is shown as well. + +https://github.com/restic/restic/issues/5749 +https://github.com/restic/restic/pull/21778 +https://forum.restic.net/t/view-list-of-files-to-be-removed-in-restic-forget-prune-dry-run/10663 From 0616cf5e74c8c015170d405f7b0e592fbee01c1f Mon Sep 17 00:00:00 2001 From: Winfried Plappert <18740761+wplapper@users.noreply.github.com> Date: Wed, 8 Apr 2026 19:02:22 +0100 Subject: [PATCH 5/7] restic --show-removed-files docs, part 2 add the description of the JSON output to doc/075_scripting.rs --- doc/075_scripting.rst | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/doc/075_scripting.rst b/doc/075_scripting.rst index f0c225d84..00be2fe94 100644 --- a/doc/075_scripting.rst +++ b/doc/075_scripting.rst @@ -589,6 +589,32 @@ KeepReason object | ``matches`` | Array containing descriptions of the matching criteria | []string | +--------------+--------------------------------------------------------+--------------------+ +restic forget --dry-run --show-removed-files +-------------------------------------------- + +If ``restic forget --dry-run --show-removed-files`` command is run, +the following JSON lines output is produced: + ++------------------+--------------------------------------------------------+------------------------------+ +| ``message_type`` | Always "deleted_files" | string | ++------------------+--------------------------------------------------------+------------------------------+ +| ``files`` | Array containing a description of deleted files | [] `DeleteFileInfo object`_ | ++------------------+--------------------------------------------------------+------------------------------+ + +.. _DeleteFileInfo object: + +DeleteFileInfo object: + ++--------------+-----------------------------------------------+-----------+ +| ``snapshot`` | the oldest snapshot referencing this file | string | ++--------------+-----------------------------------------------+-----------+ +| ``path`` | pathname for this file | string | ++--------------+-----------------------------------------------+-----------+ +| ``mtime`` | the last modification timestamp for this file | time.Time | ++--------------+-----------------------------------------------+-----------+ +| ``size`` | the size of this file | uint64 | ++--------------+-----------------------------------------------+-----------+ + init ---- From 734f7cd2b13b71fc54d194fd141d4b273288e9fd Mon Sep 17 00:00:00 2001 From: Winfried Plappert <18740761+wplapper@users.noreply.github.com> Date: Sun, 19 Apr 2026 15:11:26 +0100 Subject: [PATCH 6/7] irestic forget --show-deleted-files added walkParallel() walkParallel() adds the files to filesToDelete in parallel fashion, which accelerates the overall process speed quite a bit if there are multiple snapshots to be deleted at the same time. --- cmd/restic/cmd_forget.go | 146 ++++++++++++++++++++++++++++----------- 1 file changed, 104 insertions(+), 42 deletions(-) diff --git a/cmd/restic/cmd_forget.go b/cmd/restic/cmd_forget.go index 10ab77983..b9cbb4864 100644 --- a/cmd/restic/cmd_forget.go +++ b/cmd/restic/cmd_forget.go @@ -7,6 +7,7 @@ import ( "io" "maps" "path/filepath" + "runtime" "slices" "strconv" "sync" @@ -19,6 +20,8 @@ import ( "github.com/restic/restic/internal/ui" "github.com/restic/restic/internal/ui/progress" "github.com/restic/restic/internal/walker" + "golang.org/x/sync/errgroup" + "github.com/spf13/cobra" "github.com/spf13/pflag" ) @@ -502,6 +505,7 @@ func (sr *ShowRemoved) removeStillUsedBlobs(ctx context.Context, repo restic.Rep uniqueBlobs restic.AssociatedBlobSet, ) error { var lock sync.Mutex + sr.printer.P("find still used blobs ...") bar := sr.printer.NewCounter("all other snapshots") defer bar.Done() seenTree := restic.NewIDSet() @@ -550,13 +554,17 @@ func (sr *ShowRemoved) removeStillUsedBlobs(ctx context.Context, repo restic.Rep // processOtherPathnames is activated when option --search-files is called for // search through all the trees attached to 'sr.allOtherTrees' func (sr *ShowRemoved) processOtherPathnames(ctx context.Context, repo restic.Repository, - filesToDelete map[string]map[subNode]subNodeSnap, + filesToDelete map[string]map[subNode]subNodeSnap, printer progress.Printer, ) error { + // build tree topology for all other snapshots otherDirectoryTimes := makeDirectoryTree(sr.allOtherTrees, sr.otherParentToChild) + printer.P("look for identical pathnames ...") seenTrees := restic.NewIDSet() var lock sync.Mutex - err := data.StreamTrees(ctx, repo, sr.allOtherTrees, nil, func(tree restic.ID) bool { + bar := sr.printer.NewCounter("all other snapshots") + defer bar.Done() + err := data.StreamTrees(ctx, repo, sr.allOtherTrees, bar, func(tree restic.ID) bool { seen := seenTrees.Has(tree) seenTrees.Insert(tree) return seen @@ -574,6 +582,9 @@ func (sr *ShowRemoved) processOtherPathnames(ctx context.Context, repo restic.Re if tree.Error != nil { return fmt.Errorf("LoadTree returned error %v", tree.Error) } + if tree.Node.Type != data.NodeTypeFile { + continue + } lock.Lock() delete(filesToDelete, filepath.Join(otherPath, tree.Node.Name)) lock.Unlock() @@ -592,59 +603,31 @@ func (sr *ShowRemoved) createDeletedFilenames(ctx context.Context, repo restic.R uniqueBlobs restic.AssociatedBlobSet, gopts global.Options, printer progress.Printer, ) error { + printer.P("build file list to be deleted ...") filesToDelete := make(map[string]map[subNode]subNodeSnap) - for _, sn := range sr.selectedSnapshots { - err := walker.Walk(ctx, repo, *sn.Tree, walker.WalkVisitor{ - ProcessNode: func(parentTreeID restic.ID, pathname string, node *data.Node, nodeErr error) error { - if nodeErr != nil { - printer.E("Unable to load tree %s\n ... which belongs to snapshot %s - reason %v\n", - parentTreeID.Str(), sn.ID().Str(), nodeErr) - return nodeErr - } - if node == nil { - return nil - } - - if node.Type == data.NodeTypeFile { - fixedNode := subNode{ID: parentTreeID, node: node} - for _, blob := range node.Content { - if !uniqueBlobs.Has(restic.BlobHandle{ID: blob, Type: restic.DataBlob}) { - continue - } - - if _, ok := filesToDelete[pathname]; !ok { - filesToDelete[pathname] = make(map[subNode]subNodeSnap) - } - if _, ok := filesToDelete[pathname][fixedNode]; !ok { - filesToDelete[pathname][fixedNode] = subNodeSnap{ - node: node, - snapshot: sn, - } - } - - // first blob is enough to construct a complete entry - break - } - } - return nil - }}) - if err != nil { - return err - } + now := time.Now() + if err := walkParallel(ctx, repo, sr.selectedSnapshots, uniqueBlobs, filesToDelete, printer); err != nil { + return err } + printer.P("file list built") + printer.VV("time to build delete list %.1f seconds", time.Since(now).Seconds()) if sr.searchFiles { // match pathnames from 'allOtherTrees' and remove from 'filesToDelete' - if err := sr.processOtherPathnames(ctx, repo, filesToDelete); err != nil { + now = time.Now() + if err := sr.processOtherPathnames(ctx, repo, filesToDelete, printer); err != nil { return err } + printer.VV("time to find identical pathnames %.1f seconds", time.Since(now).Seconds()) } // convert 'filesToDelete' into deletedFilenamesJSON.DeletedFiles + now = time.Now() deletedFilenamesJSON, err := sr.generateJSONData(filesToDelete) if err != nil { return err } + printer.VV("time to generate output %.1f seconds", time.Since(now).Seconds()) if !gopts.JSON { printer.P("\n*** files to be removed ***") @@ -707,15 +690,18 @@ func showRemovedFiles(ctx context.Context, repo restic.Repository, return ctx.Err() } + now := time.Now() uniqueBlobs := repo.NewAssociatedBlobSet() if err := data.FindUsedBlobs(ctx, repo, sr.selectedTrees, uniqueBlobs, nil); err != nil { return err } + printer.VV("time to gather used blobs %.1f seconds", time.Since(now).Seconds()) + now = time.Now() if err := sr.removeStillUsedBlobs(ctx, repo, uniqueBlobs); err != nil { return err } - + printer.VV("time to remove still used blobs %.1f seconds", time.Since(now).Seconds()) return sr.createDeletedFilenames(ctx, repo, uniqueBlobs, gopts, printer) } @@ -751,3 +737,79 @@ func makeDirectoryTree(treeRoots []restic.ID, parentToChild map[restic.ID][]subN return directoryNames } + +// walkParallel walks all the snapshoots in selectedSnapshots in parallel +// it generates the delete file list from the blobs in 'uniqueBlobs' +func walkParallel(ctx context.Context, repo restic.Repository, selectedSnapshots []*data.Snapshot, + uniqueBlobs restic.AssociatedBlobSet, filesToDelete map[string]map[subNode]subNodeSnap, + printer progress.Printer, +) error { + + var lock sync.Mutex + chanSnapshot := make(chan *data.Snapshot) + wg, wgCtx := errgroup.WithContext(ctx) + + // go routine 1: dispense snapshots + wg.Go(func() error { + for _, sn := range selectedSnapshots { + chanSnapshot <- sn + } + + close(chanSnapshot) + return nil + }) + + worker := func() error { + for sn := range chanSnapshot { + err := walker.Walk(wgCtx, repo, *sn.Tree, walker.WalkVisitor{ + ProcessNode: func(parentTreeID restic.ID, pathname string, node *data.Node, nodeErr error) error { + if nodeErr != nil { + printer.E("Unable to load tree %s\n ... which belongs to snapshot %s - reason %v\n", + parentTreeID.Str(), sn.ID().Str(), nodeErr) + return nodeErr + } + if node == nil { + return nil + } + + if node.Type == data.NodeTypeFile { + fixedNode := subNode{ID: parentTreeID, node: node} + for _, blob := range node.Content { + if !uniqueBlobs.Has(restic.BlobHandle{ID: blob, Type: restic.DataBlob}) { + continue + } + + lock.Lock() + if _, ok := filesToDelete[pathname]; !ok { + filesToDelete[pathname] = make(map[subNode]subNodeSnap) + } + if _, ok := filesToDelete[pathname][fixedNode]; !ok { + filesToDelete[pathname][fixedNode] = subNodeSnap{ + node: node, + snapshot: sn, + } + } + lock.Unlock() + + // first blob is enough to construct a complete entry + break + } + } + + return nil + }}) + if err != nil { + return err + } + } + + return nil + } + + // go routine 2 .. n+1: workers + for i := 0; i < runtime.GOMAXPROCS(0); i++ { + wg.Go(worker) + } + + return wg.Wait() +} From 33e5579d22a15858f2e65e83d46b8ee4a6786f8b Mon Sep 17 00:00:00 2001 From: Winfried Plappert <18740761+wplapper@users.noreply.github.com> Date: Mon, 20 Apr 2026 07:55:33 +0100 Subject: [PATCH 7/7] restic forget --show-removed-files - cleanup General code cleanup: * removed debugging timing output * converted 'filesToDelete' to a map[string]map[*data.Snapshot]*data.Node, could be simplified since using walker.Walk() * changed generateJSONData() accordingly * ignoring Tree blobs in 'uniqueBlobs' completely, since directory removals are completely ignored --- cmd/restic/cmd_forget.go | 235 ++++++++++++++++++--------------------- 1 file changed, 108 insertions(+), 127 deletions(-) diff --git a/cmd/restic/cmd_forget.go b/cmd/restic/cmd_forget.go index b9cbb4864..883677558 100644 --- a/cmd/restic/cmd_forget.go +++ b/cmd/restic/cmd_forget.go @@ -460,11 +460,6 @@ type subNode struct { node *data.Node } -type subNodeSnap struct { - node *data.Node - snapshot *data.Snapshot -} - type DeleteFileInfo struct { SnapshotID restic.ID `json:"snapshot"` Path string `json:"path"` @@ -483,18 +478,16 @@ type ShowRemoved struct { allOtherTrees []restic.ID otherParentToChild map[restic.ID][]subNode searchFiles bool - printer progress.Printer } // makeShowRemoved: initializes &ShowRemoved -func makeShowRemoved(searchFiles bool, printer progress.Printer) *ShowRemoved { +func makeShowRemoved(searchFiles bool) *ShowRemoved { return &ShowRemoved{ selectedSnapshots: []*data.Snapshot{}, selectedTrees: []restic.ID{}, allOtherTrees: []restic.ID{}, otherParentToChild: make(map[restic.ID][]subNode), searchFiles: searchFiles, - printer: printer, } } @@ -502,19 +495,18 @@ func makeShowRemoved(searchFiles bool, printer progress.Printer) *ShowRemoved { // in use and removes them from 'uniqueBlobs' // at the same time, the tree hierarchy is collected for the 'allOtherTrees' func (sr *ShowRemoved) removeStillUsedBlobs(ctx context.Context, repo restic.Repository, - uniqueBlobs restic.AssociatedBlobSet, + uniqueBlobs restic.AssociatedBlobSet, printer progress.Printer, ) error { var lock sync.Mutex - sr.printer.P("find still used blobs ...") - bar := sr.printer.NewCounter("all other snapshots") + printer.P("find used blobs in all other snapshots ...") + bar := printer.NewCounter("all other snapshots") + bar.SetMax(uint64(len(sr.allOtherTrees))) defer bar.Done() + seenTree := restic.NewIDSet() err := data.StreamTrees(ctx, repo, sr.allOtherTrees, bar, func(tree restic.ID) bool { - lock.Lock() seen := seenTree.Has(tree) seenTree.Insert(tree) - uniqueBlobs.Delete(restic.BlobHandle{ID: tree, Type: restic.TreeBlob}) - lock.Unlock() return seen }, func(id restic.ID, err error, nodes data.TreeNodeIterator) error { if err != nil { @@ -522,11 +514,11 @@ func (sr *ShowRemoved) removeStillUsedBlobs(ctx context.Context, repo restic.Rep } children := []subNode{} - for tree := range nodes { - if tree.Error != nil { - return fmt.Errorf("LoadTree returned error %v", tree.Error) + for nodeIter := range nodes { + if nodeIter.Error != nil { + return fmt.Errorf("LoadTree returned error %v", nodeIter.Error) } - node := tree.Node + node := nodeIter.Node switch node.Type { case data.NodeTypeFile: for _, blob := range node.Content { @@ -554,16 +546,18 @@ func (sr *ShowRemoved) removeStillUsedBlobs(ctx context.Context, repo restic.Rep // processOtherPathnames is activated when option --search-files is called for // search through all the trees attached to 'sr.allOtherTrees' func (sr *ShowRemoved) processOtherPathnames(ctx context.Context, repo restic.Repository, - filesToDelete map[string]map[subNode]subNodeSnap, printer progress.Printer, + filesToDelete map[string]map[*data.Snapshot]*data.Node, printer progress.Printer, ) error { // build tree topology for all other snapshots otherDirectoryTimes := makeDirectoryTree(sr.allOtherTrees, sr.otherParentToChild) - printer.P("look for identical pathnames ...") - seenTrees := restic.NewIDSet() + printer.P("look for identical pathnames in all other snapshots ...") var lock sync.Mutex - bar := sr.printer.NewCounter("all other snapshots") + bar := printer.NewCounter("all other snapshots") + bar.SetMax(uint64(len(sr.allOtherTrees))) defer bar.Done() + + seenTrees := restic.NewIDSet() err := data.StreamTrees(ctx, repo, sr.allOtherTrees, bar, func(tree restic.ID) bool { seen := seenTrees.Has(tree) seenTrees.Insert(tree) @@ -578,15 +572,16 @@ func (sr *ShowRemoved) processOtherPathnames(ctx context.Context, repo restic.Re return nil } - for tree := range nodes { - if tree.Error != nil { - return fmt.Errorf("LoadTree returned error %v", tree.Error) + for nodeIter := range nodes { + if nodeIter.Error != nil { + return fmt.Errorf("LoadTree returned error %v", nodeIter.Error) } - if tree.Node.Type != data.NodeTypeFile { + if nodeIter.Node.Type != data.NodeTypeFile { continue } + lock.Lock() - delete(filesToDelete, filepath.Join(otherPath, tree.Node.Name)) + delete(filesToDelete, filepath.Join(otherPath, nodeIter.Node.Name)) lock.Unlock() } return nil @@ -595,6 +590,78 @@ func (sr *ShowRemoved) processOtherPathnames(ctx context.Context, repo restic.Re return err } +// walkParallel walks all the snapshoots in selectedSnapshots in parallel +// it generates the delete file list from the blobs in 'uniqueBlobs' +func walkParallel(ctx context.Context, repo restic.Repository, selectedSnapshots []*data.Snapshot, + uniqueBlobs restic.AssociatedBlobSet, filesToDelete map[string]map[*data.Snapshot]*data.Node, + printer progress.Printer, +) error { + + var lock sync.Mutex + chanSnapshot := make(chan *data.Snapshot) + wg, wgCtx := errgroup.WithContext(ctx) + bar := printer.NewCounter("walk selected snapshots") + bar.SetMax(uint64(len(selectedSnapshots))) + defer bar.Done() + + // go routine 1: dispense snapshots + wg.Go(func() error { + for _, sn := range selectedSnapshots { + chanSnapshot <- sn + } + + close(chanSnapshot) + return nil + }) + + worker := func() error { + for sn := range chanSnapshot { + err := walker.Walk(wgCtx, repo, *sn.Tree, walker.WalkVisitor{ + ProcessNode: func(parentTreeID restic.ID, pathname string, node *data.Node, nodeErr error) error { + if nodeErr != nil { + printer.E("Unable to load tree %s\n ... which belongs to snapshot %s - reason %v\n", + parentTreeID.Str(), sn.ID().Str(), nodeErr) + return nodeErr + } + if node == nil || node.Type != data.NodeTypeFile { + return nil + } + + for _, blob := range node.Content { + if !uniqueBlobs.Has(restic.BlobHandle{ID: blob, Type: restic.DataBlob}) { + continue + } + + lock.Lock() + if _, ok := filesToDelete[pathname]; !ok { + filesToDelete[pathname] = make(map[*data.Snapshot]*data.Node) + } + filesToDelete[pathname][sn] = node + lock.Unlock() + + // first blob is enough to construct a complete entry + break + } + + return nil + }}) + if err != nil { + return err + } + bar.Add(1) + } + + return nil + } + + // go routine 2 .. n+1: workers + for i := 0; i < runtime.GOMAXPROCS(0); i++ { + wg.Go(worker) + } + + return wg.Wait() +} + // createDeletedFilenames walks through the selected snapshots (treeList) // and takes note of the blobs in 'uniqueBlobs' // the tree IDs related to these blobs are collected for naming and finding the @@ -604,30 +671,23 @@ func (sr *ShowRemoved) createDeletedFilenames(ctx context.Context, repo restic.R ) error { printer.P("build file list to be deleted ...") - filesToDelete := make(map[string]map[subNode]subNodeSnap) - now := time.Now() + filesToDelete := make(map[string]map[*data.Snapshot]*data.Node) if err := walkParallel(ctx, repo, sr.selectedSnapshots, uniqueBlobs, filesToDelete, printer); err != nil { return err } - printer.P("file list built") - printer.VV("time to build delete list %.1f seconds", time.Since(now).Seconds()) if sr.searchFiles { - // match pathnames from 'allOtherTrees' and remove from 'filesToDelete' - now = time.Now() + // match identical pathnames from 'allOtherTrees' and remove from 'filesToDelete' if err := sr.processOtherPathnames(ctx, repo, filesToDelete, printer); err != nil { return err } - printer.VV("time to find identical pathnames %.1f seconds", time.Since(now).Seconds()) } // convert 'filesToDelete' into deletedFilenamesJSON.DeletedFiles - now = time.Now() deletedFilenamesJSON, err := sr.generateJSONData(filesToDelete) if err != nil { return err } - printer.VV("time to generate output %.1f seconds", time.Since(now).Seconds()) if !gopts.JSON { printer.P("\n*** files to be removed ***") @@ -642,7 +702,7 @@ func (sr *ShowRemoved) createDeletedFilenames(ctx context.Context, repo restic.R // generateJSONData collects data blobs from 'filesToDelete' // The structure for JSON is created and filled. -func (sr *ShowRemoved) generateJSONData(filesToDelete map[string]map[subNode]subNodeSnap) (*DeletedFilenamesJSON, error) { +func (sr *ShowRemoved) generateJSONData(filesToDelete map[string]map[*data.Snapshot]*data.Node) (*DeletedFilenamesJSON, error) { resultJSON := &DeletedFilenamesJSON{ MessageType: "deleted_files", @@ -650,15 +710,16 @@ func (sr *ShowRemoved) generateJSONData(filesToDelete map[string]map[subNode]sub } for _, name := range slices.Sorted(maps.Keys(filesToDelete)) { - oldest := slices.MinFunc(slices.Collect(maps.Values(filesToDelete[name])), func(a, b subNodeSnap) int { - return a.snapshot.Time.Compare(b.snapshot.Time) + oldest := slices.MinFunc(slices.Collect(maps.Keys(filesToDelete[name])), func(a, b *data.Snapshot) int { + return a.Time.Compare(b.Time) }) + node := filesToDelete[name][oldest] newEntry := DeleteFileInfo{ Path: name, - Size: oldest.node.Size, - Mtime: oldest.node.ModTime.Truncate(time.Second), - SnapshotID: *(oldest.snapshot).ID(), + Size: node.Size, + Mtime: node.ModTime.Truncate(time.Second), + SnapshotID: *oldest.ID(), } resultJSON.DeletedFiles = append(resultJSON.DeletedFiles, newEntry) } @@ -669,15 +730,14 @@ func (sr *ShowRemoved) generateJSONData(filesToDelete map[string]map[subNode]sub // showRemovedFiles prepares a list of files which are going to be removed // when forget --prune is run for 'removeSnIDs' // this function is the main driver -func showRemovedFiles(ctx context.Context, repo restic.Repository, - removeSnIDs restic.IDSet, opts ForgetOptions, - gopts global.Options, snapshotLister restic.Lister, printer progress.Printer, +func showRemovedFiles(ctx context.Context, repo restic.Repository, removeSnIDs restic.IDSet, + opts ForgetOptions, gopts global.Options, snapshotLister restic.Lister, printer progress.Printer, ) error { if err := repo.LoadIndex(ctx, printer); err != nil { return err } - sr := makeShowRemoved(opts.SearchFiles, printer) + sr := makeShowRemoved(opts.SearchFiles) for sn := range FindFilteredSnapshots(ctx, snapshotLister, repo, &data.SnapshotFilter{}, nil, printer) { if removeSnIDs.Has(*sn.ID()) { sr.selectedTrees = append(sr.selectedTrees, *sn.Tree) @@ -690,18 +750,15 @@ func showRemovedFiles(ctx context.Context, repo restic.Repository, return ctx.Err() } - now := time.Now() + printer.P("find used blobs for selected snapshots ...") uniqueBlobs := repo.NewAssociatedBlobSet() if err := data.FindUsedBlobs(ctx, repo, sr.selectedTrees, uniqueBlobs, nil); err != nil { return err } - printer.VV("time to gather used blobs %.1f seconds", time.Since(now).Seconds()) - now = time.Now() - if err := sr.removeStillUsedBlobs(ctx, repo, uniqueBlobs); err != nil { + if err := sr.removeStillUsedBlobs(ctx, repo, uniqueBlobs, printer); err != nil { return err } - printer.VV("time to remove still used blobs %.1f seconds", time.Since(now).Seconds()) return sr.createDeletedFilenames(ctx, repo, uniqueBlobs, gopts, printer) } @@ -737,79 +794,3 @@ func makeDirectoryTree(treeRoots []restic.ID, parentToChild map[restic.ID][]subN return directoryNames } - -// walkParallel walks all the snapshoots in selectedSnapshots in parallel -// it generates the delete file list from the blobs in 'uniqueBlobs' -func walkParallel(ctx context.Context, repo restic.Repository, selectedSnapshots []*data.Snapshot, - uniqueBlobs restic.AssociatedBlobSet, filesToDelete map[string]map[subNode]subNodeSnap, - printer progress.Printer, -) error { - - var lock sync.Mutex - chanSnapshot := make(chan *data.Snapshot) - wg, wgCtx := errgroup.WithContext(ctx) - - // go routine 1: dispense snapshots - wg.Go(func() error { - for _, sn := range selectedSnapshots { - chanSnapshot <- sn - } - - close(chanSnapshot) - return nil - }) - - worker := func() error { - for sn := range chanSnapshot { - err := walker.Walk(wgCtx, repo, *sn.Tree, walker.WalkVisitor{ - ProcessNode: func(parentTreeID restic.ID, pathname string, node *data.Node, nodeErr error) error { - if nodeErr != nil { - printer.E("Unable to load tree %s\n ... which belongs to snapshot %s - reason %v\n", - parentTreeID.Str(), sn.ID().Str(), nodeErr) - return nodeErr - } - if node == nil { - return nil - } - - if node.Type == data.NodeTypeFile { - fixedNode := subNode{ID: parentTreeID, node: node} - for _, blob := range node.Content { - if !uniqueBlobs.Has(restic.BlobHandle{ID: blob, Type: restic.DataBlob}) { - continue - } - - lock.Lock() - if _, ok := filesToDelete[pathname]; !ok { - filesToDelete[pathname] = make(map[subNode]subNodeSnap) - } - if _, ok := filesToDelete[pathname][fixedNode]; !ok { - filesToDelete[pathname][fixedNode] = subNodeSnap{ - node: node, - snapshot: sn, - } - } - lock.Unlock() - - // first blob is enough to construct a complete entry - break - } - } - - return nil - }}) - if err != nil { - return err - } - } - - return nil - } - - // go routine 2 .. n+1: workers - for i := 0; i < runtime.GOMAXPROCS(0); i++ { - wg.Go(worker) - } - - return wg.Wait() -}