restic forget --show-removed-files - cleanup

General code cleanup:
* removed debugging timing output
* converted 'filesToDelete' to a map[string]map[*data.Snapshot]*data.Node,
  could be simplified since using walker.Walk()
* changed generateJSONData() accordingly
* ignoring Tree blobs in 'uniqueBlobs' completely, since directory
  removals are completely ignored
This commit is contained in:
Winfried Plappert 2026-04-20 07:55:33 +01:00
parent 734f7cd2b1
commit 33e5579d22

View file

@ -460,11 +460,6 @@ type subNode struct {
node *data.Node
}
type subNodeSnap struct {
node *data.Node
snapshot *data.Snapshot
}
type DeleteFileInfo struct {
SnapshotID restic.ID `json:"snapshot"`
Path string `json:"path"`
@ -483,18 +478,16 @@ type ShowRemoved struct {
allOtherTrees []restic.ID
otherParentToChild map[restic.ID][]subNode
searchFiles bool
printer progress.Printer
}
// makeShowRemoved: initializes &ShowRemoved
func makeShowRemoved(searchFiles bool, printer progress.Printer) *ShowRemoved {
func makeShowRemoved(searchFiles bool) *ShowRemoved {
return &ShowRemoved{
selectedSnapshots: []*data.Snapshot{},
selectedTrees: []restic.ID{},
allOtherTrees: []restic.ID{},
otherParentToChild: make(map[restic.ID][]subNode),
searchFiles: searchFiles,
printer: printer,
}
}
@ -502,19 +495,18 @@ func makeShowRemoved(searchFiles bool, printer progress.Printer) *ShowRemoved {
// in use and removes them from 'uniqueBlobs'
// at the same time, the tree hierarchy is collected for the 'allOtherTrees'
func (sr *ShowRemoved) removeStillUsedBlobs(ctx context.Context, repo restic.Repository,
uniqueBlobs restic.AssociatedBlobSet,
uniqueBlobs restic.AssociatedBlobSet, printer progress.Printer,
) error {
var lock sync.Mutex
sr.printer.P("find still used blobs ...")
bar := sr.printer.NewCounter("all other snapshots")
printer.P("find used blobs in all other snapshots ...")
bar := printer.NewCounter("all other snapshots")
bar.SetMax(uint64(len(sr.allOtherTrees)))
defer bar.Done()
seenTree := restic.NewIDSet()
err := data.StreamTrees(ctx, repo, sr.allOtherTrees, bar, func(tree restic.ID) bool {
lock.Lock()
seen := seenTree.Has(tree)
seenTree.Insert(tree)
uniqueBlobs.Delete(restic.BlobHandle{ID: tree, Type: restic.TreeBlob})
lock.Unlock()
return seen
}, func(id restic.ID, err error, nodes data.TreeNodeIterator) error {
if err != nil {
@ -522,11 +514,11 @@ func (sr *ShowRemoved) removeStillUsedBlobs(ctx context.Context, repo restic.Rep
}
children := []subNode{}
for tree := range nodes {
if tree.Error != nil {
return fmt.Errorf("LoadTree returned error %v", tree.Error)
for nodeIter := range nodes {
if nodeIter.Error != nil {
return fmt.Errorf("LoadTree returned error %v", nodeIter.Error)
}
node := tree.Node
node := nodeIter.Node
switch node.Type {
case data.NodeTypeFile:
for _, blob := range node.Content {
@ -554,16 +546,18 @@ func (sr *ShowRemoved) removeStillUsedBlobs(ctx context.Context, repo restic.Rep
// processOtherPathnames is activated when option --search-files is called for
// search through all the trees attached to 'sr.allOtherTrees'
func (sr *ShowRemoved) processOtherPathnames(ctx context.Context, repo restic.Repository,
filesToDelete map[string]map[subNode]subNodeSnap, printer progress.Printer,
filesToDelete map[string]map[*data.Snapshot]*data.Node, printer progress.Printer,
) error {
// build tree topology for all other snapshots
otherDirectoryTimes := makeDirectoryTree(sr.allOtherTrees, sr.otherParentToChild)
printer.P("look for identical pathnames ...")
seenTrees := restic.NewIDSet()
printer.P("look for identical pathnames in all other snapshots ...")
var lock sync.Mutex
bar := sr.printer.NewCounter("all other snapshots")
bar := printer.NewCounter("all other snapshots")
bar.SetMax(uint64(len(sr.allOtherTrees)))
defer bar.Done()
seenTrees := restic.NewIDSet()
err := data.StreamTrees(ctx, repo, sr.allOtherTrees, bar, func(tree restic.ID) bool {
seen := seenTrees.Has(tree)
seenTrees.Insert(tree)
@ -578,15 +572,16 @@ func (sr *ShowRemoved) processOtherPathnames(ctx context.Context, repo restic.Re
return nil
}
for tree := range nodes {
if tree.Error != nil {
return fmt.Errorf("LoadTree returned error %v", tree.Error)
for nodeIter := range nodes {
if nodeIter.Error != nil {
return fmt.Errorf("LoadTree returned error %v", nodeIter.Error)
}
if tree.Node.Type != data.NodeTypeFile {
if nodeIter.Node.Type != data.NodeTypeFile {
continue
}
lock.Lock()
delete(filesToDelete, filepath.Join(otherPath, tree.Node.Name))
delete(filesToDelete, filepath.Join(otherPath, nodeIter.Node.Name))
lock.Unlock()
}
return nil
@ -595,6 +590,78 @@ func (sr *ShowRemoved) processOtherPathnames(ctx context.Context, repo restic.Re
return err
}
// walkParallel walks all the snapshoots in selectedSnapshots in parallel
// it generates the delete file list from the blobs in 'uniqueBlobs'
func walkParallel(ctx context.Context, repo restic.Repository, selectedSnapshots []*data.Snapshot,
uniqueBlobs restic.AssociatedBlobSet, filesToDelete map[string]map[*data.Snapshot]*data.Node,
printer progress.Printer,
) error {
var lock sync.Mutex
chanSnapshot := make(chan *data.Snapshot)
wg, wgCtx := errgroup.WithContext(ctx)
bar := printer.NewCounter("walk selected snapshots")
bar.SetMax(uint64(len(selectedSnapshots)))
defer bar.Done()
// go routine 1: dispense snapshots
wg.Go(func() error {
for _, sn := range selectedSnapshots {
chanSnapshot <- sn
}
close(chanSnapshot)
return nil
})
worker := func() error {
for sn := range chanSnapshot {
err := walker.Walk(wgCtx, repo, *sn.Tree, walker.WalkVisitor{
ProcessNode: func(parentTreeID restic.ID, pathname string, node *data.Node, nodeErr error) error {
if nodeErr != nil {
printer.E("Unable to load tree %s\n ... which belongs to snapshot %s - reason %v\n",
parentTreeID.Str(), sn.ID().Str(), nodeErr)
return nodeErr
}
if node == nil || node.Type != data.NodeTypeFile {
return nil
}
for _, blob := range node.Content {
if !uniqueBlobs.Has(restic.BlobHandle{ID: blob, Type: restic.DataBlob}) {
continue
}
lock.Lock()
if _, ok := filesToDelete[pathname]; !ok {
filesToDelete[pathname] = make(map[*data.Snapshot]*data.Node)
}
filesToDelete[pathname][sn] = node
lock.Unlock()
// first blob is enough to construct a complete entry
break
}
return nil
}})
if err != nil {
return err
}
bar.Add(1)
}
return nil
}
// go routine 2 .. n+1: workers
for i := 0; i < runtime.GOMAXPROCS(0); i++ {
wg.Go(worker)
}
return wg.Wait()
}
// createDeletedFilenames walks through the selected snapshots (treeList)
// and takes note of the blobs in 'uniqueBlobs'
// the tree IDs related to these blobs are collected for naming and finding the
@ -604,30 +671,23 @@ func (sr *ShowRemoved) createDeletedFilenames(ctx context.Context, repo restic.R
) error {
printer.P("build file list to be deleted ...")
filesToDelete := make(map[string]map[subNode]subNodeSnap)
now := time.Now()
filesToDelete := make(map[string]map[*data.Snapshot]*data.Node)
if err := walkParallel(ctx, repo, sr.selectedSnapshots, uniqueBlobs, filesToDelete, printer); err != nil {
return err
}
printer.P("file list built")
printer.VV("time to build delete list %.1f seconds", time.Since(now).Seconds())
if sr.searchFiles {
// match pathnames from 'allOtherTrees' and remove from 'filesToDelete'
now = time.Now()
// match identical pathnames from 'allOtherTrees' and remove from 'filesToDelete'
if err := sr.processOtherPathnames(ctx, repo, filesToDelete, printer); err != nil {
return err
}
printer.VV("time to find identical pathnames %.1f seconds", time.Since(now).Seconds())
}
// convert 'filesToDelete' into deletedFilenamesJSON.DeletedFiles
now = time.Now()
deletedFilenamesJSON, err := sr.generateJSONData(filesToDelete)
if err != nil {
return err
}
printer.VV("time to generate output %.1f seconds", time.Since(now).Seconds())
if !gopts.JSON {
printer.P("\n*** files to be removed ***")
@ -642,7 +702,7 @@ func (sr *ShowRemoved) createDeletedFilenames(ctx context.Context, repo restic.R
// generateJSONData collects data blobs from 'filesToDelete'
// The structure for JSON is created and filled.
func (sr *ShowRemoved) generateJSONData(filesToDelete map[string]map[subNode]subNodeSnap) (*DeletedFilenamesJSON, error) {
func (sr *ShowRemoved) generateJSONData(filesToDelete map[string]map[*data.Snapshot]*data.Node) (*DeletedFilenamesJSON, error) {
resultJSON := &DeletedFilenamesJSON{
MessageType: "deleted_files",
@ -650,15 +710,16 @@ func (sr *ShowRemoved) generateJSONData(filesToDelete map[string]map[subNode]sub
}
for _, name := range slices.Sorted(maps.Keys(filesToDelete)) {
oldest := slices.MinFunc(slices.Collect(maps.Values(filesToDelete[name])), func(a, b subNodeSnap) int {
return a.snapshot.Time.Compare(b.snapshot.Time)
oldest := slices.MinFunc(slices.Collect(maps.Keys(filesToDelete[name])), func(a, b *data.Snapshot) int {
return a.Time.Compare(b.Time)
})
node := filesToDelete[name][oldest]
newEntry := DeleteFileInfo{
Path: name,
Size: oldest.node.Size,
Mtime: oldest.node.ModTime.Truncate(time.Second),
SnapshotID: *(oldest.snapshot).ID(),
Size: node.Size,
Mtime: node.ModTime.Truncate(time.Second),
SnapshotID: *oldest.ID(),
}
resultJSON.DeletedFiles = append(resultJSON.DeletedFiles, newEntry)
}
@ -669,15 +730,14 @@ func (sr *ShowRemoved) generateJSONData(filesToDelete map[string]map[subNode]sub
// showRemovedFiles prepares a list of files which are going to be removed
// when forget --prune is run for 'removeSnIDs'
// this function is the main driver
func showRemovedFiles(ctx context.Context, repo restic.Repository,
removeSnIDs restic.IDSet, opts ForgetOptions,
gopts global.Options, snapshotLister restic.Lister, printer progress.Printer,
func showRemovedFiles(ctx context.Context, repo restic.Repository, removeSnIDs restic.IDSet,
opts ForgetOptions, gopts global.Options, snapshotLister restic.Lister, printer progress.Printer,
) error {
if err := repo.LoadIndex(ctx, printer); err != nil {
return err
}
sr := makeShowRemoved(opts.SearchFiles, printer)
sr := makeShowRemoved(opts.SearchFiles)
for sn := range FindFilteredSnapshots(ctx, snapshotLister, repo, &data.SnapshotFilter{}, nil, printer) {
if removeSnIDs.Has(*sn.ID()) {
sr.selectedTrees = append(sr.selectedTrees, *sn.Tree)
@ -690,18 +750,15 @@ func showRemovedFiles(ctx context.Context, repo restic.Repository,
return ctx.Err()
}
now := time.Now()
printer.P("find used blobs for selected snapshots ...")
uniqueBlobs := repo.NewAssociatedBlobSet()
if err := data.FindUsedBlobs(ctx, repo, sr.selectedTrees, uniqueBlobs, nil); err != nil {
return err
}
printer.VV("time to gather used blobs %.1f seconds", time.Since(now).Seconds())
now = time.Now()
if err := sr.removeStillUsedBlobs(ctx, repo, uniqueBlobs); err != nil {
if err := sr.removeStillUsedBlobs(ctx, repo, uniqueBlobs, printer); err != nil {
return err
}
printer.VV("time to remove still used blobs %.1f seconds", time.Since(now).Seconds())
return sr.createDeletedFilenames(ctx, repo, uniqueBlobs, gopts, printer)
}
@ -737,79 +794,3 @@ func makeDirectoryTree(treeRoots []restic.ID, parentToChild map[restic.ID][]subN
return directoryNames
}
// walkParallel walks all the snapshoots in selectedSnapshots in parallel
// it generates the delete file list from the blobs in 'uniqueBlobs'
func walkParallel(ctx context.Context, repo restic.Repository, selectedSnapshots []*data.Snapshot,
uniqueBlobs restic.AssociatedBlobSet, filesToDelete map[string]map[subNode]subNodeSnap,
printer progress.Printer,
) error {
var lock sync.Mutex
chanSnapshot := make(chan *data.Snapshot)
wg, wgCtx := errgroup.WithContext(ctx)
// go routine 1: dispense snapshots
wg.Go(func() error {
for _, sn := range selectedSnapshots {
chanSnapshot <- sn
}
close(chanSnapshot)
return nil
})
worker := func() error {
for sn := range chanSnapshot {
err := walker.Walk(wgCtx, repo, *sn.Tree, walker.WalkVisitor{
ProcessNode: func(parentTreeID restic.ID, pathname string, node *data.Node, nodeErr error) error {
if nodeErr != nil {
printer.E("Unable to load tree %s\n ... which belongs to snapshot %s - reason %v\n",
parentTreeID.Str(), sn.ID().Str(), nodeErr)
return nodeErr
}
if node == nil {
return nil
}
if node.Type == data.NodeTypeFile {
fixedNode := subNode{ID: parentTreeID, node: node}
for _, blob := range node.Content {
if !uniqueBlobs.Has(restic.BlobHandle{ID: blob, Type: restic.DataBlob}) {
continue
}
lock.Lock()
if _, ok := filesToDelete[pathname]; !ok {
filesToDelete[pathname] = make(map[subNode]subNodeSnap)
}
if _, ok := filesToDelete[pathname][fixedNode]; !ok {
filesToDelete[pathname][fixedNode] = subNodeSnap{
node: node,
snapshot: sn,
}
}
lock.Unlock()
// first blob is enough to construct a complete entry
break
}
}
return nil
}})
if err != nil {
return err
}
}
return nil
}
// go routine 2 .. n+1: workers
for i := 0; i < runtime.GOMAXPROCS(0); i++ {
wg.Go(worker)
}
return wg.Wait()
}