mirror of
https://github.com/restic/restic.git
synced 2026-05-28 04:35:41 -04:00
restic forget --show-removed-files - cleanup
General code cleanup: * removed debugging timing output * converted 'filesToDelete' to a map[string]map[*data.Snapshot]*data.Node, could be simplified since using walker.Walk() * changed generateJSONData() accordingly * ignoring Tree blobs in 'uniqueBlobs' completely, since directory removals are completely ignored
This commit is contained in:
parent
734f7cd2b1
commit
33e5579d22
1 changed files with 108 additions and 127 deletions
|
|
@ -460,11 +460,6 @@ type subNode struct {
|
|||
node *data.Node
|
||||
}
|
||||
|
||||
type subNodeSnap struct {
|
||||
node *data.Node
|
||||
snapshot *data.Snapshot
|
||||
}
|
||||
|
||||
type DeleteFileInfo struct {
|
||||
SnapshotID restic.ID `json:"snapshot"`
|
||||
Path string `json:"path"`
|
||||
|
|
@ -483,18 +478,16 @@ type ShowRemoved struct {
|
|||
allOtherTrees []restic.ID
|
||||
otherParentToChild map[restic.ID][]subNode
|
||||
searchFiles bool
|
||||
printer progress.Printer
|
||||
}
|
||||
|
||||
// makeShowRemoved: initializes &ShowRemoved
|
||||
func makeShowRemoved(searchFiles bool, printer progress.Printer) *ShowRemoved {
|
||||
func makeShowRemoved(searchFiles bool) *ShowRemoved {
|
||||
return &ShowRemoved{
|
||||
selectedSnapshots: []*data.Snapshot{},
|
||||
selectedTrees: []restic.ID{},
|
||||
allOtherTrees: []restic.ID{},
|
||||
otherParentToChild: make(map[restic.ID][]subNode),
|
||||
searchFiles: searchFiles,
|
||||
printer: printer,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -502,19 +495,18 @@ func makeShowRemoved(searchFiles bool, printer progress.Printer) *ShowRemoved {
|
|||
// in use and removes them from 'uniqueBlobs'
|
||||
// at the same time, the tree hierarchy is collected for the 'allOtherTrees'
|
||||
func (sr *ShowRemoved) removeStillUsedBlobs(ctx context.Context, repo restic.Repository,
|
||||
uniqueBlobs restic.AssociatedBlobSet,
|
||||
uniqueBlobs restic.AssociatedBlobSet, printer progress.Printer,
|
||||
) error {
|
||||
var lock sync.Mutex
|
||||
sr.printer.P("find still used blobs ...")
|
||||
bar := sr.printer.NewCounter("all other snapshots")
|
||||
printer.P("find used blobs in all other snapshots ...")
|
||||
bar := printer.NewCounter("all other snapshots")
|
||||
bar.SetMax(uint64(len(sr.allOtherTrees)))
|
||||
defer bar.Done()
|
||||
|
||||
seenTree := restic.NewIDSet()
|
||||
err := data.StreamTrees(ctx, repo, sr.allOtherTrees, bar, func(tree restic.ID) bool {
|
||||
lock.Lock()
|
||||
seen := seenTree.Has(tree)
|
||||
seenTree.Insert(tree)
|
||||
uniqueBlobs.Delete(restic.BlobHandle{ID: tree, Type: restic.TreeBlob})
|
||||
lock.Unlock()
|
||||
return seen
|
||||
}, func(id restic.ID, err error, nodes data.TreeNodeIterator) error {
|
||||
if err != nil {
|
||||
|
|
@ -522,11 +514,11 @@ func (sr *ShowRemoved) removeStillUsedBlobs(ctx context.Context, repo restic.Rep
|
|||
}
|
||||
|
||||
children := []subNode{}
|
||||
for tree := range nodes {
|
||||
if tree.Error != nil {
|
||||
return fmt.Errorf("LoadTree returned error %v", tree.Error)
|
||||
for nodeIter := range nodes {
|
||||
if nodeIter.Error != nil {
|
||||
return fmt.Errorf("LoadTree returned error %v", nodeIter.Error)
|
||||
}
|
||||
node := tree.Node
|
||||
node := nodeIter.Node
|
||||
switch node.Type {
|
||||
case data.NodeTypeFile:
|
||||
for _, blob := range node.Content {
|
||||
|
|
@ -554,16 +546,18 @@ func (sr *ShowRemoved) removeStillUsedBlobs(ctx context.Context, repo restic.Rep
|
|||
// processOtherPathnames is activated when option --search-files is called for
|
||||
// search through all the trees attached to 'sr.allOtherTrees'
|
||||
func (sr *ShowRemoved) processOtherPathnames(ctx context.Context, repo restic.Repository,
|
||||
filesToDelete map[string]map[subNode]subNodeSnap, printer progress.Printer,
|
||||
filesToDelete map[string]map[*data.Snapshot]*data.Node, printer progress.Printer,
|
||||
) error {
|
||||
// build tree topology for all other snapshots
|
||||
otherDirectoryTimes := makeDirectoryTree(sr.allOtherTrees, sr.otherParentToChild)
|
||||
|
||||
printer.P("look for identical pathnames ...")
|
||||
seenTrees := restic.NewIDSet()
|
||||
printer.P("look for identical pathnames in all other snapshots ...")
|
||||
var lock sync.Mutex
|
||||
bar := sr.printer.NewCounter("all other snapshots")
|
||||
bar := printer.NewCounter("all other snapshots")
|
||||
bar.SetMax(uint64(len(sr.allOtherTrees)))
|
||||
defer bar.Done()
|
||||
|
||||
seenTrees := restic.NewIDSet()
|
||||
err := data.StreamTrees(ctx, repo, sr.allOtherTrees, bar, func(tree restic.ID) bool {
|
||||
seen := seenTrees.Has(tree)
|
||||
seenTrees.Insert(tree)
|
||||
|
|
@ -578,15 +572,16 @@ func (sr *ShowRemoved) processOtherPathnames(ctx context.Context, repo restic.Re
|
|||
return nil
|
||||
}
|
||||
|
||||
for tree := range nodes {
|
||||
if tree.Error != nil {
|
||||
return fmt.Errorf("LoadTree returned error %v", tree.Error)
|
||||
for nodeIter := range nodes {
|
||||
if nodeIter.Error != nil {
|
||||
return fmt.Errorf("LoadTree returned error %v", nodeIter.Error)
|
||||
}
|
||||
if tree.Node.Type != data.NodeTypeFile {
|
||||
if nodeIter.Node.Type != data.NodeTypeFile {
|
||||
continue
|
||||
}
|
||||
|
||||
lock.Lock()
|
||||
delete(filesToDelete, filepath.Join(otherPath, tree.Node.Name))
|
||||
delete(filesToDelete, filepath.Join(otherPath, nodeIter.Node.Name))
|
||||
lock.Unlock()
|
||||
}
|
||||
return nil
|
||||
|
|
@ -595,6 +590,78 @@ func (sr *ShowRemoved) processOtherPathnames(ctx context.Context, repo restic.Re
|
|||
return err
|
||||
}
|
||||
|
||||
// walkParallel walks all the snapshoots in selectedSnapshots in parallel
|
||||
// it generates the delete file list from the blobs in 'uniqueBlobs'
|
||||
func walkParallel(ctx context.Context, repo restic.Repository, selectedSnapshots []*data.Snapshot,
|
||||
uniqueBlobs restic.AssociatedBlobSet, filesToDelete map[string]map[*data.Snapshot]*data.Node,
|
||||
printer progress.Printer,
|
||||
) error {
|
||||
|
||||
var lock sync.Mutex
|
||||
chanSnapshot := make(chan *data.Snapshot)
|
||||
wg, wgCtx := errgroup.WithContext(ctx)
|
||||
bar := printer.NewCounter("walk selected snapshots")
|
||||
bar.SetMax(uint64(len(selectedSnapshots)))
|
||||
defer bar.Done()
|
||||
|
||||
// go routine 1: dispense snapshots
|
||||
wg.Go(func() error {
|
||||
for _, sn := range selectedSnapshots {
|
||||
chanSnapshot <- sn
|
||||
}
|
||||
|
||||
close(chanSnapshot)
|
||||
return nil
|
||||
})
|
||||
|
||||
worker := func() error {
|
||||
for sn := range chanSnapshot {
|
||||
err := walker.Walk(wgCtx, repo, *sn.Tree, walker.WalkVisitor{
|
||||
ProcessNode: func(parentTreeID restic.ID, pathname string, node *data.Node, nodeErr error) error {
|
||||
if nodeErr != nil {
|
||||
printer.E("Unable to load tree %s\n ... which belongs to snapshot %s - reason %v\n",
|
||||
parentTreeID.Str(), sn.ID().Str(), nodeErr)
|
||||
return nodeErr
|
||||
}
|
||||
if node == nil || node.Type != data.NodeTypeFile {
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, blob := range node.Content {
|
||||
if !uniqueBlobs.Has(restic.BlobHandle{ID: blob, Type: restic.DataBlob}) {
|
||||
continue
|
||||
}
|
||||
|
||||
lock.Lock()
|
||||
if _, ok := filesToDelete[pathname]; !ok {
|
||||
filesToDelete[pathname] = make(map[*data.Snapshot]*data.Node)
|
||||
}
|
||||
filesToDelete[pathname][sn] = node
|
||||
lock.Unlock()
|
||||
|
||||
// first blob is enough to construct a complete entry
|
||||
break
|
||||
}
|
||||
|
||||
return nil
|
||||
}})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
bar.Add(1)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// go routine 2 .. n+1: workers
|
||||
for i := 0; i < runtime.GOMAXPROCS(0); i++ {
|
||||
wg.Go(worker)
|
||||
}
|
||||
|
||||
return wg.Wait()
|
||||
}
|
||||
|
||||
// createDeletedFilenames walks through the selected snapshots (treeList)
|
||||
// and takes note of the blobs in 'uniqueBlobs'
|
||||
// the tree IDs related to these blobs are collected for naming and finding the
|
||||
|
|
@ -604,30 +671,23 @@ func (sr *ShowRemoved) createDeletedFilenames(ctx context.Context, repo restic.R
|
|||
) error {
|
||||
|
||||
printer.P("build file list to be deleted ...")
|
||||
filesToDelete := make(map[string]map[subNode]subNodeSnap)
|
||||
now := time.Now()
|
||||
filesToDelete := make(map[string]map[*data.Snapshot]*data.Node)
|
||||
if err := walkParallel(ctx, repo, sr.selectedSnapshots, uniqueBlobs, filesToDelete, printer); err != nil {
|
||||
return err
|
||||
}
|
||||
printer.P("file list built")
|
||||
printer.VV("time to build delete list %.1f seconds", time.Since(now).Seconds())
|
||||
|
||||
if sr.searchFiles {
|
||||
// match pathnames from 'allOtherTrees' and remove from 'filesToDelete'
|
||||
now = time.Now()
|
||||
// match identical pathnames from 'allOtherTrees' and remove from 'filesToDelete'
|
||||
if err := sr.processOtherPathnames(ctx, repo, filesToDelete, printer); err != nil {
|
||||
return err
|
||||
}
|
||||
printer.VV("time to find identical pathnames %.1f seconds", time.Since(now).Seconds())
|
||||
}
|
||||
|
||||
// convert 'filesToDelete' into deletedFilenamesJSON.DeletedFiles
|
||||
now = time.Now()
|
||||
deletedFilenamesJSON, err := sr.generateJSONData(filesToDelete)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
printer.VV("time to generate output %.1f seconds", time.Since(now).Seconds())
|
||||
|
||||
if !gopts.JSON {
|
||||
printer.P("\n*** files to be removed ***")
|
||||
|
|
@ -642,7 +702,7 @@ func (sr *ShowRemoved) createDeletedFilenames(ctx context.Context, repo restic.R
|
|||
|
||||
// generateJSONData collects data blobs from 'filesToDelete'
|
||||
// The structure for JSON is created and filled.
|
||||
func (sr *ShowRemoved) generateJSONData(filesToDelete map[string]map[subNode]subNodeSnap) (*DeletedFilenamesJSON, error) {
|
||||
func (sr *ShowRemoved) generateJSONData(filesToDelete map[string]map[*data.Snapshot]*data.Node) (*DeletedFilenamesJSON, error) {
|
||||
|
||||
resultJSON := &DeletedFilenamesJSON{
|
||||
MessageType: "deleted_files",
|
||||
|
|
@ -650,15 +710,16 @@ func (sr *ShowRemoved) generateJSONData(filesToDelete map[string]map[subNode]sub
|
|||
}
|
||||
|
||||
for _, name := range slices.Sorted(maps.Keys(filesToDelete)) {
|
||||
oldest := slices.MinFunc(slices.Collect(maps.Values(filesToDelete[name])), func(a, b subNodeSnap) int {
|
||||
return a.snapshot.Time.Compare(b.snapshot.Time)
|
||||
oldest := slices.MinFunc(slices.Collect(maps.Keys(filesToDelete[name])), func(a, b *data.Snapshot) int {
|
||||
return a.Time.Compare(b.Time)
|
||||
})
|
||||
|
||||
node := filesToDelete[name][oldest]
|
||||
newEntry := DeleteFileInfo{
|
||||
Path: name,
|
||||
Size: oldest.node.Size,
|
||||
Mtime: oldest.node.ModTime.Truncate(time.Second),
|
||||
SnapshotID: *(oldest.snapshot).ID(),
|
||||
Size: node.Size,
|
||||
Mtime: node.ModTime.Truncate(time.Second),
|
||||
SnapshotID: *oldest.ID(),
|
||||
}
|
||||
resultJSON.DeletedFiles = append(resultJSON.DeletedFiles, newEntry)
|
||||
}
|
||||
|
|
@ -669,15 +730,14 @@ func (sr *ShowRemoved) generateJSONData(filesToDelete map[string]map[subNode]sub
|
|||
// showRemovedFiles prepares a list of files which are going to be removed
|
||||
// when forget --prune is run for 'removeSnIDs'
|
||||
// this function is the main driver
|
||||
func showRemovedFiles(ctx context.Context, repo restic.Repository,
|
||||
removeSnIDs restic.IDSet, opts ForgetOptions,
|
||||
gopts global.Options, snapshotLister restic.Lister, printer progress.Printer,
|
||||
func showRemovedFiles(ctx context.Context, repo restic.Repository, removeSnIDs restic.IDSet,
|
||||
opts ForgetOptions, gopts global.Options, snapshotLister restic.Lister, printer progress.Printer,
|
||||
) error {
|
||||
if err := repo.LoadIndex(ctx, printer); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
sr := makeShowRemoved(opts.SearchFiles, printer)
|
||||
sr := makeShowRemoved(opts.SearchFiles)
|
||||
for sn := range FindFilteredSnapshots(ctx, snapshotLister, repo, &data.SnapshotFilter{}, nil, printer) {
|
||||
if removeSnIDs.Has(*sn.ID()) {
|
||||
sr.selectedTrees = append(sr.selectedTrees, *sn.Tree)
|
||||
|
|
@ -690,18 +750,15 @@ func showRemovedFiles(ctx context.Context, repo restic.Repository,
|
|||
return ctx.Err()
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
printer.P("find used blobs for selected snapshots ...")
|
||||
uniqueBlobs := repo.NewAssociatedBlobSet()
|
||||
if err := data.FindUsedBlobs(ctx, repo, sr.selectedTrees, uniqueBlobs, nil); err != nil {
|
||||
return err
|
||||
}
|
||||
printer.VV("time to gather used blobs %.1f seconds", time.Since(now).Seconds())
|
||||
|
||||
now = time.Now()
|
||||
if err := sr.removeStillUsedBlobs(ctx, repo, uniqueBlobs); err != nil {
|
||||
if err := sr.removeStillUsedBlobs(ctx, repo, uniqueBlobs, printer); err != nil {
|
||||
return err
|
||||
}
|
||||
printer.VV("time to remove still used blobs %.1f seconds", time.Since(now).Seconds())
|
||||
return sr.createDeletedFilenames(ctx, repo, uniqueBlobs, gopts, printer)
|
||||
}
|
||||
|
||||
|
|
@ -737,79 +794,3 @@ func makeDirectoryTree(treeRoots []restic.ID, parentToChild map[restic.ID][]subN
|
|||
|
||||
return directoryNames
|
||||
}
|
||||
|
||||
// walkParallel walks all the snapshoots in selectedSnapshots in parallel
|
||||
// it generates the delete file list from the blobs in 'uniqueBlobs'
|
||||
func walkParallel(ctx context.Context, repo restic.Repository, selectedSnapshots []*data.Snapshot,
|
||||
uniqueBlobs restic.AssociatedBlobSet, filesToDelete map[string]map[subNode]subNodeSnap,
|
||||
printer progress.Printer,
|
||||
) error {
|
||||
|
||||
var lock sync.Mutex
|
||||
chanSnapshot := make(chan *data.Snapshot)
|
||||
wg, wgCtx := errgroup.WithContext(ctx)
|
||||
|
||||
// go routine 1: dispense snapshots
|
||||
wg.Go(func() error {
|
||||
for _, sn := range selectedSnapshots {
|
||||
chanSnapshot <- sn
|
||||
}
|
||||
|
||||
close(chanSnapshot)
|
||||
return nil
|
||||
})
|
||||
|
||||
worker := func() error {
|
||||
for sn := range chanSnapshot {
|
||||
err := walker.Walk(wgCtx, repo, *sn.Tree, walker.WalkVisitor{
|
||||
ProcessNode: func(parentTreeID restic.ID, pathname string, node *data.Node, nodeErr error) error {
|
||||
if nodeErr != nil {
|
||||
printer.E("Unable to load tree %s\n ... which belongs to snapshot %s - reason %v\n",
|
||||
parentTreeID.Str(), sn.ID().Str(), nodeErr)
|
||||
return nodeErr
|
||||
}
|
||||
if node == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
if node.Type == data.NodeTypeFile {
|
||||
fixedNode := subNode{ID: parentTreeID, node: node}
|
||||
for _, blob := range node.Content {
|
||||
if !uniqueBlobs.Has(restic.BlobHandle{ID: blob, Type: restic.DataBlob}) {
|
||||
continue
|
||||
}
|
||||
|
||||
lock.Lock()
|
||||
if _, ok := filesToDelete[pathname]; !ok {
|
||||
filesToDelete[pathname] = make(map[subNode]subNodeSnap)
|
||||
}
|
||||
if _, ok := filesToDelete[pathname][fixedNode]; !ok {
|
||||
filesToDelete[pathname][fixedNode] = subNodeSnap{
|
||||
node: node,
|
||||
snapshot: sn,
|
||||
}
|
||||
}
|
||||
lock.Unlock()
|
||||
|
||||
// first blob is enough to construct a complete entry
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// go routine 2 .. n+1: workers
|
||||
for i := 0; i < runtime.GOMAXPROCS(0); i++ {
|
||||
wg.Go(worker)
|
||||
}
|
||||
|
||||
return wg.Wait()
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue