mirror of
https://github.com/restic/restic.git
synced 2026-02-03 12:29:37 -05:00
The TreeNodeIterator decodes nodes while iterating over a tree blob. This should reduce peak memory usage as now only the serialized tree blob and a single node have to alive at the same time. Using the iterator has implications for the error handling however. Now it is necessary that all loops that iterate through a tree check for errors before using the node returned by the iterator. The other change is that it is no longer possible to iterate over a tree multiple times. Instead it must be loaded a second time. This only affects the tree rewriting code.
45 lines
1.2 KiB
Go
45 lines
1.2 KiB
Go
package data
|
|
|
|
import (
|
|
"context"
|
|
"sync"
|
|
|
|
"github.com/restic/restic/internal/restic"
|
|
"github.com/restic/restic/internal/ui/progress"
|
|
)
|
|
|
|
// FindUsedBlobs traverses the tree ID and adds all seen blobs (trees and data
|
|
// blobs) to the set blobs. Already seen tree blobs will not be visited again.
|
|
func FindUsedBlobs(ctx context.Context, repo restic.Loader, treeIDs restic.IDs, blobs restic.FindBlobSet, p *progress.Counter) error {
|
|
var lock sync.Mutex
|
|
|
|
return StreamTrees(ctx, repo, treeIDs, p, func(treeID restic.ID) bool {
|
|
// locking is necessary the goroutine below concurrently adds data blobs
|
|
lock.Lock()
|
|
h := restic.BlobHandle{ID: treeID, Type: restic.TreeBlob}
|
|
blobReferenced := blobs.Has(h)
|
|
// noop if already referenced
|
|
blobs.Insert(h)
|
|
lock.Unlock()
|
|
return blobReferenced
|
|
}, func(_ restic.ID, err error, nodes TreeNodeIterator) error {
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
for item := range nodes {
|
|
if item.Error != nil {
|
|
return item.Error
|
|
}
|
|
lock.Lock()
|
|
switch item.Node.Type {
|
|
case NodeTypeFile:
|
|
for _, blob := range item.Node.Content {
|
|
blobs.Insert(restic.BlobHandle{ID: blob, Type: restic.DataBlob})
|
|
}
|
|
}
|
|
lock.Unlock()
|
|
}
|
|
return nil
|
|
})
|
|
}
|