mirror of
https://github.com/restic/restic.git
synced 2026-02-03 04:20:45 -05:00
The tree.Nodes will be replaced by an iterator to loads and serializes tree node ondemand. Thus, the processing moves from StreamTrees into the callback. Schedule them onto the workers used by StreamTrees for proper load distribution.
200 lines
5.3 KiB
Go
200 lines
5.3 KiB
Go
package data
|
|
|
|
import (
|
|
"context"
|
|
"runtime"
|
|
"sync"
|
|
|
|
"github.com/restic/restic/internal/debug"
|
|
"github.com/restic/restic/internal/errors"
|
|
"github.com/restic/restic/internal/restic"
|
|
"github.com/restic/restic/internal/ui/progress"
|
|
"golang.org/x/sync/errgroup"
|
|
)
|
|
|
|
type trackedTreeItem struct {
|
|
restic.ID
|
|
Subtrees restic.IDs
|
|
rootIdx int
|
|
}
|
|
|
|
type trackedID struct {
|
|
restic.ID
|
|
rootIdx int
|
|
}
|
|
|
|
// loadTreeWorker loads trees from repo and sends them to out.
|
|
func loadTreeWorker(
|
|
ctx context.Context,
|
|
repo restic.Loader,
|
|
in <-chan trackedID,
|
|
process func(id restic.ID, error error, tree *Tree) error,
|
|
out chan<- trackedTreeItem,
|
|
) error {
|
|
|
|
for treeID := range in {
|
|
tree, err := LoadTree(ctx, repo, treeID.ID)
|
|
if tree == nil && err == nil {
|
|
err = errors.New("tree is nil and error is nil")
|
|
}
|
|
debug.Log("load tree %v (%v) returned err: %v", tree, treeID, err)
|
|
|
|
err = process(treeID.ID, err, tree)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
var subtrees restic.IDs
|
|
if tree != nil {
|
|
subtrees = tree.Subtrees()
|
|
}
|
|
|
|
job := trackedTreeItem{ID: treeID.ID, Subtrees: subtrees, rootIdx: treeID.rootIdx}
|
|
|
|
select {
|
|
case <-ctx.Done():
|
|
return nil
|
|
case out <- job:
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// filterTree receives the result of a tree load and queues new trees for loading and processing.
|
|
func filterTrees(ctx context.Context, repo restic.Loader, trees restic.IDs, loaderChan chan<- trackedID, hugeTreeLoaderChan chan<- trackedID,
|
|
in <-chan trackedTreeItem, skip func(tree restic.ID) bool, p *progress.Counter) {
|
|
|
|
var (
|
|
inCh = in
|
|
loadCh chan<- trackedID
|
|
nextTreeID trackedID
|
|
outstandingLoadTreeJobs = 0
|
|
)
|
|
// tracks how many trees are currently waiting to be processed for a given root tree
|
|
rootCounter := make([]int, len(trees))
|
|
// build initial backlog
|
|
backlog := make([]trackedID, 0, len(trees))
|
|
for idx, id := range trees {
|
|
backlog = append(backlog, trackedID{ID: id, rootIdx: idx})
|
|
rootCounter[idx] = 1
|
|
}
|
|
|
|
for {
|
|
// if no tree is waiting to be sent, pick the next one
|
|
if loadCh == nil && len(backlog) > 0 {
|
|
// process last added ids first, that is traverse the tree in depth-first order
|
|
ln := len(backlog) - 1
|
|
nextTreeID, backlog = backlog[ln], backlog[:ln]
|
|
|
|
if skip(nextTreeID.ID) {
|
|
rootCounter[nextTreeID.rootIdx]--
|
|
if p != nil && rootCounter[nextTreeID.rootIdx] == 0 {
|
|
p.Add(1)
|
|
}
|
|
continue
|
|
}
|
|
|
|
treeSize, found := repo.LookupBlobSize(restic.TreeBlob, nextTreeID.ID)
|
|
if found && treeSize > 50*1024*1024 {
|
|
loadCh = hugeTreeLoaderChan
|
|
} else {
|
|
loadCh = loaderChan
|
|
}
|
|
}
|
|
|
|
// loadCh is only nil at this point if the backlog is empty
|
|
if loadCh == nil && outstandingLoadTreeJobs == 0 {
|
|
debug.Log("backlog is empty, all channels nil, exiting")
|
|
return
|
|
}
|
|
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
|
|
case loadCh <- nextTreeID:
|
|
outstandingLoadTreeJobs++
|
|
loadCh = nil
|
|
|
|
case j, ok := <-inCh:
|
|
if !ok {
|
|
debug.Log("input channel closed")
|
|
inCh = nil
|
|
continue
|
|
}
|
|
|
|
outstandingLoadTreeJobs--
|
|
rootCounter[j.rootIdx]--
|
|
|
|
debug.Log("input job tree %v", j.ID)
|
|
// iterate backwards over subtree to compensate backwards traversal order of nextTreeID selection
|
|
for i := len(j.Subtrees) - 1; i >= 0; i-- {
|
|
id := j.Subtrees[i]
|
|
if id.IsNull() {
|
|
// We do not need to raise this error here, it is
|
|
// checked when the tree is checked. Just make sure
|
|
// that we do not add any null IDs to the backlog.
|
|
debug.Log("tree %v has nil subtree", j.ID)
|
|
continue
|
|
}
|
|
backlog = append(backlog, trackedID{ID: id, rootIdx: j.rootIdx})
|
|
rootCounter[j.rootIdx]++
|
|
}
|
|
// the progress check must happen after j.Subtrees was added to the backlog
|
|
if p != nil && rootCounter[j.rootIdx] == 0 {
|
|
p.Add(1)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// StreamTrees iteratively loads the given trees and their subtrees. The skip method
|
|
// is guaranteed to always be called from the same goroutine. The process function is
|
|
// directly called from the worker goroutines. It MUST read `nodes` until it returns an
|
|
// error or completes. If the process function returns an error, then StreamTrees will
|
|
// abort and return the error.
|
|
func StreamTrees(
|
|
ctx context.Context,
|
|
repo restic.Loader,
|
|
trees restic.IDs,
|
|
p *progress.Counter,
|
|
skip func(tree restic.ID) bool,
|
|
process func(id restic.ID, error error, tree *Tree) error,
|
|
) error {
|
|
loaderChan := make(chan trackedID)
|
|
hugeTreeChan := make(chan trackedID, 10)
|
|
loadedTreeChan := make(chan trackedTreeItem)
|
|
|
|
var loadTreeWg sync.WaitGroup
|
|
|
|
wg, ctx := errgroup.WithContext(ctx)
|
|
// decoding a tree can take quite some time such that this can be both CPU- or IO-bound
|
|
// one extra worker to handle huge tree blobs
|
|
workerCount := int(repo.Connections()) + runtime.GOMAXPROCS(0) + 1
|
|
for i := 0; i < workerCount; i++ {
|
|
workerLoaderChan := loaderChan
|
|
if i == 0 {
|
|
workerLoaderChan = hugeTreeChan
|
|
}
|
|
loadTreeWg.Add(1)
|
|
wg.Go(func() error {
|
|
defer loadTreeWg.Done()
|
|
return loadTreeWorker(ctx, repo, workerLoaderChan, process, loadedTreeChan)
|
|
})
|
|
}
|
|
|
|
// close once all loadTreeWorkers have completed
|
|
wg.Go(func() error {
|
|
loadTreeWg.Wait()
|
|
close(loadedTreeChan)
|
|
return nil
|
|
})
|
|
|
|
wg.Go(func() error {
|
|
defer close(loaderChan)
|
|
defer close(hugeTreeChan)
|
|
filterTrees(ctx, repo, trees, loaderChan, hugeTreeChan, loadedTreeChan, skip, p)
|
|
return nil
|
|
})
|
|
return wg.Wait()
|
|
}
|