mirror of
https://github.com/restic/restic.git
synced 2026-02-03 04:20:45 -05:00
The TreeNodeIterator decodes nodes while iterating over a tree blob. This should reduce peak memory usage as now only the serialized tree blob and a single node have to alive at the same time. Using the iterator has implications for the error handling however. Now it is necessary that all loops that iterate through a tree check for errors before using the node returned by the iterator. The other change is that it is no longer possible to iterate over a tree multiple times. Instead it must be loaded a second time. This only affects the tree rewriting code.
275 lines
5.9 KiB
Go
275 lines
5.9 KiB
Go
package data
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
|
|
"io"
|
|
"iter"
|
|
"path"
|
|
"strings"
|
|
|
|
"github.com/restic/restic/internal/errors"
|
|
"github.com/restic/restic/internal/restic"
|
|
)
|
|
|
|
// For documentation purposes only:
|
|
// // Tree is an ordered list of nodes.
|
|
// type Tree struct {
|
|
// Nodes []*Node `json:"nodes"`
|
|
// }
|
|
|
|
var ErrTreeNotOrdered = errors.New("nodes are not ordered or duplicate")
|
|
|
|
type treeIterator struct {
|
|
dec json.Decoder
|
|
started bool
|
|
}
|
|
|
|
type NodeOrError struct {
|
|
Node *Node
|
|
Error error
|
|
}
|
|
|
|
type TreeNodeIterator = iter.Seq[NodeOrError]
|
|
|
|
func NewTreeNodeIterator(rd io.Reader) (TreeNodeIterator, error) {
|
|
t := &treeIterator{
|
|
dec: *json.NewDecoder(rd),
|
|
}
|
|
|
|
err := t.init()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return func(yield func(NodeOrError) bool) {
|
|
if t.started {
|
|
panic("tree iterator is single use only")
|
|
}
|
|
t.started = true
|
|
for {
|
|
n, err := t.next()
|
|
if err != nil && errors.Is(err, io.EOF) {
|
|
return
|
|
}
|
|
if !yield(NodeOrError{Node: n, Error: err}) {
|
|
return
|
|
}
|
|
// errors are final
|
|
if err != nil {
|
|
return
|
|
}
|
|
}
|
|
}, nil
|
|
}
|
|
|
|
func (t *treeIterator) init() error {
|
|
// `{"nodes":[` `]}`
|
|
|
|
if err := t.assertToken(json.Delim('{')); err != nil {
|
|
return err
|
|
}
|
|
if err := t.assertToken("nodes"); err != nil {
|
|
return err
|
|
}
|
|
if err := t.assertToken(json.Delim('[')); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (t *treeIterator) next() (*Node, error) {
|
|
if t.dec.More() {
|
|
var n Node
|
|
err := t.dec.Decode(&n)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &n, nil
|
|
}
|
|
|
|
if err := t.assertToken(json.Delim(']')); err != nil {
|
|
return nil, err
|
|
}
|
|
if err := t.assertToken(json.Delim('}')); err != nil {
|
|
return nil, err
|
|
}
|
|
return nil, io.EOF
|
|
}
|
|
|
|
func (t *treeIterator) assertToken(token json.Token) error {
|
|
to, err := t.dec.Token()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if to != token {
|
|
return errors.Errorf("error decoding tree: expected %v, got %v", token, to)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func LoadTree(ctx context.Context, loader restic.BlobLoader, content restic.ID) (TreeNodeIterator, error) {
|
|
rd, err := loader.LoadBlob(ctx, restic.TreeBlob, content, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return NewTreeNodeIterator(bytes.NewReader(rd))
|
|
}
|
|
|
|
type TreeFinder struct {
|
|
next func() (NodeOrError, bool)
|
|
stop func()
|
|
current *Node
|
|
}
|
|
|
|
func NewTreeFinder(tree TreeNodeIterator) *TreeFinder {
|
|
if tree == nil {
|
|
return &TreeFinder{stop: func() {}}
|
|
}
|
|
next, stop := iter.Pull(tree)
|
|
return &TreeFinder{next: next, stop: stop}
|
|
}
|
|
|
|
// Find finds the node with the given name. If the node is not found, it returns nil.
|
|
// If Find was called before, the new name must be strictly greater than the last name.
|
|
func (t *TreeFinder) Find(name string) (*Node, error) {
|
|
if t.next == nil {
|
|
return nil, nil
|
|
}
|
|
// loop until `t.current.Name` is >= name
|
|
for t.current == nil || t.current.Name < name {
|
|
current, ok := t.next()
|
|
if current.Error != nil {
|
|
return nil, current.Error
|
|
}
|
|
if !ok {
|
|
return nil, nil
|
|
}
|
|
t.current = current.Node
|
|
}
|
|
|
|
if t.current.Name == name {
|
|
// forget the current node to free memory as early as possible
|
|
current := t.current
|
|
t.current = nil
|
|
return current, nil
|
|
}
|
|
// we have already passed the name
|
|
return nil, nil
|
|
}
|
|
|
|
func (t *TreeFinder) Close() {
|
|
t.stop()
|
|
}
|
|
|
|
type TreeWriter struct {
|
|
builder *TreeJSONBuilder
|
|
saver restic.BlobSaver
|
|
}
|
|
|
|
func NewTreeWriter(saver restic.BlobSaver) *TreeWriter {
|
|
builder := NewTreeJSONBuilder()
|
|
return &TreeWriter{builder: builder, saver: saver}
|
|
}
|
|
|
|
func (t *TreeWriter) AddNode(node *Node) error {
|
|
return t.builder.AddNode(node)
|
|
}
|
|
|
|
func (t *TreeWriter) Finalize(ctx context.Context) (restic.ID, error) {
|
|
buf, err := t.builder.Finalize()
|
|
if err != nil {
|
|
return restic.ID{}, err
|
|
}
|
|
id, _, _, err := t.saver.SaveBlob(ctx, restic.TreeBlob, buf, restic.ID{}, false)
|
|
return id, err
|
|
}
|
|
|
|
func SaveTree(ctx context.Context, saver restic.BlobSaver, nodes TreeNodeIterator) (restic.ID, error) {
|
|
treeWriter := NewTreeWriter(saver)
|
|
for item := range nodes {
|
|
if item.Error != nil {
|
|
return restic.ID{}, item.Error
|
|
}
|
|
err := treeWriter.AddNode(item.Node)
|
|
if err != nil {
|
|
return restic.ID{}, err
|
|
}
|
|
}
|
|
return treeWriter.Finalize(ctx)
|
|
}
|
|
|
|
type TreeJSONBuilder struct {
|
|
buf bytes.Buffer
|
|
lastName string
|
|
}
|
|
|
|
func NewTreeJSONBuilder() *TreeJSONBuilder {
|
|
tb := &TreeJSONBuilder{}
|
|
_, _ = tb.buf.WriteString(`{"nodes":[`)
|
|
return tb
|
|
}
|
|
|
|
func (builder *TreeJSONBuilder) AddNode(node *Node) error {
|
|
if node.Name <= builder.lastName {
|
|
return fmt.Errorf("node %q, last %q: %w", node.Name, builder.lastName, ErrTreeNotOrdered)
|
|
}
|
|
if builder.lastName != "" {
|
|
_ = builder.buf.WriteByte(',')
|
|
}
|
|
builder.lastName = node.Name
|
|
|
|
val, err := json.Marshal(node)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
_, _ = builder.buf.Write(val)
|
|
return nil
|
|
}
|
|
|
|
func (builder *TreeJSONBuilder) Finalize() ([]byte, error) {
|
|
// append a newline so that the data is always consistent (json.Encoder
|
|
// adds a newline after each object)
|
|
_, _ = builder.buf.WriteString("]}\n")
|
|
buf := builder.buf.Bytes()
|
|
// drop reference to buffer
|
|
builder.buf = bytes.Buffer{}
|
|
return buf, nil
|
|
}
|
|
|
|
func FindTreeDirectory(ctx context.Context, repo restic.BlobLoader, id *restic.ID, dir string) (*restic.ID, error) {
|
|
if id == nil {
|
|
return nil, errors.New("tree id is null")
|
|
}
|
|
|
|
dirs := strings.Split(path.Clean(dir), "/")
|
|
subfolder := ""
|
|
|
|
for _, name := range dirs {
|
|
if name == "" || name == "." {
|
|
continue
|
|
}
|
|
subfolder = path.Join(subfolder, name)
|
|
tree, err := LoadTree(ctx, repo, *id)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("path %s: %w", subfolder, err)
|
|
}
|
|
finder := NewTreeFinder(tree)
|
|
node, err := finder.Find(name)
|
|
finder.Close()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("path %s: %w", subfolder, err)
|
|
}
|
|
if node == nil {
|
|
return nil, fmt.Errorf("path %s: not found", subfolder)
|
|
}
|
|
if node.Type != NodeTypeDir || node.Subtree == nil {
|
|
return nil, fmt.Errorf("path %s: not a directory", subfolder)
|
|
}
|
|
id = node.Subtree
|
|
}
|
|
return id, nil
|
|
}
|