restic/internal/data/tree.go
Michael Eischer 350f29d921 data: replace Tree with TreeNodeIterator
The TreeNodeIterator decodes nodes while iterating over a tree blob.
This should reduce peak memory usage as now only the serialized tree
blob and a single node have to alive at the same time. Using the
iterator has implications for the error handling however. Now it is
necessary that all loops that iterate through a tree check for errors
before using the node returned by the iterator.

The other change is that it is no longer possible to iterate over a tree
multiple times. Instead it must be loaded a second time. This only
affects the tree rewriting code.
2026-01-31 20:03:38 +01:00

275 lines
5.9 KiB
Go

package data
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"iter"
"path"
"strings"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/restic"
)
// For documentation purposes only:
// // Tree is an ordered list of nodes.
// type Tree struct {
// Nodes []*Node `json:"nodes"`
// }
var ErrTreeNotOrdered = errors.New("nodes are not ordered or duplicate")
type treeIterator struct {
dec json.Decoder
started bool
}
type NodeOrError struct {
Node *Node
Error error
}
type TreeNodeIterator = iter.Seq[NodeOrError]
func NewTreeNodeIterator(rd io.Reader) (TreeNodeIterator, error) {
t := &treeIterator{
dec: *json.NewDecoder(rd),
}
err := t.init()
if err != nil {
return nil, err
}
return func(yield func(NodeOrError) bool) {
if t.started {
panic("tree iterator is single use only")
}
t.started = true
for {
n, err := t.next()
if err != nil && errors.Is(err, io.EOF) {
return
}
if !yield(NodeOrError{Node: n, Error: err}) {
return
}
// errors are final
if err != nil {
return
}
}
}, nil
}
func (t *treeIterator) init() error {
// `{"nodes":[` `]}`
if err := t.assertToken(json.Delim('{')); err != nil {
return err
}
if err := t.assertToken("nodes"); err != nil {
return err
}
if err := t.assertToken(json.Delim('[')); err != nil {
return err
}
return nil
}
func (t *treeIterator) next() (*Node, error) {
if t.dec.More() {
var n Node
err := t.dec.Decode(&n)
if err != nil {
return nil, err
}
return &n, nil
}
if err := t.assertToken(json.Delim(']')); err != nil {
return nil, err
}
if err := t.assertToken(json.Delim('}')); err != nil {
return nil, err
}
return nil, io.EOF
}
func (t *treeIterator) assertToken(token json.Token) error {
to, err := t.dec.Token()
if err != nil {
return err
}
if to != token {
return errors.Errorf("error decoding tree: expected %v, got %v", token, to)
}
return nil
}
func LoadTree(ctx context.Context, loader restic.BlobLoader, content restic.ID) (TreeNodeIterator, error) {
rd, err := loader.LoadBlob(ctx, restic.TreeBlob, content, nil)
if err != nil {
return nil, err
}
return NewTreeNodeIterator(bytes.NewReader(rd))
}
type TreeFinder struct {
next func() (NodeOrError, bool)
stop func()
current *Node
}
func NewTreeFinder(tree TreeNodeIterator) *TreeFinder {
if tree == nil {
return &TreeFinder{stop: func() {}}
}
next, stop := iter.Pull(tree)
return &TreeFinder{next: next, stop: stop}
}
// Find finds the node with the given name. If the node is not found, it returns nil.
// If Find was called before, the new name must be strictly greater than the last name.
func (t *TreeFinder) Find(name string) (*Node, error) {
if t.next == nil {
return nil, nil
}
// loop until `t.current.Name` is >= name
for t.current == nil || t.current.Name < name {
current, ok := t.next()
if current.Error != nil {
return nil, current.Error
}
if !ok {
return nil, nil
}
t.current = current.Node
}
if t.current.Name == name {
// forget the current node to free memory as early as possible
current := t.current
t.current = nil
return current, nil
}
// we have already passed the name
return nil, nil
}
func (t *TreeFinder) Close() {
t.stop()
}
type TreeWriter struct {
builder *TreeJSONBuilder
saver restic.BlobSaver
}
func NewTreeWriter(saver restic.BlobSaver) *TreeWriter {
builder := NewTreeJSONBuilder()
return &TreeWriter{builder: builder, saver: saver}
}
func (t *TreeWriter) AddNode(node *Node) error {
return t.builder.AddNode(node)
}
func (t *TreeWriter) Finalize(ctx context.Context) (restic.ID, error) {
buf, err := t.builder.Finalize()
if err != nil {
return restic.ID{}, err
}
id, _, _, err := t.saver.SaveBlob(ctx, restic.TreeBlob, buf, restic.ID{}, false)
return id, err
}
func SaveTree(ctx context.Context, saver restic.BlobSaver, nodes TreeNodeIterator) (restic.ID, error) {
treeWriter := NewTreeWriter(saver)
for item := range nodes {
if item.Error != nil {
return restic.ID{}, item.Error
}
err := treeWriter.AddNode(item.Node)
if err != nil {
return restic.ID{}, err
}
}
return treeWriter.Finalize(ctx)
}
type TreeJSONBuilder struct {
buf bytes.Buffer
lastName string
}
func NewTreeJSONBuilder() *TreeJSONBuilder {
tb := &TreeJSONBuilder{}
_, _ = tb.buf.WriteString(`{"nodes":[`)
return tb
}
func (builder *TreeJSONBuilder) AddNode(node *Node) error {
if node.Name <= builder.lastName {
return fmt.Errorf("node %q, last %q: %w", node.Name, builder.lastName, ErrTreeNotOrdered)
}
if builder.lastName != "" {
_ = builder.buf.WriteByte(',')
}
builder.lastName = node.Name
val, err := json.Marshal(node)
if err != nil {
return err
}
_, _ = builder.buf.Write(val)
return nil
}
func (builder *TreeJSONBuilder) Finalize() ([]byte, error) {
// append a newline so that the data is always consistent (json.Encoder
// adds a newline after each object)
_, _ = builder.buf.WriteString("]}\n")
buf := builder.buf.Bytes()
// drop reference to buffer
builder.buf = bytes.Buffer{}
return buf, nil
}
func FindTreeDirectory(ctx context.Context, repo restic.BlobLoader, id *restic.ID, dir string) (*restic.ID, error) {
if id == nil {
return nil, errors.New("tree id is null")
}
dirs := strings.Split(path.Clean(dir), "/")
subfolder := ""
for _, name := range dirs {
if name == "" || name == "." {
continue
}
subfolder = path.Join(subfolder, name)
tree, err := LoadTree(ctx, repo, *id)
if err != nil {
return nil, fmt.Errorf("path %s: %w", subfolder, err)
}
finder := NewTreeFinder(tree)
node, err := finder.Find(name)
finder.Close()
if err != nil {
return nil, fmt.Errorf("path %s: %w", subfolder, err)
}
if node == nil {
return nil, fmt.Errorf("path %s: not found", subfolder)
}
if node.Type != NodeTypeDir || node.Subtree == nil {
return nil, fmt.Errorf("path %s: not a directory", subfolder)
}
id = node.Subtree
}
return id, nil
}