restic/internal/walker/rewriter.go
Winfried Plappert 5148608c39 restic rewrite include - based on restic 0.18.1
cmd/restic/cmd_rewrite.go:
introduction of include filters for this command:
- add include filters, add error checking code
- add new parameter 'keepEmptyDirectoryFunc' to 'walker.NewSnapshotSizeRewriter()',
  so empty directories have to be kept to keep the directory structure intact
- add parameter 'keepEmptySnapshot' to 'filterAndReplaceSnapshot()' to keep snapshots
  intact when nothing is to be included
- introduce helper function 'gatherIncludeFilters()' and 'gatherExcludeFilters()' to
  keep code flow clean

cmd/restic/cmd_rewrite_integration_test.go:
add several new tests around the 'include' functionality

internal/filter/include.go:
this is where is include filter is defined

internal/walker/rewriter.go:
- struct RewriteOpts gains field 'KeepEmtpyDirectory', which is a 'NodeKeepEmptyDirectoryFunc()'
  which defaults to nil, so that al subdirectories are kept
- function 'NewSnapshotSizeRewriter()' gains the parameter 'keepEmptyDirecoryFilter' which
  controls the management of empty subdirectories in case of include filters active

internal/data/tree.go:
gains a function Count() for checking the number if node elements in a newly built tree

internal/walker/rewriter_test.go:
function 'NewSnapshotSizeRewriter()' gets an additional parameter nil to keeps things happy

cmd/restic/cmd_repair_snapshots.go:
function 'filterAndReplaceSnapshot()' gets an additional parameter 'keepEmptySnapshot=nil'

doc/045_working_with_repos.rst:
gets to mention include filters

changelog/unreleased/issue-4278:
the usual announcement file

git rebase master -i produced this

restic rewrite include - keep linter happy

cmd/restic/cmd_rewrite_integration_test.go:
linter likes strings.Contain() better than my strings.Index() >= 0
2026-01-31 19:42:56 +00:00

195 lines
4.9 KiB
Go

package walker
import (
"context"
"fmt"
"path"
"github.com/restic/restic/internal/data"
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/restic"
)
type NodeRewriteFunc func(node *data.Node, path string) *data.Node
type FailedTreeRewriteFunc func(nodeID restic.ID, path string, err error) (data.TreeNodeIterator, error)
type QueryRewrittenSizeFunc func() SnapshotSize
type NodeKeepEmptyDirectoryFunc func(path string) bool
type SnapshotSize struct {
FileCount uint
FileSize uint64
}
type RewriteOpts struct {
// return nil to remove the node
RewriteNode NodeRewriteFunc
KeepEmtpyDirectory NodeKeepEmptyDirectoryFunc
// decide what to do with a tree that could not be loaded. Return nil to remove the node. By default the load error is returned which causes the operation to fail.
RewriteFailedTree FailedTreeRewriteFunc
AllowUnstableSerialization bool
DisableNodeCache bool
}
type idMap map[restic.ID]restic.ID
type TreeRewriter struct {
opts RewriteOpts
replaces idMap
}
func NewTreeRewriter(opts RewriteOpts) *TreeRewriter {
rw := &TreeRewriter{
opts: opts,
}
if !opts.DisableNodeCache {
rw.replaces = make(idMap)
}
// setup default implementations
if rw.opts.RewriteNode == nil {
rw.opts.RewriteNode = func(node *data.Node, _ string) *data.Node {
return node
}
}
if rw.opts.RewriteFailedTree == nil {
// fail with error by default
rw.opts.RewriteFailedTree = func(_ restic.ID, _ string, err error) (data.TreeNodeIterator, error) {
return nil, err
}
}
if rw.opts.KeepEmtpyDirectory == nil {
rw.opts.KeepEmtpyDirectory = func(_ string) bool {
return true
}
}
return rw
}
func NewSnapshotSizeRewriter(rewriteNode NodeRewriteFunc, keepEmptyDirecoryFilter NodeKeepEmptyDirectoryFunc) (*TreeRewriter, QueryRewrittenSizeFunc) {
var count uint
var size uint64
t := NewTreeRewriter(RewriteOpts{
RewriteNode: func(node *data.Node, path string) *data.Node {
node = rewriteNode(node, path)
if node != nil && node.Type == data.NodeTypeFile {
count++
size += node.Size
}
return node
},
DisableNodeCache: true,
KeepEmtpyDirectory: keepEmptyDirecoryFilter,
})
ss := func() SnapshotSize {
return SnapshotSize{count, size}
}
return t, ss
}
func (t *TreeRewriter) RewriteTree(ctx context.Context, loader restic.BlobLoader, saver restic.BlobSaver, nodepath string, nodeID restic.ID) (newNodeID restic.ID, err error) {
// check if tree was already changed
newID, ok := t.replaces[nodeID]
if ok {
return newID, nil
}
// a nil nodeID will lead to a load error
curTree, err := data.LoadTree(ctx, loader, nodeID)
if err != nil {
replacement, err := t.opts.RewriteFailedTree(nodeID, nodepath, err)
if err != nil {
return restic.ID{}, err
}
if replacement != nil {
replacementID, err := data.SaveTree(ctx, saver, replacement)
if err != nil {
return restic.ID{}, err
}
return replacementID, nil
}
return restic.ID{}, nil
}
if !t.opts.AllowUnstableSerialization {
// check that we can properly encode this tree without losing information
// The alternative of using json/Decoder.DisallowUnknownFields() doesn't work as we use
// a custom UnmarshalJSON to decode trees, see also https://github.com/golang/go/issues/41144
testID, err := data.SaveTree(ctx, saver, curTree)
if err != nil {
return restic.ID{}, err
}
if nodeID != testID {
return restic.ID{}, fmt.Errorf("cannot encode tree at %q without losing information", nodepath)
}
// reload the tree to get a new iterator
curTree, err = data.LoadTree(ctx, loader, nodeID)
if err != nil {
// shouldn't fail as the first load was successful
return restic.ID{}, fmt.Errorf("failed to reload tree %v: %w", nodeID, err)
}
}
debug.Log("filterTree: %s, nodeId: %s\n", nodepath, nodeID.Str())
tb := data.NewTreeWriter(saver)
for item := range curTree {
if ctx.Err() != nil {
return restic.ID{}, ctx.Err()
}
if item.Error != nil {
return restic.ID{}, err
}
node := item.Node
path := path.Join(nodepath, node.Name)
node = t.opts.RewriteNode(node, path)
if node == nil {
continue
}
if node.Type != data.NodeTypeDir {
err = tb.AddNode(node)
if err != nil {
return restic.ID{}, err
}
continue
}
// treat nil as null id
var subtree restic.ID
if node.Subtree != nil {
subtree = *node.Subtree
}
newID, err := t.RewriteTree(ctx, loader, saver, path, subtree)
if err != nil {
return restic.ID{}, err
} else if err == nil && newID.IsNull() {
continue
}
node.Subtree = &newID
err = tb.AddNode(node)
if err != nil {
return restic.ID{}, err
}
}
newTreeID, err := tb.Finalize(ctx)
if err != nil {
return restic.ID{}, err
}
if tb.Count() == 0 && !t.opts.KeepEmtpyDirectory(nodepath) {
return restic.ID{}, nil
}
if t.replaces != nil {
t.replaces[nodeID] = newTreeID
}
if !newTreeID.Equal(nodeID) {
debug.Log("filterTree: save new tree for %s as %v\n", nodepath, newTreeID)
}
return newTreeID, err
}