restic rewrite include - based on restic 0.18.1

cmd/restic/cmd_rewrite.go:
introduction of include filters for this command:
- add include filters, add error checking code
- add new parameter 'keepEmptyDirectoryFunc' to 'walker.NewSnapshotSizeRewriter()',
  so empty directories have to be kept to keep the directory structure intact
- add parameter 'keepEmptySnapshot' to 'filterAndReplaceSnapshot()' to keep snapshots
  intact when nothing is to be included
- introduce helper function 'gatherIncludeFilters()' and 'gatherExcludeFilters()' to
  keep code flow clean

cmd/restic/cmd_rewrite_integration_test.go:
add several new tests around the 'include' functionality

internal/filter/include.go:
this is where is include filter is defined

internal/walker/rewriter.go:
- struct RewriteOpts gains field 'KeepEmtpyDirectory', which is a 'NodeKeepEmptyDirectoryFunc()'
  which defaults to nil, so that al subdirectories are kept
- function 'NewSnapshotSizeRewriter()' gains the parameter 'keepEmptyDirecoryFilter' which
  controls the management of empty subdirectories in case of include filters active

internal/data/tree.go:
gains a function Count() for checking the number if node elements in a newly built tree

internal/walker/rewriter_test.go:
function 'NewSnapshotSizeRewriter()' gets an additional parameter nil to keeps things happy

cmd/restic/cmd_repair_snapshots.go:
function 'filterAndReplaceSnapshot()' gets an additional parameter 'keepEmptySnapshot=nil'

doc/045_working_with_repos.rst:
gets to mention include filters

changelog/unreleased/issue-4278:
the usual announcement file

git rebase master -i produced this

restic rewrite include - keep linter happy

cmd/restic/cmd_rewrite_integration_test.go:
linter likes strings.Contain() better than my strings.Index() >= 0
This commit is contained in:
Winfried Plappert 2025-11-21 14:17:41 +00:00
parent 083cdf0675
commit 5148608c39
9 changed files with 343 additions and 30 deletions

View file

@ -0,0 +1,12 @@
Enhancement: Support include filters in `rewrite` command
The enhancement enables the standard include filter options
--iinclude pattern same as --include pattern but ignores the casing of filenames
--iinclude-file file same as --include-file but ignores casing of filenames in patterns
-i, --include pattern include a pattern (can be specified multiple times)
--include-file file read include patterns from a file (can be specified multiple times)
The exclusion or inclusion of filter parameters is exclusive, as in other commands.
https://github.com/restic/restic/issues/4278
https://github.com/restic/restic/pull/5191

View file

@ -151,7 +151,7 @@ func runRepairSnapshots(ctx context.Context, gopts global.Options, opts RepairOp
func(ctx context.Context, sn *data.Snapshot, uploader restic.BlobSaver) (restic.ID, *data.SnapshotSummary, error) {
id, err := rewriter.RewriteTree(ctx, repo, uploader, "/", *sn.Tree)
return id, nil, err
}, opts.DryRun, opts.Forget, nil, "repaired", printer)
}, opts.DryRun, opts.Forget, nil, "repaired", printer, false)
if err != nil {
return errors.Fatalf("unable to rewrite snapshot ID %q: %v", sn.ID().Str(), err)
}

View file

@ -30,6 +30,9 @@ The "rewrite" command excludes files from existing snapshots. It creates new
snapshots containing the same data as the original ones, but without the files
you specify to exclude. All metadata (time, host, tags) will be preserved.
Alternatively you can use one of the --include variants to only include files
in the new snapshot which you want to preserve.
The snapshots to rewrite are specified using the --host, --tag and --path options,
or by providing a list of snapshot IDs. Please note that specifying neither any of
these options nor a snapshot ID will cause the command to rewrite all snapshots.
@ -46,8 +49,8 @@ When rewrite is used with the --snapshot-summary option, a new snapshot is
created containing statistics summary data. Only two fields in the summary will
be non-zero: TotalFilesProcessed and TotalBytesProcessed.
When rewrite is called with one of the --exclude options, TotalFilesProcessed
and TotalBytesProcessed will be updated in the snapshot summary.
When rewrite is called with one of the --exclude or --include options,
TotalFilesProcessed and TotalBytesProcessed will be updated in the snapshot summary.
EXIT STATUS
===========
@ -109,6 +112,7 @@ type RewriteOptions struct {
Metadata snapshotMetadataArgs
data.SnapshotFilter
filter.ExcludePatternOptions
filter.IncludePatternOptions
}
func (opts *RewriteOptions) AddFlags(f *pflag.FlagSet) {
@ -120,6 +124,7 @@ func (opts *RewriteOptions) AddFlags(f *pflag.FlagSet) {
initMultiSnapshotFilter(f, &opts.SnapshotFilter, true)
opts.ExcludePatternOptions.Add(f)
opts.IncludePatternOptions.Add(f)
}
// rewriteFilterFunc returns the filtered tree ID or an error. If a snapshot summary is returned, the snapshot will
@ -136,33 +141,32 @@ func rewriteSnapshot(ctx context.Context, repo *repository.Repository, sn *data.
return false, err
}
includeByNameFuncs, err := opts.IncludePatternOptions.CollectPatterns(printer.E)
if err != nil {
return false, err
}
metadata, err := opts.Metadata.convert()
if err != nil {
return false, err
}
condInclude := len(includeByNameFuncs) > 0
condExclude := len(rejectByNameFuncs) > 0 || opts.SnapshotSummary
var filter rewriteFilterFunc
var rewriteNode walker.NodeRewriteFunc
var keepEmptyDirectoryFunc walker.NodeKeepEmptyDirectoryFunc
if len(rejectByNameFuncs) > 0 || opts.SnapshotSummary {
selectByName := func(nodepath string) bool {
for _, reject := range rejectByNameFuncs {
if reject(nodepath) {
return false
}
}
return true
if condInclude || condExclude {
if condInclude {
rewriteNode, keepEmptyDirectoryFunc = gatherIncludeFilters(includeByNameFuncs, printer)
} else {
rewriteNode = gatherExcludeFilters(rejectByNameFuncs, printer)
keepEmptyDirectoryFunc = nil
}
rewriteNode := func(node *data.Node, path string) *data.Node {
if selectByName(path) {
return node
}
printer.P("excluding %s", path)
return nil
}
rewriter, querySize := walker.NewSnapshotSizeRewriter(rewriteNode)
rewriter, querySize := walker.NewSnapshotSizeRewriter(rewriteNode, keepEmptyDirectoryFunc)
filter = func(ctx context.Context, sn *data.Snapshot, uploader restic.BlobSaver) (restic.ID, *data.SnapshotSummary, error) {
id, err := rewriter.RewriteTree(ctx, repo, uploader, "/", *sn.Tree)
@ -186,11 +190,12 @@ func rewriteSnapshot(ctx context.Context, repo *repository.Repository, sn *data.
}
return filterAndReplaceSnapshot(ctx, repo, sn,
filter, opts.DryRun, opts.Forget, metadata, "rewrite", printer)
filter, opts.DryRun, opts.Forget, metadata, "rewrite", printer, len(includeByNameFuncs) > 0)
}
func filterAndReplaceSnapshot(ctx context.Context, repo restic.Repository, sn *data.Snapshot,
filter rewriteFilterFunc, dryRun bool, forget bool, newMetadata *snapshotMetadata, addTag string, printer progress.Printer) (bool, error) {
filter rewriteFilterFunc, dryRun bool, forget bool, newMetadata *snapshotMetadata, addTag string, printer progress.Printer,
keepEmptySnapshot bool) (bool, error) {
var filteredTree restic.ID
var summary *data.SnapshotSummary
@ -204,6 +209,10 @@ func filterAndReplaceSnapshot(ctx context.Context, repo restic.Repository, sn *d
}
if filteredTree.IsNull() {
if keepEmptySnapshot {
debug.Log("Snapshot %v not modified", sn)
return false, nil
}
if dryRun {
printer.P("would delete empty snapshot")
} else {
@ -284,8 +293,12 @@ func filterAndReplaceSnapshot(ctx context.Context, repo restic.Repository, sn *d
}
func runRewrite(ctx context.Context, opts RewriteOptions, gopts global.Options, args []string, term ui.Terminal) error {
if !opts.SnapshotSummary && opts.ExcludePatternOptions.Empty() && opts.Metadata.empty() {
return errors.Fatal("Nothing to do: no excludes provided and no new metadata provided")
hasExcludes := !opts.ExcludePatternOptions.Empty()
hasIncludes := !opts.IncludePatternOptions.Empty()
if !opts.SnapshotSummary && !hasExcludes && !hasIncludes && opts.Metadata.empty() {
return errors.Fatal("Nothing to do: no excludes/includes provided and no new metadata provided")
} else if hasExcludes && hasIncludes {
return errors.Fatal("exclude and include patterns are mutually exclusive")
}
printer := ui.NewProgressPrinter(false, gopts.Verbosity, term)
@ -348,3 +361,71 @@ func runRewrite(ctx context.Context, opts RewriteOptions, gopts global.Options,
return nil
}
func gatherIncludeFilters(includeByNameFuncs []filter.IncludeByNameFunc, printer progress.Printer) (rewriteNode walker.NodeRewriteFunc, keepEmptyDirectory walker.NodeKeepEmptyDirectoryFunc) {
inSelectByName := func(nodepath string, node *data.Node) bool {
for _, include := range includeByNameFuncs {
if node.Type == data.NodeTypeDir {
// always include directories
return true
}
matched, childMayMatch := include(nodepath)
if matched && childMayMatch {
return matched && childMayMatch
}
}
return false
}
rewriteNode = func(node *data.Node, path string) *data.Node {
if inSelectByName(path, node) {
if node.Type != data.NodeTypeDir {
printer.VV("including %q\n", path)
}
return node
}
return nil
}
inSelectByNameDir := func(nodepath string) bool {
for _, include := range includeByNameFuncs {
matched, _ := include(nodepath)
if matched {
return matched
}
}
return false
}
keepEmptyDirectory = func(path string) bool {
keep := inSelectByNameDir(path)
if keep {
printer.VV("including directoty %q\n", path)
}
return keep
}
return rewriteNode, keepEmptyDirectory
}
func gatherExcludeFilters(excludeByNameFuncs []filter.RejectByNameFunc, printer progress.Printer) (rewriteNode walker.NodeRewriteFunc) {
exSelectByName := func(nodepath string) bool {
for _, reject := range excludeByNameFuncs {
if reject(nodepath) {
return false
}
}
return true
}
rewriteNode = func(node *data.Node, path string) *data.Node {
if exSelectByName(path) {
return node
}
printer.VV("excluding %q\n", path)
return nil
}
return rewriteNode
}

View file

@ -2,7 +2,9 @@ package main
import (
"context"
"os"
"path/filepath"
"strings"
"testing"
"github.com/restic/restic/internal/data"
@ -27,6 +29,13 @@ func testRunRewriteExclude(t testing.TB, gopts global.Options, excludes []string
}))
}
func testRunRewriteWithOpts(t testing.TB, opts RewriteOptions, gopts global.Options, args []string) error {
rtest.OK(t, withTermStatus(t, gopts, func(ctx context.Context, gopts global.Options) error {
return runRewrite(context.TODO(), opts, gopts, args, gopts.Term)
}))
return nil
}
func createBasicRewriteRepo(t testing.TB, env *testEnvironment) restic.ID {
testSetupBackupData(t, env)
@ -39,6 +48,20 @@ func createBasicRewriteRepo(t testing.TB, env *testEnvironment) restic.ID {
return snapshotIDs[0]
}
func createBasicRewriteRepoWithEmptyDirectory(t testing.TB, env *testEnvironment) restic.ID {
testSetupBackupData(t, env)
// make an empty directory named "empty-directory"
rtest.OK(t, os.Mkdir(filepath.Join(env.testdata, "/0/tests", "empty-directory"), 0755))
// create backup
testRunBackup(t, filepath.Dir(env.testdata), []string{"testdata"}, BackupOptions{}, env.gopts)
snapshotIDs := testRunList(t, env.gopts, "snapshots")
rtest.Assert(t, len(snapshotIDs) == 1, "expected one snapshot, got %v", snapshotIDs)
return snapshotIDs[0]
}
func getSnapshot(t testing.TB, snapshotID restic.ID, env *testEnvironment) *data.Snapshot {
t.Helper()
@ -195,3 +218,169 @@ func TestRewriteSnaphotSummary(t *testing.T) {
rtest.Equals(t, oldSummary.TotalBytesProcessed, newSn.Summary.TotalBytesProcessed, "unexpected TotalBytesProcessed value")
rtest.Equals(t, oldSummary.TotalFilesProcessed, newSn.Summary.TotalFilesProcessed, "unexpected TotalFilesProcessed value")
}
func TestRewriteIncludeFiles(t *testing.T) {
env, cleanup := withTestEnvironment(t)
defer cleanup()
// opens repo, creates one backup of the whole lot of 'testdata'
createBasicRewriteRepo(t, env)
snapshots := testListSnapshots(t, env.gopts, 1)
// include txt files
err := testRunRewriteWithOpts(t,
RewriteOptions{
Forget: true,
IncludePatternOptions: filter.IncludePatternOptions{Includes: []string{"*.txt"}},
},
env.gopts,
[]string{"latest"})
rtest.OK(t, err)
newSnapshots := testListSnapshots(t, env.gopts, 1)
rtest.Assert(t, snapshots[0] != newSnapshots[0], "snapshot id should have changed")
// read restic ls output and count .txt files
count := 0
out := testRunLsWithOpts(t, env.gopts, LsOptions{}, []string{"latest"})
for _, line := range strings.Split(string(out), "\n") {
if strings.Contains(line, ".txt") {
count++
}
}
rtest.Assert(t, count == 2, "expected two txt files, but got %d files", count)
err = withTermStatus(t, env.gopts, func(ctx context.Context, gopts global.Options) error {
printer := ui.NewProgressPrinter(gopts.JSON, gopts.Verbosity, gopts.Term)
_, repo, unlock, err := openWithExclusiveLock(ctx, gopts, false, printer)
rtest.OK(t, err)
defer unlock()
sn, err := data.LoadSnapshot(context.TODO(), repo, newSnapshots[0])
rtest.OK(t, err)
rtest.Assert(t, sn.Summary != nil, "snapshot should have a summary attached")
rtest.Assert(t, sn.Summary.TotalFilesProcessed == 2,
"there should be 2 files in the snapshot, but there are %d files", sn.Summary.TotalFilesProcessed)
return nil
})
rtest.OK(t, err)
}
func TestRewriteExcludeFiles(t *testing.T) {
env, cleanup := withTestEnvironment(t)
defer cleanup()
createBasicRewriteRepo(t, env)
snapshots := testListSnapshots(t, env.gopts, 1)
// exclude txt files
err := testRunRewriteWithOpts(t,
RewriteOptions{
Forget: true,
ExcludePatternOptions: filter.ExcludePatternOptions{Excludes: []string{"*.txt"}},
},
env.gopts,
[]string{"latest"})
rtest.OK(t, err)
newSnapshots := testListSnapshots(t, env.gopts, 1)
rtest.Assert(t, snapshots[0] != newSnapshots[0], "snapshot id should have changed")
// read restic ls output and count .txt files
count := 0
out := testRunLsWithOpts(t, env.gopts, LsOptions{}, []string{"latest"})
for _, line := range strings.Split(string(out), "\n") {
if strings.Contains(line, ".txt") {
count++
}
}
rtest.Assert(t, count == 0, "expected 0 txt files, but got %d files", count)
}
func TestRewriteExcludeIncludeContradiction(t *testing.T) {
env, cleanup := withTestEnvironment(t)
defer cleanup()
createBasicRewriteRepo(t, env)
testListSnapshots(t, env.gopts, 1)
// test contradiction
err := withTermStatus(t, env.gopts, func(ctx context.Context, gopts global.Options) error {
return runRewrite(ctx,
RewriteOptions{
ExcludePatternOptions: filter.ExcludePatternOptions{Excludes: []string{"nonsense"}},
IncludePatternOptions: filter.IncludePatternOptions{Includes: []string{"not allowed"}},
},
gopts, []string{"quack"}, env.gopts.Term)
})
rtest.Assert(t, err != nil, `expected to fail command with message "exclude and include patterns are mutually exclusive"`)
}
func TestRewriteIncludeEmptyDirectory(t *testing.T) {
env, cleanup := withTestEnvironment(t)
defer cleanup()
snapIDEmpty := createBasicRewriteRepoWithEmptyDirectory(t, env)
// restic rewrite <snapshots[0]> -i empty-directory --forget
// exclude txt files
err := testRunRewriteWithOpts(t,
RewriteOptions{
Forget: true,
IncludePatternOptions: filter.IncludePatternOptions{Includes: []string{"empty-directory"}},
},
env.gopts,
[]string{"latest"})
rtest.OK(t, err)
newSnapshots := testListSnapshots(t, env.gopts, 1)
rtest.Assert(t, snapIDEmpty != newSnapshots[0], "snapshot id should have changed")
// read restic ls output and count lines with "empty-directory"
count := 0
out := testRunLsWithOpts(t, env.gopts, LsOptions{}, []string{"latest"})
for _, line := range strings.Split(string(out), "\n") {
if strings.Contains(line, "empty-directory") {
count++
}
}
rtest.Assert(t, count == 1, "expected 1 empty directory, but got %d entries", count)
}
// TestRewriteIncludeNothing makes sure when nothing is included, the original snapshot stays untouched
func TestRewriteIncludeNothing(t *testing.T) {
env, cleanup := withTestEnvironment(t)
defer cleanup()
createBasicRewriteRepo(t, env)
snapsBefore := testListSnapshots(t, env.gopts, 1)
// restic rewrite latest -i nothing-whatsoever --forget
err := testRunRewriteWithOpts(t,
RewriteOptions{
Forget: true,
ExcludePatternOptions: filter.ExcludePatternOptions{Excludes: []string{"nothing-whatsoever"}},
},
env.gopts,
[]string{"latest"})
rtest.OK(t, err)
snapsAfter := testListSnapshots(t, env.gopts, 1)
rtest.Assert(t, snapsBefore[0] == snapsAfter[0], "snapshots should be identical but are %s and %s",
snapsBefore[0].Str(), snapsAfter[0].Str())
}
// TestRewriteExcludeNothing makes sure when nothing is excluded, the original snapshot stays untouched
// and no new (unchanged) snapshot would be created
func TestRewriteExcludeNothing(t *testing.T) {
env, cleanup := withTestEnvironment(t)
defer cleanup()
createBasicRewriteRepo(t, env)
snapsBefore := testListSnapshots(t, env.gopts, 1)
// restic rewrite latest -e 'nothing-whatsoever' --forget
err := testRunRewriteWithOpts(t,
RewriteOptions{
Forget: true,
ExcludePatternOptions: filter.ExcludePatternOptions{Excludes: []string{"nothing-whatsoever"}},
},
env.gopts,
[]string{"latest"})
rtest.OK(t, err)
snapsAfter := testListSnapshots(t, env.gopts, 1)
rtest.Assert(t, snapsBefore[0] == snapsAfter[0], "snapshots should be identical but are %s and %s",
snapsBefore[0].String(), snapsAfter[0].String())
}

View file

@ -336,6 +336,13 @@ The options ``--exclude``, ``--exclude-file``, ``--iexclude`` and
``--iexclude-file`` are supported. They behave the same way as for the backup
command, see :ref:`backup-excluding-files` for details.
The options ``--include``, ``--include-file``, ``--iinclude`` and
``--iinclude-file`` are supported as well.
The ``--include`` variants allow you to reduce an existing snapshot or a set of snapshots
to those files that you are really interested in. An example could be all pictures
from a snapshot:
``restic rewrite -r ... --iinclude "*.jpg" --iinclude "*.jpeg" --iinclude "*.png"``.
It is possible to rewrite only a subset of snapshots by filtering them the same
way as for the ``copy`` command, see :ref:`copy-filtering-snapshots`.

View file

@ -238,8 +238,9 @@ func SaveTree(ctx context.Context, saver restic.BlobSaver, nodes TreeNodeIterato
}
type TreeJSONBuilder struct {
buf bytes.Buffer
lastName string
buf bytes.Buffer
lastName string
countNodes int
}
func NewTreeJSONBuilder() *TreeJSONBuilder {
@ -262,6 +263,7 @@ func (builder *TreeJSONBuilder) AddNode(node *Node) error {
return err
}
_, _ = builder.buf.Write(val)
builder.countNodes++
return nil
}
@ -275,6 +277,11 @@ func (builder *TreeJSONBuilder) Finalize() ([]byte, error) {
return buf, nil
}
// Count returns the number of nodes in the tree
func (builder *TreeJSONBuilder) Count() int {
return builder.countNodes
}
func FindTreeDirectory(ctx context.Context, repo restic.BlobLoader, id *restic.ID, dir string) (*restic.ID, error) {
if id == nil {
return nil, errors.New("tree id is null")

View file

@ -25,6 +25,10 @@ func (opts *IncludePatternOptions) Add(f *pflag.FlagSet) {
f.StringArrayVar(&opts.InsensitiveIncludeFiles, "iinclude-file", nil, "same as --include-file but ignores casing of `file`names in patterns")
}
func (opts *IncludePatternOptions) Empty() bool {
return len(opts.Includes) == 0 && len(opts.InsensitiveIncludes) == 0 && len(opts.IncludeFiles) == 0 && len(opts.InsensitiveIncludeFiles) == 0
}
func (opts IncludePatternOptions) CollectPatterns(warnf func(msg string, args ...interface{})) ([]IncludeByNameFunc, error) {
var fs []IncludeByNameFunc
if len(opts.IncludeFiles) > 0 {

View file

@ -13,6 +13,7 @@ import (
type NodeRewriteFunc func(node *data.Node, path string) *data.Node
type FailedTreeRewriteFunc func(nodeID restic.ID, path string, err error) (data.TreeNodeIterator, error)
type QueryRewrittenSizeFunc func() SnapshotSize
type NodeKeepEmptyDirectoryFunc func(path string) bool
type SnapshotSize struct {
FileCount uint
@ -21,7 +22,8 @@ type SnapshotSize struct {
type RewriteOpts struct {
// return nil to remove the node
RewriteNode NodeRewriteFunc
RewriteNode NodeRewriteFunc
KeepEmtpyDirectory NodeKeepEmptyDirectoryFunc
// decide what to do with a tree that could not be loaded. Return nil to remove the node. By default the load error is returned which causes the operation to fail.
RewriteFailedTree FailedTreeRewriteFunc
@ -56,10 +58,15 @@ func NewTreeRewriter(opts RewriteOpts) *TreeRewriter {
return nil, err
}
}
if rw.opts.KeepEmtpyDirectory == nil {
rw.opts.KeepEmtpyDirectory = func(_ string) bool {
return true
}
}
return rw
}
func NewSnapshotSizeRewriter(rewriteNode NodeRewriteFunc) (*TreeRewriter, QueryRewrittenSizeFunc) {
func NewSnapshotSizeRewriter(rewriteNode NodeRewriteFunc, keepEmptyDirecoryFilter NodeKeepEmptyDirectoryFunc) (*TreeRewriter, QueryRewrittenSizeFunc) {
var count uint
var size uint64
@ -72,7 +79,8 @@ func NewSnapshotSizeRewriter(rewriteNode NodeRewriteFunc) (*TreeRewriter, QueryR
}
return node
},
DisableNodeCache: true,
DisableNodeCache: true,
KeepEmtpyDirectory: keepEmptyDirecoryFilter,
})
ss := func() SnapshotSize {
@ -159,6 +167,8 @@ func (t *TreeRewriter) RewriteTree(ctx context.Context, loader restic.BlobLoader
newID, err := t.RewriteTree(ctx, loader, saver, path, subtree)
if err != nil {
return restic.ID{}, err
} else if err == nil && newID.IsNull() {
continue
}
node.Subtree = &newID
err = tb.AddNode(node)
@ -171,6 +181,9 @@ func (t *TreeRewriter) RewriteTree(ctx context.Context, loader restic.BlobLoader
if err != nil {
return restic.ID{}, err
}
if tb.Count() == 0 && !t.opts.KeepEmtpyDirectory(nodepath) {
return restic.ID{}, nil
}
if t.replaces != nil {
t.replaces[nodeID] = newTreeID

View file

@ -306,7 +306,7 @@ func TestSnapshotSizeQuery(t *testing.T) {
}
return node
}
rewriter, querySize := NewSnapshotSizeRewriter(rewriteNode)
rewriter, querySize := NewSnapshotSizeRewriter(rewriteNode, nil)
newRoot, err := rewriter.RewriteTree(ctx, modrepo, modrepo, "/", root)
if err != nil {
t.Error(err)