diff --git a/changelog/unreleased/issue-4278 b/changelog/unreleased/issue-4278 new file mode 100644 index 000000000..5a88dbed9 --- /dev/null +++ b/changelog/unreleased/issue-4278 @@ -0,0 +1,12 @@ +Enhancement: Support include filters in `rewrite` command + +The enhancement enables the standard include filter options + --iinclude pattern same as --include pattern but ignores the casing of filenames + --iinclude-file file same as --include-file but ignores casing of filenames in patterns + -i, --include pattern include a pattern (can be specified multiple times) + --include-file file read include patterns from a file (can be specified multiple times) + +The exclusion or inclusion of filter parameters is exclusive, as in other commands. + +https://github.com/restic/restic/issues/4278 +https://github.com/restic/restic/pull/5191 diff --git a/cmd/restic/cmd_repair_snapshots.go b/cmd/restic/cmd_repair_snapshots.go index 104d3dff3..b392bf017 100644 --- a/cmd/restic/cmd_repair_snapshots.go +++ b/cmd/restic/cmd_repair_snapshots.go @@ -151,7 +151,7 @@ func runRepairSnapshots(ctx context.Context, gopts global.Options, opts RepairOp func(ctx context.Context, sn *data.Snapshot, uploader restic.BlobSaver) (restic.ID, *data.SnapshotSummary, error) { id, err := rewriter.RewriteTree(ctx, repo, uploader, "/", *sn.Tree) return id, nil, err - }, opts.DryRun, opts.Forget, nil, "repaired", printer) + }, opts.DryRun, opts.Forget, nil, "repaired", printer, false) if err != nil { return errors.Fatalf("unable to rewrite snapshot ID %q: %v", sn.ID().Str(), err) } diff --git a/cmd/restic/cmd_rewrite.go b/cmd/restic/cmd_rewrite.go index 9c53dcae6..acd407b52 100644 --- a/cmd/restic/cmd_rewrite.go +++ b/cmd/restic/cmd_rewrite.go @@ -30,6 +30,9 @@ The "rewrite" command excludes files from existing snapshots. It creates new snapshots containing the same data as the original ones, but without the files you specify to exclude. All metadata (time, host, tags) will be preserved. +Alternatively you can use one of the --include variants to only include files +in the new snapshot which you want to preserve. + The snapshots to rewrite are specified using the --host, --tag and --path options, or by providing a list of snapshot IDs. Please note that specifying neither any of these options nor a snapshot ID will cause the command to rewrite all snapshots. @@ -46,8 +49,8 @@ When rewrite is used with the --snapshot-summary option, a new snapshot is created containing statistics summary data. Only two fields in the summary will be non-zero: TotalFilesProcessed and TotalBytesProcessed. -When rewrite is called with one of the --exclude options, TotalFilesProcessed -and TotalBytesProcessed will be updated in the snapshot summary. +When rewrite is called with one of the --exclude or --include options, +TotalFilesProcessed and TotalBytesProcessed will be updated in the snapshot summary. EXIT STATUS =========== @@ -109,6 +112,7 @@ type RewriteOptions struct { Metadata snapshotMetadataArgs data.SnapshotFilter filter.ExcludePatternOptions + filter.IncludePatternOptions } func (opts *RewriteOptions) AddFlags(f *pflag.FlagSet) { @@ -120,6 +124,7 @@ func (opts *RewriteOptions) AddFlags(f *pflag.FlagSet) { initMultiSnapshotFilter(f, &opts.SnapshotFilter, true) opts.ExcludePatternOptions.Add(f) + opts.IncludePatternOptions.Add(f) } // rewriteFilterFunc returns the filtered tree ID or an error. If a snapshot summary is returned, the snapshot will @@ -136,33 +141,32 @@ func rewriteSnapshot(ctx context.Context, repo *repository.Repository, sn *data. return false, err } + includeByNameFuncs, err := opts.IncludePatternOptions.CollectPatterns(printer.E) + if err != nil { + return false, err + } + metadata, err := opts.Metadata.convert() if err != nil { return false, err } + condInclude := len(includeByNameFuncs) > 0 + condExclude := len(rejectByNameFuncs) > 0 || opts.SnapshotSummary var filter rewriteFilterFunc + var rewriteNode walker.NodeRewriteFunc + var keepEmptyDirectoryFunc walker.NodeKeepEmptyDirectoryFunc - if len(rejectByNameFuncs) > 0 || opts.SnapshotSummary { - selectByName := func(nodepath string) bool { - for _, reject := range rejectByNameFuncs { - if reject(nodepath) { - return false - } - } - return true + if condInclude || condExclude { + if condInclude { + rewriteNode, keepEmptyDirectoryFunc = gatherIncludeFilters(includeByNameFuncs, printer) + } else { + rewriteNode = gatherExcludeFilters(rejectByNameFuncs, printer) + keepEmptyDirectoryFunc = nil } - rewriteNode := func(node *data.Node, path string) *data.Node { - if selectByName(path) { - return node - } - printer.P("excluding %s", path) - return nil - } - - rewriter, querySize := walker.NewSnapshotSizeRewriter(rewriteNode) + rewriter, querySize := walker.NewSnapshotSizeRewriter(rewriteNode, keepEmptyDirectoryFunc) filter = func(ctx context.Context, sn *data.Snapshot, uploader restic.BlobSaver) (restic.ID, *data.SnapshotSummary, error) { id, err := rewriter.RewriteTree(ctx, repo, uploader, "/", *sn.Tree) @@ -186,11 +190,12 @@ func rewriteSnapshot(ctx context.Context, repo *repository.Repository, sn *data. } return filterAndReplaceSnapshot(ctx, repo, sn, - filter, opts.DryRun, opts.Forget, metadata, "rewrite", printer) + filter, opts.DryRun, opts.Forget, metadata, "rewrite", printer, len(includeByNameFuncs) > 0) } func filterAndReplaceSnapshot(ctx context.Context, repo restic.Repository, sn *data.Snapshot, - filter rewriteFilterFunc, dryRun bool, forget bool, newMetadata *snapshotMetadata, addTag string, printer progress.Printer) (bool, error) { + filter rewriteFilterFunc, dryRun bool, forget bool, newMetadata *snapshotMetadata, addTag string, printer progress.Printer, + keepEmptySnapshot bool) (bool, error) { var filteredTree restic.ID var summary *data.SnapshotSummary @@ -204,6 +209,10 @@ func filterAndReplaceSnapshot(ctx context.Context, repo restic.Repository, sn *d } if filteredTree.IsNull() { + if keepEmptySnapshot { + debug.Log("Snapshot %v not modified", sn) + return false, nil + } if dryRun { printer.P("would delete empty snapshot") } else { @@ -284,8 +293,12 @@ func filterAndReplaceSnapshot(ctx context.Context, repo restic.Repository, sn *d } func runRewrite(ctx context.Context, opts RewriteOptions, gopts global.Options, args []string, term ui.Terminal) error { - if !opts.SnapshotSummary && opts.ExcludePatternOptions.Empty() && opts.Metadata.empty() { - return errors.Fatal("Nothing to do: no excludes provided and no new metadata provided") + hasExcludes := !opts.ExcludePatternOptions.Empty() + hasIncludes := !opts.IncludePatternOptions.Empty() + if !opts.SnapshotSummary && !hasExcludes && !hasIncludes && opts.Metadata.empty() { + return errors.Fatal("Nothing to do: no excludes/includes provided and no new metadata provided") + } else if hasExcludes && hasIncludes { + return errors.Fatal("exclude and include patterns are mutually exclusive") } printer := ui.NewProgressPrinter(false, gopts.Verbosity, term) @@ -348,3 +361,71 @@ func runRewrite(ctx context.Context, opts RewriteOptions, gopts global.Options, return nil } + +func gatherIncludeFilters(includeByNameFuncs []filter.IncludeByNameFunc, printer progress.Printer) (rewriteNode walker.NodeRewriteFunc, keepEmptyDirectory walker.NodeKeepEmptyDirectoryFunc) { + inSelectByName := func(nodepath string, node *data.Node) bool { + for _, include := range includeByNameFuncs { + if node.Type == data.NodeTypeDir { + // always include directories + return true + } + matched, childMayMatch := include(nodepath) + if matched && childMayMatch { + return matched && childMayMatch + } + } + return false + } + + rewriteNode = func(node *data.Node, path string) *data.Node { + if inSelectByName(path, node) { + if node.Type != data.NodeTypeDir { + printer.VV("including %q\n", path) + } + return node + } + return nil + } + + inSelectByNameDir := func(nodepath string) bool { + for _, include := range includeByNameFuncs { + matched, _ := include(nodepath) + if matched { + return matched + } + } + return false + } + + keepEmptyDirectory = func(path string) bool { + keep := inSelectByNameDir(path) + if keep { + printer.VV("including directoty %q\n", path) + } + return keep + } + + return rewriteNode, keepEmptyDirectory +} + +func gatherExcludeFilters(excludeByNameFuncs []filter.RejectByNameFunc, printer progress.Printer) (rewriteNode walker.NodeRewriteFunc) { + exSelectByName := func(nodepath string) bool { + for _, reject := range excludeByNameFuncs { + if reject(nodepath) { + return false + } + } + return true + } + + rewriteNode = func(node *data.Node, path string) *data.Node { + if exSelectByName(path) { + return node + } + + printer.VV("excluding %q\n", path) + return nil + } + + return rewriteNode +} diff --git a/cmd/restic/cmd_rewrite_integration_test.go b/cmd/restic/cmd_rewrite_integration_test.go index 35f5f4d01..dc5fd9a85 100644 --- a/cmd/restic/cmd_rewrite_integration_test.go +++ b/cmd/restic/cmd_rewrite_integration_test.go @@ -2,7 +2,9 @@ package main import ( "context" + "os" "path/filepath" + "strings" "testing" "github.com/restic/restic/internal/data" @@ -27,6 +29,13 @@ func testRunRewriteExclude(t testing.TB, gopts global.Options, excludes []string })) } +func testRunRewriteWithOpts(t testing.TB, opts RewriteOptions, gopts global.Options, args []string) error { + rtest.OK(t, withTermStatus(t, gopts, func(ctx context.Context, gopts global.Options) error { + return runRewrite(context.TODO(), opts, gopts, args, gopts.Term) + })) + return nil +} + func createBasicRewriteRepo(t testing.TB, env *testEnvironment) restic.ID { testSetupBackupData(t, env) @@ -39,6 +48,20 @@ func createBasicRewriteRepo(t testing.TB, env *testEnvironment) restic.ID { return snapshotIDs[0] } +func createBasicRewriteRepoWithEmptyDirectory(t testing.TB, env *testEnvironment) restic.ID { + testSetupBackupData(t, env) + + // make an empty directory named "empty-directory" + rtest.OK(t, os.Mkdir(filepath.Join(env.testdata, "/0/tests", "empty-directory"), 0755)) + + // create backup + testRunBackup(t, filepath.Dir(env.testdata), []string{"testdata"}, BackupOptions{}, env.gopts) + snapshotIDs := testRunList(t, env.gopts, "snapshots") + rtest.Assert(t, len(snapshotIDs) == 1, "expected one snapshot, got %v", snapshotIDs) + + return snapshotIDs[0] +} + func getSnapshot(t testing.TB, snapshotID restic.ID, env *testEnvironment) *data.Snapshot { t.Helper() @@ -195,3 +218,169 @@ func TestRewriteSnaphotSummary(t *testing.T) { rtest.Equals(t, oldSummary.TotalBytesProcessed, newSn.Summary.TotalBytesProcessed, "unexpected TotalBytesProcessed value") rtest.Equals(t, oldSummary.TotalFilesProcessed, newSn.Summary.TotalFilesProcessed, "unexpected TotalFilesProcessed value") } + +func TestRewriteIncludeFiles(t *testing.T) { + env, cleanup := withTestEnvironment(t) + defer cleanup() + // opens repo, creates one backup of the whole lot of 'testdata' + createBasicRewriteRepo(t, env) + snapshots := testListSnapshots(t, env.gopts, 1) + + // include txt files + err := testRunRewriteWithOpts(t, + RewriteOptions{ + Forget: true, + IncludePatternOptions: filter.IncludePatternOptions{Includes: []string{"*.txt"}}, + }, + env.gopts, + []string{"latest"}) + rtest.OK(t, err) + newSnapshots := testListSnapshots(t, env.gopts, 1) + rtest.Assert(t, snapshots[0] != newSnapshots[0], "snapshot id should have changed") + + // read restic ls output and count .txt files + count := 0 + out := testRunLsWithOpts(t, env.gopts, LsOptions{}, []string{"latest"}) + for _, line := range strings.Split(string(out), "\n") { + if strings.Contains(line, ".txt") { + count++ + } + } + rtest.Assert(t, count == 2, "expected two txt files, but got %d files", count) + + err = withTermStatus(t, env.gopts, func(ctx context.Context, gopts global.Options) error { + printer := ui.NewProgressPrinter(gopts.JSON, gopts.Verbosity, gopts.Term) + _, repo, unlock, err := openWithExclusiveLock(ctx, gopts, false, printer) + rtest.OK(t, err) + defer unlock() + + sn, err := data.LoadSnapshot(context.TODO(), repo, newSnapshots[0]) + rtest.OK(t, err) + + rtest.Assert(t, sn.Summary != nil, "snapshot should have a summary attached") + rtest.Assert(t, sn.Summary.TotalFilesProcessed == 2, + "there should be 2 files in the snapshot, but there are %d files", sn.Summary.TotalFilesProcessed) + return nil + }) + rtest.OK(t, err) +} + +func TestRewriteExcludeFiles(t *testing.T) { + env, cleanup := withTestEnvironment(t) + defer cleanup() + createBasicRewriteRepo(t, env) + snapshots := testListSnapshots(t, env.gopts, 1) + + // exclude txt files + err := testRunRewriteWithOpts(t, + RewriteOptions{ + Forget: true, + ExcludePatternOptions: filter.ExcludePatternOptions{Excludes: []string{"*.txt"}}, + }, + env.gopts, + []string{"latest"}) + rtest.OK(t, err) + newSnapshots := testListSnapshots(t, env.gopts, 1) + rtest.Assert(t, snapshots[0] != newSnapshots[0], "snapshot id should have changed") + + // read restic ls output and count .txt files + count := 0 + out := testRunLsWithOpts(t, env.gopts, LsOptions{}, []string{"latest"}) + for _, line := range strings.Split(string(out), "\n") { + if strings.Contains(line, ".txt") { + count++ + } + } + rtest.Assert(t, count == 0, "expected 0 txt files, but got %d files", count) +} + +func TestRewriteExcludeIncludeContradiction(t *testing.T) { + env, cleanup := withTestEnvironment(t) + defer cleanup() + createBasicRewriteRepo(t, env) + testListSnapshots(t, env.gopts, 1) + + // test contradiction + err := withTermStatus(t, env.gopts, func(ctx context.Context, gopts global.Options) error { + return runRewrite(ctx, + RewriteOptions{ + ExcludePatternOptions: filter.ExcludePatternOptions{Excludes: []string{"nonsense"}}, + IncludePatternOptions: filter.IncludePatternOptions{Includes: []string{"not allowed"}}, + }, + gopts, []string{"quack"}, env.gopts.Term) + }) + rtest.Assert(t, err != nil, `expected to fail command with message "exclude and include patterns are mutually exclusive"`) +} + +func TestRewriteIncludeEmptyDirectory(t *testing.T) { + env, cleanup := withTestEnvironment(t) + defer cleanup() + snapIDEmpty := createBasicRewriteRepoWithEmptyDirectory(t, env) + + // restic rewrite -i empty-directory --forget + // exclude txt files + err := testRunRewriteWithOpts(t, + RewriteOptions{ + Forget: true, + IncludePatternOptions: filter.IncludePatternOptions{Includes: []string{"empty-directory"}}, + }, + env.gopts, + []string{"latest"}) + rtest.OK(t, err) + newSnapshots := testListSnapshots(t, env.gopts, 1) + rtest.Assert(t, snapIDEmpty != newSnapshots[0], "snapshot id should have changed") + + // read restic ls output and count lines with "empty-directory" + count := 0 + out := testRunLsWithOpts(t, env.gopts, LsOptions{}, []string{"latest"}) + for _, line := range strings.Split(string(out), "\n") { + if strings.Contains(line, "empty-directory") { + count++ + } + } + rtest.Assert(t, count == 1, "expected 1 empty directory, but got %d entries", count) +} + +// TestRewriteIncludeNothing makes sure when nothing is included, the original snapshot stays untouched +func TestRewriteIncludeNothing(t *testing.T) { + env, cleanup := withTestEnvironment(t) + defer cleanup() + createBasicRewriteRepo(t, env) + snapsBefore := testListSnapshots(t, env.gopts, 1) + + // restic rewrite latest -i nothing-whatsoever --forget + err := testRunRewriteWithOpts(t, + RewriteOptions{ + Forget: true, + ExcludePatternOptions: filter.ExcludePatternOptions{Excludes: []string{"nothing-whatsoever"}}, + }, + env.gopts, + []string{"latest"}) + rtest.OK(t, err) + + snapsAfter := testListSnapshots(t, env.gopts, 1) + rtest.Assert(t, snapsBefore[0] == snapsAfter[0], "snapshots should be identical but are %s and %s", + snapsBefore[0].Str(), snapsAfter[0].Str()) +} + +// TestRewriteExcludeNothing makes sure when nothing is excluded, the original snapshot stays untouched +// and no new (unchanged) snapshot would be created +func TestRewriteExcludeNothing(t *testing.T) { + env, cleanup := withTestEnvironment(t) + defer cleanup() + createBasicRewriteRepo(t, env) + snapsBefore := testListSnapshots(t, env.gopts, 1) + + // restic rewrite latest -e 'nothing-whatsoever' --forget + err := testRunRewriteWithOpts(t, + RewriteOptions{ + Forget: true, + ExcludePatternOptions: filter.ExcludePatternOptions{Excludes: []string{"nothing-whatsoever"}}, + }, + env.gopts, + []string{"latest"}) + rtest.OK(t, err) + snapsAfter := testListSnapshots(t, env.gopts, 1) + rtest.Assert(t, snapsBefore[0] == snapsAfter[0], "snapshots should be identical but are %s and %s", + snapsBefore[0].String(), snapsAfter[0].String()) +} diff --git a/doc/045_working_with_repos.rst b/doc/045_working_with_repos.rst index 797ea9f9d..ba263fd31 100644 --- a/doc/045_working_with_repos.rst +++ b/doc/045_working_with_repos.rst @@ -336,6 +336,13 @@ The options ``--exclude``, ``--exclude-file``, ``--iexclude`` and ``--iexclude-file`` are supported. They behave the same way as for the backup command, see :ref:`backup-excluding-files` for details. +The options ``--include``, ``--include-file``, ``--iinclude`` and +``--iinclude-file`` are supported as well. +The ``--include`` variants allow you to reduce an existing snapshot or a set of snapshots +to those files that you are really interested in. An example could be all pictures +from a snapshot: +``restic rewrite -r ... --iinclude "*.jpg" --iinclude "*.jpeg" --iinclude "*.png"``. + It is possible to rewrite only a subset of snapshots by filtering them the same way as for the ``copy`` command, see :ref:`copy-filtering-snapshots`. diff --git a/internal/data/tree.go b/internal/data/tree.go index 1bfcbf660..54f2cb131 100644 --- a/internal/data/tree.go +++ b/internal/data/tree.go @@ -238,8 +238,9 @@ func SaveTree(ctx context.Context, saver restic.BlobSaver, nodes TreeNodeIterato } type TreeJSONBuilder struct { - buf bytes.Buffer - lastName string + buf bytes.Buffer + lastName string + countNodes int } func NewTreeJSONBuilder() *TreeJSONBuilder { @@ -262,6 +263,7 @@ func (builder *TreeJSONBuilder) AddNode(node *Node) error { return err } _, _ = builder.buf.Write(val) + builder.countNodes++ return nil } @@ -275,6 +277,11 @@ func (builder *TreeJSONBuilder) Finalize() ([]byte, error) { return buf, nil } +// Count returns the number of nodes in the tree +func (builder *TreeJSONBuilder) Count() int { + return builder.countNodes +} + func FindTreeDirectory(ctx context.Context, repo restic.BlobLoader, id *restic.ID, dir string) (*restic.ID, error) { if id == nil { return nil, errors.New("tree id is null") diff --git a/internal/filter/include.go b/internal/filter/include.go index 87d5f1207..e6eefe6b9 100644 --- a/internal/filter/include.go +++ b/internal/filter/include.go @@ -25,6 +25,10 @@ func (opts *IncludePatternOptions) Add(f *pflag.FlagSet) { f.StringArrayVar(&opts.InsensitiveIncludeFiles, "iinclude-file", nil, "same as --include-file but ignores casing of `file`names in patterns") } +func (opts *IncludePatternOptions) Empty() bool { + return len(opts.Includes) == 0 && len(opts.InsensitiveIncludes) == 0 && len(opts.IncludeFiles) == 0 && len(opts.InsensitiveIncludeFiles) == 0 +} + func (opts IncludePatternOptions) CollectPatterns(warnf func(msg string, args ...interface{})) ([]IncludeByNameFunc, error) { var fs []IncludeByNameFunc if len(opts.IncludeFiles) > 0 { diff --git a/internal/walker/rewriter.go b/internal/walker/rewriter.go index f53577e5a..6cd228733 100644 --- a/internal/walker/rewriter.go +++ b/internal/walker/rewriter.go @@ -13,6 +13,7 @@ import ( type NodeRewriteFunc func(node *data.Node, path string) *data.Node type FailedTreeRewriteFunc func(nodeID restic.ID, path string, err error) (data.TreeNodeIterator, error) type QueryRewrittenSizeFunc func() SnapshotSize +type NodeKeepEmptyDirectoryFunc func(path string) bool type SnapshotSize struct { FileCount uint @@ -21,7 +22,8 @@ type SnapshotSize struct { type RewriteOpts struct { // return nil to remove the node - RewriteNode NodeRewriteFunc + RewriteNode NodeRewriteFunc + KeepEmtpyDirectory NodeKeepEmptyDirectoryFunc // decide what to do with a tree that could not be loaded. Return nil to remove the node. By default the load error is returned which causes the operation to fail. RewriteFailedTree FailedTreeRewriteFunc @@ -56,10 +58,15 @@ func NewTreeRewriter(opts RewriteOpts) *TreeRewriter { return nil, err } } + if rw.opts.KeepEmtpyDirectory == nil { + rw.opts.KeepEmtpyDirectory = func(_ string) bool { + return true + } + } return rw } -func NewSnapshotSizeRewriter(rewriteNode NodeRewriteFunc) (*TreeRewriter, QueryRewrittenSizeFunc) { +func NewSnapshotSizeRewriter(rewriteNode NodeRewriteFunc, keepEmptyDirecoryFilter NodeKeepEmptyDirectoryFunc) (*TreeRewriter, QueryRewrittenSizeFunc) { var count uint var size uint64 @@ -72,7 +79,8 @@ func NewSnapshotSizeRewriter(rewriteNode NodeRewriteFunc) (*TreeRewriter, QueryR } return node }, - DisableNodeCache: true, + DisableNodeCache: true, + KeepEmtpyDirectory: keepEmptyDirecoryFilter, }) ss := func() SnapshotSize { @@ -159,6 +167,8 @@ func (t *TreeRewriter) RewriteTree(ctx context.Context, loader restic.BlobLoader newID, err := t.RewriteTree(ctx, loader, saver, path, subtree) if err != nil { return restic.ID{}, err + } else if err == nil && newID.IsNull() { + continue } node.Subtree = &newID err = tb.AddNode(node) @@ -171,6 +181,9 @@ func (t *TreeRewriter) RewriteTree(ctx context.Context, loader restic.BlobLoader if err != nil { return restic.ID{}, err } + if tb.Count() == 0 && !t.opts.KeepEmtpyDirectory(nodepath) { + return restic.ID{}, nil + } if t.replaces != nil { t.replaces[nodeID] = newTreeID diff --git a/internal/walker/rewriter_test.go b/internal/walker/rewriter_test.go index edc3685dc..de6764bab 100644 --- a/internal/walker/rewriter_test.go +++ b/internal/walker/rewriter_test.go @@ -306,7 +306,7 @@ func TestSnapshotSizeQuery(t *testing.T) { } return node } - rewriter, querySize := NewSnapshotSizeRewriter(rewriteNode) + rewriter, querySize := NewSnapshotSizeRewriter(rewriteNode, nil) newRoot, err := rewriter.RewriteTree(ctx, modrepo, modrepo, "/", root) if err != nil { t.Error(err)