diff --git a/changelog/unreleased/issue-5372 b/changelog/unreleased/issue-5372 new file mode 100644 index 000000000..1a7c53bf8 --- /dev/null +++ b/changelog/unreleased/issue-5372 @@ -0,0 +1,8 @@ +Enhancement: command to list packfiles belonging to a snapshot + +This enhancement creates a new option `packfiles` for command `restore`. It shows the +packfiles which are related to a snapshot or a set of snapshots. Output is either +plain text or JSON. Different levels of details can be activated. + +https://github.com/restic/restic/issues/5372 +https://github.com/restic/restic/pull/5396 diff --git a/cmd/restic/cmd_restore.go b/cmd/restic/cmd_restore.go index d797d1b68..4ece21305 100644 --- a/cmd/restic/cmd_restore.go +++ b/cmd/restic/cmd_restore.go @@ -1,9 +1,14 @@ package main import ( + "bytes" "context" + "encoding/json" + "io" + "maps" "path/filepath" "runtime" + "slices" "time" "github.com/restic/restic/internal/data" @@ -11,6 +16,9 @@ import ( "github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/filter" "github.com/restic/restic/internal/global" + "github.com/restic/restic/internal/repository" + "github.com/restic/restic/internal/repository/pack" + "github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/restorer" "github.com/restic/restic/internal/ui" "github.com/restic/restic/internal/ui/progress" @@ -38,6 +46,28 @@ syntax, where "subfolder" is a path within the snapshot tree as shown by "restic ls". POSIX ACLs are always restored by their numeric value, while file ownership can optionally be restored by name instead of numeric value. +Options: + The option "--packfiles" creates a list of participating packfiles belonging + to a snapshot in ID ascending order. + "--packfiles" and "--json" together: the output produces the JSON output: + { + "packfiles": [ + { + "id": "dc25893d422a71af41aaaa843cd7121708cd88679bf3885f94a32900ac068e84", + "type": "data", + "size": 11429556 + }, + ... + ] + } + + "--packfiles" anlone will produce text output. "--verbose" will control the + level of output detail which will be printed. + If "--verbose" is not specified" you will receive the list of packfiles for the + specied snapshot, or the list of all packfiles in the repository if there is + no snapshot given. + With option "--verbose" you will see the packfile ID, its type and its length on disk, + in this order. EXIT STATUS =========== @@ -71,6 +101,7 @@ type RestoreOptions struct { Verify bool Overwrite restorer.OverwriteBehavior Delete bool + PackfileList bool ExcludeXattrPattern []string IncludeXattrPattern []string OwnershipByName bool @@ -94,6 +125,7 @@ func (opts *RestoreOptions) AddFlags(f *pflag.FlagSet) { if runtime.GOOS != "windows" { f.BoolVar(&opts.OwnershipByName, "ownership-by-name", false, "restore file ownership by user name and group name (except POSIX ACLs)") } + f.BoolVar(&opts.PackfileList, "packfiles", false, "create packfile list for selected snapshot") } func runRestore(ctx context.Context, opts RestoreOptions, gopts global.Options, @@ -106,6 +138,10 @@ func runRestore(ctx context.Context, opts RestoreOptions, gopts global.Options, printer = restoreui.NewTextProgress(term, gopts.Verbosity) } + if opts.PackfileList { + return packfileLIst(ctx, opts, gopts, args, printer) + } + excludePatternFns, err := opts.ExcludePatternOptions.CollectPatterns(printer.E) if err != nil { return err @@ -315,3 +351,129 @@ func getXattrSelectFilter(opts RestoreOptions, printer progress.Printer) (func(x // default to including all xattrs return func(_ string) bool { return true }, nil } + +// this section deals with generating a packfile list from a given set of snapshots +// or all snapshots if no filtering has been specified +// on purpose it relies on (selected) used blobs in the repository, so it can work +// on cached metadata. + +// PacklistInfo defines one entry per packfile +type PacklistInfo struct { + ID restic.ID `json:"id"` + Type string `json:"type"` + Size int64 `json:"size"` +} + +// output definition for JSON +type outputStruct struct { + PackfileList []PacklistInfo `json:"packfiles"` +} + +// CheckWithSnapshots will process snapshot IDs from 'selectedTrees' and +// will create a mapping between the packfile with information about size and type +func CheckWithSnapshots(ctx context.Context, repo *repository.Repository, + selectedTrees []restic.ID, +) (map[restic.ID]PacklistInfo, error) { + + // gather used blobs from all trees for 'selectedTrees' + usedBlobs := repo.NewAssociatedBlobSet() + if err := data.FindUsedBlobs(ctx, repo, selectedTrees, usedBlobs, nil); err != nil { + return nil, err + } + + // get length of packfiles from repository via index + repoPacks, err := pack.Size(ctx, repo, false) + if err != nil { + return nil, err + } + + // convert used blobs to packfile IDs and collect statistics + snapPacks := make(map[restic.ID]PacklistInfo) + for bh := range usedBlobs.Keys() { + for _, blob := range repo.LookupBlob(bh.Type, bh.ID) { + if _, ok := snapPacks[blob.PackID]; !ok { + snapPacks[blob.PackID] = PacklistInfo{ + ID: blob.PackID, + Type: blob.Type.String(), + Size: repoPacks[blob.PackID], + } + } + } + } + + return snapPacks, nil +} + +// packfileLIst runs the sub-command '--packfiles' +func packfileLIst(ctx context.Context, opts RestoreOptions, gopts global.Options, args []string, + printer progress.Printer, +) error { + ctx, repo, unlock, err := openWithReadLock(ctx, gopts, true, printer) + if err != nil { + return err + } + defer unlock() + + snapshotLister, err := restic.MemorizeList(ctx, repo, restic.SnapshotFile) + if err != nil { + return err + } + + // index needs to be loaded + if err = repo.LoadIndex(ctx, printer); err != nil { + return err + } + + // find selected snapshots + selectedTrees := make([]restic.ID, 0, 128) + for sn := range FindFilteredSnapshots(ctx, snapshotLister, repo, &opts.SnapshotFilter, args, printer) { + selectedTrees = append(selectedTrees, *sn.Tree) + } + if ctx.Err() != nil { + return ctx.Err() + } + + // gather packfiles list + snapPacks, err := CheckWithSnapshots(ctx, repo, selectedTrees) + if err != nil { + return err + } + + // sort packfile IDs + + packfilesSort := slices.SortedFunc(maps.Keys(snapPacks), func(a, b restic.ID) int { + return bytes.Compare(a[:], b[:]) + }) + + if gopts.JSON { + return produceJSONOutput(packfilesSort, snapPacks, gopts.Term.OutputWriter()) + } + + produceTextOutput(packfilesSort, snapPacks, gopts, printer) + return nil +} + +// produceJSONOutput generates JSON output +func produceJSONOutput(packfilesSort []restic.ID, snapPacks map[restic.ID]PacklistInfo, stdout io.Writer) error { + var output outputStruct + output.PackfileList = make([]PacklistInfo, 0, len(packfilesSort)) + for _, packfileID := range packfilesSort { + output.PackfileList = append(output.PackfileList, snapPacks[packfileID]) + } + + return json.NewEncoder(stdout).Encode(output) +} + +func produceTextOutput(packfilesSort []restic.ID, snapPacks map[restic.ID]PacklistInfo, + gopts global.Options, printer progress.Printer, +) { + for _, packfileID := range packfilesSort { + d := snapPacks[packfileID] + + if gopts.Verbosity >= 2 { + printer.P("%s %s %10d", packfileID.String(), d.Type, d.Size) + } else { + printer.P("%s", packfileID.String()) + } + } +} diff --git a/cmd/restic/cmd_restore_integration_test.go b/cmd/restic/cmd_restore_integration_test.go index 47d611d8b..0c88ab479 100644 --- a/cmd/restic/cmd_restore_integration_test.go +++ b/cmd/restic/cmd_restore_integration_test.go @@ -2,6 +2,7 @@ package main import ( "context" + "encoding/json" "fmt" "math/rand" "os" @@ -416,3 +417,82 @@ func TestRestoreDefaultLayout(t *testing.T) { rtest.RemoveAll(t, filepath.Join(env.base, "repo")) rtest.RemoveAll(t, target) } + +func TestPackfileListJSON(t *testing.T) { + // setup + env, cleanup := withTestEnvironment(t) + defer cleanup() + testSetupBackupData(t, env) + + // backup + opts := BackupOptions{} + testRunBackup(t, env.testdata+"/0/0/9", []string{"."}, opts, env.gopts) + testListSnapshots(t, env.gopts, 1) + + // run restore --packfiles + env.gopts.JSON = true + buf := testRunPackfiles(t, env.gopts, RestoreOptions{PackfileList: true}, []string{"latest"}) + t.Logf("buf='%v'", buf) + + // unmarshal output + output := &outputStruct{} + rtest.OK(t, json.Unmarshal(buf, output)) + + countTree := 0 + countData := 0 + for _, item := range output.PackfileList { + switch item.Type { + case "tree": + countTree++ + case "data": + countData++ + } + } + rtest.Assert(t, countTree == 1 && countData == 1, "expected tree count == 1 and data count == 1 but got %d trees and %d data packfiles", + countTree, countData) +} + +func TestPackfileListText(t *testing.T) { + // setup + env, cleanup := withTestEnvironment(t) + defer cleanup() + testSetupBackupData(t, env) + + // backup + opts := BackupOptions{} + testRunBackup(t, filepath.Join(env.testdata, "0", "0", "9"), []string{"."}, opts, env.gopts) + testListSnapshots(t, env.gopts, 1) + + // run packfilelist + env.gopts.JSON = false + env.gopts.Verbosity = 2 + buf := testRunPackfiles(t, env.gopts, RestoreOptions{PackfileList: true}, []string{"latest"}) + + countTree := 0 + countData := 0 + for _, line := range strings.Split(string(buf), "\n") { + if len(line) < 64 { + continue + } + parts := strings.Split(line, " ") + switch parts[1] { + case "tree": + countTree++ + case "data": + countData++ + } + } + rtest.Assert(t, countTree == 1 && countData == 1, "expected tree count == 1 and data count == 1 but got %d trees and %d data packfiles", + countTree, countData) +} + +func testRunPackfiles(t testing.TB, gopts global.Options, opts RestoreOptions, args []string) []byte { + buf, err := withCaptureStdout(t, gopts, func(ctx context.Context, g global.Options) error { + err2 := runRestore(ctx, opts, g, g.Term, args) + rtest.OK(t, err2) + return err2 + }) + rtest.OK(t, err) + + return buf.Bytes() +} diff --git a/doc/045_working_with_repos.rst b/doc/045_working_with_repos.rst index 9aff3ab0f..f40e89753 100644 --- a/doc/045_working_with_repos.rst +++ b/doc/045_working_with_repos.rst @@ -199,6 +199,51 @@ It works also without specifying the option ``--long``. /tmp/restic/010_introduction.rst +Listing packfiles in a snapshot +=============================== + +If you want to list the packfiles which participate in a snapshot, use the command +``restore`` with the option ``--packfiles``. + +.. code-block:: console + + $ restic -r /srv/restic-repo restore --packfiles latest + repository ec50fbf6 opened (version 2, compression level auto) + [0:00] 100.00% 1 / 1 index files loaded + ae92415f79c281330159ed590db2a973048c886aacfa4fabfa0eaac10b396b8f + dc25893d422a71af41aaaa843cd7121708cd88679bf3885f94a32900ac068e84 + +This will give you the list of all packfiles, sorted in ID ascending order. + +In case you want to know more about the current snapshot, use option ``--verbose``. + +.. code-block:: console + + $ restic -r /srv/restic-repo restore --packfiles latest -v + ... + repository ec50fbf6 opened (version 2, compression level auto) + [0:00] 100.00% 1 / 1 index files loaded + ae92415f79c281330159ed590db2a973048c886aacfa4fabfa0eaac10b396b8f tree 422728 + dc25893d422a71af41aaaa843cd7121708cd88679bf3885f94a32900ac068e84 data 11429556 + ... + +This commands supports the option ``--json``. The formatted output looks like: + +.. code-block:: console + + $ restic -r /srv/restic-repo restore --packfiles latest --json | jq + { + "packfiles": [ + { + "id": "ae92415f79c281330159ed590db2a973048c886aacfa4fabfa0eaac10b396b8f", + "type": "tree", + "size": 422728 + }, + ... + ] + } + + Copying snapshots between repositories ====================================== @@ -376,8 +421,8 @@ modifying the repository. Instead restic will only print the actions it would perform. .. note:: The ``rewrite`` command verifies that it does not modify snapshots in - unexpected ways and otherwise fails with the error ``cannot encode tree at "[...]" without losing information``. - This can occur when rewriting a snapshot created by a newer + unexpected ways and fails with an ``cannot encode tree at "[...]" without loosing information`` + error otherwise. This can occur when rewriting a snapshot created by a newer version of restic or some third-party implementation. To convert a snapshot into the format expected by the ``rewrite`` command