mirror of
https://github.com/restic/restic.git
synced 2026-05-28 04:35:41 -04:00
Merge 7349e7db8c into ccfb31b5fa
This commit is contained in:
commit
cb515e0f8c
7 changed files with 143 additions and 54 deletions
16
changelog/unreleased/pull-3747
Normal file
16
changelog/unreleased/pull-3747
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
Change: restic check now verifies the local cache on read
|
||||
|
||||
Up to now the check command had to create a temporary cache to ensure that it does
|
||||
not miss bitrot at the repository storage. However, this has the downside that a host
|
||||
may have to store two copies of the repository cache:
|
||||
one for regular usage and one for the check command.
|
||||
|
||||
This PR adds support for the cache to verify its content and thereby allows
|
||||
the check command to use an already existing cache, which as a bonus will be repaired
|
||||
by redownloading locally damaged files if necessary.
|
||||
|
||||
The CLI now has an --temporary-cache option in addition to --no-cache-verify
|
||||
(which replaces --with-cache).
|
||||
|
||||
https://github.com/restic/restic/pull/3747
|
||||
https://github.com/restic/restic/pull/21794
|
||||
|
|
@ -33,8 +33,8 @@ func newCheckCommand(globalOptions *global.Options) *cobra.Command {
|
|||
The "check" command tests the repository for errors and reports any errors it
|
||||
finds. It can also be used to read all data and therefore simulate a restore.
|
||||
|
||||
By default, the "check" command will always load all data directly from the
|
||||
repository and not use a local cache.
|
||||
By default, the "check" command will use a local cache but verify it against
|
||||
the repository. It is possible to switch to using a temporary cache.
|
||||
|
||||
EXIT STATUS
|
||||
===========
|
||||
|
|
@ -73,6 +73,8 @@ type CheckOptions struct {
|
|||
ReadDataSubset string
|
||||
CheckUnused bool
|
||||
WithCache bool
|
||||
NoCacheVerify bool
|
||||
TemporaryCache bool
|
||||
data.SnapshotFilter
|
||||
}
|
||||
|
||||
|
|
@ -86,7 +88,12 @@ func (opts *CheckOptions) AddFlags(f *pflag.FlagSet) {
|
|||
// MarkDeprecated only returns an error when the flag is not found
|
||||
panic(err)
|
||||
}
|
||||
f.BoolVar(&opts.WithCache, "with-cache", false, "use existing cache, only read uncached data from repository")
|
||||
//f.BoolVar(&opts.WithCache, "with-cache", false, "use existing cache, only read uncached data from repository")
|
||||
f.BoolVar(&opts.NoCacheVerify, "with-cache", false, "disable verification of local cache")
|
||||
_ = f.MarkDeprecated("with-cache", "--with-cache is deprecated, use --no-cache-verify instead")
|
||||
f.BoolVar(&opts.NoCacheVerify, "no-cache-verify", false, "disable verification of local cache")
|
||||
f.BoolVar(&opts.TemporaryCache, "temporary-cache", false, "create a temporary cache")
|
||||
|
||||
initMultiSnapshotFilter(f, &opts.SnapshotFilter, true)
|
||||
}
|
||||
|
||||
|
|
@ -131,6 +138,9 @@ func checkFlags(opts CheckOptions) error {
|
|||
}
|
||||
}
|
||||
|
||||
if opts.NoCacheVerify && opts.TemporaryCache {
|
||||
return errors.Fatal("check flags --no-cache-verify and --temporary-cache cannot be used together")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
@ -177,7 +187,7 @@ func parsePercentage(s string) (float64, error) {
|
|||
// - by default, we use a cache in a temporary directory that is deleted after the check
|
||||
func prepareCheckCache(opts CheckOptions, gopts *global.Options, printer progress.Printer) (cleanup func()) {
|
||||
cleanup = func() {}
|
||||
if opts.WithCache {
|
||||
if !opts.TemporaryCache {
|
||||
// use the default cache, no setup needed
|
||||
return cleanup
|
||||
}
|
||||
|
|
@ -210,7 +220,6 @@ func prepareCheckCache(opts CheckOptions, gopts *global.Options, printer progres
|
|||
}
|
||||
|
||||
gopts.CacheDir = tempdir
|
||||
printer.P("using temporary cache in %v\n", tempdir)
|
||||
|
||||
cleanup = func() {
|
||||
err := os.RemoveAll(tempdir)
|
||||
|
|
@ -244,6 +253,11 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts global.Options, args
|
|||
}
|
||||
defer unlock()
|
||||
|
||||
if !opts.NoCacheVerify {
|
||||
// verify files in already existing cache
|
||||
repo.Cache().EnableVerification()
|
||||
}
|
||||
|
||||
chkr := checker.New(repo, opts.CheckUnused)
|
||||
err = chkr.LoadSnapshots(ctx, &opts.SnapshotFilter, args)
|
||||
if err != nil {
|
||||
|
|
|
|||
|
|
@ -183,6 +183,7 @@ func checkIfFileWithSimilarNameExists(files []fs.DirEntry, fileName string) bool
|
|||
return found
|
||||
}
|
||||
|
||||
// TestPrepareCheckCache: this code was introduced at commit 0271bb9
|
||||
func TestPrepareCheckCache(t *testing.T) {
|
||||
// Create a temporary directory for the cache
|
||||
tmpDirBase := t.TempDir()
|
||||
|
|
@ -205,10 +206,9 @@ func TestPrepareCheckCache(t *testing.T) {
|
|||
}
|
||||
gopts := global.Options{CacheDir: tmpDirBase}
|
||||
cleanup := prepareCheckCache(testCase.opts, &gopts, &progress.NoopPrinter{})
|
||||
files, err := os.ReadDir(tmpDirBase)
|
||||
rtest.OK(t, err)
|
||||
files, _ := os.ReadDir(tmpDirBase) // this must FAIL!
|
||||
|
||||
if !testCase.opts.WithCache {
|
||||
if !testCase.opts.WithCache && len(files) > 0 {
|
||||
// If using a temporary cache directory, the cache directory should exist
|
||||
// listing all directories inside tmpDirBase (cacheDir)
|
||||
// one directory should be tmpDir created by prepareCheckCache with 'restic-check-cache-' in path
|
||||
|
|
@ -225,8 +225,7 @@ func TestPrepareCheckCache(t *testing.T) {
|
|||
cleanup()
|
||||
|
||||
// Verify that the cache directory has been removed
|
||||
files, err = os.ReadDir(tmpDirBase)
|
||||
rtest.OK(t, err)
|
||||
files, _ = os.ReadDir(tmpDirBase)
|
||||
rtest.Assert(t, len(files) == 0, "Expected cache directory to be removed, but it still exists: %v", files)
|
||||
})
|
||||
}
|
||||
|
|
@ -235,13 +234,11 @@ func TestPrepareCheckCache(t *testing.T) {
|
|||
func TestPrepareDefaultCheckCache(t *testing.T) {
|
||||
gopts := global.Options{CacheDir: ""}
|
||||
cleanup := prepareCheckCache(CheckOptions{}, &gopts, &progress.NoopPrinter{})
|
||||
_, err := os.ReadDir(gopts.CacheDir)
|
||||
rtest.OK(t, err)
|
||||
|
||||
// Call the cleanup function to remove the temporary cache directory
|
||||
cleanup()
|
||||
|
||||
// Verify that the cache directory has been removed
|
||||
_, err = os.ReadDir(gopts.CacheDir)
|
||||
_, err := os.ReadDir(gopts.CacheDir)
|
||||
rtest.Assert(t, errors.Is(err, os.ErrNotExist), "Expected cache directory to be removed, but it still exists")
|
||||
}
|
||||
|
|
|
|||
|
|
@ -471,13 +471,10 @@ If the repository structure is intact, restic will show that ``no errors were fo
|
|||
check snapshots, trees and blobs
|
||||
no errors were found
|
||||
|
||||
By default, check creates a new temporary cache directory to verify that the
|
||||
data stored in the repository is intact. To reuse the existing cache, you can
|
||||
use the ``--with-cache`` flag.
|
||||
|
||||
If the cache directory is not explicitly set, then ``check`` creates its
|
||||
temporary cache directory in the temporary directory, see :ref:`temporary_files`.
|
||||
Otherwise, the specified cache directory is used, as described in :ref:`caching`.
|
||||
By default, ``check`` will use the current cache, and it will verify each cache object before
|
||||
proceeding to make sure that there is no bit rot in the local cache. You can
|
||||
explicitely request ``check`` to acquire a temporary cache via option ``--temporary-cache``.
|
||||
Alternative you can tell ``check`` not to verify the cache by using option ``--no-cache-verify``.
|
||||
|
||||
By default, the ``check`` command does not verify that the actual pack files
|
||||
on disk in the repository are unmodified, because doing so requires reading
|
||||
|
|
|
|||
117
internal/backend/cache/backend.go
vendored
117
internal/backend/cache/backend.go
vendored
|
|
@ -2,6 +2,8 @@ package cache
|
|||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"fmt"
|
||||
"io"
|
||||
"sync"
|
||||
|
||||
|
|
@ -37,7 +39,6 @@ func newBackend(be backend.Backend, c *Cache, errorLog func(string, ...interface
|
|||
|
||||
// Remove deletes a file from the backend and the cache if it has been cached.
|
||||
func (b *Backend) Remove(ctx context.Context, h backend.Handle) error {
|
||||
debug.Log("cache Remove(%v)", h)
|
||||
err := b.Backend.Remove(ctx, h)
|
||||
if err != nil {
|
||||
return err
|
||||
|
|
@ -92,7 +93,7 @@ func (b *Backend) Save(ctx context.Context, h backend.Handle, rd backend.RewindR
|
|||
return nil
|
||||
}
|
||||
|
||||
func (b *Backend) cacheFile(ctx context.Context, h backend.Handle) error {
|
||||
func (b *Backend) tryToCacheFile(ctx context.Context, h backend.Handle) error {
|
||||
finish := make(chan struct{})
|
||||
|
||||
b.inProgressMutex.Lock()
|
||||
|
|
@ -119,6 +120,12 @@ func (b *Backend) cacheFile(ctx context.Context, h backend.Handle) error {
|
|||
b.inProgressMutex.Unlock()
|
||||
}()
|
||||
|
||||
if b.hasToVerify(h) && b.Cache.Has(h) {
|
||||
if err := b.verify(ctx, h); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// test again, maybe the file was cached in the meantime
|
||||
if !b.Cache.Has(h) {
|
||||
// nope, it's still not in the cache, pull it from the repo and save it
|
||||
|
|
@ -132,22 +139,82 @@ func (b *Backend) cacheFile(ctx context.Context, h backend.Handle) error {
|
|||
return err
|
||||
}
|
||||
|
||||
b.markVerified(h)
|
||||
debug.Log("verified %v", h)
|
||||
return nil
|
||||
}
|
||||
|
||||
// loadFromCache will try to load the file from the cache.
|
||||
func (b *Backend) loadFromCache(h backend.Handle, length int, offset int64, consumer func(rd io.Reader) error) (bool, error) {
|
||||
rd, inCache, err := b.Cache.load(h, length, offset)
|
||||
// loadFromCacheOrDelegate will try to load the file from the cache, and fall
|
||||
// back to the backend if that fails.
|
||||
func (b *Backend) loadFromCacheOrDelegate(ctx context.Context, h backend.Handle, length int, offset int64, consumer func(rd io.Reader) error) error {
|
||||
rd, _, err := b.Cache.load(h, length, offset)
|
||||
if err != nil {
|
||||
return inCache, err
|
||||
debug.Log("error caching %v: %v, falling back to backend", h, err)
|
||||
return b.Backend.Load(ctx, h, length, offset, consumer)
|
||||
}
|
||||
|
||||
err = consumer(rd)
|
||||
if err != nil {
|
||||
_ = rd.Close() // ignore secondary errors
|
||||
return true, err
|
||||
return err
|
||||
}
|
||||
return true, rd.Close()
|
||||
return rd.Close()
|
||||
}
|
||||
|
||||
func (b *Backend) hasToVerify(h backend.Handle) bool {
|
||||
if b.Cache.verifiedFiles == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
b.Cache.verifiedFilesLock.Lock()
|
||||
_, ok := b.Cache.verifiedFiles[h]
|
||||
b.Cache.verifiedFilesLock.Unlock()
|
||||
return !ok
|
||||
}
|
||||
|
||||
func (b *Backend) markVerified(h backend.Handle) {
|
||||
if b.Cache.verifiedFiles != nil {
|
||||
b.Cache.verifiedFilesLock.Lock()
|
||||
b.Cache.verifiedFiles[h] = struct{}{}
|
||||
b.Cache.verifiedFilesLock.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
func (b *Backend) verify(ctx context.Context, h backend.Handle) error {
|
||||
// verify that the cache file is correct or at least not more broken than the version stored at the backend
|
||||
var remoteHash, localHash restic.ID
|
||||
|
||||
err := b.Backend.Load(ctx, h, 0, 0, func(rd io.Reader) error {
|
||||
hash := sha256.New()
|
||||
_, ierr := io.Copy(hash, rd)
|
||||
remoteHash = restic.IDFromHash(hash.Sum(nil))
|
||||
return ierr
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = b.Backend.Load(ctx, h, 0, 0, func(rd io.Reader) error {
|
||||
hash := sha256.New()
|
||||
_, ierr := io.Copy(hash, rd)
|
||||
localHash = restic.IDFromHash(hash.Sum(nil))
|
||||
return ierr
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if remoteHash != localHash {
|
||||
if remoteHash.String() == h.Name {
|
||||
// the remote version is correct, but not the local version
|
||||
// delete the local version to repair the cache
|
||||
_, _ = b.Cache.remove(h)
|
||||
} else if localHash.String() == h.Name {
|
||||
return fmt.Errorf("%v: remote file damaged, please re-upload the cached copy", h)
|
||||
} else {
|
||||
return fmt.Errorf("%v: cached and remote file differ and are both invalid", h)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Load loads a file from the cache or the backend.
|
||||
|
|
@ -162,14 +229,18 @@ func (b *Backend) Load(ctx context.Context, h backend.Handle, length int, offset
|
|||
debug.Log("downloading %v finished", h)
|
||||
}
|
||||
|
||||
// try loading from cache without checking that the handle is actually cached
|
||||
inCache, err := b.loadFromCache(h, length, offset, consumer)
|
||||
if inCache {
|
||||
if err != nil {
|
||||
debug.Log("error loading %v from cache: %v", h, err)
|
||||
if b.Cache.Has(h) && !b.hasToVerify(h) {
|
||||
debug.Log("Load(%v, %v, %v) from cache", h, length, offset)
|
||||
rd, _, err := b.Cache.load(h, length, offset)
|
||||
if err == nil {
|
||||
err = consumer(rd)
|
||||
if err != nil {
|
||||
_ = rd.Close() // ignore secondary errors
|
||||
return err
|
||||
}
|
||||
return rd.Close()
|
||||
}
|
||||
// the caller must explicitly use cache.Forget() to remove the cache entry
|
||||
return err
|
||||
debug.Log("error loading %v from cache: %v", h, err)
|
||||
}
|
||||
|
||||
// if we don't automatically cache this file type, fall back to the backend
|
||||
|
|
@ -179,28 +250,16 @@ func (b *Backend) Load(ctx context.Context, h backend.Handle, length int, offset
|
|||
}
|
||||
|
||||
debug.Log("auto-store %v in the cache", h)
|
||||
err = b.cacheFile(ctx, h)
|
||||
err := b.tryToCacheFile(ctx, h)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
inCache, err = b.loadFromCache(h, length, offset, consumer)
|
||||
if inCache {
|
||||
if err != nil {
|
||||
debug.Log("error loading %v from cache: %v", h, err)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
debug.Log("error caching %v: %v, falling back to backend", h, err)
|
||||
return b.Backend.Load(ctx, h, length, offset, consumer)
|
||||
return b.loadFromCacheOrDelegate(ctx, h, length, offset, consumer)
|
||||
}
|
||||
|
||||
// Stat tests whether the backend has a file. If it does not exist but still
|
||||
// exists in the cache, it is removed from the cache.
|
||||
func (b *Backend) Stat(ctx context.Context, h backend.Handle) (backend.FileInfo, error) {
|
||||
debug.Log("cache Stat(%v)", h)
|
||||
|
||||
fi, err := b.Backend.Stat(ctx, h)
|
||||
if err != nil && b.Backend.IsNotExist(err) {
|
||||
// try to remove from the cache, ignore errors
|
||||
|
|
|
|||
8
internal/backend/cache/backend_test.go
vendored
8
internal/backend/cache/backend_test.go
vendored
|
|
@ -145,8 +145,8 @@ func TestOutOfBoundsAccess(t *testing.T) {
|
|||
t.Error("cache returned non-existent file section")
|
||||
return errors.New("broken")
|
||||
})
|
||||
test.Assert(t, strings.Contains(err.Error(), " is too short"), "expected too short error, got %v", err)
|
||||
test.Equals(t, 1, be.ctr, "expected file to be loaded only once")
|
||||
test.Assert(t, strings.Contains(err.Error(), "access beyond end of file"), "expected 'access beyond end of file', got '%v'", err)
|
||||
test.Equals(t, 2, be.ctr, "expected file to be loaded twice")
|
||||
// file must nevertheless get cached
|
||||
if !c.Has(h) {
|
||||
t.Errorf("cache doesn't have file after load")
|
||||
|
|
@ -157,8 +157,8 @@ func TestOutOfBoundsAccess(t *testing.T) {
|
|||
t.Error("cache returned non-existent file section")
|
||||
return errors.New("broken")
|
||||
})
|
||||
test.Assert(t, strings.Contains(err.Error(), " is too short"), "expected too short error, got %v", err)
|
||||
test.Equals(t, 1, be.ctr, "expected file to be loaded only once")
|
||||
test.Assert(t, strings.Contains(err.Error(), "access beyond end of file"), "expected 'access beyond end of file', got '%v'", err)
|
||||
test.Equals(t, 3, be.ctr, "expected file to be loaded 3 times")
|
||||
}
|
||||
|
||||
func TestForget(t *testing.T) {
|
||||
|
|
|
|||
8
internal/backend/cache/cache.go
vendored
8
internal/backend/cache/cache.go
vendored
|
|
@ -21,7 +21,9 @@ type Cache struct {
|
|||
Base string
|
||||
Created bool
|
||||
|
||||
forgotten sync.Map
|
||||
verifiedFiles map[backend.Handle]struct{}
|
||||
verifiedFilesLock sync.Mutex
|
||||
forgotten sync.Map
|
||||
}
|
||||
|
||||
const dirMode = 0700
|
||||
|
|
@ -154,6 +156,10 @@ func updateTimestamp(d string) error {
|
|||
return os.Chtimes(d, t, t)
|
||||
}
|
||||
|
||||
func (c *Cache) EnableVerification() {
|
||||
c.verifiedFiles = make(map[backend.Handle]struct{})
|
||||
}
|
||||
|
||||
// MaxCacheAge is the default age (30 days) after which cache directories are considered old.
|
||||
const MaxCacheAge = 30 * 24 * time.Hour
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue