This commit is contained in:
Winfried Plappert 2026-05-20 22:42:39 +02:00 committed by GitHub
commit cb515e0f8c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 143 additions and 54 deletions

View file

@ -0,0 +1,16 @@
Change: restic check now verifies the local cache on read
Up to now the check command had to create a temporary cache to ensure that it does
not miss bitrot at the repository storage. However, this has the downside that a host
may have to store two copies of the repository cache:
one for regular usage and one for the check command.
This PR adds support for the cache to verify its content and thereby allows
the check command to use an already existing cache, which as a bonus will be repaired
by redownloading locally damaged files if necessary.
The CLI now has an --temporary-cache option in addition to --no-cache-verify
(which replaces --with-cache).
https://github.com/restic/restic/pull/3747
https://github.com/restic/restic/pull/21794

View file

@ -33,8 +33,8 @@ func newCheckCommand(globalOptions *global.Options) *cobra.Command {
The "check" command tests the repository for errors and reports any errors it
finds. It can also be used to read all data and therefore simulate a restore.
By default, the "check" command will always load all data directly from the
repository and not use a local cache.
By default, the "check" command will use a local cache but verify it against
the repository. It is possible to switch to using a temporary cache.
EXIT STATUS
===========
@ -73,6 +73,8 @@ type CheckOptions struct {
ReadDataSubset string
CheckUnused bool
WithCache bool
NoCacheVerify bool
TemporaryCache bool
data.SnapshotFilter
}
@ -86,7 +88,12 @@ func (opts *CheckOptions) AddFlags(f *pflag.FlagSet) {
// MarkDeprecated only returns an error when the flag is not found
panic(err)
}
f.BoolVar(&opts.WithCache, "with-cache", false, "use existing cache, only read uncached data from repository")
//f.BoolVar(&opts.WithCache, "with-cache", false, "use existing cache, only read uncached data from repository")
f.BoolVar(&opts.NoCacheVerify, "with-cache", false, "disable verification of local cache")
_ = f.MarkDeprecated("with-cache", "--with-cache is deprecated, use --no-cache-verify instead")
f.BoolVar(&opts.NoCacheVerify, "no-cache-verify", false, "disable verification of local cache")
f.BoolVar(&opts.TemporaryCache, "temporary-cache", false, "create a temporary cache")
initMultiSnapshotFilter(f, &opts.SnapshotFilter, true)
}
@ -131,6 +138,9 @@ func checkFlags(opts CheckOptions) error {
}
}
if opts.NoCacheVerify && opts.TemporaryCache {
return errors.Fatal("check flags --no-cache-verify and --temporary-cache cannot be used together")
}
return nil
}
@ -177,7 +187,7 @@ func parsePercentage(s string) (float64, error) {
// - by default, we use a cache in a temporary directory that is deleted after the check
func prepareCheckCache(opts CheckOptions, gopts *global.Options, printer progress.Printer) (cleanup func()) {
cleanup = func() {}
if opts.WithCache {
if !opts.TemporaryCache {
// use the default cache, no setup needed
return cleanup
}
@ -210,7 +220,6 @@ func prepareCheckCache(opts CheckOptions, gopts *global.Options, printer progres
}
gopts.CacheDir = tempdir
printer.P("using temporary cache in %v\n", tempdir)
cleanup = func() {
err := os.RemoveAll(tempdir)
@ -244,6 +253,11 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts global.Options, args
}
defer unlock()
if !opts.NoCacheVerify {
// verify files in already existing cache
repo.Cache().EnableVerification()
}
chkr := checker.New(repo, opts.CheckUnused)
err = chkr.LoadSnapshots(ctx, &opts.SnapshotFilter, args)
if err != nil {

View file

@ -183,6 +183,7 @@ func checkIfFileWithSimilarNameExists(files []fs.DirEntry, fileName string) bool
return found
}
// TestPrepareCheckCache: this code was introduced at commit 0271bb9
func TestPrepareCheckCache(t *testing.T) {
// Create a temporary directory for the cache
tmpDirBase := t.TempDir()
@ -205,10 +206,9 @@ func TestPrepareCheckCache(t *testing.T) {
}
gopts := global.Options{CacheDir: tmpDirBase}
cleanup := prepareCheckCache(testCase.opts, &gopts, &progress.NoopPrinter{})
files, err := os.ReadDir(tmpDirBase)
rtest.OK(t, err)
files, _ := os.ReadDir(tmpDirBase) // this must FAIL!
if !testCase.opts.WithCache {
if !testCase.opts.WithCache && len(files) > 0 {
// If using a temporary cache directory, the cache directory should exist
// listing all directories inside tmpDirBase (cacheDir)
// one directory should be tmpDir created by prepareCheckCache with 'restic-check-cache-' in path
@ -225,8 +225,7 @@ func TestPrepareCheckCache(t *testing.T) {
cleanup()
// Verify that the cache directory has been removed
files, err = os.ReadDir(tmpDirBase)
rtest.OK(t, err)
files, _ = os.ReadDir(tmpDirBase)
rtest.Assert(t, len(files) == 0, "Expected cache directory to be removed, but it still exists: %v", files)
})
}
@ -235,13 +234,11 @@ func TestPrepareCheckCache(t *testing.T) {
func TestPrepareDefaultCheckCache(t *testing.T) {
gopts := global.Options{CacheDir: ""}
cleanup := prepareCheckCache(CheckOptions{}, &gopts, &progress.NoopPrinter{})
_, err := os.ReadDir(gopts.CacheDir)
rtest.OK(t, err)
// Call the cleanup function to remove the temporary cache directory
cleanup()
// Verify that the cache directory has been removed
_, err = os.ReadDir(gopts.CacheDir)
_, err := os.ReadDir(gopts.CacheDir)
rtest.Assert(t, errors.Is(err, os.ErrNotExist), "Expected cache directory to be removed, but it still exists")
}

View file

@ -471,13 +471,10 @@ If the repository structure is intact, restic will show that ``no errors were fo
check snapshots, trees and blobs
no errors were found
By default, check creates a new temporary cache directory to verify that the
data stored in the repository is intact. To reuse the existing cache, you can
use the ``--with-cache`` flag.
If the cache directory is not explicitly set, then ``check`` creates its
temporary cache directory in the temporary directory, see :ref:`temporary_files`.
Otherwise, the specified cache directory is used, as described in :ref:`caching`.
By default, ``check`` will use the current cache, and it will verify each cache object before
proceeding to make sure that there is no bit rot in the local cache. You can
explicitely request ``check`` to acquire a temporary cache via option ``--temporary-cache``.
Alternative you can tell ``check`` not to verify the cache by using option ``--no-cache-verify``.
By default, the ``check`` command does not verify that the actual pack files
on disk in the repository are unmodified, because doing so requires reading

View file

@ -2,6 +2,8 @@ package cache
import (
"context"
"crypto/sha256"
"fmt"
"io"
"sync"
@ -37,7 +39,6 @@ func newBackend(be backend.Backend, c *Cache, errorLog func(string, ...interface
// Remove deletes a file from the backend and the cache if it has been cached.
func (b *Backend) Remove(ctx context.Context, h backend.Handle) error {
debug.Log("cache Remove(%v)", h)
err := b.Backend.Remove(ctx, h)
if err != nil {
return err
@ -92,7 +93,7 @@ func (b *Backend) Save(ctx context.Context, h backend.Handle, rd backend.RewindR
return nil
}
func (b *Backend) cacheFile(ctx context.Context, h backend.Handle) error {
func (b *Backend) tryToCacheFile(ctx context.Context, h backend.Handle) error {
finish := make(chan struct{})
b.inProgressMutex.Lock()
@ -119,6 +120,12 @@ func (b *Backend) cacheFile(ctx context.Context, h backend.Handle) error {
b.inProgressMutex.Unlock()
}()
if b.hasToVerify(h) && b.Cache.Has(h) {
if err := b.verify(ctx, h); err != nil {
return err
}
}
// test again, maybe the file was cached in the meantime
if !b.Cache.Has(h) {
// nope, it's still not in the cache, pull it from the repo and save it
@ -132,22 +139,82 @@ func (b *Backend) cacheFile(ctx context.Context, h backend.Handle) error {
return err
}
b.markVerified(h)
debug.Log("verified %v", h)
return nil
}
// loadFromCache will try to load the file from the cache.
func (b *Backend) loadFromCache(h backend.Handle, length int, offset int64, consumer func(rd io.Reader) error) (bool, error) {
rd, inCache, err := b.Cache.load(h, length, offset)
// loadFromCacheOrDelegate will try to load the file from the cache, and fall
// back to the backend if that fails.
func (b *Backend) loadFromCacheOrDelegate(ctx context.Context, h backend.Handle, length int, offset int64, consumer func(rd io.Reader) error) error {
rd, _, err := b.Cache.load(h, length, offset)
if err != nil {
return inCache, err
debug.Log("error caching %v: %v, falling back to backend", h, err)
return b.Backend.Load(ctx, h, length, offset, consumer)
}
err = consumer(rd)
if err != nil {
_ = rd.Close() // ignore secondary errors
return true, err
return err
}
return true, rd.Close()
return rd.Close()
}
func (b *Backend) hasToVerify(h backend.Handle) bool {
if b.Cache.verifiedFiles == nil {
return false
}
b.Cache.verifiedFilesLock.Lock()
_, ok := b.Cache.verifiedFiles[h]
b.Cache.verifiedFilesLock.Unlock()
return !ok
}
func (b *Backend) markVerified(h backend.Handle) {
if b.Cache.verifiedFiles != nil {
b.Cache.verifiedFilesLock.Lock()
b.Cache.verifiedFiles[h] = struct{}{}
b.Cache.verifiedFilesLock.Unlock()
}
}
func (b *Backend) verify(ctx context.Context, h backend.Handle) error {
// verify that the cache file is correct or at least not more broken than the version stored at the backend
var remoteHash, localHash restic.ID
err := b.Backend.Load(ctx, h, 0, 0, func(rd io.Reader) error {
hash := sha256.New()
_, ierr := io.Copy(hash, rd)
remoteHash = restic.IDFromHash(hash.Sum(nil))
return ierr
})
if err != nil {
return err
}
err = b.Backend.Load(ctx, h, 0, 0, func(rd io.Reader) error {
hash := sha256.New()
_, ierr := io.Copy(hash, rd)
localHash = restic.IDFromHash(hash.Sum(nil))
return ierr
})
if err != nil {
return err
}
if remoteHash != localHash {
if remoteHash.String() == h.Name {
// the remote version is correct, but not the local version
// delete the local version to repair the cache
_, _ = b.Cache.remove(h)
} else if localHash.String() == h.Name {
return fmt.Errorf("%v: remote file damaged, please re-upload the cached copy", h)
} else {
return fmt.Errorf("%v: cached and remote file differ and are both invalid", h)
}
}
return nil
}
// Load loads a file from the cache or the backend.
@ -162,14 +229,18 @@ func (b *Backend) Load(ctx context.Context, h backend.Handle, length int, offset
debug.Log("downloading %v finished", h)
}
// try loading from cache without checking that the handle is actually cached
inCache, err := b.loadFromCache(h, length, offset, consumer)
if inCache {
if err != nil {
debug.Log("error loading %v from cache: %v", h, err)
if b.Cache.Has(h) && !b.hasToVerify(h) {
debug.Log("Load(%v, %v, %v) from cache", h, length, offset)
rd, _, err := b.Cache.load(h, length, offset)
if err == nil {
err = consumer(rd)
if err != nil {
_ = rd.Close() // ignore secondary errors
return err
}
return rd.Close()
}
// the caller must explicitly use cache.Forget() to remove the cache entry
return err
debug.Log("error loading %v from cache: %v", h, err)
}
// if we don't automatically cache this file type, fall back to the backend
@ -179,28 +250,16 @@ func (b *Backend) Load(ctx context.Context, h backend.Handle, length int, offset
}
debug.Log("auto-store %v in the cache", h)
err = b.cacheFile(ctx, h)
err := b.tryToCacheFile(ctx, h)
if err != nil {
return err
}
inCache, err = b.loadFromCache(h, length, offset, consumer)
if inCache {
if err != nil {
debug.Log("error loading %v from cache: %v", h, err)
}
return err
}
debug.Log("error caching %v: %v, falling back to backend", h, err)
return b.Backend.Load(ctx, h, length, offset, consumer)
return b.loadFromCacheOrDelegate(ctx, h, length, offset, consumer)
}
// Stat tests whether the backend has a file. If it does not exist but still
// exists in the cache, it is removed from the cache.
func (b *Backend) Stat(ctx context.Context, h backend.Handle) (backend.FileInfo, error) {
debug.Log("cache Stat(%v)", h)
fi, err := b.Backend.Stat(ctx, h)
if err != nil && b.Backend.IsNotExist(err) {
// try to remove from the cache, ignore errors

View file

@ -145,8 +145,8 @@ func TestOutOfBoundsAccess(t *testing.T) {
t.Error("cache returned non-existent file section")
return errors.New("broken")
})
test.Assert(t, strings.Contains(err.Error(), " is too short"), "expected too short error, got %v", err)
test.Equals(t, 1, be.ctr, "expected file to be loaded only once")
test.Assert(t, strings.Contains(err.Error(), "access beyond end of file"), "expected 'access beyond end of file', got '%v'", err)
test.Equals(t, 2, be.ctr, "expected file to be loaded twice")
// file must nevertheless get cached
if !c.Has(h) {
t.Errorf("cache doesn't have file after load")
@ -157,8 +157,8 @@ func TestOutOfBoundsAccess(t *testing.T) {
t.Error("cache returned non-existent file section")
return errors.New("broken")
})
test.Assert(t, strings.Contains(err.Error(), " is too short"), "expected too short error, got %v", err)
test.Equals(t, 1, be.ctr, "expected file to be loaded only once")
test.Assert(t, strings.Contains(err.Error(), "access beyond end of file"), "expected 'access beyond end of file', got '%v'", err)
test.Equals(t, 3, be.ctr, "expected file to be loaded 3 times")
}
func TestForget(t *testing.T) {

View file

@ -21,7 +21,9 @@ type Cache struct {
Base string
Created bool
forgotten sync.Map
verifiedFiles map[backend.Handle]struct{}
verifiedFilesLock sync.Mutex
forgotten sync.Map
}
const dirMode = 0700
@ -154,6 +156,10 @@ func updateTimestamp(d string) error {
return os.Chtimes(d, t, t)
}
func (c *Cache) EnableVerification() {
c.verifiedFiles = make(map[backend.Handle]struct{})
}
// MaxCacheAge is the default age (30 days) after which cache directories are considered old.
const MaxCacheAge = 30 * 24 * time.Hour