From eb42e1002e0024629aa04221633153a8bd074ade Mon Sep 17 00:00:00 2001
From: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date: Thu, 4 Dec 2025 18:33:59 +0900
Subject: [PATCH 01/34] Squashed commit of the following:

commit 0e6e1d21526387212148c7cfdec8ddf2f3c3150a
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Thu Dec 4 18:25:03 2025 +0900

    WIP

commit e7a1b5c14d0a6214a6f9e76a396c5dcb932e6488
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Thu Dec 4 18:01:47 2025 +0900

    WIP

commit 4c803f9fabe112571642dd99cd89681662858918
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Thu Dec 4 17:24:49 2025 +0900

    WIP

commit 2779f005a955a26a8770caf32c6481ae23223126
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Tue Dec 2 22:17:20 2025 +0900

    WIP: Remove ChunkDict

commit 2f0cdf16d149257d83be3d3ddfcb900edb684b59
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Tue Dec 2 18:54:52 2025 +0900

    WIP

commit fb0580c80b88a20261be1411305d29823f283734
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Tue Dec 2 18:34:11 2025 +0900

    WIP

commit 51168307869c3d62a7f27f13e3fd766ee962e12b
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Tue Dec 2 18:03:49 2025 +0900

    WIP

commit 4a8ccb3e6e32d8feec38373f2406b2b2f84c0dda
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Tue Dec 2 17:30:17 2025 +0900

    WIP

commit 36efa22c492bf4c4fcad3e47a6b1751c3c040d00
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Tue Dec 2 13:38:12 2025 +0900

    WIP

commit 33e85c8ffea90318322b438e2fc81b449901ba9b
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Fri Nov 28 19:01:44 2025 +0900

    WIP

commit 5c23c1d0628b1b612159c94446c1a0f2c0ce5902
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Wed Nov 26 17:55:12 2025 +0900

    WIP

commit 661df4b48b35b252e0e41aaf8296b1aa70994923
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Wed Nov 19 16:48:31 2025 +0900

    Fix: debugNote concurrency issue

    Fix debugNote concurrent r/w issue by implementing dump function

commit 8f28da4c742be41d5bb6af39578b5e760f7909cb
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Tue Nov 18 21:12:59 2025 +0900

    Refactor

    Change code order for better readability
    Change debugNote to a separate struct

commit e9ba3ca89b26ee6eb7c69bc898d757136ee0af1b
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Tue Nov 18 19:41:01 2025 +0900

    Refactor: rechunk_worker

    Integrate largely redundant functions into one
    Introduce chunkDictHelper for encapsulation
    Improve data structures to reduce redundancy and complexity

commit 034e1cb410995b4ea260fed2e66eabe45a42cd84
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Tue Nov 18 01:51:36 2025 +0900

    Docs: Update docs on rechunk-copy

commit f159887d338e3c24ecdc1c1dc54b858d6bec37be
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Tue Nov 18 00:31:33 2025 +0900

    Update: use WithBlobUploader

    Use WithBlobUploader introduced in latest restic update
    Make Progress to preserve final output
    Misc code cleanup (ctx names)

commit f12d17943b71de60a7ef5fbbc73fc3c8e1f23c75
Merge: bdf4737ac 382616747
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Mon Nov 17 20:13:05 2025 +0900

    Merge remote-tracking branch 'origin/master' into feature/rechunker

commit bdf4737acd93ebf39d3243ea46dba363102f3d71
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Mon Nov 17 20:10:13 2025 +0900

    Refactor: rechunk_cache

    Refactor rechunk_cache.go
    Add comments for rechunker package code

commit 66e0a0c0a86c2a2c5757b6060810740d20870e4e
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Mon Nov 17 17:47:50 2025 +0900

    Debug: Add debug logs

    Add debug logs in rechunker_worker.go

commit 3d6ed46b673bd6be797be4b33f8de5410f05b3f1
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Mon Nov 17 17:14:21 2025 +0900

    Feat: Progress bar

    Enrich information in the progress bar

commit 1db294dd3f9e07b0b9526e836bed772a6182689f
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Wed Nov 12 18:08:26 2025 +0900

    Refactor: ChunkDict

    Refactor ChunkDict code

commit 41d4a9c6497ef6d906b3789ab63b1f5ae2d852d2
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Wed Nov 12 17:36:38 2025 +0900

    Fix: Add condition test

    Add condition test for priorityFilesHandler.Push() on blob ready

commit 60b7be1e799db81c2ee173be102b287d8cf8e5e3
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Wed Nov 12 16:36:34 2025 +0900

    Style: golangci-lint

    Comply with golangci-lint suggestion

commit be0a5934298fcd68d8c3cf6f47016450cfee5b2d
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Wed Nov 12 15:56:30 2025 +0900

    Misc: Add RechunkRepo interfaces

    Add RechunkSrcRepo and RechunkDstRepo interfaces to make it compatible with rechunker_test

commit dcab416d682c7dc5aeb34b1debf0d05826abe99a
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Wed Nov 12 15:23:22 2025 +0900

    Refactor

    Reorder functions from broader functions to finer functions
    Split into subfunctions for long function code

commit 1934b88ab3ddeeca3dcc4fadca8ed5b67b558f62
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Tue Nov 11 17:54:49 2025 +0900

    Feat: Ignore no more needed blob after blob load

    Make blob cache ignore no longer needed blobs immediately after blob load
    Refactor code

commit fdc08c86d197088cfd8fb695c8c192c8835c320c
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Mon Nov 10 18:14:04 2025 +0900

    Refactor: Enhance code readability and modularity

    Modularize subfunctions so that they are more readable
    Place 'big' functions first, to make it easy to get the over logic
    Move types and structs to appropriate file
    Change numWorkers to `min(GOMAXPROCS(0), srcRepo.Connections())`

commit 0ff40fb9b39da5ac122738956be598a10f48514f
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Sun Nov 9 16:24:23 2025 +0900

    Misc: Change numDownloaders

    Change numDownloaders to `min(numWorkers, srcRepo.Connections())`
    Remove unnecessary PackedBlobLoader interface
    Change HashOfIDs to public function

commit 4cd013e4fa672bdcf5dbce59e2d7e2cd8aeb2bc4
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Fri Nov 7 15:17:30 2025 +0900

    Feat: Split dispatcher channel

    Split chDispatch into chRegular and chPriority
    Introduce PriorityFilesHandler, replacing inline concurrency logic of priorityFilesList
    Refactor names and file orders
    Move rechunk worker code to a separate file

commit fa00c151acb7a690762367112799e5620f085294
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Fri Oct 31 17:53:24 2025 +0900

    Misc: Change numWorkers

commit ddae794c4399f40102d542ac4724cde8e6c0e8f8
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Fri Oct 31 17:34:53 2025 +0900

    Update: Align with global interface

commit 809d1c4aedebdd3080a4dda551846c02b2a02da3
Merge: 08b707b16 71432c7f4
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Fri Oct 31 17:20:45 2025 +0900

    Merge remote-tracking branch 'origin/master' into feature/rechunker

commit 08b707b1628965872a97f58e25627a7d742bfce4
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Wed Oct 29 16:31:54 2025 +0900

    Feat: Reimplement rechunk blob cache

    Rechunk blob cache change including:
      - Blobs loaded by the pack, evicted by the blob
      - Ignore no longer needed blobs
    Order files by their blob count, descending order
    Rechunk blob cache is enabled by default, and its size is customizable
    Refactor code

commit 224b5ca711d0143c61caab36c499179ba9ec0c2c
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Tue Oct 28 00:04:43 2025 +0900

    Feat: Rechunk file order

    Change to process files by lexicographic pack order
    Refactor names

commit 4c18ebb2f9a225832cd56780689be39a702116f0
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Thu Oct 16 20:41:33 2025 +0900

    Refactor: Improve modularity of RechunkData

    Split RechunkData to smaller units
    Rename types and variables
    Remove unnecessary custom types

commit fe5ef9ba57229de33cd661c7629a70d8353d29bd
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Fri Oct 10 20:43:31 2025 +0900

    Debug: cmd_rechunk_copy log

    Add debug.Log for cmd_rechunk_copy.go
    Ensure concurrency safety for rechunker.debugNote
    Fix debug.Log in rechunk_cache.go to log only real requests

commit 4064f7cbc03e1e9f7e98b2e2ce9a7917fa5f32c4
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Fri Oct 10 18:52:34 2025 +0900

    Update: new restic version

    Update interfaces to match the new restic version's

commit cb1982a6eb6538199d87c998f41bd83a931dc8ad
Merge: ef6d24aaf 481fcb9ca
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Fri Oct 10 18:16:31 2025 +0900

    Merge branch 'master' into feature/rechunker

commit ef6d24aaf697b7f43f05ddbf602685ec785e0296
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Fri Oct 10 16:49:14 2025 +0900

    Debug: Add debug trace

    Add debug trace snippets for PackCache and ChunkDict
    Change back numWorkers to GOMAXPROCS(0)

commit 139eaf217acb660f86f13be12b821659b01d0ca6
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Thu Sep 25 14:57:03 2025 +0900

    Misc: Tune hyperparameters

    Tune numWorkers and numDownloaders
    Fix typo in rechunker_test

commit 0d7b12785bcefc0a41a73d698f9d18ab7f38ccc3
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Wed Sep 24 22:10:46 2025 +0900

    Update: Terminal progress printer

    Update cmd_rechunk_copy code to use latest terminal progress printer (as in cmd_copy)
    Remove unnecessary code in cmd_rechunk_copy

commit 30dc52539b9b0e166f7dfb0c0bc6051f99e78d11
Merge: 9a152ae70 3e6b5c34c
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Wed Sep 24 21:42:47 2025 +0900

    Merge remote-tracking branch 'origin/master' into feature/rechunker

commit 9a152ae70d641d9b667b9ffd82bf83c0e4bcb852
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Wed Sep 24 21:32:32 2025 +0900

    Fix: buildIndex

    Fix issue that the needed metadata is not computed when not using PackCache.
    Refactor names and struct fields
    Change test cases to ensure testing ChunkDict

commit 78b9ae5f51c14e9ad736657fdbe4bf7ce2d400f8
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Wed Sep 24 17:54:26 2025 +0900

    Misc: Update changelog

commit da37aa834ca9df21ce8323bdfec05534db122e45
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Thu Sep 18 18:30:11 2025 +0900

    Feat: Pack cache switch

    Add switch for enabling pack cache
    Refactor names
      RechunkBlobCache -> PackCache
      RechunkChainDict -> ChunkDict
    Tune hyperparameters

commit f7fe0952fdd8fa0c31faa8c0eb6bfa14461c7959
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Wed Sep 17 13:47:52 2025 +0900

    Misc: changelog

commit 3bfe8b8b8e688a11841228a7a8ff15a429675ff0
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Mon Sep 15 17:39:51 2025 +0900

    Test: rechunk-copy integration test

    Add cmd_rechunk_copy_integration_test
    Refine rechunker test cases
    Refactor minor names in rechunker and rechunker_test

commit e0635df2599e555d078f322fc43ade2f8ce124b5
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Wed Sep 10 22:30:22 2025 +0900

    Test: rechunker_test

    Augment test cases for RechunkData
    Fix concurrency error in test code
    Fix cmd_rechunk_copy import package names
    Change hyperparameters
    Minor refactoring

commit d6034d4ef95c56b416a1b83adc862c9e7338575f
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Wed Sep 10 21:42:06 2025 +0900

    Rename: Move rechunker to internal/rechunker

    Move rechunker module from internal/walker to internal/rechunker
    Change package name to rechunker

commit 067c423cf46e452df9afbfc7e6c711a324da0b5e
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Wed Sep 10 19:35:04 2025 +0900

    Feat: rechunker_test

    Rewrite rechunker_test for current rechunker
    Refactor method signatures for modularity

commit 75522f91879df44a14f366263815e1b6efe65e12
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Mon Sep 8 20:36:16 2025 +0900

    Style: golangci-lint

    Fix code style to comply with golangci-lint
    Add comments and refactor names
    Tune hyperparameters

commit 4180a847ccf4232994f265653faba33bd592c224
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Mon Sep 8 16:09:43 2025 +0900

    Feat: Remove blob tracer

    Remove blob tracer, as no performance difference in ablation study

commit 10fc276a63cc5ed3c4798615ae60b3be709a92c9
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Mon Sep 8 15:43:33 2025 +0900

    Feat: numDownloaders

    Implement multiple downloaders in blob cache

commit e73749cbb0644efd8622123c9dfa1f7d3d4a2a4a
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Sun Sep 7 23:38:35 2025 +0900

    Feat: Blob tracer

    Reintroduce blob tracer
    Refactor names and code structure

commit 3451587b521428f26b6cd3c9ab4ecf15d7ca04cb
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Tue Sep 2 00:25:17 2025 +0900

    Feat: RechunkChainDict

    Reimplement Rechunker to accommodate RechunkChainDict
    Refactor names, types, and structures

commit b7c9be234e68d40c889c7041cf1612418ae35ac7
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Sun Aug 31 02:22:24 2025 +0900

    Misc: Rechunker jump feature plan

commit 2c1328fb10e2161de9171e5fd802802c2b48ffb3
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Sun Aug 31 00:21:22 2025 +0900

    Feat: RechunkChainDict

    Implement RechunkChainDict.Add
    Refactor names

commit a1101c0fb6e73ce5852f8cc48333735eb73d93a6
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Sat Aug 30 19:41:09 2025 +0900

    Feat: RechunkChainDict

    Implement RechunkChainDict
    Refactor names

commit 393603d9cf11fb27f251d67321724b814d0e7449
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Thu Aug 28 22:30:12 2025 +0900

    Feat: Optimize scheduling

    Add optimization, such as:
      Prioritize files whose blobs are all ready in BlobCache
      Don't save to BlobCache no longer needed blobs
    Refactor names and

commit 3b04337ebbdd7b7a2e99253be1e3ea7668e986ba
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Wed Aug 27 20:39:46 2025 +0900

    Feat: Rechunk copy with caching and scheduling

    Implement PackLRU-based BlobCache for Rechunker
    Reimplement and refactor Rechunker

commit 2278a83c6007ef2888c5d4d92096606201e1fa3b
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Tue Aug 26 20:49:14 2025 +0900

    misc

    Reimplementing rechunker

commit bf071940d05f0fe6ac298619d8af5c948190f12c
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Mon Aug 25 16:58:16 2025 +0900

    misc

commit 1e14b3eaf8d035079ee0e7a354fac0c0927aee6f
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Mon Aug 25 09:22:30 2025 +0900

    Fix: bufferPool implementation (#5473)

    Increase bufferPool capacity (2 -> 4) per worker
    Move bufferPool to global (shared among workers)

commit eab0e1d3ca3fb57249f7af3df902951768ad317c
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Sat Aug 23 00:43:16 2025 +0900

    Style: gofmt

commit ff70fb86081a9c953feef04798d54ef4af519a0b
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Fri Aug 22 23:12:28 2025 +0900

    Fix: Potential memory leak

    Fix issues addressed in review by wplapper (#5473)
      Fix buffer not returning to bufferPool at the end of file
      Change numWorkers value to runtime.GOMAXPROCS(0) to utilize multicore

commit 92e37c1f8ae13cfedc4bfced82ef0dc988c5054b
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Tue Aug 19 18:22:26 2025 +0900

    Style: Cleanup code

    Cleanup and organize rechunk code
    Add feature flag check in rechunk-copy command

commit 619968326a99e2ea588705d99f4a61b5d9ec3bca
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Tue Aug 19 18:22:20 2025 +0900

    Feat: Add feature flag for rechunk-copy

commit c82f793b90b1365aeb1a9acec2980fd780480689
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Thu Aug 14 18:07:33 2025 +0900

    Misc: Minor refactoring

commit eaf6534487b354491132d9b217bba798757d57cb
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Mon Aug 4 17:47:37 2025 +0900

    Misc: Change config value

    Change default numWorkers of RechunkData to 1
    Misc UI message change

commit 25e562b3a663894c8ac18f69612d1b75b1b540f1
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Mon Aug 4 13:37:35 2025 +0900

    Fix: Map concurrency error of rechunkBlobsMap

    Fix map concurrency error by adding mutex to rechunkBlobsMap
    Remove bloblru from rechunker
    Add buffer to downloader worker of RechunkData
    Simplify rechunkBlobsMap's value
    Add tags feature to rechunk-copy

commit da48871d1aaa6a45b230a47af5f636388d791253
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Wed Jul 30 11:40:10 2025 +0900

    Feat: Add progress bar

    Add progress bar to rechunk-copy
    Minor refactoring

commit 0d7da20db671e26e020a890cb4131ff1d3517cad
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Wed Jul 30 00:37:21 2025 +0900

    Feat: Integrate to cmd

    Implement cmd_rechunk_copy.go
    Check working on the built program with example repos

commit 0e472bcb5916f3201b3747990231e75cd1ca85ae
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Tue Jul 29 22:38:11 2025 +0900

    Feat: TestRechunkRewriter

    Implement TestRechunkRewriter
    Refactor rechunker.go

commit 75c5112caeda9c31105a2d5f7b0c31badd7af8bb
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Mon Jul 28 23:55:03 2025 +0900

    Fix: Debug and code cleanup

commit 0a61d3c7bcbf1e6ca68c791f6ad203dd7eb8cf45
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Mon Jul 28 13:08:22 2025 +0900

    Feat: Implement TestRechunker

commit b38f4c06b8160e6b08509581620ba6160e277176
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Sun Jul 27 20:48:00 2025 +0900

    Misc: Write plan of rechunker_test.go

commit 0ce37d8d5c79de8455cf603f2db1024a4378c7c0
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Fri Jul 25 23:39:21 2025 +0900

    Feat: Rechunker worker

    Implement rechunk worker

commit 9d1afe0373ba8a85f955017be393a3bec95f722d
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Fri Jul 25 17:23:29 2025 +0900

    Feat: rechunker tree traversal

    Implement traverser in FileRechunker that registers unique file contents to the rechunk job chan
    Implement tree rewriter that substitutes Content field to rechunked blobs

commit 8419d0991b5c552ac81d80b7584a28cd3a440c9f
Author: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date:   Thu Jul 24 19:29:00 2025 +0900

    misc
---
 changelog/unreleased/issue-5473               |  25 +
 cmd/restic/cmd_rechunk_copy.go                | 229 +++++++
 .../cmd_rechunk_copy_integration_test.go      | 124 ++++
 cmd/restic/main.go                            |   1 +
 doc/045_working_with_repos.rst                |  38 +-
 internal/feature/registry.go                  |   2 +
 internal/rechunker/blob_cache.go              | 335 ++++++++++
 internal/rechunker/debug.go                   | 103 +++
 internal/rechunker/dispatcher.go              | 261 ++++++++
 internal/rechunker/progress.go                |  95 +++
 internal/rechunker/rechunker.go               | 609 ++++++++++++++++++
 internal/rechunker/rechunker_test.go          | 416 ++++++++++++
 internal/rechunker/worker.go                  | 226 +++++++
 13 files changed, 2460 insertions(+), 4 deletions(-)
 create mode 100644 changelog/unreleased/issue-5473
 create mode 100644 cmd/restic/cmd_rechunk_copy.go
 create mode 100644 cmd/restic/cmd_rechunk_copy_integration_test.go
 create mode 100644 internal/rechunker/blob_cache.go
 create mode 100644 internal/rechunker/debug.go
 create mode 100644 internal/rechunker/dispatcher.go
 create mode 100644 internal/rechunker/progress.go
 create mode 100644 internal/rechunker/rechunker.go
 create mode 100644 internal/rechunker/rechunker_test.go
 create mode 100644 internal/rechunker/worker.go

diff --git a/changelog/unreleased/issue-5473 b/changelog/unreleased/issue-5473
new file mode 100644
index 000000000..12444d23a
--- /dev/null
+++ b/changelog/unreleased/issue-5473
@@ -0,0 +1,25 @@
+Enhancement: Add rechunk-copy feature
+
+Restic didn't rechunk data blobs when copying snapshots between repositories
+with different chunker parameters. Instead, it copied the blobs as-is,
+which impaired deduplication powered by [Content Defined Chunking](https://restic.net/blog/2015-09-12/restic-foundation1-cdc/). 
+To mitigate this issue, users had to manually restore the snapshots somewhere,
+and then backup them again to the new repository. This workaround was 
+inefficient, prone to tamper with the original metadata, and bothersome.
+
+It now supports `rechunk-copy` command, in which the data are rechunked while copying.
+Currently, it does not automatically skip previously copied snapshots. Also,
+it does not remember which files had been rechunked in previous runs, so it will
+try to rework on every file again in the next run (though it would not add new
+data blobs to the repository in that case; this is what deduplication is for).
+Therefore, `rechunk-copy` is adequate for one-time migration between repositories.
+For incremental copy scenarios, `copy` between repositories with same chunker 
+parameters is recommended.
+
+`rechunk-copy` shares the same command interface with `copy`, with an exception
+that `rechunk-copy` additionally supports a `--rechunk-tag` option to add a
+tag to all rechunk-copied snapshots in the destination repository.
+
+https://github.com/restic/restic/issues/5473
+https://forum.restic.net/t/is-it-possible-to-re-chunk-after-a-restic-copy/6072
+https://forum.restic.net/t/copy-snapshots-between-repositories-without-copy-chunker-params/7320
diff --git a/cmd/restic/cmd_rechunk_copy.go b/cmd/restic/cmd_rechunk_copy.go
new file mode 100644
index 000000000..e46751285
--- /dev/null
+++ b/cmd/restic/cmd_rechunk_copy.go
@@ -0,0 +1,229 @@
+package main
+
+import (
+	"context"
+
+	"github.com/restic/restic/internal/data"
+	"github.com/restic/restic/internal/debug"
+	"github.com/restic/restic/internal/errors"
+	"github.com/restic/restic/internal/feature"
+	"github.com/restic/restic/internal/global"
+	"github.com/restic/restic/internal/rechunker"
+	"github.com/restic/restic/internal/restic"
+	"github.com/restic/restic/internal/ui"
+	"github.com/restic/restic/internal/ui/progress"
+
+	"github.com/spf13/cobra"
+	"github.com/spf13/pflag"
+)
+
+// Reference: cmd_copy.go (v0.18.0)
+
+func newRechunkCopyCommand(globalOptions *global.Options) *cobra.Command {
+	var opts RechunkCopyOptions
+	cmd := &cobra.Command{
+		Use:   "rechunk-copy [flags] [snapshotID ...]",
+		Short: "Rechunk-copy snapshots from one repository to another",
+		Long: `
+The "rechunk-copy" command rechunk-copies one or more snapshots from one repository to another.
+
+Data blobs will be rechunked and stored in the destination repo. 
+Tree blobs in the destination repo are also updated to point to the rechunked data blobs, 
+but it does not modify any other metadata.
+
+NOTE: This command has largely different internal mechanism from "copy" command,
+due to restic's content defined chunking (CDC) algorithm. Note that "rechunk-copy"
+could consume significantly more bandwidth during the process compared to "copy", 
+and may also need significantly more time to finish.
+
+EXIT STATUS
+===========
+
+Exit status is 0 if the command was successful.
+Exit status is 1 if there was any error.
+Exit status is 10 if the repository does not exist.
+Exit status is 11 if the repository is already locked.
+Exit status is 12 if the password is incorrect.
+		`,
+		GroupID:           cmdGroupDefault,
+		DisableAutoGenTag: true,
+		RunE: func(cmd *cobra.Command, args []string) error {
+			finalizeSnapshotFilter(&opts.SnapshotFilter)
+			return runRechunkCopy(cmd.Context(), opts, *globalOptions, args, globalOptions.Term)
+		},
+	}
+
+	opts.AddFlags(cmd.Flags())
+	return cmd
+}
+
+// RechunkCopyOptions bundles all options for the rechunk-copy command.
+type RechunkCopyOptions struct {
+	global.SecondaryRepoOptions
+	data.SnapshotFilter
+	RechunkTags       data.TagLists
+	CacheSize         int
+	isIntegrationTest bool // skip check for RESTIC_FEATURES=rechunk-copy when integration test
+}
+
+func (opts *RechunkCopyOptions) AddFlags(f *pflag.FlagSet) {
+	opts.SecondaryRepoOptions.AddFlags(f, "destination", "to copy snapshots from")
+	initMultiSnapshotFilter(f, &opts.SnapshotFilter, true)
+	f.Var(&opts.RechunkTags, "rechunk-tag", "add `tags` for the copied snapshots in the format `tag[,tag,...]` (can be specified multiple times)")
+	f.IntVar(&opts.CacheSize, "cache-size", 4096, "in-memory blob cache size in MiBs (0 to disable)")
+}
+
+func runRechunkCopy(ctx context.Context, opts RechunkCopyOptions, gopts global.Options, args []string, term ui.Terminal) error {
+	if !feature.Flag.Enabled(feature.RechunkCopy) && !opts.isIntegrationTest {
+		return errors.Fatal("rechunk-copy feature flag is not set. Currently, rechunk-copy is alpha feature (disabled by default).")
+	}
+	if opts.CacheSize != 0 && opts.CacheSize < 100 {
+		return errors.Fatal("blob cache size must be at least 100 MiB")
+	}
+
+	printer := ui.NewProgressPrinter(false, gopts.Verbosity, term)
+	secondaryGopts, isFromRepo, err := opts.SecondaryRepoOptions.FillGlobalOpts(ctx, gopts, "destination")
+	if err != nil {
+		return err
+	}
+	if isFromRepo {
+		// swap global options, if the secondary repo was set via from-repo
+		gopts, secondaryGopts = secondaryGopts, gopts
+	}
+
+	ctx, srcRepo, unlock, err := openWithReadLock(ctx, gopts, gopts.NoLock, printer)
+	if err != nil {
+		return err
+	}
+	defer unlock()
+
+	ctx, dstRepo, unlock, err := openWithAppendLock(ctx, secondaryGopts, false, printer)
+	if err != nil {
+		return err
+	}
+	defer unlock()
+
+	if srcRepo.Config().ChunkerPolynomial == dstRepo.Config().ChunkerPolynomial {
+		return errors.Fatal("source repo and destination repo have same chunker polynomials; use `restic copy` instead")
+	}
+
+	srcSnapshotLister, err := restic.MemorizeList(ctx, srcRepo, restic.SnapshotFile)
+	if err != nil {
+		return err
+	}
+
+	debug.Log("Loading source index")
+	if err := srcRepo.LoadIndex(ctx, printer); err != nil {
+		return err
+	}
+	debug.Log("Loading destination index")
+	if err := dstRepo.LoadIndex(ctx, printer); err != nil {
+		return err
+	}
+
+	debug.Log("Running NewRechunker()")
+	rechnker := rechunker.NewRechunker(rechunker.Config{
+		CacheSize:          opts.CacheSize * (1 << 20),
+		SmallFileThreshold: 25,
+		Pol:                dstRepo.Config().ChunkerPolynomial,
+	})
+	rootTrees := restic.IDs{}
+
+	// gather all root trees of snapshots for rechunking
+	debug.Log("Gathering root trees of target snapshots")
+	for sn := range FindFilteredSnapshots(ctx, srcSnapshotLister, srcRepo, &opts.SnapshotFilter, args, printer) {
+		rootTrees = append(rootTrees, *sn.Tree)
+	}
+
+	// run rechunk process
+	debug.Log("Running runRechunk()")
+	progress := rechunker.NewProgress(
+		term,
+		printer,
+		ui.CalculateProgressInterval(!gopts.Quiet, gopts.JSON, term.CanUpdateStatus()),
+	)
+	if err = runRechunk(ctx, srcRepo, rootTrees, dstRepo, rechnker, opts.CacheSize*(1<<20), printer, progress); err != nil {
+		return err
+	}
+
+	// rewrite trees
+	printer.P("Rewriting trees...\n")
+	err = dstRepo.WithBlobUploader(ctx, func(ctx context.Context, uploader restic.BlobSaver) error {
+		for sn := range FindFilteredSnapshots(ctx, srcSnapshotLister, srcRepo, &opts.SnapshotFilter, args, printer) {
+			debug.Log("Running RewriteTree() for tree ID %v", sn.Tree.Str())
+			_, err := rechnker.RewriteTree(ctx, srcRepo, uploader, *sn.Tree)
+			if err != nil {
+				return err
+			}
+		}
+
+		return nil
+	})
+	if err != nil {
+		return err
+	}
+	printer.V("Rewriting done.\n\n")
+
+	// write snapshots
+	debug.Log("Writing snapshots")
+	for sn := range FindFilteredSnapshots(ctx, srcSnapshotLister, srcRepo, &opts.SnapshotFilter, args, printer) {
+		sn.Parent = nil // Parent does not have relevance in the new repo.
+		// Use Original as a persistent snapshot ID
+		if sn.Original == nil {
+			sn.Original = sn.ID()
+		}
+
+		newTreeID, err := rechnker.GetRewrittenTree(*sn.Tree)
+		if err != nil {
+			return err
+		}
+		debug.Log("Snapshot %v: Original root tree %v is substituted with new %v", sn.ID().Str(), sn.Tree.Str(), newTreeID.Str())
+		// change Tree field to new one
+		sn.Tree = &newTreeID
+		// add tags if provided by user
+		sn.AddTags(opts.RechunkTags.Flatten())
+		newID, err := data.SaveSnapshot(ctx, dstRepo, sn)
+		if err != nil {
+			return err
+		}
+		debug.Log("Snapshot %v (src repo) is rechunk-copied to snapshot %v (dst repo)", sn.ID().Str(), newID.Str())
+		printer.P("snapshot %s saved\n", newID.Str())
+	}
+
+	// summary
+	printer.V("\n[Post-run Summary]")
+	printer.V("Number of distinct files processed: %v", rechnker.NumFiles())
+	printer.V("  - Total size processed (including duplicate blobs): %v", ui.FormatBytes(rechnker.TotalSize()))
+	printer.P("Additional data stored to the repository: %v", ui.FormatBytes(rechnker.TotalAddedToDstRepo()))
+
+	return ctx.Err()
+}
+
+func runRechunk(ctx context.Context, srcRepo restic.Repository, roots restic.IDs, dstRepo restic.Repository, rechnker *rechunker.Rechunker, cacheSize int, printer progress.Printer, progress *rechunker.Progress) error {
+	printer.V("Planning rechunk...\n")
+	debug.Log("Running Plan()")
+	err := rechnker.Plan(ctx, srcRepo, roots)
+	if err != nil {
+		return err
+	}
+	printer.V("Planning done.")
+
+	printer.V("\n[Pre-run Summary]")
+	// num_snapshots, num_distinct_files, total_size, num_packs,
+	printer.V("Number of snapshots: %v", len(roots))
+	printer.V("Number of distinct files to process: %v", rechnker.NumFiles())
+	printer.V("  - Total size (including duplicate blobs): %v", ui.FormatBytes(rechnker.TotalSize()))
+	printer.V("Number of packs to download: %v\n\n", rechnker.PackCount())
+
+	debug.Log("Running RechunkData()")
+	progress.Start(rechnker.NumFiles(), rechnker.TotalSize())
+	err = rechnker.Rechunk(ctx, srcRepo, dstRepo, progress)
+	if err != nil {
+		return err
+	}
+	progress.Done()
+
+	printer.V("Rechunking done.\n\n")
+
+	return nil
+}
diff --git a/cmd/restic/cmd_rechunk_copy_integration_test.go b/cmd/restic/cmd_rechunk_copy_integration_test.go
new file mode 100644
index 000000000..9fd7db548
--- /dev/null
+++ b/cmd/restic/cmd_rechunk_copy_integration_test.go
@@ -0,0 +1,124 @@
+package main
+
+import (
+	"context"
+	"fmt"
+	"path/filepath"
+	"testing"
+
+	"github.com/restic/restic/internal/global"
+	rtest "github.com/restic/restic/internal/test"
+)
+
+// Reference: cmd_copy_integration_test.go (v0.18.0)
+
+func testRunRechunkCopy(t testing.TB, srcGopts global.Options, dstGopts global.Options) {
+	gopts := srcGopts
+	gopts.Repo = dstGopts.Repo
+	gopts.Password = dstGopts.Password
+	gopts.InsecureNoPassword = dstGopts.InsecureNoPassword
+	rechunkCopyOpts := RechunkCopyOptions{
+		SecondaryRepoOptions: global.SecondaryRepoOptions{
+			Repo:               srcGopts.Repo,
+			Password:           srcGopts.Password,
+			InsecureNoPassword: srcGopts.InsecureNoPassword,
+		},
+		isIntegrationTest: true,
+	}
+
+	rtest.OK(t, withTermStatus(t, gopts, func(ctx context.Context, gopts global.Options) error {
+		return runRechunkCopy(context.TODO(), rechunkCopyOpts, gopts, nil, gopts.Term)
+	}))
+}
+
+func TestRechunkCopy(t *testing.T) {
+	env, cleanup := withTestEnvironment(t)
+	defer cleanup()
+	env2, cleanup2 := withTestEnvironment(t)
+	defer cleanup2()
+
+	testSetupBackupData(t, env)
+	opts := BackupOptions{}
+	testRunBackup(t, "", []string{filepath.Join(env.testdata, "0", "0", "9")}, opts, env.gopts)
+	testRunBackup(t, "", []string{filepath.Join(env.testdata, "0", "0", "9", "2")}, opts, env.gopts)
+	testRunBackup(t, "", []string{filepath.Join(env.testdata, "0", "0", "9", "3")}, opts, env.gopts)
+	testRunCheck(t, env.gopts)
+
+	testRunInit(t, env2.gopts)
+	testRunRechunkCopy(t, env.gopts, env2.gopts)
+
+	snapshotIDs := testListSnapshots(t, env.gopts, 3)
+	copiedSnapshotIDs := testListSnapshots(t, env2.gopts, 3)
+
+	// Check that the copies size seems reasonable
+	stat := dirStats(t, env.repo)
+	stat2 := dirStats(t, env2.repo)
+	sizeDiff := int64(stat.size) - int64(stat2.size)
+	if sizeDiff < 0 {
+		sizeDiff = -sizeDiff
+	}
+	rtest.Assert(t, sizeDiff < int64(stat.size)/50, "expected less than 2%% size difference: %v vs. %v",
+		stat.size, stat2.size)
+
+	// Check integrity of the copy
+	testRunCheck(t, env2.gopts)
+
+	// Check that the copied snapshots have the same tree contents as the old ones (= identical tree hash)
+	origRestores := make(map[string]struct{})
+	for i, snapshotID := range snapshotIDs {
+		restoredir := filepath.Join(env.base, fmt.Sprintf("restore%d", i))
+		origRestores[restoredir] = struct{}{}
+		testRunRestore(t, env.gopts, restoredir, snapshotID.String())
+	}
+	for i, snapshotID := range copiedSnapshotIDs {
+		restoredir := filepath.Join(env2.base, fmt.Sprintf("restore%d", i))
+		testRunRestore(t, env2.gopts, restoredir, snapshotID.String())
+		foundMatch := false
+		for cmpdir := range origRestores {
+			diff := directoriesContentsDiff(t, restoredir, cmpdir)
+			if diff == "" {
+				delete(origRestores, cmpdir)
+				foundMatch = true
+			}
+		}
+
+		rtest.Assert(t, foundMatch, "found no counterpart for snapshot %v", snapshotID)
+	}
+
+	rtest.Assert(t, len(origRestores) == 0, "found not copied snapshots")
+}
+
+func TestRechunkCopyUnstableJSON(t *testing.T) {
+	env, cleanup := withTestEnvironment(t)
+	defer cleanup()
+	env2, cleanup2 := withTestEnvironment(t)
+	defer cleanup2()
+
+	// contains a symlink created using `ln -s '../i/'$'\355\246\361''d/samba' broken-symlink`
+	datafile := filepath.Join("testdata", "copy-unstable-json.tar.gz")
+	rtest.SetupTarTestFixture(t, env.base, datafile)
+
+	testRunInit(t, env2.gopts)
+	testRunCopy(t, env.gopts, env2.gopts)
+	testRunCheck(t, env2.gopts)
+	testListSnapshots(t, env2.gopts, 1)
+}
+
+func TestRechunkCopyToEmptyPassword(t *testing.T) {
+	env, cleanup := withTestEnvironment(t)
+	defer cleanup()
+	env2, cleanup2 := withTestEnvironment(t)
+	defer cleanup2()
+	env2.gopts.Password = ""
+	env2.gopts.InsecureNoPassword = true
+
+	testSetupBackupData(t, env)
+	testRunBackup(t, "", []string{filepath.Join(env.testdata, "0", "0", "9")}, BackupOptions{}, env.gopts)
+
+	testRunInit(t, env2.gopts)
+	testRunCopy(t, env.gopts, env2.gopts)
+
+	testListSnapshots(t, env.gopts, 1)
+	testListSnapshots(t, env2.gopts, 1)
+	testRunCheck(t, env2.gopts)
+}
diff --git a/cmd/restic/main.go b/cmd/restic/main.go
index 3307d2787..c3a3e86a0 100644
--- a/cmd/restic/main.go
+++ b/cmd/restic/main.go
@@ -90,6 +90,7 @@ The full documentation can be found at https://restic.readthedocs.io/ .
 		newOptionsCommand(globalOptions),
 		newPruneCommand(globalOptions),
 		newRebuildIndexCommand(globalOptions),
+		newRechunkCopyCommand(globalOptions),
 		newRecoverCommand(globalOptions),
 		newRepairCommand(globalOptions),
 		newRestoreCommand(globalOptions),
diff --git a/doc/045_working_with_repos.rst b/doc/045_working_with_repos.rst
index 797ea9f9d..e4e7b7147 100644
--- a/doc/045_working_with_repos.rst
+++ b/doc/045_working_with_repos.rst
@@ -233,7 +233,7 @@ messages can appear some time after the snapshot content was copied.
     source and destination repository. This *may incur higher bandwidth usage
     and costs* than expected during normal backup runs.
 
-.. important:: The copying process does not re-chunk files, which may break
+.. important:: The plain copy command does not re-chunk files, which may break
     deduplication between the files copied and files already stored in the
     destination repository. This means that copied files, which existed in
     both the source and destination repository, *may occupy up to twice their
@@ -283,9 +283,11 @@ Ensuring deduplication for copied snapshots
 -------------------------------------------
 
 Even though the copy command can transfer snapshots between arbitrary repositories,
-deduplication between snapshots from the source and destination repository may not work.
-To ensure proper deduplication, both repositories have to use the same parameters for
-splitting large files into smaller chunks, which requires additional setup steps. With
+deduplication between snapshots from the source and destination repository may not work 
+with plain copy command. There are two methods to ensure proper deduplication between 
+repositories. First one is to use rechunk-copy command described below. Second one is 
+to make both repositories use the same parameters for splitting large files into smaller
+chunks, which requires additional setup steps. With
 the same parameters restic will for both repositories split identical files into
 identical chunks and therefore deduplication also works for snapshots copied between
 these repositories.
@@ -300,6 +302,34 @@ using the same chunker parameters as the source repository:
 
 Note that it is not possible to change the chunker parameters of an existing repository.
 
+Rechunk-copy between repositories with different chunker parameters
+-------------------------------------------------------------------
+
+The rechunk-copy command re-chunks files with destination repository's chunker parameters
+when copying snapshots. The command-line options are compatible with plain copy command,
+with two additions. First is ``--rechunk-tag``, which specifies a tag added to rechunk-copied
+snapshots. Second is ``--cache-size``. The rechunk-copy command uses in-memory cache for
+rechunking, whose default size is 4096 MiB. You can customize the in-memory cache size, 
+fitting your RAM size and desired memory usage. Note that a small cache size will lead to
+frequent re-download of packs, which is especially undesirable for remote source repositories.
+
+The below commands are all valid ones.
+
+.. code-block:: console
+
+    $ restic -r /srv/dst-repo rechunk-copy --from-repo /srv/src-repo
+    $ restic -r /srv/dst-repo rechunk-copy --from-repo /srv/src-repo --host luigi --path /srv/data --tag foo,bar
+    $ restic -r /srv/dst-repo rechunk-copy --rechunk-tag my-rechunk --from-repo /srv/src-repo 34c9e85f 2714b65a
+    $ restic -r /srv/dst-repo rechunk-copy --cache-size 8192 --from-repo /srv/src-repo # set cache size to 8192 MiB
+
+.. note:: Although the rechunk-copy command can provide on-demand deduplication between 
+    repositories with different chunker parameters, there are a few disadvantages compared 
+    to the plain copy command. The rechunk-copy command is slower because it re-assembles 
+    all files and does the same all computations which are done during backup. Also, as of now, 
+    the rechunk-copy command does not support skipping redundant snapshots, so you should 
+    manually designate the exact snapshots to copy. Therefore, it is recommended to use 
+    repositories with the same chunker parameter if you plan to copy regularly between repositories.
+
 
 Removing files from snapshots
 =============================
diff --git a/internal/feature/registry.go b/internal/feature/registry.go
index a7368fa75..fb373ea6a 100644
--- a/internal/feature/registry.go
+++ b/internal/feature/registry.go
@@ -10,6 +10,7 @@ const (
 	DeprecateS3LegacyLayout FlagName = "deprecate-s3-legacy-layout"
 	DeviceIDForHardlinks    FlagName = "device-id-for-hardlinks"
 	ExplicitS3AnonymousAuth FlagName = "explicit-s3-anonymous-auth"
+	RechunkCopy             FlagName = "rechunk-copy"
 	SafeForgetKeepTags      FlagName = "safe-forget-keep-tags"
 	S3Restore               FlagName = "s3-restore"
 )
@@ -21,6 +22,7 @@ func init() {
 		DeprecateS3LegacyLayout: {Type: Stable, Description: "disable support for S3 legacy layout used up to restic 0.7.0. Use restic 0.17.3 to migrate if necessary."},
 		DeviceIDForHardlinks:    {Type: Alpha, Description: "store deviceID only for hardlinks to reduce metadata changes for example when using btrfs subvolumes. Will be removed in a future restic version after repository format 3 is available"},
 		ExplicitS3AnonymousAuth: {Type: Stable, Description: "forbid anonymous S3 authentication unless `-o s3.unsafe-anonymous-auth=true` is set"},
+		RechunkCopy:             {Type: Alpha, Description: "enable rechunk-copy command, where it rechunks data blobs while copying snapshots. This command is not stable yet, so use with caution."},
 		SafeForgetKeepTags:      {Type: Stable, Description: "prevent deleting all snapshots if the tag passed to `forget --keep-tags tagname` does not exist"},
 		S3Restore:               {Type: Alpha, Description: "restore S3 objects from cold storage classes when `-o s3.enable-restore=true` is set"},
 	})
diff --git a/internal/rechunker/blob_cache.go b/internal/rechunker/blob_cache.go
new file mode 100644
index 000000000..efdc2e1d9
--- /dev/null
+++ b/internal/rechunker/blob_cache.go
@@ -0,0 +1,335 @@
+package rechunker
+
+import (
+	"context"
+	"sync"
+
+	"github.com/hashicorp/golang-lru/v2/simplelru"
+	"github.com/restic/restic/internal/debug"
+	"github.com/restic/restic/internal/restic"
+	"golang.org/x/sync/errgroup"
+)
+
+type BlobCache struct {
+	mu sync.RWMutex
+	c  *simplelru.LRU[restic.ID, []byte]
+
+	idx *Index
+
+	free, size int
+
+	waitList   restic.IDSet                // set of packs waiting for download
+	inProgress map[restic.ID]chan struct{} // blob ready event channel; open by requestDownload(), closed by downloaders
+	downloadCh chan restic.ID              // pack download request channel; produced by requestDownload(), consumed by downloaders
+
+	ignored restic.IDSet // set of ignored blobs; blobs in this set are excluded from download
+
+	cancel func() // this function is called at Close(), cancelling cache context
+}
+
+const overhead = len(restic.ID{}) + 64
+
+func NewBlobCache(ctx context.Context, size int, numDownloaders int,
+	repo PackLoader, idx *Index,
+	onReady func(blobIDs restic.IDs), onEvict func(blobIDs restic.IDs)) *BlobCache {
+	if size < 32*(1<<20) {
+		panic("Blob cache size should be at least 32 MiB!!")
+	}
+	debug.Log("Creating blob cache of size %v", size)
+
+	ctx, cancel := context.WithCancel(ctx)
+
+	c := &BlobCache{
+		idx: idx,
+
+		size: size,
+		free: size,
+
+		waitList:   restic.NewIDSet(),
+		inProgress: map[restic.ID]chan struct{}{},
+		downloadCh: make(chan restic.ID),
+
+		ignored: restic.NewIDSet(),
+
+		cancel: cancel,
+	}
+
+	lru, err := simplelru.NewLRU(size, func(k restic.ID, v []byte) {
+		c.free += cap(v) + overhead
+	})
+	if err != nil {
+		panic(err)
+	}
+	c.c = lru
+
+	// create download function that uses repo's LoadBlobsFromPack
+	download := createDownloadFn(ctx, repo)
+
+	c.startDownloaders(ctx, numDownloaders, download, onReady, onEvict)
+
+	return c
+}
+
+type blobMap = map[restic.ID][]byte
+type downloadFn func(packID restic.ID, blobs []restic.Blob) (blobMap, error)
+
+func createDownloadFn(ctx context.Context, repo PackLoader) downloadFn {
+	return func(packID restic.ID, blobs []restic.Blob) (blobMap, error) {
+		bm := blobMap{}
+		err := repo.LoadBlobsFromPack(ctx, packID, blobs,
+			func(blob restic.BlobHandle, buf []byte, err error) error {
+				if err != nil {
+					return err
+				}
+				newBuf := make([]byte, len(buf))
+				copy(newBuf, buf)
+				bm[blob.ID] = newBuf
+
+				return nil
+			})
+		if err != nil {
+			return blobMap{}, err
+		}
+		return bm, nil
+	}
+}
+
+func (c *BlobCache) startDownloaders(ctx context.Context, numDownloaders int,
+	download downloadFn, onReady, onEvict func(blobIDs restic.IDs)) {
+	wg, ctx := errgroup.WithContext(ctx)
+	for range numDownloaders {
+		wg.Go(func() error {
+			debug.Log("Starting blob cache downloader")
+			defer debug.Log(("Stopping blob cache downloader"))
+
+			for {
+				// listen to pack download request
+				var packID restic.ID
+				select {
+				case <-ctx.Done():
+					return ctx.Err()
+				case packID = <-c.downloadCh:
+				}
+
+				// filter out ignored blobs
+				c.mu.RLock()
+				var filtered []restic.Blob
+				for _, blob := range c.idx.PackToBlobs[packID] {
+					ignored := c.ignored.Has(blob.ID)
+					ready := c.c.Contains(blob.ID)
+					if !ignored && !ready {
+						filtered = append(filtered, blob)
+					}
+				}
+				c.mu.RUnlock()
+
+				// skip if no blobs to download
+				if len(filtered) == 0 {
+					continue
+				}
+
+				// download blobs from the repo
+				debug.Log("Starting download of %v blobs in pack %v", len(filtered), packID.Str())
+				blobs, err := download(packID, filtered)
+				if err != nil {
+					return err
+				}
+
+				// pop the pack from the waitlist,
+				// store downloaded blobs to the cache,
+
+				var ready, evicted restic.IDs
+				c.mu.Lock()
+				delete(c.waitList, packID)
+				for id, data := range blobs {
+					size := cap(data) + overhead
+					for size > c.free {
+						id, _, ok := c.c.RemoveOldest()
+						if ok {
+							evicted = append(evicted, id)
+						}
+					}
+					c.c.Add(id, data)
+					c.free -= size
+					if _, ok := c.inProgress[id]; ok {
+						close(c.inProgress[id])
+						delete(c.inProgress, id)
+					}
+					ready = append(ready, id)
+				}
+				c.mu.Unlock()
+
+				// execute callbacks
+				if len(evicted) > 0 {
+					if onEvict != nil {
+						onEvict(evicted)
+					}
+					debug.Log("%v blobs are evicted.", len(evicted))
+				}
+				if onReady != nil {
+					onReady(ready)
+				}
+
+				debug.Log("Pack %v loaded. Current cache usage: %v", packID.Str(), c.size-c.free)
+				debug.Log("Pack %v includes the following blobs: \n%v", packID.Str(), ready.String())
+
+				// debugStats: track maximum memory usage
+				if debugStats != nil {
+					debugStats.UpdateMax("max_cache_usage", c.size-c.free)
+				}
+			}
+		})
+	}
+}
+
+func (c *BlobCache) Get(ctx context.Context, id restic.ID, buf []byte) ([]byte, <-chan []byte) {
+	c.mu.Lock()
+	blob, ok := c.c.Get(id) // try to retrieve blob, with recency update
+	c.mu.Unlock()
+
+	if ok { // case 1: when blob exists in cache: return that blob immediately
+		if cap(buf) < len(blob) {
+			debug.Log("Allocating new buf, as it has smaller capacity than chunk size.")
+			buf = make([]byte, len(blob))
+		} else {
+			buf = buf[:len(blob)]
+		}
+		copy(buf, blob)
+
+		debug.Log("Cache hit. Returning blob %v", id.Str())
+		return buf, nil
+	}
+
+	// case 2: when blob does not exist in cache: return chOut (where downloaded blob will be delievered)
+	debug.Log("Cache miss. Requesting async get for blob %v", id.Str())
+	chOut := c.asyncGet(ctx, id, buf)
+
+	return nil, chOut
+}
+
+func (c *BlobCache) asyncGet(ctx context.Context, id restic.ID, buf []byte) <-chan []byte {
+	wg, ctx := errgroup.WithContext(ctx)
+	out := make(chan []byte, 1)
+
+	wg.Go(func() error {
+		for {
+			c.mu.RLock()
+			blob, ready := c.c.Peek(id)
+			finish, inProgress := c.inProgress[id]
+			c.mu.RUnlock()
+
+			if ready { // case A: blob is now ready in the cache
+				if cap(buf) < len(blob) {
+					debug.Log("Allocating new buf, as it has smaller capacity than chunk size.")
+					buf = make([]byte, len(blob))
+				} else {
+					buf = buf[:len(blob)]
+				}
+				copy(buf, blob)
+
+				debug.Log("Blob %v is now ready in the cache. Passing blob data to channel.", id.Str())
+				out <- buf
+				return nil
+			}
+			if inProgress { // case B: blob is queued, but not yet ready
+				debug.Log("Waiting for blob %v to be ready in the cache.", id.Str())
+				select {
+				case <-ctx.Done():
+					return ctx.Err()
+				case <-finish: // wait until download complete
+					continue
+				}
+			}
+
+			// case C: blob is not queued
+			// add to the download queue
+			debug.Log("Requesting download of the pack containing blob %v", id.Str())
+			err := c.requestDownload(ctx, id)
+			if err != nil {
+				return err
+			}
+		}
+	})
+
+	return out
+}
+
+func (c *BlobCache) requestDownload(ctx context.Context, id restic.ID) error {
+	packID := c.idx.BlobToPack[id]
+
+	c.mu.Lock()
+	ok := c.waitList.Has(packID)
+	if !ok {
+		// queue pack download
+		c.waitList.Insert(packID)
+	}
+	if _, inProgress := c.inProgress[id]; !inProgress {
+		c.inProgress[id] = make(chan struct{})
+	}
+	c.mu.Unlock()
+
+	if ok { // somebody else has already queued pack download; it will handle download
+		return nil
+	}
+
+	// send packID to inform the downloader
+	select {
+	case <-ctx.Done():
+		return ctx.Err()
+	case c.downloadCh <- packID:
+		return nil
+	}
+}
+
+func (c *BlobCache) Ignore(ids restic.IDs) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	for _, id := range ids {
+		c.ignored.Insert(id)
+	}
+
+	if debugStats != nil {
+		debugStats.Add("ignored_blob_count", len(ids))
+	}
+}
+
+func (c *BlobCache) Close() {
+	if c == nil {
+		return
+	}
+
+	c.cancel()
+}
+
+type BlobLoaderWithCache struct {
+	repo  PackLoader
+	cache *BlobCache
+}
+
+func (l *BlobLoaderWithCache) LoadBlob(ctx context.Context, t restic.BlobType, id restic.ID, buf []byte) ([]byte, error) {
+	blob, ch := l.cache.Get(ctx, id, buf)
+	if blob == nil { // wait for blob to be downloaded
+		select {
+		case <-ctx.Done():
+			return nil, ctx.Err()
+		case blob = <-ch:
+		}
+	}
+	return blob, nil
+}
+
+type PackLoader interface {
+	LoadBlobsFromPack(context.Context, restic.ID, []restic.Blob, func(restic.BlobHandle, []byte, error) error) error
+}
+
+func WrapWithCache(ctx context.Context, repo PackLoader, cacheSize int, numDownloaders int, idx *Index,
+	onReady, onEvict func(restic.IDs)) (*BlobLoaderWithCache, *BlobCache) {
+	r := &BlobLoaderWithCache{
+		repo:  repo,
+		cache: NewBlobCache(ctx, cacheSize, numDownloaders, repo, idx, onReady, onEvict),
+	}
+
+	debug.Log("Wrapped the repository with blob cache.")
+	return r, r.cache
+}
diff --git a/internal/rechunker/debug.go b/internal/rechunker/debug.go
new file mode 100644
index 000000000..5a5e8b655
--- /dev/null
+++ b/internal/rechunker/debug.go
@@ -0,0 +1,103 @@
+package rechunker
+
+import (
+	"maps"
+	"strings"
+	"sync"
+
+	"github.com/restic/restic/internal/debug"
+)
+
+// global data structure for debug trace
+var debugStats = NewStats(true)
+
+type Stats struct {
+	d  map[string]int
+	mu sync.Mutex
+}
+
+func NewStats(enable bool) *Stats {
+	if enable {
+		return &Stats{
+			d: map[string]int{},
+		}
+	}
+	return nil
+}
+
+func (n *Stats) Add(k string, v int) {
+	if n == nil {
+		return
+	}
+
+	n.mu.Lock()
+	defer n.mu.Unlock()
+
+	n.d[k] += v
+}
+
+func (n *Stats) AddMap(m map[string]int) {
+	if n == nil {
+		return
+	}
+
+	n.mu.Lock()
+	defer n.mu.Unlock()
+
+	for k, v := range m {
+		n.d[k] += v
+	}
+}
+
+func (n *Stats) UpdateMax(k string, v int) {
+	if n == nil {
+		return
+	}
+
+	n.mu.Lock()
+	defer n.mu.Unlock()
+
+	if n.d[k] < v {
+		n.d[k] = v
+	}
+}
+
+func (n *Stats) Dump() (note map[string]int) {
+	if n == nil {
+		return
+	}
+
+	n.mu.Lock()
+	defer n.mu.Unlock()
+
+	note = map[string]int{}
+	maps.Copy(note, n.d)
+
+	return note
+}
+
+func debugPrintRechunkReport(rc *Rechunker) {
+	if debugStats == nil {
+		return
+	}
+
+	dNote := debugStats.Dump()
+
+	if rc.cfg.CacheSize > 0 {
+		debug.Log("List of blobs downloaded more than once:")
+		numBlobRedundant := 0
+		redundantDownloadCount := 0
+		for k := range dNote {
+			if strings.HasPrefix(k, "load:") && dNote[k] > 1 {
+				debug.Log("%v: Downloaded %d times", k[5:15], dNote[k])
+				numBlobRedundant++
+				redundantDownloadCount += dNote[k]
+			}
+		}
+		debug.Log("[summary_blobcache] Number of redundantly downloaded blobs is %d, whose overall download count is %d", numBlobRedundant, redundantDownloadCount)
+		debug.Log("[summary_blobcache] Peak memory usage by blob cache: %v/%v bytes", dNote["max_cache_usage"], rc.cfg.CacheSize)
+		if dNote["total_blob_count"] != dNote["ignored_blob_count"] {
+			debug.Log("[summary_blobcache] WARNING: Number of successfully ignored blob at the end: %v/%v", dNote["ignored_blob_count"], dNote["total_blob_count"])
+		}
+	}
+}
diff --git a/internal/rechunker/dispatcher.go b/internal/rechunker/dispatcher.go
new file mode 100644
index 000000000..0b20fa7dc
--- /dev/null
+++ b/internal/rechunker/dispatcher.go
@@ -0,0 +1,261 @@
+package rechunker
+
+import (
+	"context"
+	"sync"
+
+	"github.com/restic/restic/internal/debug"
+	"github.com/restic/restic/internal/restic"
+	"golang.org/x/sync/errgroup"
+)
+
+type Dispatcher struct {
+	mu sync.Mutex
+
+	// job dispatch channel to workers
+	regular  <-chan *ChunkedFile
+	priority <-chan *ChunkedFile
+
+	// files list for dispatch
+	regularList  []*ChunkedFile
+	priorityList []*ChunkedFile
+
+	push chan struct{} // priority file notification
+	done chan struct{}
+}
+
+func NewDispatcher(ctx context.Context, files []*ChunkedFile, usePriority bool) *Dispatcher {
+	debug.Log(("Running NewDispatcher()"))
+
+	wg, ctx := errgroup.WithContext(ctx)
+
+	if !usePriority {
+		// this will be a regular dispatcher without priority dispatch
+		d := &Dispatcher{
+			regularList: files,
+			done:        make(chan struct{}),
+		}
+		d.createRegularCh(ctx, wg, nil)
+		return d
+	}
+
+	// below is setup for priority-aware dispatcher
+
+	d := &Dispatcher{
+		regularList: files,
+		push:        make(chan struct{}, 1),
+		done:        make(chan struct{}),
+	}
+
+	set := restic.IDSet{}
+	mu := sync.Mutex{}
+	visited := func(id restic.ID) bool {
+		mu.Lock()
+		visited := set.Has(id)
+		if !visited {
+			set.Insert(id)
+		}
+		mu.Unlock()
+		return visited
+	}
+
+	d.createRegularCh(ctx, wg, visited)
+	d.createPriorityCh(ctx, wg, visited)
+
+	return d
+}
+
+func (d *Dispatcher) Next(ctx context.Context) (*ChunkedFile, bool, error) {
+	file, from, err := PrioritySelect(ctx, d.priority, d.regular)
+	return file, from != 0, err
+}
+
+func (d *Dispatcher) NextPriority(ctx context.Context) (*ChunkedFile, bool, error) {
+	if d.priority == nil {
+		return nil, false, nil
+	}
+	file, from, err := PrioritySelect(ctx, d.priority, nil)
+	return file, from != 0, err
+}
+
+func (d *Dispatcher) PushPriority(files []*ChunkedFile) bool {
+	if d.priority == nil {
+		return false
+	}
+
+	// check if dispatcher is closed; if closed, return without push
+	select {
+	case <-d.done:
+		return false
+	default:
+	}
+
+	d.mu.Lock()
+	defer d.mu.Unlock()
+
+	d.priorityList = append(d.priorityList, files...)
+
+	// notify push channel
+	select {
+	case d.push <- struct{}{}:
+	default:
+	}
+
+	return true
+}
+
+func (d *Dispatcher) popPriority() []*ChunkedFile {
+	d.mu.Lock()
+	defer d.mu.Unlock()
+
+	l := d.priorityList
+	d.priorityList = nil
+
+	return l
+}
+
+func (d *Dispatcher) Close() {
+	if d == nil {
+		return
+	}
+
+	select {
+	case <-d.done:
+	default:
+		close(d.done)
+	}
+}
+
+func (d *Dispatcher) createRegularCh(ctx context.Context, wg *errgroup.Group, visited func(id restic.ID) bool) {
+	debug.Log("Running dispatcher for regular channel")
+	ch := make(chan *ChunkedFile)
+	wg.Go(func() error {
+		defer d.Close()
+		defer close(ch)
+
+		for _, file := range d.regularList {
+			// check if the file was visited by another dispatcher;
+			// if it was, skip the file.
+			if visited != nil && visited(file.hashval) {
+				debug.Log("File %v was visited by another dispatcher; skipping.", file.hashval.Str())
+				continue
+			}
+
+			select {
+			case <-ctx.Done():
+				return ctx.Err()
+			case <-d.done:
+				debug.Log("Closing dispatcher for regular channel")
+				return nil
+			case ch <- file:
+				debug.Log("Sent file %v through regular channel", file.hashval.Str())
+			}
+		}
+
+		return nil
+	})
+
+	d.regular = ch
+}
+
+func (d *Dispatcher) createPriorityCh(ctx context.Context, wg *errgroup.Group, visited func(id restic.ID) bool) {
+	debug.Log("Running dispatcher for priority channel")
+	ch := make(chan *ChunkedFile)
+	wg.Go(func() error {
+		defer close(ch)
+
+		var list []*ChunkedFile
+		for {
+			if len(list) == 0 {
+				// wait for priority files notification or done signal
+				select {
+				case <-ctx.Done():
+					return ctx.Err()
+				case <-d.push:
+					list = d.popPriority()
+					debug.Log("Detected priority files whose count is %v", len(list))
+					continue
+				case <-d.done:
+					debug.Log("Closing dispatcher for priority channel")
+					return nil
+				}
+			}
+
+			file := list[0]
+			list = list[1:]
+
+			// check if the file was handled by another channel;
+			// if it was, skip the file.
+			if visited != nil && visited(file.hashval) {
+				debug.Log("File %v was visited by another dispatcher; skipping.", file.hashval.Str())
+				continue
+			}
+
+			select {
+			case <-ctx.Done():
+				return ctx.Err()
+			case <-d.done:
+				return nil
+			case ch <- file:
+				debug.Log("Sent file %v through priority channel", file.hashval.Str())
+			}
+		}
+	})
+
+	d.priority = ch
+}
+
+// PrioritySelect selects from two channels with priority; first channel first.
+func PrioritySelect(ctx context.Context, first <-chan *ChunkedFile, second <-chan *ChunkedFile) (item *ChunkedFile, from int, err error) {
+	if first != nil && second != nil {
+		// First, try to pull from channel 'first' only. If 'first' is not ready now, try both channels.
+		select {
+		case <-ctx.Done():
+			return nil, 0, ctx.Err()
+		case i, ok := <-first:
+			if ok {
+				item = i
+				from = 1
+			}
+		default:
+			select {
+			case <-ctx.Done():
+				return nil, 0, ctx.Err()
+			case i, ok := <-first:
+				if ok {
+					item = i
+					from = 1
+				}
+			case i, ok := <-second:
+				if ok {
+					item = i
+					from = 2
+				}
+			}
+		}
+	} else if first != nil {
+		// only 'first' is not nil, so behave like a normal select of 'first'
+		select {
+		case <-ctx.Done():
+			return nil, 0, ctx.Err()
+		case i, ok := <-first:
+			if ok {
+				item = i
+				from = 1
+			}
+		}
+	} else if second != nil {
+		// only 'second' is not nil, so behave like a normal select of 'second'
+		select {
+		case <-ctx.Done():
+			return nil, 0, ctx.Err()
+		case i, ok := <-second:
+			if ok {
+				item = i
+				from = 2
+			}
+		}
+	}
+
+	return item, from, nil
+}
diff --git a/internal/rechunker/progress.go b/internal/rechunker/progress.go
new file mode 100644
index 000000000..ca99c998a
--- /dev/null
+++ b/internal/rechunker/progress.go
@@ -0,0 +1,95 @@
+package rechunker
+
+import (
+	"fmt"
+	"sync"
+	"time"
+
+	"github.com/restic/restic/internal/ui"
+	"github.com/restic/restic/internal/ui/progress"
+)
+
+type Progress struct {
+	updater progress.Updater
+	m       sync.Mutex
+
+	filesFinished  int
+	filesTotal     int
+	bytesProcessed uint64
+	bytesTotal     uint64
+
+	printer progress.Printer
+	term    ui.Terminal
+	show    bool
+}
+
+func NewProgress(term ui.Terminal, printer progress.Printer, interval time.Duration) *Progress {
+	p := &Progress{
+		term:    term,
+		printer: printer,
+	}
+	p.updater = *progress.NewUpdater(interval, p.update)
+
+	return p
+}
+
+func (p *Progress) Start(fileCount int, totalSize uint64) {
+	p.m.Lock()
+	defer p.m.Unlock()
+
+	p.filesTotal = fileCount
+	p.bytesTotal = totalSize
+	p.show = true
+}
+
+func (p *Progress) AddFile(count int) {
+	p.m.Lock()
+	defer p.m.Unlock()
+
+	p.filesFinished += count
+}
+
+func (p *Progress) AddBlob(size uint64) {
+	p.m.Lock()
+	defer p.m.Unlock()
+
+	p.bytesProcessed += size
+}
+
+func (p *Progress) update(duration time.Duration, final bool) {
+	p.m.Lock()
+	defer p.m.Unlock()
+
+	if p.show && !final {
+		formattedDuration := ui.FormatDuration(duration)
+		formattedBytesProcessed := ui.FormatBytes(p.bytesProcessed)
+		formattedBytesTotal := ui.FormatBytes(p.bytesTotal)
+		percent := ui.FormatPercent(p.bytesProcessed, p.bytesTotal)
+		progress := []string{
+			fmt.Sprintf("[%s] %v/%v distinct files processed",
+				formattedDuration, p.filesFinished, p.filesTotal),
+			fmt.Sprintf("%s %s/%s", percent, formattedBytesProcessed, formattedBytesTotal),
+		}
+		p.term.SetStatus(progress)
+	} else if p.show && final {
+		formattedDuration := ui.FormatDuration(duration)
+		formattedBytesProcessed := ui.FormatBytes(p.bytesProcessed)
+		formattedBytesTotal := ui.FormatBytes(p.bytesTotal)
+		percent := ui.FormatPercent(p.bytesProcessed, p.bytesTotal)
+
+		p.term.SetStatus(nil)
+		p.printer.P("[%s] %v/%v distinct files processed\n", formattedDuration, p.filesFinished, p.filesTotal)
+		p.printer.P("%s %s/%s\n", percent, formattedBytesProcessed, formattedBytesTotal)
+		p.show = false
+	} else {
+		p.term.SetStatus(nil)
+	}
+}
+
+func (p *Progress) Done() {
+	if p == nil {
+		return
+	}
+
+	p.updater.Done()
+}
diff --git a/internal/rechunker/rechunker.go b/internal/rechunker/rechunker.go
new file mode 100644
index 000000000..2f9d8c97e
--- /dev/null
+++ b/internal/rechunker/rechunker.go
@@ -0,0 +1,609 @@
+package rechunker
+
+import (
+	"context"
+	"crypto/sha256"
+	"encoding/json"
+	"fmt"
+	"runtime"
+	"slices"
+	"sync"
+	"sync/atomic"
+
+	"github.com/restic/chunker"
+	"github.com/restic/restic/internal/data"
+	"github.com/restic/restic/internal/debug"
+	"github.com/restic/restic/internal/restic"
+	"golang.org/x/sync/errgroup"
+)
+
+type Rechunker struct {
+	cfg     Config
+	tracker *eventTracker
+
+	filesList    []*ChunkedFile
+	totalSize    uint64
+	rechunkReady bool
+
+	idx *Index
+
+	rechunkMap          map[restic.ID]restic.IDs // hashOfIDs of srcBlobIDs -> dstBlobIDs
+	rechunkMapLock      sync.Mutex
+	totalAddedToDstRepo atomic.Uint64
+	rewriteTreeMap      map[restic.ID]restic.ID // original tree ID (in src repo) -> rewritten tree ID (in dst repo)
+}
+
+type Config struct {
+	CacheSize          int
+	SmallFileThreshold int // files less than the threshold will be prioritized when all blobs are ready in the cache
+	Pol                chunker.Pol
+}
+
+// Index is immutable after Plan() returns.
+type Index struct {
+	BlobSize    map[restic.ID]uint
+	BlobToPack  map[restic.ID]restic.ID     // blob ID -> {blob length, pack ID}
+	PackToBlobs map[restic.ID][]restic.Blob // pack ID -> list of blobs to be loaded from the pack
+}
+
+func NewRechunker(cfg Config) *Rechunker {
+	return &Rechunker{
+		cfg:            cfg,
+		rechunkMap:     map[restic.ID]restic.IDs{},
+		rewriteTreeMap: map[restic.ID]restic.ID{},
+	}
+}
+
+func (rc *Rechunker) reset() {
+	rc.tracker = nil
+
+	rc.filesList = nil
+	rc.rechunkReady = false
+
+	rc.idx = nil
+}
+
+func (rc *Rechunker) Plan(ctx context.Context, srcRepo restic.Repository, rootTrees []restic.ID) error {
+	rc.reset()
+
+	visitedFiles := restic.IDSet{}
+	visitedTrees := restic.IDSet{}
+
+	// skip previously processed files and trees
+	for k := range rc.rechunkMap {
+		visitedFiles.Insert(k)
+	}
+	for k := range rc.rewriteTreeMap {
+		visitedTrees.Insert(k)
+	}
+
+	var err error
+	debug.Log("Gathering distinct file Contents from target snapshots")
+	rc.filesList, rc.totalSize, err = gatherFileContents(ctx, srcRepo, rootTrees, visitedFiles, visitedTrees)
+	if err != nil {
+		return err
+	}
+
+	debug.Log("Building the internal index for use in Rechunk()")
+	rc.idx, rc.tracker, err = createIndex(rc.filesList, srcRepo.LookupBlob, rc.cfg)
+	if err != nil {
+		return err
+	}
+
+	debug.Log("Sorting the file list by their chunk counts (descending order)")
+	slices.SortFunc(rc.filesList, func(a, b *ChunkedFile) int {
+		return len(b.IDs) - len(a.IDs) // descending order
+	})
+
+	rc.rechunkReady = true
+
+	return nil
+}
+
+func gatherFileContents(ctx context.Context, repo restic.Loader, rootTrees restic.IDs, visitedFiles restic.IDSet, visitedTrees restic.IDSet) (filesList []*ChunkedFile, totalSize uint64, err error) {
+	wg, ctx := errgroup.WithContext(ctx)
+
+	// create StreamTrees channel that streams through all subtrees in target snapshots
+	treeStream := data.StreamTrees(ctx, wg, repo, rootTrees, func(id restic.ID) bool {
+		visited := visitedTrees.Has(id)
+		visitedTrees.Insert(id)
+		return visited
+	}, nil)
+
+	// gather all distinct file Contents under trees
+	wg.Go(func() error {
+		for tree := range treeStream {
+			if tree.Error != nil {
+				return tree.Error
+			}
+
+			// check if the tree blob is unstable json
+			buf, err := json.Marshal(tree.Tree)
+			if err != nil {
+				return err
+			}
+			buf = append(buf, '\n')
+			if tree.ID != restic.Hash(buf) {
+				return fmt.Errorf("can't run rechunk-copy, because the following tree can't be rewritten without losing information:\n%v", tree.ID.String())
+			}
+
+			for _, node := range tree.Nodes {
+				// you only have to rechunk regular files; so skip other file types
+				if node.Type == data.NodeTypeFile {
+					hashval := HashOfIDs(node.Content)
+					if visitedFiles.Has(hashval) {
+						continue
+					}
+					visitedFiles.Insert(hashval)
+
+					filesList = append(filesList, &ChunkedFile{
+						node.Content,
+						hashval,
+					})
+					totalSize += node.Size
+				}
+			}
+		}
+		return nil
+	})
+	err = wg.Wait()
+	if err != nil {
+		return nil, 0, err
+	}
+	return filesList, totalSize, nil
+}
+
+func createIndex(filesList []*ChunkedFile, lookupBlob func(t restic.BlobType, id restic.ID) []restic.PackedBlob, cfg Config) (*Index, *eventTracker, error) {
+	// collect blob usage info
+	blobCount := map[restic.ID]int{}
+	for _, file := range filesList {
+		for _, blob := range file.IDs {
+			blobCount[blob]++
+		}
+	}
+
+	// debugStats: record the number of blobs used
+	if debugStats != nil {
+		debugStats.Add("total_blob_count", len(blobCount))
+	}
+
+	// build blob lookup info
+	blobSize := map[restic.ID]uint{}
+	blobToPack := map[restic.ID]restic.ID{}
+	packToBlobs := map[restic.ID][]restic.Blob{}
+	for blob := range blobCount {
+		packs := lookupBlob(restic.DataBlob, blob)
+		if len(packs) == 0 {
+			return nil, nil, fmt.Errorf("can't find blob from source repo: %v", blob)
+		}
+		pb := packs[0]
+
+		blobSize[pb.Blob.ID] = pb.DataLength()
+		blobToPack[pb.Blob.ID] = pb.PackID
+		packToBlobs[pb.PackID] = append(packToBlobs[pb.PackID], pb.Blob)
+	}
+
+	idx := &Index{
+		BlobSize:    blobSize,
+		BlobToPack:  blobToPack,
+		PackToBlobs: packToBlobs,
+	}
+
+	// build blob trace info for small files
+	// if blob cache is enabled, Rechunker tracks small files' remaining blob count
+	// until all blobs are available in the cache (rc.tracker.sfBlobRequires);
+	// when the file has all its blobs ready, it is prioritized to be processed first.
+	// this logic is handled by rc.priorityFilesHandler.
+	sfBlobRequires := map[restic.ID]int{}
+	sfBlobToFiles := map[restic.ID][]*ChunkedFile{}
+	for _, file := range filesList {
+		if file.Len() >= cfg.SmallFileThreshold {
+			continue
+		}
+		blobSet := restic.NewIDSet(file.IDs...)
+		sfBlobRequires[file.hashval] = len(blobSet)
+		for b := range blobSet {
+			sfBlobToFiles[b] = append(sfBlobToFiles[b], file)
+		}
+	}
+
+	tracker := &eventTracker{
+		idx:                idx,
+		filesContaining:    sfBlobToFiles,
+		blobsToPrepare:     sfBlobRequires,
+		remainingBlobNeeds: blobCount,
+	}
+
+	return idx, tracker, nil
+}
+
+type Loader interface {
+	restic.BlobLoader
+	LoadBlobsFromPack(context.Context, restic.ID, []restic.Blob, func(restic.BlobHandle, []byte, error) error) error
+	Connections() uint
+}
+
+func (rc *Rechunker) Rechunk(ctx context.Context, srcRepo Loader, dstRepo restic.WithBlobUploader, p *Progress) error {
+	if !rc.rechunkReady {
+		return fmt.Errorf("Plan() must be run first before RechunkData()")
+	}
+	rc.rechunkReady = false
+
+	debug.Log("Rechunk start.")
+	defer debug.Log("Rechunk done.")
+
+	numWorkers := min(runtime.GOMAXPROCS(0), int(srcRepo.Connections()))
+	numDownloaders := numWorkers
+	debug.Log("srcRepo.Connections(): %v", srcRepo.Connections())
+
+	// Phase 1: Setup Infrastructure
+
+	// start blob cache
+	var downloader restic.BlobLoader
+	var cache *BlobCache
+	if rc.cfg.CacheSize > 0 {
+		downloader, cache = rc.setupCache(ctx, srcRepo, numDownloaders)
+		defer cache.Close()
+	} else {
+		downloader = srcRepo
+	}
+
+	// start dispatcher
+	dispatcher := rc.setupDispatcher(ctx)
+	defer dispatcher.Close()
+
+	// Phase 2: Run Workers
+	bufferPool := NewBufferPool(2 * (numWorkers + 1))
+	err := dstRepo.WithBlobUploader(ctx, func(ctx context.Context, uploader restic.BlobSaver) error {
+		debug.Log("Starting uploader")
+		defer debug.Log("Closing uploader")
+
+		wg, ctx := errgroup.WithContext(ctx)
+		rc.runWorkers(ctx, wg, numWorkers, downloader, uploader, dispatcher.Next, bufferPool, p)
+		rc.runWorkers(ctx, wg, 1, downloader, uploader, dispatcher.NextPriority, bufferPool, p)
+
+		return wg.Wait()
+	})
+	if err != nil {
+		return err
+	}
+
+	debugPrintRechunkReport(rc)
+
+	return nil
+}
+
+func (rc *Rechunker) setupCache(ctx context.Context, srcRepo PackLoader, numDownloaders int) (repo restic.BlobLoader, cache *BlobCache) {
+	debug.Log("Creating blob cache: cacheSize %v", rc.cfg.CacheSize)
+
+	// wrap srcRepo with cache. Now repo's LoadBlob() method will be transparently mediated by blob cache
+	repo, cache = WrapWithCache(ctx, srcRepo, rc.cfg.CacheSize, numDownloaders, rc.idx, rc.tracker.BlobReady, rc.tracker.BlobUnready)
+
+	// register callback to ignore obsolete blobs
+	rc.tracker.obsoleteBlobCB = cache.Ignore
+
+	return repo, cache
+}
+
+func (rc *Rechunker) setupDispatcher(ctx context.Context) (dispatcher *Dispatcher) {
+	debug.Log("Running file dispatcher")
+
+	// If the blob cache is enabled, priority dispatch will be used.
+	// With priority dispatch, (small) files with all their blobs ready in the cache are prioritized.
+	// if the blob cache is disabled, dispatch order simply follows the filesList.
+	if rc.cfg.CacheSize > 0 {
+		dispatcher = NewDispatcher(ctx, rc.filesList, true)
+
+		// register callback to push priority files
+		rc.tracker.priorityCB = dispatcher.PushPriority
+	} else {
+		dispatcher = NewDispatcher(ctx, rc.filesList, false)
+	}
+	return dispatcher
+}
+
+func (rc *Rechunker) runWorkers(ctx context.Context, wg *errgroup.Group, numWorkers int,
+	downloader restic.BlobLoader, uploader restic.BlobSaver, receiveJob func(context.Context) (*ChunkedFile, bool, error),
+	bufferPool *BufferPool, p *Progress) {
+	for range numWorkers {
+		wg.Go(func() error {
+			debug.Log("Starting worker")
+			worker := NewWorker(
+				rc.cfg.Pol,
+				downloader,
+				uploader,
+				bufferPool,
+				rc.tracker.ReadProgress,
+			)
+
+			for {
+				debug.Log("receiving job")
+				file, ok, err := receiveJob(ctx)
+				if err != nil {
+					return err
+				}
+				if !ok {
+					return nil
+				}
+
+				debug.Log("Starting file %v", file.hashval.Str())
+				result, err := worker.RunFile(ctx, file.IDs, p)
+				if err != nil {
+					return err
+				}
+				debug.Log("Finished file %v", file.hashval.Str())
+				if p != nil {
+					p.AddFile(1)
+				}
+
+				rc.totalAddedToDstRepo.Add(result.addedToRepository)
+				rc.rechunkMapLock.Lock()
+				rc.rechunkMap[file.hashval] = result.dstBlobs
+				rc.rechunkMapLock.Unlock()
+			}
+		})
+	}
+}
+
+func (rc *Rechunker) RewriteTree(ctx context.Context, srcRepo restic.BlobLoader, dstRepo restic.BlobSaver, nodeID restic.ID) (restic.ID, error) {
+	// check if the identical tree has already been processed
+	newID, ok := rc.rewriteTreeMap[nodeID]
+	if ok {
+		return newID, nil
+	}
+
+	curTree, err := data.LoadTree(ctx, srcRepo, nodeID)
+	if err != nil {
+		return restic.ID{}, err
+	}
+
+	tb := data.NewTreeJSONBuilder()
+	for _, node := range curTree.Nodes {
+		if ctx.Err() != nil {
+			return restic.ID{}, ctx.Err()
+		}
+
+		err = rc.rewriteNode(node)
+		if err != nil {
+			return restic.ID{}, err
+		}
+
+		// if the node is non-directory node, add it to the tree
+		if node.Type != data.NodeTypeDir {
+			err = tb.AddNode(node)
+			if err != nil {
+				return restic.ID{}, err
+			}
+			continue
+		}
+
+		// if the node is directory node, rewrite it recursively
+		subtree := *node.Subtree
+		newID, err := rc.RewriteTree(ctx, srcRepo, dstRepo, subtree)
+		if err != nil {
+			return restic.ID{}, err
+		}
+		node.Subtree = &newID
+		err = tb.AddNode(node)
+		if err != nil {
+			return restic.ID{}, err
+		}
+	}
+
+	tree, err := tb.Finalize()
+	if err != nil {
+		return restic.ID{}, err
+	}
+
+	// save new tree to the destination repo
+	newTreeID, known, size, err := dstRepo.SaveBlob(ctx, restic.TreeBlob, tree, restic.ID{}, false)
+	if err != nil {
+		return restic.ID{}, err
+	}
+	rc.rewriteTreeMap[nodeID] = newTreeID
+
+	if !known {
+		rc.totalAddedToDstRepo.Add(uint64(size))
+	}
+
+	return newTreeID, err
+}
+
+func (rc *Rechunker) rewriteNode(node *data.Node) error {
+	if node.Type != data.NodeTypeFile {
+		return nil
+	}
+
+	hashval := HashOfIDs(node.Content)
+	dstBlobs, ok := rc.rechunkMap[hashval]
+	if !ok {
+		return fmt.Errorf("can't find from rechunkBlobsMap: %v", node.Content.String())
+	}
+	node.Content = dstBlobs
+	return nil
+}
+
+func (rc *Rechunker) NumFiles() int {
+	return len(rc.filesList)
+}
+
+func (rc *Rechunker) GetRewrittenTree(originalTree restic.ID) (restic.ID, error) {
+	newID, ok := rc.rewriteTreeMap[originalTree]
+	if !ok {
+		return restic.ID{}, fmt.Errorf("rewritten tree does not exist for original tree %v", originalTree)
+	}
+	return newID, nil
+}
+
+func (rc *Rechunker) TotalSize() uint64 {
+	return rc.totalSize
+}
+
+func (rc *Rechunker) PackCount() int {
+	return len(rc.idx.PackToBlobs)
+}
+
+func (rc *Rechunker) TotalAddedToDstRepo() uint64 {
+	return rc.totalAddedToDstRepo.Load()
+}
+
+func (idx *Index) AdvanceCursor(c Cursor, bytesProcessed uint) Cursor {
+	if idx == nil {
+		panic("call from nil index")
+	}
+
+	for c.BlobIdx < len(c.blobs) {
+		r := idx.BlobSize[c.blobs[c.BlobIdx]] - c.Offset
+
+		if bytesProcessed < r {
+			c.Offset += bytesProcessed
+			bytesProcessed = 0
+			break
+		}
+
+		bytesProcessed -= r
+		c.BlobIdx++
+		c.Offset = 0
+	}
+
+	return c
+}
+
+func HashOfIDs(ids restic.IDs) restic.ID {
+	c := make([]byte, 0, len(ids)*32)
+	for _, id := range ids {
+		c = append(c, id[:]...)
+	}
+	return sha256.Sum256(c)
+}
+
+type Cursor struct {
+	blobs   restic.IDs
+	BlobIdx int
+	Offset  uint
+}
+
+type Interval struct {
+	Start Cursor
+	End   Cursor
+}
+
+type ChunkedFile struct {
+	restic.IDs
+	hashval restic.ID
+}
+
+type eventTracker struct {
+	mu sync.Mutex
+
+	idx *Index
+
+	filesContaining map[restic.ID][]*ChunkedFile // blobID -> files containing that blob
+	blobsToPrepare  map[restic.ID]int            // file hashval -> number of blobs until all blobs ready in the cache
+
+	remainingBlobNeeds map[restic.ID]int // blobID -> remaining blob needs
+
+	priorityCB     func(files []*ChunkedFile) bool
+	obsoleteBlobCB func(ids restic.IDs)
+}
+
+func (t *eventTracker) BlobReady(ids restic.IDs) {
+	// when a new blob is ready, (small) files containing that blob has
+	// their blobsToPrepare decreased by one.
+	// The list of files whose blobs are all prepared is returned.
+
+	if t.priorityCB == nil {
+		// if there is no callback, it is of no meaning to track the state
+		return
+	}
+
+	var readyFiles []*ChunkedFile
+
+	t.mu.Lock()
+	for _, id := range ids {
+		for _, file := range t.filesContaining[id] {
+			n := t.blobsToPrepare[file.hashval]
+			if n > 0 {
+				n--
+				if n == 0 {
+					readyFiles = append(readyFiles, file)
+				}
+				t.blobsToPrepare[file.hashval] = n
+			}
+		}
+	}
+	t.mu.Unlock()
+
+	if len(readyFiles) == 0 {
+		return
+	}
+
+	if t.priorityCB != nil {
+		_ = t.priorityCB(readyFiles)
+	}
+
+	// debugStats: trace blob load count
+	if debugStats != nil {
+		dAdds := map[string]int{}
+		for _, id := range ids {
+			dAdds["load:"+id.String()]++
+		}
+		debugStats.AddMap(dAdds)
+	}
+}
+
+func (t *eventTracker) BlobUnready(ids restic.IDs) {
+	// when a blob is evicted, (small) files containing that blob has
+	// their blobsToPrepare increased by one. However, ignore files
+	// once they have reached blobsToPrepare value zero; they are no longer tracked.
+
+	if t.priorityCB == nil {
+		// if there is no callback, it is of no meaning to track progress
+		return
+	}
+
+	t.mu.Lock()
+	for _, id := range ids {
+		filesToUpdate := t.filesContaining[id]
+		for _, file := range filesToUpdate {
+			// files with blobsToPrepare==0 is not tracked
+			if t.blobsToPrepare[file.hashval] > 0 {
+				t.blobsToPrepare[file.hashval]++
+			}
+		}
+	}
+	t.mu.Unlock()
+}
+
+func (t *eventTracker) ReadProgress(cursor Cursor, bytesProcessed uint) Cursor {
+	start, end := cursor, t.idx.AdvanceCursor(cursor, bytesProcessed)
+
+	if t.obsoleteBlobCB == nil {
+		// if there is no callback, it is of no meaning to track the state
+		return end
+	}
+
+	if start.BlobIdx == end.BlobIdx { // nothing to do
+		return end
+	}
+
+	blobs := cursor.blobs[start.BlobIdx:end.BlobIdx]
+	var obsolete restic.IDs
+	t.mu.Lock()
+	for _, b := range blobs {
+		t.remainingBlobNeeds[b]--
+		if t.remainingBlobNeeds[b] == 0 {
+			obsolete = append(obsolete, b)
+		}
+	}
+	t.mu.Unlock()
+
+	if len(obsolete) == 0 {
+		return end
+	}
+
+	if t.obsoleteBlobCB != nil {
+		t.obsoleteBlobCB(obsolete)
+	}
+	return end
+}
diff --git a/internal/rechunker/rechunker_test.go b/internal/rechunker/rechunker_test.go
new file mode 100644
index 000000000..689b9fa0a
--- /dev/null
+++ b/internal/rechunker/rechunker_test.go
@@ -0,0 +1,416 @@
+package rechunker
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"io"
+	"sort"
+	"sync"
+	"testing"
+
+	"github.com/restic/chunker"
+
+	"github.com/restic/restic/internal/data"
+	"github.com/restic/restic/internal/restic"
+	rtest "github.com/restic/restic/internal/test"
+)
+
+// Reference: walker_test.go, rewriter_test.go (v0.18.0)
+
+// TestRechunkerRepo implements minimal Loader/Saver interface
+type TestRechunkerRepo struct {
+	loadBlob          func(id restic.ID, buf []byte) ([]byte, error)
+	loadBlobsFromPack func(packID restic.ID, blobs []restic.Blob, handleBlobFn func(blob restic.BlobHandle, buf []byte, err error) error) error
+	saveBlob          func(buf []byte) (newID restic.ID, known bool, size int, err error)
+}
+
+func (r *TestRechunkerRepo) LoadBlob(ctx context.Context, t restic.BlobType, id restic.ID, buf []byte) ([]byte, error) {
+	return r.loadBlob(id, buf)
+}
+func (r *TestRechunkerRepo) LoadBlobsFromPack(ctx context.Context, packID restic.ID, blobs []restic.Blob, handleBlobFn func(blob restic.BlobHandle, buf []byte, err error) error) error {
+	return r.loadBlobsFromPack(packID, blobs, handleBlobFn)
+}
+func (r *TestRechunkerRepo) SaveBlob(ctx context.Context, t restic.BlobType, buf []byte, id restic.ID, storeDuplicate bool) (newID restic.ID, known bool, size int, err error) {
+	return r.saveBlob(buf)
+}
+func (r *TestRechunkerRepo) WithBlobUploader(ctx context.Context, fn func(ctx context.Context, uploader restic.BlobSaver) error) error {
+	return fn(ctx, r)
+}
+func (r *TestRechunkerRepo) Connections() uint {
+	return 5
+}
+
+// chunk `files` by `pol` and return fileIndex (map from path to blob IDs) and chunkStore (map from blob ID to bytes data)
+func chunkFiles(chnker *chunker.Chunker, pol chunker.Pol, files map[string][]byte) (map[string]restic.IDs, map[restic.ID][]byte) {
+	fileIndex := map[string]restic.IDs{}
+	chunkStore := map[restic.ID][]byte{}
+
+	for name, data := range files {
+		r := bytes.NewReader(data)
+		chnker.Reset(r, pol)
+		chunks := restic.IDs{}
+
+		for {
+			chunk, err := chnker.Next(nil)
+			if err == io.EOF {
+				break
+			}
+			if err != nil {
+				panic(err)
+			}
+
+			id := restic.Hash(chunk.Data)
+			chunks = append(chunks, id)
+			if _, ok := chunkStore[id]; !ok {
+				chunkStore[id] = chunk.Data
+			}
+		}
+
+		fileIndex[name] = chunks
+	}
+
+	return fileIndex, chunkStore
+}
+
+// arbitrary pack assignment for blobs in chunkStore
+func simulatedPack(chunkStore map[restic.ID][]byte) map[restic.ID]restic.ID {
+	blobToPack := map[restic.ID]restic.ID{}
+	i := 0
+	packID := restic.NewRandomID()
+	for blobID := range chunkStore {
+		blobToPack[blobID] = packID
+		i++
+		if i%10 == 0 {
+			packID = restic.NewRandomID()
+		}
+	}
+
+	return blobToPack
+}
+
+func prepareData() map[string][]byte {
+	files := map[string][]byte{
+		"0": {},
+		"1": rtest.Random(1, 10_000),
+		"2": rtest.Random(4, 10_000_000),
+		"3": rtest.Random(5, 150_000_000),
+	}
+	files["2_duplicate"] = files["2"]
+	headChanged := make([]byte, 0, 120_000_000)
+	headChanged = append(headChanged, rtest.Random(6, 10_000_000)...)
+	headChanged = append(headChanged, files["3"][40_000_000:]...)
+	files["3_head_changed"] = headChanged
+	tailChanged := make([]byte, 0, 100_000_000)
+	tailChanged = append(tailChanged, files["3"][:90_000_000]...)
+	tailChanged = append(tailChanged, rtest.Random(7, 10_000_000)...)
+	files["3_tail_changed"] = tailChanged
+
+	return files
+}
+
+func TestRechunker(t *testing.T) {
+	ctx, cancel := context.WithCancel(context.TODO())
+	defer cancel()
+
+	// generate reandom polynomials
+	srcChunkerParam, _ := chunker.RandomPolynomial()
+	dstChunkerParam, _ := chunker.RandomPolynomial()
+
+	// prepare test data
+	files := prepareData()
+
+	// prepare chunker and minimal repositories
+	chnker := chunker.New(nil, 0)
+	srcFileIndex, srcChunkStore := chunkFiles(chnker, srcChunkerParam, files)
+	dstWantsFileIndex, dstWantsChunkStore := chunkFiles(chnker, dstChunkerParam, files)
+	rechunkStore := restic.IDSet{}
+
+	srcFilesList := []*ChunkedFile{}
+	for _, file := range srcFileIndex {
+		srcFilesList = append(srcFilesList, &ChunkedFile{file, HashOfIDs(file)})
+	}
+	srcBlobToPack := simulatedPack(srcChunkStore)
+
+	srcRepo := &TestRechunkerRepo{
+		loadBlob: func(id restic.ID, buf []byte) ([]byte, error) {
+			blob, ok := srcChunkStore[id]
+			if !ok {
+				return nil, fmt.Errorf("blob not found")
+			}
+
+			if cap(buf) < len(blob) {
+				buf = make([]byte, len(blob))
+			}
+			buf = buf[:len(blob)]
+			copy(buf, blob)
+
+			return buf, nil
+		},
+		loadBlobsFromPack: func(packID restic.ID, blobs []restic.Blob, handleBlobFn func(blob restic.BlobHandle, buf []byte, err error) error) error {
+			for _, blob := range blobs {
+				if packID != srcBlobToPack[blob.ID] {
+					return fmt.Errorf("blob %v is not in the pack %v", blob.ID, packID)
+				}
+				err := handleBlobFn(blob.BlobHandle, srcChunkStore[blob.ID], nil)
+				if err != nil {
+					return err
+				}
+			}
+			return nil
+		},
+	}
+
+	// run test
+	cfg := Config{
+		CacheSize:          4096 * (1 << 20),
+		SmallFileThreshold: 25,
+		Pol:                dstChunkerParam,
+	}
+	rechunker := NewRechunker(cfg)
+
+	// manually configure rechunker instead of running Plan(), because we are using mock repo
+	var err error
+	rechunker.filesList = srcFilesList
+	rechunker.idx, rechunker.tracker, err = createIndex(srcFilesList, func(t restic.BlobType, id restic.ID) []restic.PackedBlob {
+		pb := restic.PackedBlob{}
+		pb.ID = id
+		pb.Type = t
+		pb.UncompressedLength = uint(len(srcChunkStore[id]))
+		pb.PackID = srcBlobToPack[id]
+
+		return []restic.PackedBlob{pb}
+	}, cfg)
+	if err != nil {
+		panic(err)
+	}
+
+	rechunker.rechunkReady = true
+
+	saveBlobLock := sync.Mutex{}
+	rechunkTestRepo := &TestRechunkerRepo{
+		saveBlob: func(buf []byte) (newID restic.ID, known bool, size int, err error) {
+			newID = restic.Hash(buf)
+			saveBlobLock.Lock()
+			rechunkStore.Insert(newID)
+			saveBlobLock.Unlock()
+			return
+		},
+	}
+	rtest.OK(t, rechunker.Rechunk(ctx, srcRepo, rechunkTestRepo, nil))
+
+	// compare test result (by rechunker) vs dstWantsChunkedFiles (ordinary backup)
+	testResult := rechunker.rechunkMap
+	for name, srcBlobs := range srcFileIndex {
+		hashval := HashOfIDs(srcBlobs)
+		wants := HashOfIDs(dstWantsFileIndex[name])
+		if HashOfIDs(testResult[hashval]) != wants {
+			t.Errorf("blob mismatch for file '%v'", name)
+		}
+	}
+
+	// check if all blobs are stored
+	for blobID := range dstWantsChunkStore {
+		if !rechunkStore.Has(blobID) {
+			t.Errorf("blob missing: %v", blobID.Str())
+		}
+	}
+}
+
+type BlobIDsPair struct {
+	srcBlobIDs restic.IDs
+	dstBlobIDs restic.IDs
+}
+
+func generateBlobIDsPair(nSrc, nDst uint) BlobIDsPair {
+	srcIDs := make(restic.IDs, 0, nSrc)
+	dstIDs := make(restic.IDs, 0, nDst)
+	for range nSrc {
+		srcIDs = append(srcIDs, restic.NewRandomID())
+	}
+	for range nDst {
+		dstIDs = append(dstIDs, restic.NewRandomID())
+	}
+
+	return BlobIDsPair{srcBlobIDs: srcIDs, dstBlobIDs: dstIDs}
+}
+
+type TreeMap map[restic.ID][]byte
+type TestTree map[string]interface{}
+type TestContentNode struct {
+	Type    data.NodeType
+	Size    uint64
+	Content restic.IDs
+}
+
+func (t TreeMap) LoadBlob(_ context.Context, _ restic.BlobType, id restic.ID, _ []byte) ([]byte, error) {
+	buf, ok := t[id]
+	if !ok {
+		return nil, fmt.Errorf("blob does not exist")
+	}
+	return buf, nil
+}
+
+func (t TreeMap) SaveBlob(_ context.Context, _ restic.BlobType, buf []byte, _ restic.ID, _ bool) (newID restic.ID, known bool, size int, err error) {
+	id := restic.Hash(buf)
+
+	_, ok := t[id]
+	if ok {
+		return id, false, 0, nil
+	}
+
+	t[id] = append([]byte{}, buf...)
+	return id, true, len(buf), nil
+}
+
+func BuildTreeMap(tree TestTree) (m TreeMap, root restic.ID) {
+	m = TreeMap{}
+	id := buildTreeMap(tree, m)
+	return m, id
+}
+
+func buildTreeMap(tree TestTree, m TreeMap) restic.ID {
+	tb := data.NewTreeJSONBuilder()
+	var names []string
+	for name := range tree {
+		names = append(names, name)
+	}
+	sort.Strings(names)
+
+	for _, name := range names {
+		item := tree[name]
+		switch elem := item.(type) {
+		case TestTree:
+			id := buildTreeMap(elem, m)
+			err := tb.AddNode(&data.Node{
+				Name:    name,
+				Subtree: &id,
+				Type:    data.NodeTypeDir,
+			})
+			if err != nil {
+				panic(err)
+			}
+		case TestContentNode:
+			err := tb.AddNode(&data.Node{
+				Name:    name,
+				Type:    elem.Type,
+				Size:    elem.Size,
+				Content: elem.Content,
+			})
+			if err != nil {
+				panic(err)
+			}
+		default:
+			panic(fmt.Sprintf("invalid type %T", elem))
+		}
+	}
+
+	buf, err := tb.Finalize()
+	if err != nil {
+		panic(err)
+	}
+
+	id := restic.Hash(buf)
+
+	if _, ok := m[id]; !ok {
+		m[id] = buf
+	}
+
+	return id
+}
+
+func TestRechunkerRewriteTree(t *testing.T) {
+	blobIDsMap := map[string]BlobIDsPair{
+		"a":        generateBlobIDsPair(1, 1),
+		"subdir/a": generateBlobIDsPair(30, 31),
+		"x":        generateBlobIDsPair(42, 41),
+		"0":        generateBlobIDsPair(0, 0),
+	}
+	rechunkBlobsMap := map[restic.ID]restic.IDs{}
+	for _, v := range blobIDsMap {
+		rechunkBlobsMap[HashOfIDs(v.srcBlobIDs)] = v.dstBlobIDs
+	}
+
+	tree := TestTree{
+		"zerofile": TestContentNode{
+			Type:    data.NodeTypeFile,
+			Size:    0,
+			Content: restic.IDs{},
+		},
+		"a": TestContentNode{
+			Type:    data.NodeTypeFile,
+			Size:    1,
+			Content: blobIDsMap["a"].srcBlobIDs,
+		},
+		"subdir": TestTree{
+			"a": TestContentNode{
+				Type:    data.NodeTypeFile,
+				Size:    3,
+				Content: blobIDsMap["subdir/a"].srcBlobIDs,
+			},
+			"x": TestContentNode{
+				Type:    data.NodeTypeFile,
+				Size:    2,
+				Content: blobIDsMap["x"].srcBlobIDs,
+			},
+			"subdir": TestTree{
+				"dup_x": TestContentNode{
+					Type:    data.NodeTypeFile,
+					Size:    2,
+					Content: blobIDsMap["x"].srcBlobIDs,
+				},
+				"nonregularfile": TestContentNode{
+					Type: data.NodeTypeSymlink,
+				},
+			},
+		},
+	}
+	wants := TestTree{
+		"zerofile": TestContentNode{
+			Type:    data.NodeTypeFile,
+			Size:    0,
+			Content: restic.IDs{},
+		},
+		"a": TestContentNode{
+			Type:    data.NodeTypeFile,
+			Size:    1,
+			Content: blobIDsMap["a"].dstBlobIDs,
+		},
+		"subdir": TestTree{
+			"a": TestContentNode{
+				Type:    data.NodeTypeFile,
+				Size:    3,
+				Content: blobIDsMap["subdir/a"].dstBlobIDs,
+			},
+			"x": TestContentNode{
+				Type:    data.NodeTypeFile,
+				Size:    2,
+				Content: blobIDsMap["x"].dstBlobIDs,
+			},
+			"subdir": TestTree{
+				"dup_x": TestContentNode{
+					Type:    data.NodeTypeFile,
+					Size:    2,
+					Content: blobIDsMap["x"].dstBlobIDs,
+				},
+				"nonregularfile": TestContentNode{
+					Type: data.NodeTypeSymlink,
+				},
+			},
+		},
+	}
+
+	srcRepo, srcRoot := BuildTreeMap(tree)
+	_, wantsRoot := BuildTreeMap(wants)
+
+	testsRepo := TreeMap{}
+	rechunker := NewRechunker(Config{})
+	rechunker.rechunkMap = rechunkBlobsMap
+	testsRoot, err := rechunker.RewriteTree(context.TODO(), srcRepo, testsRepo, srcRoot)
+	if err != nil {
+		t.Error(err)
+	}
+	if wantsRoot != testsRoot {
+		t.Errorf("tree mismatch. wants: %v, tests: %v", wantsRoot, testsRoot)
+	}
+}
diff --git a/internal/rechunker/worker.go b/internal/rechunker/worker.go
new file mode 100644
index 000000000..d1d687ddf
--- /dev/null
+++ b/internal/rechunker/worker.go
@@ -0,0 +1,226 @@
+package rechunker
+
+import (
+	"context"
+	"io"
+
+	"github.com/restic/chunker"
+	"github.com/restic/restic/internal/debug"
+	"github.com/restic/restic/internal/restic"
+	"golang.org/x/sync/errgroup"
+)
+
+type FileResult struct {
+	dstBlobs          restic.IDs
+	addedToRepository uint64
+}
+type Worker struct {
+	pool *BufferPool
+
+	chunker    *chunker.Chunker
+	pol        chunker.Pol
+	downloader restic.BlobLoader
+	uploader   restic.BlobSaver
+
+	readProgressCallback func(cursor Cursor, bytesProcessed uint) Cursor
+}
+
+func NewWorker(pol chunker.Pol, downloader restic.BlobLoader, uploader restic.BlobSaver,
+	bufferPool *BufferPool,
+	onReadCallback func(Cursor, uint) Cursor,
+) *Worker {
+	return &Worker{
+		pool: bufferPool,
+
+		chunker:    chunker.New(nil, pol),
+		pol:        pol,
+		downloader: downloader,
+		uploader:   uploader,
+
+		readProgressCallback: onReadCallback,
+	}
+}
+
+func (w *Worker) RunFile(ctx context.Context, srcBlobs restic.IDs, p *Progress) (FileResult, error) {
+	buf := w.pool.Get()
+
+	// setup reader
+	reader := NewBlobSequenceReader(ctx, srcBlobs, w.downloader, buf)
+
+	// Run worker pipeline (reader and writer)
+	wg, ctx := errgroup.WithContext(ctx)
+
+	chChunk := make(chan chunker.Chunk)
+	chResult := make(chan FileResult, 1)
+
+	// Run reader goroutine
+	w.runReader(ctx, wg, srcBlobs, reader, chChunk)
+
+	// Run writer goroutine
+	w.runWriter(ctx, wg, chChunk, chResult, p)
+
+	if err := wg.Wait(); err != nil {
+		return FileResult{}, err
+	}
+
+	result := <-chResult
+
+	w.pool.Put(buf)
+
+	return result, nil
+}
+
+func (w *Worker) runReader(ctx context.Context, wg *errgroup.Group, srcBlobs restic.IDs, reader *BlobSequenceReader, out chan<- chunker.Chunk) {
+	debug.Log("Starting reader goroutine")
+	wg.Go(func() error {
+		defer close(out)
+
+		w.chunker.Reset(reader, w.pol)
+
+		cursor := Cursor{blobs: srcBlobs}
+
+		for {
+			// bring buffer from bufferPool
+			buf := w.pool.Get()
+
+			// rechunk with new parameter
+			c, err := w.chunker.Next(buf)
+			if err == io.EOF { // reached EOF; all done
+				w.pool.Put(buf)
+				return nil
+			}
+			if err != nil {
+				return err
+			}
+
+			// if onProgress callback is given, run it
+			if w.readProgressCallback != nil {
+				cursor = w.readProgressCallback(cursor, c.Length)
+			}
+
+			// send chunk to writer
+			select {
+			case <-ctx.Done():
+				return ctx.Err()
+			case out <- c:
+				debug.Log("Sending a new chunk of size %v to writer", c.Length)
+			}
+		}
+	})
+}
+
+func (w *Worker) runWriter(ctx context.Context, wg *errgroup.Group, in <-chan chunker.Chunk, out chan<- FileResult, p *Progress) {
+	debug.Log("Starting writer goroutine")
+	wg.Go(func() error {
+		defer close(out)
+
+		dstBlobs := restic.IDs{}
+		var addedSize uint64
+
+		for {
+			// receive chunk from reader
+			var c chunker.Chunk
+			var ok bool
+			select {
+			case <-ctx.Done():
+				return ctx.Err()
+			case c, ok = <-in:
+				if !ok { // EOF
+					out <- FileResult{
+						dstBlobs:          dstBlobs,
+						addedToRepository: addedSize,
+					}
+					return nil
+				}
+			}
+
+			// save chunk to destination repo
+			dstBlobID, known, size, err := w.uploader.SaveBlob(ctx, restic.DataBlob, c.Data, restic.ID{}, false)
+			if err != nil {
+				return err
+			}
+			if !known {
+				addedSize += uint64(size)
+				debug.Log("Stored new dst chunk %v into dstRepo", dstBlobID.Str())
+			}
+
+			if p != nil {
+				p.AddBlob(uint64(c.Length))
+			}
+
+			// recycle used buffer into bufferPool
+			w.pool.Put(c.Data)
+
+			dstBlobs = append(dstBlobs, dstBlobID)
+		}
+	})
+}
+
+type BlobSequenceReader struct {
+	ctx        context.Context
+	downloader restic.BlobLoader
+
+	blobs restic.IDs
+
+	data []byte // data of the current blob being read
+	buf  []byte // reused buffer space
+}
+
+func NewBlobSequenceReader(ctx context.Context, blobs restic.IDs, downloader restic.BlobLoader, buf []byte) *BlobSequenceReader {
+	return &BlobSequenceReader{
+		ctx:        ctx,
+		blobs:      blobs,
+		downloader: downloader,
+		buf:        buf,
+	}
+}
+
+func (r *BlobSequenceReader) Read(p []byte) (n int, err error) {
+	if len(r.data) == 0 {
+		// out of data; load the next blob
+		if len(r.blobs) == 0 {
+			return 0, io.EOF
+		}
+
+		// bring the blob data from backend
+		r.data, err = r.downloader.LoadBlob(r.ctx, restic.DataBlob, r.blobs[0], r.buf)
+		if err != nil {
+			return 0, err
+		}
+
+		r.blobs = r.blobs[1:]
+	}
+
+	// copy data from currentBuf to p
+	n = copy(p, r.data)
+	r.data = r.data[n:]
+	return n, nil
+}
+
+type BufferPool struct {
+	c chan []byte
+}
+
+func NewBufferPool(cap int) *BufferPool {
+	return &BufferPool{
+		c: make(chan []byte, cap),
+	}
+}
+
+func (p *BufferPool) Get() []byte {
+	select {
+	case buf := <-p.c:
+		return buf[:0]
+	default:
+		debug.Log("Allocating new buffer")
+		return make([]byte, 0, chunker.MaxSize)
+	}
+}
+
+func (p *BufferPool) Put(buf []byte) {
+	select {
+	case p.c <- buf:
+	default:
+		debug.Log("bufferPool is full; discarding the buffer")
+	}
+}

From a19ab5c01b35529bcb25ce327e597985888a57eb Mon Sep 17 00:00:00 2001
From: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date: Thu, 4 Dec 2025 18:44:45 +0900
Subject: [PATCH 02/34] Update: Align function signature

---
 cmd/restic/cmd_rechunk_copy.go       | 2 +-
 internal/rechunker/rechunker.go      | 2 +-
 internal/rechunker/rechunker_test.go | 4 +++-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/cmd/restic/cmd_rechunk_copy.go b/cmd/restic/cmd_rechunk_copy.go
index e46751285..27de545fa 100644
--- a/cmd/restic/cmd_rechunk_copy.go
+++ b/cmd/restic/cmd_rechunk_copy.go
@@ -148,7 +148,7 @@ func runRechunkCopy(ctx context.Context, opts RechunkCopyOptions, gopts global.O
 
 	// rewrite trees
 	printer.P("Rewriting trees...\n")
-	err = dstRepo.WithBlobUploader(ctx, func(ctx context.Context, uploader restic.BlobSaver) error {
+	err = dstRepo.WithBlobUploader(ctx, func(ctx context.Context, uploader restic.BlobSaverWithAsync) error {
 		for sn := range FindFilteredSnapshots(ctx, srcSnapshotLister, srcRepo, &opts.SnapshotFilter, args, printer) {
 			debug.Log("Running RewriteTree() for tree ID %v", sn.Tree.Str())
 			_, err := rechnker.RewriteTree(ctx, srcRepo, uploader, *sn.Tree)
diff --git a/internal/rechunker/rechunker.go b/internal/rechunker/rechunker.go
index 2f9d8c97e..e6b7a2e7d 100644
--- a/internal/rechunker/rechunker.go
+++ b/internal/rechunker/rechunker.go
@@ -254,7 +254,7 @@ func (rc *Rechunker) Rechunk(ctx context.Context, srcRepo Loader, dstRepo restic
 
 	// Phase 2: Run Workers
 	bufferPool := NewBufferPool(2 * (numWorkers + 1))
-	err := dstRepo.WithBlobUploader(ctx, func(ctx context.Context, uploader restic.BlobSaver) error {
+	err := dstRepo.WithBlobUploader(ctx, func(ctx context.Context, uploader restic.BlobSaverWithAsync) error {
 		debug.Log("Starting uploader")
 		defer debug.Log("Closing uploader")
 
diff --git a/internal/rechunker/rechunker_test.go b/internal/rechunker/rechunker_test.go
index 689b9fa0a..451c9e0d3 100644
--- a/internal/rechunker/rechunker_test.go
+++ b/internal/rechunker/rechunker_test.go
@@ -34,7 +34,9 @@ func (r *TestRechunkerRepo) LoadBlobsFromPack(ctx context.Context, packID restic
 func (r *TestRechunkerRepo) SaveBlob(ctx context.Context, t restic.BlobType, buf []byte, id restic.ID, storeDuplicate bool) (newID restic.ID, known bool, size int, err error) {
 	return r.saveBlob(buf)
 }
-func (r *TestRechunkerRepo) WithBlobUploader(ctx context.Context, fn func(ctx context.Context, uploader restic.BlobSaver) error) error {
+func (r *TestRechunkerRepo) SaveBlobAsync(ctx context.Context, tpe restic.BlobType, buf []byte, id restic.ID, storeDuplicate bool, cb func(newID restic.ID, known bool, sizeInRepo int, err error)) {
+}
+func (r *TestRechunkerRepo) WithBlobUploader(ctx context.Context, fn func(ctx context.Context, uploader restic.BlobSaverWithAsync) error) error {
 	return fn(ctx, r)
 }
 func (r *TestRechunkerRepo) Connections() uint {

From 3a7bfada29cc5f0ea1ab267013f685d71e141e88 Mon Sep 17 00:00:00 2001
From: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date: Fri, 19 Dec 2025 11:59:45 +0900
Subject: [PATCH 03/34] Refactor: Minor refactoring and code cleanup

---
 internal/rechunker/dispatcher.go | 46 ++++++++------------------------
 internal/rechunker/rechunker.go  | 41 ++++++++++++----------------
 2 files changed, 28 insertions(+), 59 deletions(-)

diff --git a/internal/rechunker/dispatcher.go b/internal/rechunker/dispatcher.go
index 0b20fa7dc..9374ea3d4 100644
--- a/internal/rechunker/dispatcher.go
+++ b/internal/rechunker/dispatcher.go
@@ -207,8 +207,16 @@ func (d *Dispatcher) createPriorityCh(ctx context.Context, wg *errgroup.Group, v
 
 // PrioritySelect selects from two channels with priority; first channel first.
 func PrioritySelect(ctx context.Context, first <-chan *ChunkedFile, second <-chan *ChunkedFile) (item *ChunkedFile, from int, err error) {
-	if first != nil && second != nil {
-		// First, try to pull from channel 'first' only. If 'first' is not ready now, try both channels.
+	// First, try to pull from channel 'first' only. If 'first' is not ready now, try both channels.
+	select {
+	case <-ctx.Done():
+		return nil, 0, ctx.Err()
+	case i, ok := <-first:
+		if ok {
+			item = i
+			from = 1
+		}
+	default:
 		select {
 		case <-ctx.Done():
 			return nil, 0, ctx.Err()
@@ -217,38 +225,6 @@ func PrioritySelect(ctx context.Context, first <-chan *ChunkedFile, second <-cha
 				item = i
 				from = 1
 			}
-		default:
-			select {
-			case <-ctx.Done():
-				return nil, 0, ctx.Err()
-			case i, ok := <-first:
-				if ok {
-					item = i
-					from = 1
-				}
-			case i, ok := <-second:
-				if ok {
-					item = i
-					from = 2
-				}
-			}
-		}
-	} else if first != nil {
-		// only 'first' is not nil, so behave like a normal select of 'first'
-		select {
-		case <-ctx.Done():
-			return nil, 0, ctx.Err()
-		case i, ok := <-first:
-			if ok {
-				item = i
-				from = 1
-			}
-		}
-	} else if second != nil {
-		// only 'second' is not nil, so behave like a normal select of 'second'
-		select {
-		case <-ctx.Done():
-			return nil, 0, ctx.Err()
 		case i, ok := <-second:
 			if ok {
 				item = i
@@ -256,6 +232,6 @@ func PrioritySelect(ctx context.Context, first <-chan *ChunkedFile, second <-cha
 			}
 		}
 	}
-
+	
 	return item, from, nil
 }
diff --git a/internal/rechunker/rechunker.go b/internal/rechunker/rechunker.go
index e6b7a2e7d..4702a27da 100644
--- a/internal/rechunker/rechunker.go
+++ b/internal/rechunker/rechunker.go
@@ -19,14 +19,13 @@ import (
 
 type Rechunker struct {
 	cfg     Config
+	idx     *Index
 	tracker *eventTracker
 
 	filesList    []*ChunkedFile
 	totalSize    uint64
 	rechunkReady bool
 
-	idx *Index
-
 	rechunkMap          map[restic.ID]restic.IDs // hashOfIDs of srcBlobIDs -> dstBlobIDs
 	rechunkMapLock      sync.Mutex
 	totalAddedToDstRepo atomic.Uint64
@@ -55,12 +54,11 @@ func NewRechunker(cfg Config) *Rechunker {
 }
 
 func (rc *Rechunker) reset() {
+	rc.idx = nil
 	rc.tracker = nil
 
 	rc.filesList = nil
 	rc.rechunkReady = false
-
-	rc.idx = nil
 }
 
 func (rc *Rechunker) Plan(ctx context.Context, srcRepo restic.Repository, rootTrees []restic.ID) error {
@@ -225,7 +223,7 @@ type Loader interface {
 
 func (rc *Rechunker) Rechunk(ctx context.Context, srcRepo Loader, dstRepo restic.WithBlobUploader, p *Progress) error {
 	if !rc.rechunkReady {
-		return fmt.Errorf("Plan() must be run first before RechunkData()")
+		return fmt.Errorf("Plan() must be run first before Rechunk()")
 	}
 	rc.rechunkReady = false
 
@@ -447,6 +445,20 @@ func (rc *Rechunker) TotalAddedToDstRepo() uint64 {
 	return rc.totalAddedToDstRepo.Load()
 }
 
+func HashOfIDs(ids restic.IDs) restic.ID {
+	c := make([]byte, 0, len(ids)*32)
+	for _, id := range ids {
+		c = append(c, id[:]...)
+	}
+	return sha256.Sum256(c)
+}
+
+type Cursor struct {
+	blobs   restic.IDs
+	BlobIdx int
+	Offset  uint
+}
+
 func (idx *Index) AdvanceCursor(c Cursor, bytesProcessed uint) Cursor {
 	if idx == nil {
 		panic("call from nil index")
@@ -469,25 +481,6 @@ func (idx *Index) AdvanceCursor(c Cursor, bytesProcessed uint) Cursor {
 	return c
 }
 
-func HashOfIDs(ids restic.IDs) restic.ID {
-	c := make([]byte, 0, len(ids)*32)
-	for _, id := range ids {
-		c = append(c, id[:]...)
-	}
-	return sha256.Sum256(c)
-}
-
-type Cursor struct {
-	blobs   restic.IDs
-	BlobIdx int
-	Offset  uint
-}
-
-type Interval struct {
-	Start Cursor
-	End   Cursor
-}
-
 type ChunkedFile struct {
 	restic.IDs
 	hashval restic.ID

From eceed382ddd5f377368d091fa54a3579b05f8db6 Mon Sep 17 00:00:00 2001
From: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date: Fri, 19 Dec 2025 12:35:00 +0900
Subject: [PATCH 04/34] Change rechunker_test code

---
 internal/rechunker/rechunker_test.go | 78 +++++++++++++++++-----------
 1 file changed, 47 insertions(+), 31 deletions(-)

diff --git a/internal/rechunker/rechunker_test.go b/internal/rechunker/rechunker_test.go
index 451c9e0d3..24de99f6e 100644
--- a/internal/rechunker/rechunker_test.go
+++ b/internal/rechunker/rechunker_test.go
@@ -3,6 +3,7 @@ package rechunker
 import (
 	"bytes"
 	"context"
+	"errors"
 	"fmt"
 	"io"
 	"sort"
@@ -16,15 +17,15 @@ import (
 	rtest "github.com/restic/restic/internal/test"
 )
 
-// Reference: walker_test.go, rewriter_test.go (v0.18.0)
-
-// TestRechunkerRepo implements minimal Loader/Saver interface
+// TestRechunkerRepo implements minimal repository interface for rechunker test.
 type TestRechunkerRepo struct {
 	loadBlob          func(id restic.ID, buf []byte) ([]byte, error)
 	loadBlobsFromPack func(packID restic.ID, blobs []restic.Blob, handleBlobFn func(blob restic.BlobHandle, buf []byte, err error) error) error
 	saveBlob          func(buf []byte) (newID restic.ID, known bool, size int, err error)
 }
 
+// methods to satisfy interfaces used in rechunker
+
 func (r *TestRechunkerRepo) LoadBlob(ctx context.Context, t restic.BlobType, id restic.ID, buf []byte) ([]byte, error) {
 	return r.loadBlob(id, buf)
 }
@@ -35,15 +36,17 @@ func (r *TestRechunkerRepo) SaveBlob(ctx context.Context, t restic.BlobType, buf
 	return r.saveBlob(buf)
 }
 func (r *TestRechunkerRepo) SaveBlobAsync(ctx context.Context, tpe restic.BlobType, buf []byte, id restic.ID, storeDuplicate bool, cb func(newID restic.ID, known bool, sizeInRepo int, err error)) {
+	// not used in rechunker; declared just to satisfy restic.BlobSaverWithAsync interface
 }
 func (r *TestRechunkerRepo) WithBlobUploader(ctx context.Context, fn func(ctx context.Context, uploader restic.BlobSaverWithAsync) error) error {
 	return fn(ctx, r)
 }
 func (r *TestRechunkerRepo) Connections() uint {
+	// arbitrarily chosen value
 	return 5
 }
 
-// chunk `files` by `pol` and return fileIndex (map from path to blob IDs) and chunkStore (map from blob ID to bytes data)
+// chunkFiles chunk `files` by `pol` and return fileIndex (map from path to blob IDs) and chunkStore (map from blob ID to blob data).
 func chunkFiles(chnker *chunker.Chunker, pol chunker.Pol, files map[string][]byte) (map[string]restic.IDs, map[restic.ID][]byte) {
 	fileIndex := map[string]restic.IDs{}
 	chunkStore := map[restic.ID][]byte{}
@@ -75,7 +78,7 @@ func chunkFiles(chnker *chunker.Chunker, pol chunker.Pol, files map[string][]byt
 	return fileIndex, chunkStore
 }
 
-// arbitrary pack assignment for blobs in chunkStore
+// simulatedPack assigns arbitrary pack to each blob in chunkStore.
 func simulatedPack(chunkStore map[restic.ID][]byte) map[restic.ID]restic.ID {
 	blobToPack := map[restic.ID]restic.ID{}
 	i := 0
@@ -91,22 +94,14 @@ func simulatedPack(chunkStore map[restic.ID][]byte) map[restic.ID]restic.ID {
 	return blobToPack
 }
 
+// prepareData prepares random data for rechunker test.
 func prepareData() map[string][]byte {
 	files := map[string][]byte{
 		"0": {},
 		"1": rtest.Random(1, 10_000),
 		"2": rtest.Random(4, 10_000_000),
-		"3": rtest.Random(5, 150_000_000),
+		"3": rtest.Random(5, 100_000_000),
 	}
-	files["2_duplicate"] = files["2"]
-	headChanged := make([]byte, 0, 120_000_000)
-	headChanged = append(headChanged, rtest.Random(6, 10_000_000)...)
-	headChanged = append(headChanged, files["3"][40_000_000:]...)
-	files["3_head_changed"] = headChanged
-	tailChanged := make([]byte, 0, 100_000_000)
-	tailChanged = append(tailChanged, files["3"][:90_000_000]...)
-	tailChanged = append(tailChanged, rtest.Random(7, 10_000_000)...)
-	files["3_tail_changed"] = tailChanged
 
 	return files
 }
@@ -128,12 +123,14 @@ func TestRechunker(t *testing.T) {
 	dstWantsFileIndex, dstWantsChunkStore := chunkFiles(chnker, dstChunkerParam, files)
 	rechunkStore := restic.IDSet{}
 
+	// build files list and virtual blobToPack mapping
 	srcFilesList := []*ChunkedFile{}
 	for _, file := range srcFileIndex {
 		srcFilesList = append(srcFilesList, &ChunkedFile{file, HashOfIDs(file)})
 	}
 	srcBlobToPack := simulatedPack(srcChunkStore)
 
+	// define src repo for rechunker test
 	srcRepo := &TestRechunkerRepo{
 		loadBlob: func(id restic.ID, buf []byte) ([]byte, error) {
 			blob, ok := srcChunkStore[id]
@@ -163,7 +160,7 @@ func TestRechunker(t *testing.T) {
 		},
 	}
 
-	// run test
+	// create rechunker
 	cfg := Config{
 		CacheSize:          4096 * (1 << 20),
 		SmallFileThreshold: 25,
@@ -189,6 +186,7 @@ func TestRechunker(t *testing.T) {
 
 	rechunker.rechunkReady = true
 
+	// define dst repo for rechunker test, and run Rechunk
 	saveBlobLock := sync.Mutex{}
 	rechunkTestRepo := &TestRechunkerRepo{
 		saveBlob: func(buf []byte) (newID restic.ID, known bool, size int, err error) {
@@ -237,6 +235,9 @@ func generateBlobIDsPair(nSrc, nDst uint) BlobIDsPair {
 	return BlobIDsPair{srcBlobIDs: srcIDs, dstBlobIDs: dstIDs}
 }
 
+// Type definitions for rewriteTree test.
+// Reference: walker/rewriter_test.go and walker/walker_test.go (v0.18.0).
+
 type TreeMap map[restic.ID][]byte
 type TestTree map[string]interface{}
 type TestContentNode struct {
@@ -245,17 +246,25 @@ type TestContentNode struct {
 	Content restic.IDs
 }
 
-func (t TreeMap) LoadBlob(_ context.Context, _ restic.BlobType, id restic.ID, _ []byte) ([]byte, error) {
-	buf, ok := t[id]
-	if !ok {
-		return nil, fmt.Errorf("blob does not exist")
+func (t TreeMap) LoadBlob(_ context.Context, tpe restic.BlobType, id restic.ID, _ []byte) ([]byte, error) {
+	if tpe != restic.TreeBlob {
+		return nil, errors.New("can only load trees")
 	}
-	return buf, nil
+	tree, ok := t[id]
+	if !ok {
+		return nil, errors.New("tree not found")
+	}
+	return tree, nil
 }
 
-func (t TreeMap) SaveBlob(_ context.Context, _ restic.BlobType, buf []byte, _ restic.ID, _ bool) (newID restic.ID, known bool, size int, err error) {
-	id := restic.Hash(buf)
+func (t TreeMap) SaveBlob(_ context.Context, tpe restic.BlobType, buf []byte, id restic.ID, _ bool) (newID restic.ID, known bool, size int, err error) {
+	if tpe != restic.TreeBlob {
+		return restic.ID{}, false, 0, errors.New("can only save trees")
+	}
 
+	if id.IsNull() {
+		id = restic.Hash(buf)
+	}
 	_, ok := t[id]
 	if ok {
 		return id, false, 0, nil
@@ -321,19 +330,20 @@ func buildTreeMap(tree TestTree, m TreeMap) restic.ID {
 	return id
 }
 
-func TestRechunkerRewriteTree(t *testing.T) {
+// prepareTree prepares sample tree for rewriteTree test.
+func prepareTree() (srcTree TestTree, wantsTree TestTree, rechunkMap map[restic.ID]restic.IDs) {
 	blobIDsMap := map[string]BlobIDsPair{
 		"a":        generateBlobIDsPair(1, 1),
 		"subdir/a": generateBlobIDsPair(30, 31),
 		"x":        generateBlobIDsPair(42, 41),
 		"0":        generateBlobIDsPair(0, 0),
 	}
-	rechunkBlobsMap := map[restic.ID]restic.IDs{}
+	rechunkMap = map[restic.ID]restic.IDs{}
 	for _, v := range blobIDsMap {
-		rechunkBlobsMap[HashOfIDs(v.srcBlobIDs)] = v.dstBlobIDs
+		rechunkMap[HashOfIDs(v.srcBlobIDs)] = v.dstBlobIDs
 	}
 
-	tree := TestTree{
+	srcTree = TestTree{
 		"zerofile": TestContentNode{
 			Type:    data.NodeTypeFile,
 			Size:    0,
@@ -367,7 +377,7 @@ func TestRechunkerRewriteTree(t *testing.T) {
 			},
 		},
 	}
-	wants := TestTree{
+	wantsTree = TestTree{
 		"zerofile": TestContentNode{
 			Type:    data.NodeTypeFile,
 			Size:    0,
@@ -402,12 +412,18 @@ func TestRechunkerRewriteTree(t *testing.T) {
 		},
 	}
 
-	srcRepo, srcRoot := BuildTreeMap(tree)
-	_, wantsRoot := BuildTreeMap(wants)
+	return srcTree, wantsTree, rechunkMap
+}
+
+func TestRechunkerRewriteTree(t *testing.T) {
+	srcTree, wantsTree, rechunkMap := prepareTree()
+	
+	srcRepo, srcRoot := BuildTreeMap(srcTree)
+	_, wantsRoot := BuildTreeMap(wantsTree)
 
 	testsRepo := TreeMap{}
 	rechunker := NewRechunker(Config{})
-	rechunker.rechunkMap = rechunkBlobsMap
+	rechunker.rechunkMap = rechunkMap
 	testsRoot, err := rechunker.RewriteTree(context.TODO(), srcRepo, testsRepo, srcRoot)
 	if err != nil {
 		t.Error(err)

From d935a9ba06d86b9db3e1d2194a440ded1d286b77 Mon Sep 17 00:00:00 2001
From: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date: Fri, 19 Dec 2025 13:08:18 +0900
Subject: [PATCH 05/34] Feat: Track all files' prefix

Track all files' prefix to discover priority files,
instead of tracking only small files.
---
 internal/rechunker/rechunker.go | 37 +++++++++++++++++----------------
 internal/rechunker/worker.go    |  4 ++--
 2 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/internal/rechunker/rechunker.go b/internal/rechunker/rechunker.go
index 4702a27da..9a485b36a 100644
--- a/internal/rechunker/rechunker.go
+++ b/internal/rechunker/rechunker.go
@@ -151,6 +151,8 @@ func gatherFileContents(ctx context.Context, repo restic.Loader, rootTrees resti
 	return filesList, totalSize, nil
 }
 
+var FILE_PREFIX_LENGTH = 25
+
 func createIndex(filesList []*ChunkedFile, lookupBlob func(t restic.BlobType, id restic.ID) []restic.PackedBlob, cfg Config) (*Index, *eventTracker, error) {
 	// collect blob usage info
 	blobCount := map[restic.ID]int{}
@@ -187,28 +189,27 @@ func createIndex(filesList []*ChunkedFile, lookupBlob func(t restic.BlobType, id
 		PackToBlobs: packToBlobs,
 	}
 
-	// build blob trace info for small files
-	// if blob cache is enabled, Rechunker tracks small files' remaining blob count
-	// until all blobs are available in the cache (rc.tracker.sfBlobRequires);
-	// when the file has all its blobs ready, it is prioritized to be processed first.
+	// build blob load tracker info.
+	// if blob cache is enabled, Rechunker tracks the remaining blob count
+	// among prefix of a file until all of them are available in the cache 
+	// (rc.tracker.blobsToPrepare); when all of them are ready, the file is
+	// prioritized to be processed first.
 	// this logic is handled by rc.priorityFilesHandler.
-	sfBlobRequires := map[restic.ID]int{}
-	sfBlobToFiles := map[restic.ID][]*ChunkedFile{}
+	blobsToPrepare := map[restic.ID]int{}
+	filesContaining := map[restic.ID][]*ChunkedFile{}
 	for _, file := range filesList {
-		if file.Len() >= cfg.SmallFileThreshold {
-			continue
-		}
-		blobSet := restic.NewIDSet(file.IDs...)
-		sfBlobRequires[file.hashval] = len(blobSet)
+		n_prefix := min(FILE_PREFIX_LENGTH, len(file.IDs))
+		blobSet := restic.NewIDSet(file.IDs[:n_prefix]...)
+		blobsToPrepare[file.hashval] = len(blobSet)
 		for b := range blobSet {
-			sfBlobToFiles[b] = append(sfBlobToFiles[b], file)
+			filesContaining[b] = append(filesContaining[b], file)
 		}
 	}
 
 	tracker := &eventTracker{
 		idx:                idx,
-		filesContaining:    sfBlobToFiles,
-		blobsToPrepare:     sfBlobRequires,
+		filesContaining:    filesContaining,
+		blobsToPrepare:     blobsToPrepare,
 		remainingBlobNeeds: blobCount,
 	}
 
@@ -501,8 +502,8 @@ type eventTracker struct {
 }
 
 func (t *eventTracker) BlobReady(ids restic.IDs) {
-	// when a new blob is ready, (small) files containing that blob has
-	// their blobsToPrepare decreased by one.
+	// when a new blob is ready, files containing that blob as their prefix 
+	// has their blobsToPrepare decreased by one.
 	// The list of files whose blobs are all prepared is returned.
 
 	if t.priorityCB == nil {
@@ -546,8 +547,8 @@ func (t *eventTracker) BlobReady(ids restic.IDs) {
 }
 
 func (t *eventTracker) BlobUnready(ids restic.IDs) {
-	// when a blob is evicted, (small) files containing that blob has
-	// their blobsToPrepare increased by one. However, ignore files
+	// when a blob is evicted, files containing that blob as their prefix
+	// has their blobsToPrepare increased by one. However, ignore files
 	// once they have reached blobsToPrepare value zero; they are no longer tracked.
 
 	if t.priorityCB == nil {
diff --git a/internal/rechunker/worker.go b/internal/rechunker/worker.go
index d1d687ddf..069e56611 100644
--- a/internal/rechunker/worker.go
+++ b/internal/rechunker/worker.go
@@ -50,8 +50,8 @@ func (w *Worker) RunFile(ctx context.Context, srcBlobs restic.IDs, p *Progress)
 	// Run worker pipeline (reader and writer)
 	wg, ctx := errgroup.WithContext(ctx)
 
-	chChunk := make(chan chunker.Chunk)
-	chResult := make(chan FileResult, 1)
+	chChunk := make(chan chunker.Chunk) // chunk passing channel from reader to writer
+	chResult := make(chan FileResult, 1) // file chunk result channel
 
 	// Run reader goroutine
 	w.runReader(ctx, wg, srcBlobs, reader, chChunk)

From dbf9818969ed7beb75d3605e235d0dfa145bb348 Mon Sep 17 00:00:00 2001
From: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date: Mon, 22 Dec 2025 21:16:03 +0900
Subject: [PATCH 06/34] Misc: Remove redundant debug log

---
 internal/rechunker/dispatcher.go | 2 --
 1 file changed, 2 deletions(-)

diff --git a/internal/rechunker/dispatcher.go b/internal/rechunker/dispatcher.go
index 9374ea3d4..5ce421653 100644
--- a/internal/rechunker/dispatcher.go
+++ b/internal/rechunker/dispatcher.go
@@ -137,7 +137,6 @@ func (d *Dispatcher) createRegularCh(ctx context.Context, wg *errgroup.Group, vi
 			// check if the file was visited by another dispatcher;
 			// if it was, skip the file.
 			if visited != nil && visited(file.hashval) {
-				debug.Log("File %v was visited by another dispatcher; skipping.", file.hashval.Str())
 				continue
 			}
 
@@ -187,7 +186,6 @@ func (d *Dispatcher) createPriorityCh(ctx context.Context, wg *errgroup.Group, v
 			// check if the file was handled by another channel;
 			// if it was, skip the file.
 			if visited != nil && visited(file.hashval) {
-				debug.Log("File %v was visited by another dispatcher; skipping.", file.hashval.Str())
 				continue
 			}
 

From ca76eb0b35df8ab458534b3bb813e6152cce871c Mon Sep 17 00:00:00 2001
From: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date: Mon, 22 Dec 2025 21:35:02 +0900
Subject: [PATCH 07/34] Fix: Blob cache concurrency bug

Fix potential race condition inside blob cache downloader code
---
 internal/rechunker/blob_cache.go | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/internal/rechunker/blob_cache.go b/internal/rechunker/blob_cache.go
index efdc2e1d9..350d7ecf5 100644
--- a/internal/rechunker/blob_cache.go
+++ b/internal/rechunker/blob_cache.go
@@ -157,6 +157,7 @@ func (c *BlobCache) startDownloaders(ctx context.Context, numDownloaders int,
 					}
 					ready = append(ready, id)
 				}
+				currentCacheUsage := c.size - c.free // for debug logging
 				c.mu.Unlock()
 
 				// execute callbacks
@@ -170,12 +171,12 @@ func (c *BlobCache) startDownloaders(ctx context.Context, numDownloaders int,
 					onReady(ready)
 				}
 
-				debug.Log("Pack %v loaded. Current cache usage: %v", packID.Str(), c.size-c.free)
+				debug.Log("Pack %v loaded. Current cache usage: %v", packID.Str(), currentCacheUsage)
 				debug.Log("Pack %v includes the following blobs: \n%v", packID.Str(), ready.String())
 
 				// debugStats: track maximum memory usage
 				if debugStats != nil {
-					debugStats.UpdateMax("max_cache_usage", c.size-c.free)
+					debugStats.UpdateMax("max_cache_usage", currentCacheUsage)
 				}
 			}
 		})

From fa0ddd471e709ebf6adf14df8536036ff301c2f4 Mon Sep 17 00:00:00 2001
From: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date: Mon, 22 Dec 2025 21:35:56 +0900
Subject: [PATCH 08/34] Style: gofmt and golangci-lint

Fix the code to follow golangci-lint suggestion
---
 internal/rechunker/blob_cache.go     |  4 ++--
 internal/rechunker/dispatcher.go     |  3 ++-
 internal/rechunker/rechunker.go      | 13 ++++++-------
 internal/rechunker/rechunker_test.go |  4 ++--
 internal/rechunker/worker.go         |  2 +-
 5 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/internal/rechunker/blob_cache.go b/internal/rechunker/blob_cache.go
index 350d7ecf5..ac69f7a7c 100644
--- a/internal/rechunker/blob_cache.go
+++ b/internal/rechunker/blob_cache.go
@@ -54,7 +54,7 @@ func NewBlobCache(ctx context.Context, size int, numDownloaders int,
 		cancel: cancel,
 	}
 
-	lru, err := simplelru.NewLRU(size, func(k restic.ID, v []byte) {
+	lru, err := simplelru.NewLRU(size, func(_ restic.ID, v []byte) {
 		c.free += cap(v) + overhead
 	})
 	if err != nil {
@@ -308,7 +308,7 @@ type BlobLoaderWithCache struct {
 	cache *BlobCache
 }
 
-func (l *BlobLoaderWithCache) LoadBlob(ctx context.Context, t restic.BlobType, id restic.ID, buf []byte) ([]byte, error) {
+func (l *BlobLoaderWithCache) LoadBlob(ctx context.Context, _ restic.BlobType, id restic.ID, buf []byte) ([]byte, error) {
 	blob, ch := l.cache.Get(ctx, id, buf)
 	if blob == nil { // wait for blob to be downloaded
 		select {
diff --git a/internal/rechunker/dispatcher.go b/internal/rechunker/dispatcher.go
index 5ce421653..9e56d8332 100644
--- a/internal/rechunker/dispatcher.go
+++ b/internal/rechunker/dispatcher.go
@@ -193,6 +193,7 @@ func (d *Dispatcher) createPriorityCh(ctx context.Context, wg *errgroup.Group, v
 			case <-ctx.Done():
 				return ctx.Err()
 			case <-d.done:
+				debug.Log("Closing dispatcher for priority channel")
 				return nil
 			case ch <- file:
 				debug.Log("Sent file %v through priority channel", file.hashval.Str())
@@ -230,6 +231,6 @@ func PrioritySelect(ctx context.Context, first <-chan *ChunkedFile, second <-cha
 			}
 		}
 	}
-	
+
 	return item, from, nil
 }
diff --git a/internal/rechunker/rechunker.go b/internal/rechunker/rechunker.go
index 9a485b36a..87a95e8cc 100644
--- a/internal/rechunker/rechunker.go
+++ b/internal/rechunker/rechunker.go
@@ -83,7 +83,7 @@ func (rc *Rechunker) Plan(ctx context.Context, srcRepo restic.Repository, rootTr
 	}
 
 	debug.Log("Building the internal index for use in Rechunk()")
-	rc.idx, rc.tracker, err = createIndex(rc.filesList, srcRepo.LookupBlob, rc.cfg)
+	rc.idx, rc.tracker, err = createIndex(rc.filesList, srcRepo.LookupBlob)
 	if err != nil {
 		return err
 	}
@@ -153,7 +153,7 @@ func gatherFileContents(ctx context.Context, repo restic.Loader, rootTrees resti
 
 var FILE_PREFIX_LENGTH = 25
 
-func createIndex(filesList []*ChunkedFile, lookupBlob func(t restic.BlobType, id restic.ID) []restic.PackedBlob, cfg Config) (*Index, *eventTracker, error) {
+func createIndex(filesList []*ChunkedFile, lookupBlob func(t restic.BlobType, id restic.ID) []restic.PackedBlob) (*Index, *eventTracker, error) {
 	// collect blob usage info
 	blobCount := map[restic.ID]int{}
 	for _, file := range filesList {
@@ -191,15 +191,15 @@ func createIndex(filesList []*ChunkedFile, lookupBlob func(t restic.BlobType, id
 
 	// build blob load tracker info.
 	// if blob cache is enabled, Rechunker tracks the remaining blob count
-	// among prefix of a file until all of them are available in the cache 
+	// among prefix of a file until all of them are available in the cache
 	// (rc.tracker.blobsToPrepare); when all of them are ready, the file is
 	// prioritized to be processed first.
 	// this logic is handled by rc.priorityFilesHandler.
 	blobsToPrepare := map[restic.ID]int{}
 	filesContaining := map[restic.ID][]*ChunkedFile{}
 	for _, file := range filesList {
-		n_prefix := min(FILE_PREFIX_LENGTH, len(file.IDs))
-		blobSet := restic.NewIDSet(file.IDs[:n_prefix]...)
+		prefixLen := min(FILE_PREFIX_LENGTH, len(file.IDs))
+		blobSet := restic.NewIDSet(file.IDs[:prefixLen]...)
 		blobsToPrepare[file.hashval] = len(blobSet)
 		for b := range blobSet {
 			filesContaining[b] = append(filesContaining[b], file)
@@ -470,7 +470,6 @@ func (idx *Index) AdvanceCursor(c Cursor, bytesProcessed uint) Cursor {
 
 		if bytesProcessed < r {
 			c.Offset += bytesProcessed
-			bytesProcessed = 0
 			break
 		}
 
@@ -502,7 +501,7 @@ type eventTracker struct {
 }
 
 func (t *eventTracker) BlobReady(ids restic.IDs) {
-	// when a new blob is ready, files containing that blob as their prefix 
+	// when a new blob is ready, files containing that blob as their prefix
 	// has their blobsToPrepare decreased by one.
 	// The list of files whose blobs are all prepared is returned.
 
diff --git a/internal/rechunker/rechunker_test.go b/internal/rechunker/rechunker_test.go
index 24de99f6e..084bfb7b0 100644
--- a/internal/rechunker/rechunker_test.go
+++ b/internal/rechunker/rechunker_test.go
@@ -179,7 +179,7 @@ func TestRechunker(t *testing.T) {
 		pb.PackID = srcBlobToPack[id]
 
 		return []restic.PackedBlob{pb}
-	}, cfg)
+	})
 	if err != nil {
 		panic(err)
 	}
@@ -417,7 +417,7 @@ func prepareTree() (srcTree TestTree, wantsTree TestTree, rechunkMap map[restic.
 
 func TestRechunkerRewriteTree(t *testing.T) {
 	srcTree, wantsTree, rechunkMap := prepareTree()
-	
+
 	srcRepo, srcRoot := BuildTreeMap(srcTree)
 	_, wantsRoot := BuildTreeMap(wantsTree)
 
diff --git a/internal/rechunker/worker.go b/internal/rechunker/worker.go
index 069e56611..281b27009 100644
--- a/internal/rechunker/worker.go
+++ b/internal/rechunker/worker.go
@@ -50,7 +50,7 @@ func (w *Worker) RunFile(ctx context.Context, srcBlobs restic.IDs, p *Progress)
 	// Run worker pipeline (reader and writer)
 	wg, ctx := errgroup.WithContext(ctx)
 
-	chChunk := make(chan chunker.Chunk) // chunk passing channel from reader to writer
+	chChunk := make(chan chunker.Chunk)  // chunk passing channel from reader to writer
 	chResult := make(chan FileResult, 1) // file chunk result channel
 
 	// Run reader goroutine

From 9b79fdef264d9d3b485f77a98688a51ad10ea436 Mon Sep 17 00:00:00 2001
From: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date: Wed, 7 Jan 2026 10:29:08 +0900
Subject: [PATCH 09/34] Refactor: Remove unnecessary variables

---
 cmd/restic/cmd_rechunk_copy.go       | 5 ++---
 internal/rechunker/blob_cache.go     | 3 ++-
 internal/rechunker/rechunker.go      | 5 ++---
 internal/rechunker/rechunker_test.go | 5 ++---
 4 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/cmd/restic/cmd_rechunk_copy.go b/cmd/restic/cmd_rechunk_copy.go
index 27de545fa..de7ca1726 100644
--- a/cmd/restic/cmd_rechunk_copy.go
+++ b/cmd/restic/cmd_rechunk_copy.go
@@ -123,9 +123,8 @@ func runRechunkCopy(ctx context.Context, opts RechunkCopyOptions, gopts global.O
 
 	debug.Log("Running NewRechunker()")
 	rechnker := rechunker.NewRechunker(rechunker.Config{
-		CacheSize:          opts.CacheSize * (1 << 20),
-		SmallFileThreshold: 25,
-		Pol:                dstRepo.Config().ChunkerPolynomial,
+		CacheSize: opts.CacheSize * (1 << 20),
+		Pol:       dstRepo.Config().ChunkerPolynomial,
 	})
 	rootTrees := restic.IDs{}
 
diff --git a/internal/rechunker/blob_cache.go b/internal/rechunker/blob_cache.go
index ac69f7a7c..ede764ef2 100644
--- a/internal/rechunker/blob_cache.go
+++ b/internal/rechunker/blob_cache.go
@@ -137,7 +137,7 @@ func (c *BlobCache) startDownloaders(ctx context.Context, numDownloaders int,
 
 				// pop the pack from the waitlist,
 				// store downloaded blobs to the cache,
-
+				// and notify that blobs are ready
 				var ready, evicted restic.IDs
 				c.mu.Lock()
 				delete(c.waitList, packID)
@@ -288,6 +288,7 @@ func (c *BlobCache) Ignore(ids restic.IDs) {
 
 	for _, id := range ids {
 		c.ignored.Insert(id)
+		_ = c.c.Remove(id)
 	}
 
 	if debugStats != nil {
diff --git a/internal/rechunker/rechunker.go b/internal/rechunker/rechunker.go
index 87a95e8cc..b227e554e 100644
--- a/internal/rechunker/rechunker.go
+++ b/internal/rechunker/rechunker.go
@@ -33,9 +33,8 @@ type Rechunker struct {
 }
 
 type Config struct {
-	CacheSize          int
-	SmallFileThreshold int // files less than the threshold will be prioritized when all blobs are ready in the cache
-	Pol                chunker.Pol
+	CacheSize int
+	Pol       chunker.Pol
 }
 
 // Index is immutable after Plan() returns.
diff --git a/internal/rechunker/rechunker_test.go b/internal/rechunker/rechunker_test.go
index 084bfb7b0..121c10254 100644
--- a/internal/rechunker/rechunker_test.go
+++ b/internal/rechunker/rechunker_test.go
@@ -162,9 +162,8 @@ func TestRechunker(t *testing.T) {
 
 	// create rechunker
 	cfg := Config{
-		CacheSize:          4096 * (1 << 20),
-		SmallFileThreshold: 25,
-		Pol:                dstChunkerParam,
+		CacheSize: 4096 * (1 << 20),
+		Pol:       dstChunkerParam,
 	}
 	rechunker := NewRechunker(cfg)
 

From 945ad362a3dd2f7a3c46dd81f326ca293b67fc0e Mon Sep 17 00:00:00 2001
From: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date: Wed, 7 Jan 2026 10:31:32 +0900
Subject: [PATCH 10/34] Fix: Dispatcher last file bug

Fix dispatcher bug in which the last file of the priority list may be omitted.
---
 internal/rechunker/dispatcher.go | 29 ++---------------------------
 internal/rechunker/rechunker.go  |  1 -
 2 files changed, 2 insertions(+), 28 deletions(-)

diff --git a/internal/rechunker/dispatcher.go b/internal/rechunker/dispatcher.go
index 9e56d8332..d697f3295 100644
--- a/internal/rechunker/dispatcher.go
+++ b/internal/rechunker/dispatcher.go
@@ -21,7 +21,7 @@ type Dispatcher struct {
 	priorityList []*ChunkedFile
 
 	push chan struct{} // priority file notification
-	done chan struct{}
+	done chan struct{} // end of regular channel notification
 }
 
 func NewDispatcher(ctx context.Context, files []*ChunkedFile, usePriority bool) *Dispatcher {
@@ -83,13 +83,6 @@ func (d *Dispatcher) PushPriority(files []*ChunkedFile) bool {
 		return false
 	}
 
-	// check if dispatcher is closed; if closed, return without push
-	select {
-	case <-d.done:
-		return false
-	default:
-	}
-
 	d.mu.Lock()
 	defer d.mu.Unlock()
 
@@ -114,23 +107,11 @@ func (d *Dispatcher) popPriority() []*ChunkedFile {
 	return l
 }
 
-func (d *Dispatcher) Close() {
-	if d == nil {
-		return
-	}
-
-	select {
-	case <-d.done:
-	default:
-		close(d.done)
-	}
-}
-
 func (d *Dispatcher) createRegularCh(ctx context.Context, wg *errgroup.Group, visited func(id restic.ID) bool) {
 	debug.Log("Running dispatcher for regular channel")
 	ch := make(chan *ChunkedFile)
 	wg.Go(func() error {
-		defer d.Close()
+		defer close(d.done)
 		defer close(ch)
 
 		for _, file := range d.regularList {
@@ -143,9 +124,6 @@ func (d *Dispatcher) createRegularCh(ctx context.Context, wg *errgroup.Group, vi
 			select {
 			case <-ctx.Done():
 				return ctx.Err()
-			case <-d.done:
-				debug.Log("Closing dispatcher for regular channel")
-				return nil
 			case ch <- file:
 				debug.Log("Sent file %v through regular channel", file.hashval.Str())
 			}
@@ -192,9 +170,6 @@ func (d *Dispatcher) createPriorityCh(ctx context.Context, wg *errgroup.Group, v
 			select {
 			case <-ctx.Done():
 				return ctx.Err()
-			case <-d.done:
-				debug.Log("Closing dispatcher for priority channel")
-				return nil
 			case ch <- file:
 				debug.Log("Sent file %v through priority channel", file.hashval.Str())
 			}
diff --git a/internal/rechunker/rechunker.go b/internal/rechunker/rechunker.go
index b227e554e..50a6d61f4 100644
--- a/internal/rechunker/rechunker.go
+++ b/internal/rechunker/rechunker.go
@@ -248,7 +248,6 @@ func (rc *Rechunker) Rechunk(ctx context.Context, srcRepo Loader, dstRepo restic
 
 	// start dispatcher
 	dispatcher := rc.setupDispatcher(ctx)
-	defer dispatcher.Close()
 
 	// Phase 2: Run Workers
 	bufferPool := NewBufferPool(2 * (numWorkers + 1))

From 8e08a8e201b16162bd84645aa62a14ab7bbbc273 Mon Sep 17 00:00:00 2001
From: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date: Wed, 7 Jan 2026 10:59:27 +0900
Subject: [PATCH 11/34] Misc: Add potential error handling logic

Add error handling logic for situations where a wrong blob id is given to BlobCache.Get().
---
 internal/rechunker/blob_cache.go | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/internal/rechunker/blob_cache.go b/internal/rechunker/blob_cache.go
index ede764ef2..251dac12f 100644
--- a/internal/rechunker/blob_cache.go
+++ b/internal/rechunker/blob_cache.go
@@ -2,6 +2,7 @@ package rechunker
 
 import (
 	"context"
+	"fmt"
 	"sync"
 
 	"github.com/hashicorp/golang-lru/v2/simplelru"
@@ -256,10 +257,13 @@ func (c *BlobCache) asyncGet(ctx context.Context, id restic.ID, buf []byte) <-ch
 }
 
 func (c *BlobCache) requestDownload(ctx context.Context, id restic.ID) error {
-	packID := c.idx.BlobToPack[id]
+	packID, ok := c.idx.BlobToPack[id]
+	if !ok {
+		return fmt.Errorf("unknown blob: %v", id.Str())
+	}
 
 	c.mu.Lock()
-	ok := c.waitList.Has(packID)
+	ok = c.waitList.Has(packID)
 	if !ok {
 		// queue pack download
 		c.waitList.Insert(packID)

From a7677d3354d79ee5c75fc7f33098b6a55fe4636f Mon Sep 17 00:00:00 2001
From: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date: Sat, 10 Jan 2026 17:11:04 +0900
Subject: [PATCH 12/34] Fix: BufferPool capacity

Increase BufferPool capacity to 3x of numWorkers.
---
 internal/rechunker/rechunker.go | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/internal/rechunker/rechunker.go b/internal/rechunker/rechunker.go
index 50a6d61f4..522685253 100644
--- a/internal/rechunker/rechunker.go
+++ b/internal/rechunker/rechunker.go
@@ -150,7 +150,7 @@ func gatherFileContents(ctx context.Context, repo restic.Loader, rootTrees resti
 	return filesList, totalSize, nil
 }
 
-var FILE_PREFIX_LENGTH = 25
+var FILE_HEAD_LENGTH = 25
 
 func createIndex(filesList []*ChunkedFile, lookupBlob func(t restic.BlobType, id restic.ID) []restic.PackedBlob) (*Index, *eventTracker, error) {
 	// collect blob usage info
@@ -189,15 +189,14 @@ func createIndex(filesList []*ChunkedFile, lookupBlob func(t restic.BlobType, id
 	}
 
 	// build blob load tracker info.
-	// if blob cache is enabled, Rechunker tracks the remaining blob count
-	// among prefix of a file until all of them are available in the cache
-	// (rc.tracker.blobsToPrepare); when all of them are ready, the file is
-	// prioritized to be processed first.
-	// this logic is handled by rc.priorityFilesHandler.
-	blobsToPrepare := map[restic.ID]int{}
-	filesContaining := map[restic.ID][]*ChunkedFile{}
+	// if blob cache is enabled, Rechunker tracks the number of unprepared
+	// blobs (which are not yet ready in the cache) among first FILE_HEAD_LENGTH
+	// chunks in a file, until all of them are available in the cache.
+	// when all of them are ready, that file is prioritized by the dispatcher.
+	blobsToPrepare := map[restic.ID]int{}             // number of unprepared blobs for head of file
+	filesContaining := map[restic.ID][]*ChunkedFile{} // list of files that contain a blob
 	for _, file := range filesList {
-		prefixLen := min(FILE_PREFIX_LENGTH, len(file.IDs))
+		prefixLen := min(FILE_HEAD_LENGTH, len(file.IDs))
 		blobSet := restic.NewIDSet(file.IDs[:prefixLen]...)
 		blobsToPrepare[file.hashval] = len(blobSet)
 		for b := range blobSet {
@@ -250,7 +249,7 @@ func (rc *Rechunker) Rechunk(ctx context.Context, srcRepo Loader, dstRepo restic
 	dispatcher := rc.setupDispatcher(ctx)
 
 	// Phase 2: Run Workers
-	bufferPool := NewBufferPool(2 * (numWorkers + 1))
+	bufferPool := NewBufferPool(3 * (numWorkers + 1))
 	err := dstRepo.WithBlobUploader(ctx, func(ctx context.Context, uploader restic.BlobSaverWithAsync) error {
 		debug.Log("Starting uploader")
 		defer debug.Log("Closing uploader")

From 342769db0807f96a0e00d792e0dd0e2747711550 Mon Sep 17 00:00:00 2001
From: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date: Sat, 14 Feb 2026 01:22:37 +0900
Subject: [PATCH 13/34] Refactor: rechunker RewriteTree

Make rechunker.RewriteTree() use walker.TreeRewriter
Update rechunker.gatherFileContents() to follow current data.StreamTrees() signature
---
 internal/rechunker/rechunker.go | 175 +++++++++++++-------------------
 1 file changed, 73 insertions(+), 102 deletions(-)

diff --git a/internal/rechunker/rechunker.go b/internal/rechunker/rechunker.go
index 522685253..c0f66c314 100644
--- a/internal/rechunker/rechunker.go
+++ b/internal/rechunker/rechunker.go
@@ -3,7 +3,6 @@ package rechunker
 import (
 	"context"
 	"crypto/sha256"
-	"encoding/json"
 	"fmt"
 	"runtime"
 	"slices"
@@ -14,6 +13,7 @@ import (
 	"github.com/restic/restic/internal/data"
 	"github.com/restic/restic/internal/debug"
 	"github.com/restic/restic/internal/restic"
+	"github.com/restic/restic/internal/walker"
 	"golang.org/x/sync/errgroup"
 )
 
@@ -39,8 +39,8 @@ type Config struct {
 
 // Index is immutable after Plan() returns.
 type Index struct {
-	BlobSize    map[restic.ID]uint
-	BlobToPack  map[restic.ID]restic.ID     // blob ID -> {blob length, pack ID}
+	BlobSize    map[restic.ID]uint          // blob ID -> blob size
+	BlobToPack  map[restic.ID]restic.ID     // blob ID -> pack ID
 	PackToBlobs map[restic.ID][]restic.Blob // pack ID -> list of blobs to be loaded from the pack
 }
 
@@ -98,55 +98,49 @@ func (rc *Rechunker) Plan(ctx context.Context, srcRepo restic.Repository, rootTr
 }
 
 func gatherFileContents(ctx context.Context, repo restic.Loader, rootTrees restic.IDs, visitedFiles restic.IDSet, visitedTrees restic.IDSet) (filesList []*ChunkedFile, totalSize uint64, err error) {
-	wg, ctx := errgroup.WithContext(ctx)
+	mu := sync.Mutex{}
 
-	// create StreamTrees channel that streams through all subtrees in target snapshots
-	treeStream := data.StreamTrees(ctx, wg, repo, rootTrees, func(id restic.ID) bool {
+	// Stream through all subtrees in target snapshots and gather all distinct file Contents
+	err = data.StreamTrees(ctx, repo, rootTrees, nil, func(id restic.ID) bool {
 		visited := visitedTrees.Has(id)
 		visitedTrees.Insert(id)
 		return visited
-	}, nil)
-
-	// gather all distinct file Contents under trees
-	wg.Go(func() error {
-		for tree := range treeStream {
-			if tree.Error != nil {
-				return tree.Error
-			}
-
-			// check if the tree blob is unstable json
-			buf, err := json.Marshal(tree.Tree)
-			if err != nil {
-				return err
-			}
-			buf = append(buf, '\n')
-			if tree.ID != restic.Hash(buf) {
-				return fmt.Errorf("can't run rechunk-copy, because the following tree can't be rewritten without losing information:\n%v", tree.ID.String())
-			}
-
-			for _, node := range tree.Nodes {
-				// you only have to rechunk regular files; so skip other file types
-				if node.Type == data.NodeTypeFile {
-					hashval := HashOfIDs(node.Content)
-					if visitedFiles.Has(hashval) {
-						continue
-					}
-					visitedFiles.Insert(hashval)
-
-					filesList = append(filesList, &ChunkedFile{
-						node.Content,
-						hashval,
-					})
-					totalSize += node.Size
-				}
-			}
+	}, func(_ restic.ID, err error, nodes data.TreeNodeIterator) error {
+		if err != nil {
+			return err
 		}
+
+		for item := range nodes {
+			if item.Error != nil {
+				return item.Error
+			}
+			if item.Node == nil || item.Node.Type != data.NodeTypeFile {
+				continue
+			}
+
+			hashval := HashOfIDs(item.Node.Content)
+
+			mu.Lock()
+			if visitedFiles.Has(hashval) {
+				mu.Unlock()
+				continue
+			}
+			visitedFiles.Insert(hashval)
+
+			filesList = append(filesList, &ChunkedFile{
+				item.Node.Content,
+				hashval,
+			})
+			totalSize += item.Node.Size
+			mu.Unlock()
+		}
+
 		return nil
 	})
-	err = wg.Wait()
 	if err != nil {
 		return nil, 0, err
 	}
+
 	return filesList, totalSize, nil
 }
 
@@ -341,82 +335,59 @@ func (rc *Rechunker) runWorkers(ctx context.Context, wg *errgroup.Group, numWork
 	}
 }
 
-func (rc *Rechunker) RewriteTree(ctx context.Context, srcRepo restic.BlobLoader, dstRepo restic.BlobSaver, nodeID restic.ID) (restic.ID, error) {
+type saverType func(ctx context.Context, tpe restic.BlobType, buf []byte, id restic.ID, storeDuplicate bool) (newID restic.ID, known bool, sizeInRepo int, err error)
+
+func (s saverType) SaveBlob(ctx context.Context, tpe restic.BlobType, buf []byte, id restic.ID, storeDuplicate bool) (newID restic.ID, known bool, sizeInRepo int, err error) {
+	return s(ctx, tpe, buf, id, storeDuplicate)
+}
+
+func (rc *Rechunker) RewriteTree(ctx context.Context, srcRepo restic.BlobLoader, dstRepo restic.BlobSaver, treeID restic.ID) (restic.ID, error) {
 	// check if the identical tree has already been processed
-	newID, ok := rc.rewriteTreeMap[nodeID]
+	newID, ok := rc.rewriteTreeMap[treeID]
 	if ok {
 		return newID, nil
 	}
 
-	curTree, err := data.LoadTree(ctx, srcRepo, nodeID)
-	if err != nil {
-		return restic.ID{}, err
-	}
-
-	tb := data.NewTreeJSONBuilder()
-	for _, node := range curTree.Nodes {
-		if ctx.Err() != nil {
-			return restic.ID{}, ctx.Err()
-		}
-
-		err = rc.rewriteNode(node)
+	// wrap dstRepo so that total uploaded tree blobs size can be tracked
+	saver := saverType(func(ctx context.Context, tpe restic.BlobType, buf []byte, id restic.ID, storeDuplicate bool) (newID restic.ID, known bool, sizeInRepo int, err error) {
+		newID, known, sizeInRepo, err = dstRepo.SaveBlob(ctx, tpe, buf, id, storeDuplicate)
 		if err != nil {
-			return restic.ID{}, err
+			return
 		}
+		if !known {
+			rc.totalAddedToDstRepo.Add(uint64(sizeInRepo))
+		}
+		return
+	})
 
-		// if the node is non-directory node, add it to the tree
-		if node.Type != data.NodeTypeDir {
-			err = tb.AddNode(node)
-			if err != nil {
-				return restic.ID{}, err
+	// prepare rewriter that rewrites node.Content of regular files
+	rewriter := walker.NewTreeRewriter(walker.RewriteOpts{
+		RewriteNode: func(node *data.Node, _ string) *data.Node {
+			if node == nil {
+				return nil
+			}
+			if node.Type != data.NodeTypeFile {
+				return node
 			}
-			continue
-		}
 
-		// if the node is directory node, rewrite it recursively
-		subtree := *node.Subtree
-		newID, err := rc.RewriteTree(ctx, srcRepo, dstRepo, subtree)
-		if err != nil {
-			return restic.ID{}, err
-		}
-		node.Subtree = &newID
-		err = tb.AddNode(node)
-		if err != nil {
-			return restic.ID{}, err
-		}
-	}
+			hashval := HashOfIDs(node.Content)
+			dstBlobs, ok := rc.rechunkMap[hashval]
+			if !ok {
+				panic(fmt.Errorf("can't find from rechunkBlobsMap: %v", node.Content.String()))
+			}
+			node.Content = dstBlobs
+			return node
+		},
+	})
 
-	tree, err := tb.Finalize()
+	newID, err := rewriter.RewriteTree(ctx, srcRepo, saver, "/", treeID)
 	if err != nil {
 		return restic.ID{}, err
 	}
 
-	// save new tree to the destination repo
-	newTreeID, known, size, err := dstRepo.SaveBlob(ctx, restic.TreeBlob, tree, restic.ID{}, false)
-	if err != nil {
-		return restic.ID{}, err
-	}
-	rc.rewriteTreeMap[nodeID] = newTreeID
+	rc.rewriteTreeMap[treeID] = newID
 
-	if !known {
-		rc.totalAddedToDstRepo.Add(uint64(size))
-	}
-
-	return newTreeID, err
-}
-
-func (rc *Rechunker) rewriteNode(node *data.Node) error {
-	if node.Type != data.NodeTypeFile {
-		return nil
-	}
-
-	hashval := HashOfIDs(node.Content)
-	dstBlobs, ok := rc.rechunkMap[hashval]
-	if !ok {
-		return fmt.Errorf("can't find from rechunkBlobsMap: %v", node.Content.String())
-	}
-	node.Content = dstBlobs
-	return nil
+	return newID, err
 }
 
 func (rc *Rechunker) NumFiles() int {

From 6c516e2600fcb8d3912227bd6a604b19dfb71829 Mon Sep 17 00:00:00 2001
From: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date: Mon, 16 Feb 2026 17:28:45 +0900
Subject: [PATCH 14/34] Misc: Correct style typo

---
 internal/rechunker/blob_cache.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/internal/rechunker/blob_cache.go b/internal/rechunker/blob_cache.go
index 251dac12f..4eb77b974 100644
--- a/internal/rechunker/blob_cache.go
+++ b/internal/rechunker/blob_cache.go
@@ -101,7 +101,7 @@ func (c *BlobCache) startDownloaders(ctx context.Context, numDownloaders int,
 	for range numDownloaders {
 		wg.Go(func() error {
 			debug.Log("Starting blob cache downloader")
-			defer debug.Log(("Stopping blob cache downloader"))
+			defer debug.Log("Stopping blob cache downloader")
 
 			for {
 				// listen to pack download request
@@ -273,7 +273,7 @@ func (c *BlobCache) requestDownload(ctx context.Context, id restic.ID) error {
 	}
 	c.mu.Unlock()
 
-	if ok { // somebody else has already queued pack download; it will handle download
+	if ok { // somebody else has already queued pack download; it will handle download request
 		return nil
 	}
 

From 0e0c0a9c1c772b5d0fbf3010f1121d5f51da21fe Mon Sep 17 00:00:00 2001
From: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date: Sat, 21 Feb 2026 17:21:13 +0900
Subject: [PATCH 15/34] Refactor: Integrate rechunk-copy to copy command

Integrate `rechunk-copy` command to `copy --rechunk` command
---
 cmd/restic/cmd_copy.go                        | 123 +++++++++++++++++-
 cmd/restic/cmd_rechunk_copy.go                |  10 +-
 .../cmd_rechunk_copy_integration_test.go      |   2 +-
 cmd/restic/main.go                            |   1 -
 4 files changed, 127 insertions(+), 9 deletions(-)

diff --git a/cmd/restic/cmd_copy.go b/cmd/restic/cmd_copy.go
index d17ded7c9..600b617e7 100644
--- a/cmd/restic/cmd_copy.go
+++ b/cmd/restic/cmd_copy.go
@@ -10,7 +10,9 @@ import (
 	"github.com/restic/restic/internal/data"
 	"github.com/restic/restic/internal/debug"
 	"github.com/restic/restic/internal/errors"
+	"github.com/restic/restic/internal/feature"
 	"github.com/restic/restic/internal/global"
+	"github.com/restic/restic/internal/rechunker"
 	"github.com/restic/restic/internal/repository"
 	"github.com/restic/restic/internal/restic"
 	"github.com/restic/restic/internal/ui"
@@ -65,11 +67,28 @@ Exit status is 12 if the password is incorrect.
 type CopyOptions struct {
 	global.SecondaryRepoOptions
 	data.SnapshotFilter
+	RechunkCopyOptions
 }
 
 func (opts *CopyOptions) AddFlags(f *pflag.FlagSet) {
 	opts.SecondaryRepoOptions.AddFlags(f, "destination", "to copy snapshots from")
 	initMultiSnapshotFilter(f, &opts.SnapshotFilter, true)
+	opts.RechunkCopyOptions.AddFlags(f)
+}
+
+type RechunkCopyOptions struct {
+	Rechunk           bool
+	ForceRechunk      bool
+	AddTags           data.TagLists
+	CacheSize         int
+	isIntegrationTest bool // skip check for RESTIC_FEATURES=rechunk-copy during integration test
+}
+
+func (opts *RechunkCopyOptions) AddFlags(f *pflag.FlagSet) {
+	f.BoolVar(&opts.Rechunk, "rechunk", false, "rechunk files when copying")
+	f.BoolVar(&opts.ForceRechunk, "force", false, "force rechunk even when src and dst repo have same chunker polynomials; to be used with --rechunk")
+	f.IntVar(&opts.CacheSize, "cache-size", 4096, "for rechunk copy, specify in-memory blob cache size in MiBs (0 to disable cache). Used with --rechunk")
+	f.Var(&opts.AddTags, "add-tag", "add `tags` for the copied snapshots in the format `tag[,tag,...]` (can be specified multiple times). Used with --rechunk")
 }
 
 // collectAllSnapshots: select all snapshot trees to be copied
@@ -106,6 +125,17 @@ func collectAllSnapshots(ctx context.Context, opts CopyOptions,
 }
 
 func runCopy(ctx context.Context, opts CopyOptions, gopts global.Options, args []string, term ui.Terminal) error {
+	// Rechunk-copy guardrails
+	if opts.Rechunk {
+		debug.Log("Rechunk option enabled")
+		if !feature.Flag.Enabled(feature.RechunkCopy) && !opts.isIntegrationTest {
+			return errors.Fatal("rechunk-copy feature flag is not set. Currently, rechunk-copy is alpha feature (disabled by default).")
+		}
+		if opts.CacheSize != 0 && opts.CacheSize < 100 {
+			return errors.Fatal("blob cache size must be at least 100 MiB")
+		}
+	}
+
 	printer := ui.NewProgressPrinter(false, gopts.Verbosity, term)
 	secondaryGopts, isFromRepo, err := opts.SecondaryRepoOptions.FillGlobalOpts(ctx, gopts, "destination")
 	if err != nil {
@@ -128,6 +158,11 @@ func runCopy(ctx context.Context, opts CopyOptions, gopts global.Options, args [
 	}
 	defer unlock()
 
+	// if rechunk is enabled, ensure srcRepo and dstRepo have different ChunkerPolynomials
+	if opts.Rechunk && !opts.ForceRechunk && srcRepo.Config().ChunkerPolynomial == dstRepo.Config().ChunkerPolynomial {
+		return errors.Fatal("source repo and destination repo have same chunker polynomials; run without `--rechunk`, or set `--force` flag to proceed with rechunk anyway")
+	}
+
 	srcSnapshotLister, err := restic.MemorizeList(ctx, srcRepo, restic.SnapshotFile)
 	if err != nil {
 		return err
@@ -161,8 +196,23 @@ func runCopy(ctx context.Context, opts CopyOptions, gopts global.Options, args [
 
 	selectedSnapshots := collectAllSnapshots(ctx, opts, srcSnapshotLister, srcRepo, dstSnapshotByOriginal, args, printer)
 
-	if err := copyTreeBatched(ctx, srcRepo, dstRepo, selectedSnapshots, printer); err != nil {
-		return err
+	if !opts.Rechunk {
+		if err := copyTreeBatched(ctx, srcRepo, dstRepo, selectedSnapshots, printer); err != nil {
+			return err
+		}
+	} else {
+		rechnker := rechunker.NewRechunker(rechunker.Config{
+			CacheSize: opts.CacheSize * (1 << 20),
+			Pol:       dstRepo.Config().ChunkerPolynomial,
+		})
+		progress := rechunker.NewProgress(
+			term,
+			printer,
+			ui.CalculateProgressInterval(!gopts.Quiet, gopts.JSON, term.CanUpdateStatus()),
+		)
+		if err := rechunkCopy(ctx, srcRepo, dstRepo, selectedSnapshots, rechnker, printer, progress, opts.AddTags.Flatten()); err != nil {
+			return err
+		}
 	}
 
 	return ctx.Err()
@@ -342,3 +392,72 @@ func copySaveSnapshot(ctx context.Context, sn *data.Snapshot, dstRepo restic.Rep
 	printer.P("snapshot %s saved, copied from source snapshot %s", newID.Str(), sn.ID().Str())
 	return nil
 }
+
+func rechunkCopy(ctx context.Context, srcRepo, dstRepo restic.Repository, selectedSnapshots iter.Seq[*data.Snapshot],
+	rechnker *rechunker.Rechunker, printer progress.Printer, progress *rechunker.Progress, tags data.TagList) error {
+	printer.V("Gathering snapshots...")
+	var snapshots []*data.Snapshot
+	var rootTrees restic.IDs
+	debug.Log("Gathering root trees from selectedSnapshots()")
+	selectedSnapshots(func(sn *data.Snapshot) bool {
+		snapshots = append(snapshots, sn)
+		rootTrees = append(rootTrees, *sn.Tree)
+		return true
+	})
+
+	printer.V("Scanning files to process... ")
+	debug.Log("Running Plan()")
+	err := rechnker.Plan(ctx, srcRepo, rootTrees)
+	if err != nil {
+		return err
+	}
+
+	printer.V("\n[Pre-run Summary]")
+	// num_snapshots, num_distinct_files, total_size, num_packs,
+	printer.V("Number of snapshots: %v", len(rootTrees))
+	printer.V("Number of distinct files to process: %v", rechnker.NumFiles())
+	printer.V("  - Total size (including duplicate blobs): %v", ui.FormatBytes(rechnker.TotalSize()))
+	printer.V("Number of packs to download: %v\n\n", rechnker.PackCount())
+
+	debug.Log("Running RechunkData()")
+	progress.Start(rechnker.NumFiles(), rechnker.TotalSize())
+	err = rechnker.Rechunk(ctx, srcRepo, dstRepo, progress)
+	if err != nil {
+		return err
+	}
+	progress.Done()
+
+	printer.V("\nRewriting trees...")
+	err = dstRepo.WithBlobUploader(ctx, func(ctx context.Context, uploader restic.BlobSaverWithAsync) error {
+		for _, tree := range rootTrees {
+			debug.Log("Running RewriteTree() for tree ID %v", tree.Str())
+			_, err := rechnker.RewriteTree(ctx, srcRepo, uploader, tree)
+			if err != nil {
+				return err
+			}
+		}
+
+		return nil
+	})
+	if err != nil {
+		return err
+	}
+	printer.V("Rewriting done.\n\n")
+
+	printer.V("Writing snapshots")
+	for _, sn := range snapshots {
+		newTreeID, err := rechnker.GetRewrittenTree(*sn.Tree)
+		if err != nil {
+			return err
+		}
+		sn.Tree = &newTreeID
+		sn.AddTags(tags)
+		if err = copySaveSnapshot(ctx, sn, dstRepo, printer); err != nil {
+			return err
+		}
+	}
+
+	printer.P("Additional data stored to the repository: %v", ui.FormatBytes(rechnker.TotalAddedToDstRepo()))
+
+	return nil
+}
diff --git a/cmd/restic/cmd_rechunk_copy.go b/cmd/restic/cmd_rechunk_copy.go
index de7ca1726..d05babb58 100644
--- a/cmd/restic/cmd_rechunk_copy.go
+++ b/cmd/restic/cmd_rechunk_copy.go
@@ -20,7 +20,7 @@ import (
 // Reference: cmd_copy.go (v0.18.0)
 
 func newRechunkCopyCommand(globalOptions *global.Options) *cobra.Command {
-	var opts RechunkCopyOptions
+	var opts RechunkCopyOptionsObsolete
 	cmd := &cobra.Command{
 		Use:   "rechunk-copy [flags] [snapshotID ...]",
 		Short: "Rechunk-copy snapshots from one repository to another",
@@ -57,8 +57,8 @@ Exit status is 12 if the password is incorrect.
 	return cmd
 }
 
-// RechunkCopyOptions bundles all options for the rechunk-copy command.
-type RechunkCopyOptions struct {
+// RechunkCopyOptionsObsolete bundles all options for the rechunk-copy command.
+type RechunkCopyOptionsObsolete struct {
 	global.SecondaryRepoOptions
 	data.SnapshotFilter
 	RechunkTags       data.TagLists
@@ -66,14 +66,14 @@ type RechunkCopyOptions struct {
 	isIntegrationTest bool // skip check for RESTIC_FEATURES=rechunk-copy when integration test
 }
 
-func (opts *RechunkCopyOptions) AddFlags(f *pflag.FlagSet) {
+func (opts *RechunkCopyOptionsObsolete) AddFlags(f *pflag.FlagSet) {
 	opts.SecondaryRepoOptions.AddFlags(f, "destination", "to copy snapshots from")
 	initMultiSnapshotFilter(f, &opts.SnapshotFilter, true)
 	f.Var(&opts.RechunkTags, "rechunk-tag", "add `tags` for the copied snapshots in the format `tag[,tag,...]` (can be specified multiple times)")
 	f.IntVar(&opts.CacheSize, "cache-size", 4096, "in-memory blob cache size in MiBs (0 to disable)")
 }
 
-func runRechunkCopy(ctx context.Context, opts RechunkCopyOptions, gopts global.Options, args []string, term ui.Terminal) error {
+func runRechunkCopy(ctx context.Context, opts RechunkCopyOptionsObsolete, gopts global.Options, args []string, term ui.Terminal) error {
 	if !feature.Flag.Enabled(feature.RechunkCopy) && !opts.isIntegrationTest {
 		return errors.Fatal("rechunk-copy feature flag is not set. Currently, rechunk-copy is alpha feature (disabled by default).")
 	}
diff --git a/cmd/restic/cmd_rechunk_copy_integration_test.go b/cmd/restic/cmd_rechunk_copy_integration_test.go
index 9fd7db548..58068fd6a 100644
--- a/cmd/restic/cmd_rechunk_copy_integration_test.go
+++ b/cmd/restic/cmd_rechunk_copy_integration_test.go
@@ -17,7 +17,7 @@ func testRunRechunkCopy(t testing.TB, srcGopts global.Options, dstGopts global.O
 	gopts.Repo = dstGopts.Repo
 	gopts.Password = dstGopts.Password
 	gopts.InsecureNoPassword = dstGopts.InsecureNoPassword
-	rechunkCopyOpts := RechunkCopyOptions{
+	rechunkCopyOpts := RechunkCopyOptionsObsolete{
 		SecondaryRepoOptions: global.SecondaryRepoOptions{
 			Repo:               srcGopts.Repo,
 			Password:           srcGopts.Password,
diff --git a/cmd/restic/main.go b/cmd/restic/main.go
index c9b004402..6ed2811f3 100644
--- a/cmd/restic/main.go
+++ b/cmd/restic/main.go
@@ -94,7 +94,6 @@ The full documentation can be found at https://restic.readthedocs.io/ .
 		newOptionsCommand(globalOptions),
 		newPruneCommand(globalOptions),
 		newRebuildIndexCommand(globalOptions),
-		newRechunkCopyCommand(globalOptions),
 		newRecoverCommand(globalOptions),
 		newRepairCommand(globalOptions),
 		newRestoreCommand(globalOptions),

From 721425414db4afe7876a1a0849fb1a2317f95eac Mon Sep 17 00:00:00 2001
From: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date: Sat, 21 Feb 2026 17:52:27 +0900
Subject: [PATCH 16/34] Misc(Rechunker): Update comments and names

Minor change of comments and variable names for readability
---
 internal/rechunker/blob_cache.go |  5 ++++-
 internal/rechunker/rechunker.go  | 11 ++++++-----
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/internal/rechunker/blob_cache.go b/internal/rechunker/blob_cache.go
index 4eb77b974..e402df636 100644
--- a/internal/rechunker/blob_cache.go
+++ b/internal/rechunker/blob_cache.go
@@ -144,10 +144,13 @@ func (c *BlobCache) startDownloaders(ctx context.Context, numDownloaders int,
 				delete(c.waitList, packID)
 				for id, data := range blobs {
 					size := cap(data) + overhead
-					for size > c.free {
+					for size > c.free { // evict old blobs if there is not enough free space
 						id, _, ok := c.c.RemoveOldest()
 						if ok {
 							evicted = append(evicted, id)
+						} else {
+							defer c.mu.Unlock()
+							return fmt.Errorf("not enough cache size to store a blob; needs at least %d bytes, but has only %d bytes", size, c.free)
 						}
 					}
 					c.c.Add(id, data)
diff --git a/internal/rechunker/rechunker.go b/internal/rechunker/rechunker.go
index c0f66c314..32c0a54ef 100644
--- a/internal/rechunker/rechunker.go
+++ b/internal/rechunker/rechunker.go
@@ -60,7 +60,7 @@ func (rc *Rechunker) reset() {
 	rc.rechunkReady = false
 }
 
-func (rc *Rechunker) Plan(ctx context.Context, srcRepo restic.Repository, rootTrees []restic.ID) error {
+func (rc *Rechunker) Plan(ctx context.Context, srcRepo restic.Repository, rootTrees restic.IDs) error {
 	rc.reset()
 
 	visitedFiles := restic.IDSet{}
@@ -100,7 +100,7 @@ func (rc *Rechunker) Plan(ctx context.Context, srcRepo restic.Repository, rootTr
 func gatherFileContents(ctx context.Context, repo restic.Loader, rootTrees restic.IDs, visitedFiles restic.IDSet, visitedTrees restic.IDSet) (filesList []*ChunkedFile, totalSize uint64, err error) {
 	mu := sync.Mutex{}
 
-	// Stream through all subtrees in target snapshots and gather all distinct file Contents
+	// Stream through all subtrees in target rootTrees and gather all distinct file Contents
 	err = data.StreamTrees(ctx, repo, rootTrees, nil, func(id restic.ID) bool {
 		visited := visitedTrees.Has(id)
 		visitedTrees.Insert(id)
@@ -335,9 +335,10 @@ func (rc *Rechunker) runWorkers(ctx context.Context, wg *errgroup.Group, numWork
 	}
 }
 
-type saverType func(ctx context.Context, tpe restic.BlobType, buf []byte, id restic.ID, storeDuplicate bool) (newID restic.ID, known bool, sizeInRepo int, err error)
+// wrapper type for BlobSaver where you can define custom SaveBlob()
+type wrappedBlobSaver func(ctx context.Context, tpe restic.BlobType, buf []byte, id restic.ID, storeDuplicate bool) (newID restic.ID, known bool, sizeInRepo int, err error)
 
-func (s saverType) SaveBlob(ctx context.Context, tpe restic.BlobType, buf []byte, id restic.ID, storeDuplicate bool) (newID restic.ID, known bool, sizeInRepo int, err error) {
+func (s wrappedBlobSaver) SaveBlob(ctx context.Context, tpe restic.BlobType, buf []byte, id restic.ID, storeDuplicate bool) (newID restic.ID, known bool, sizeInRepo int, err error) {
 	return s(ctx, tpe, buf, id, storeDuplicate)
 }
 
@@ -349,7 +350,7 @@ func (rc *Rechunker) RewriteTree(ctx context.Context, srcRepo restic.BlobLoader,
 	}
 
 	// wrap dstRepo so that total uploaded tree blobs size can be tracked
-	saver := saverType(func(ctx context.Context, tpe restic.BlobType, buf []byte, id restic.ID, storeDuplicate bool) (newID restic.ID, known bool, sizeInRepo int, err error) {
+	saver := wrappedBlobSaver(func(ctx context.Context, tpe restic.BlobType, buf []byte, id restic.ID, storeDuplicate bool) (newID restic.ID, known bool, sizeInRepo int, err error) {
 		newID, known, sizeInRepo, err = dstRepo.SaveBlob(ctx, tpe, buf, id, storeDuplicate)
 		if err != nil {
 			return

From 4d7c03e2b597ae6053735e871200444fe39832e0 Mon Sep 17 00:00:00 2001
From: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date: Sun, 22 Feb 2026 21:09:52 +0900
Subject: [PATCH 17/34] Refactor: Helper functions

Change helper function signatures for rechunker, dispatcher and eventTracker
Improve error handling logic for AdvanceCursor
---
 internal/rechunker/dispatcher.go |  6 ++---
 internal/rechunker/rechunker.go  | 45 ++++++++++++++++++++------------
 internal/rechunker/worker.go     | 13 +++++----
 3 files changed, 39 insertions(+), 25 deletions(-)

diff --git a/internal/rechunker/dispatcher.go b/internal/rechunker/dispatcher.go
index d697f3295..f732d9e0b 100644
--- a/internal/rechunker/dispatcher.go
+++ b/internal/rechunker/dispatcher.go
@@ -78,9 +78,9 @@ func (d *Dispatcher) NextPriority(ctx context.Context) (*ChunkedFile, bool, erro
 	return file, from != 0, err
 }
 
-func (d *Dispatcher) PushPriority(files []*ChunkedFile) bool {
+func (d *Dispatcher) PushPriority(files []*ChunkedFile) {
 	if d.priority == nil {
-		return false
+		return
 	}
 
 	d.mu.Lock()
@@ -93,8 +93,6 @@ func (d *Dispatcher) PushPriority(files []*ChunkedFile) bool {
 	case d.push <- struct{}{}:
 	default:
 	}
-
-	return true
 }
 
 func (d *Dispatcher) popPriority() []*ChunkedFile {
diff --git a/internal/rechunker/rechunker.go b/internal/rechunker/rechunker.go
index 32c0a54ef..ddcb95719 100644
--- a/internal/rechunker/rechunker.go
+++ b/internal/rechunker/rechunker.go
@@ -429,25 +429,34 @@ type Cursor struct {
 	Offset  uint
 }
 
-func (idx *Index) AdvanceCursor(c Cursor, bytesProcessed uint) Cursor {
+func (idx *Index) AdvanceCursor(c Cursor, numBytes uint) (Cursor, error) {
 	if idx == nil {
-		panic("call from nil index")
+		return Cursor{}, fmt.Errorf("call from nil index")
 	}
 
 	for c.BlobIdx < len(c.blobs) {
-		r := idx.BlobSize[c.blobs[c.BlobIdx]] - c.Offset
+		blobSize, ok := idx.BlobSize[c.blobs[c.BlobIdx]]
+		if !ok {
+			return Cursor{}, fmt.Errorf("blob %v not in the index", c.blobs[c.BlobIdx].Str())
+		}
+		r := blobSize - c.Offset
 
-		if bytesProcessed < r {
-			c.Offset += bytesProcessed
+		if numBytes < r {
+			c.Offset += numBytes
+			numBytes = 0
 			break
 		}
 
-		bytesProcessed -= r
+		numBytes -= r
 		c.BlobIdx++
 		c.Offset = 0
 	}
 
-	return c
+	if numBytes != 0 {
+		return Cursor{}, fmt.Errorf("cursor out of range; %d bytes over end position", numBytes)
+	}
+
+	return c, nil
 }
 
 type ChunkedFile struct {
@@ -465,14 +474,14 @@ type eventTracker struct {
 
 	remainingBlobNeeds map[restic.ID]int // blobID -> remaining blob needs
 
-	priorityCB     func(files []*ChunkedFile) bool
+	priorityCB     func(files []*ChunkedFile)
 	obsoleteBlobCB func(ids restic.IDs)
 }
 
 func (t *eventTracker) BlobReady(ids restic.IDs) {
 	// when a new blob is ready, files containing that blob as their prefix
 	// has their blobsToPrepare decreased by one.
-	// The list of files whose blobs are all prepared is returned.
+	// The list of files whose blobs are all prepared is passed to priorityCB.
 
 	if t.priorityCB == nil {
 		// if there is no callback, it is of no meaning to track the state
@@ -501,7 +510,7 @@ func (t *eventTracker) BlobReady(ids restic.IDs) {
 	}
 
 	if t.priorityCB != nil {
-		_ = t.priorityCB(readyFiles)
+		t.priorityCB(readyFiles)
 	}
 
 	// debugStats: trace blob load count
@@ -537,16 +546,20 @@ func (t *eventTracker) BlobUnready(ids restic.IDs) {
 	t.mu.Unlock()
 }
 
-func (t *eventTracker) ReadProgress(cursor Cursor, bytesProcessed uint) Cursor {
-	start, end := cursor, t.idx.AdvanceCursor(cursor, bytesProcessed)
+func (t *eventTracker) ReadProgress(cursor Cursor, bytesProcessed uint) (Cursor, error) {
+	start := cursor
+	end, err := t.idx.AdvanceCursor(cursor, bytesProcessed)
+	if err != nil {
+		return Cursor{}, err
+	}
 
 	if t.obsoleteBlobCB == nil {
 		// if there is no callback, it is of no meaning to track the state
-		return end
+		return end, nil
 	}
 
 	if start.BlobIdx == end.BlobIdx { // nothing to do
-		return end
+		return end, nil
 	}
 
 	blobs := cursor.blobs[start.BlobIdx:end.BlobIdx]
@@ -561,11 +574,11 @@ func (t *eventTracker) ReadProgress(cursor Cursor, bytesProcessed uint) Cursor {
 	t.mu.Unlock()
 
 	if len(obsolete) == 0 {
-		return end
+		return end, nil
 	}
 
 	if t.obsoleteBlobCB != nil {
 		t.obsoleteBlobCB(obsolete)
 	}
-	return end
+	return end, nil
 }
diff --git a/internal/rechunker/worker.go b/internal/rechunker/worker.go
index 281b27009..d251f464a 100644
--- a/internal/rechunker/worker.go
+++ b/internal/rechunker/worker.go
@@ -22,12 +22,12 @@ type Worker struct {
 	downloader restic.BlobLoader
 	uploader   restic.BlobSaver
 
-	readProgressCallback func(cursor Cursor, bytesProcessed uint) Cursor
+	cursorProgressor func(cursor Cursor, bytesProcessed uint) (Cursor, error)
 }
 
 func NewWorker(pol chunker.Pol, downloader restic.BlobLoader, uploader restic.BlobSaver,
 	bufferPool *BufferPool,
-	onReadCallback func(Cursor, uint) Cursor,
+	cursorProgressor func(Cursor, uint) (Cursor, error),
 ) *Worker {
 	return &Worker{
 		pool: bufferPool,
@@ -37,7 +37,7 @@ func NewWorker(pol chunker.Pol, downloader restic.BlobLoader, uploader restic.Bl
 		downloader: downloader,
 		uploader:   uploader,
 
-		readProgressCallback: onReadCallback,
+		cursorProgressor: cursorProgressor,
 	}
 }
 
@@ -94,8 +94,11 @@ func (w *Worker) runReader(ctx context.Context, wg *errgroup.Group, srcBlobs res
 			}
 
 			// if onProgress callback is given, run it
-			if w.readProgressCallback != nil {
-				cursor = w.readProgressCallback(cursor, c.Length)
+			if w.cursorProgressor != nil {
+				cursor, err = w.cursorProgressor(cursor, c.Length)
+				if err != nil {
+					return err
+				}
 			}
 
 			// send chunk to writer

From 02cb7bb1a5894663f041a9034d15049900b12f42 Mon Sep 17 00:00:00 2001
From: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date: Thu, 26 Feb 2026 16:19:35 +0900
Subject: [PATCH 18/34] Test: Refactor rechunk-copy integration test

Refactor rechunk-copy integration test from cmd_rechunk_copy_integration_test into cmd_copy_integration_test
---
 cmd/restic/cmd_copy_integration_test.go       |  65 +++++
 cmd/restic/cmd_rechunk_copy.go                | 228 ------------------
 .../cmd_rechunk_copy_integration_test.go      | 124 ----------
 3 files changed, 65 insertions(+), 352 deletions(-)
 delete mode 100644 cmd/restic/cmd_rechunk_copy.go
 delete mode 100644 cmd/restic/cmd_rechunk_copy_integration_test.go

diff --git a/cmd/restic/cmd_copy_integration_test.go b/cmd/restic/cmd_copy_integration_test.go
index 6105acfe4..b806a3e2d 100644
--- a/cmd/restic/cmd_copy_integration_test.go
+++ b/cmd/restic/cmd_copy_integration_test.go
@@ -30,11 +30,35 @@ func testRunCopy(t testing.TB, srcGopts global.Options, dstGopts global.Options)
 	}))
 }
 
+func testRunRechunkCopy(t testing.TB, srcGopts global.Options, dstGopts global.Options) {
+	gopts := srcGopts
+	gopts.Repo = dstGopts.Repo
+	gopts.Password = dstGopts.Password
+	gopts.InsecureNoPassword = dstGopts.InsecureNoPassword
+	copyOpts := CopyOptions{
+		SecondaryRepoOptions: global.SecondaryRepoOptions{
+			Repo: srcGopts.Repo,
+			Password: srcGopts.Password,
+			InsecureNoPassword: srcGopts.InsecureNoPassword,
+		},
+		RechunkCopyOptions: RechunkCopyOptions{
+			Rechunk: true,
+			isIntegrationTest: true,
+		},
+	}
+
+	rtest.OK(t, withTermStatus(t, gopts, func(ctx context.Context, gopts global.Options) error {
+		return runCopy(context.TODO(), copyOpts, gopts, nil, gopts.Term)
+	}))
+}
+
 func TestCopy(t *testing.T) {
 	env, cleanup := withTestEnvironment(t)
 	defer cleanup()
 	env2, cleanup2 := withTestEnvironment(t)
 	defer cleanup2()
+	env3, cleanup3 := withTestEnvironment(t) // test env for rechunk-copy
+	defer cleanup3()
 
 	testSetupBackupData(t, env)
 	opts := BackupOptions{}
@@ -45,9 +69,13 @@ func TestCopy(t *testing.T) {
 
 	testRunInit(t, env2.gopts)
 	testRunCopy(t, env.gopts, env2.gopts)
+	
+	testRunInit(t, env3.gopts)
+	testRunRechunkCopy(t, env.gopts, env3.gopts)
 
 	snapshotIDs := testListSnapshots(t, env.gopts, 3)
 	copiedSnapshotIDs := testListSnapshots(t, env2.gopts, 3)
+	rechunkCopiedSnapshotIDs := testListSnapshots(t, env3.gopts, 3)
 
 	// Check that the copies size seems reasonable
 	stat := dirStats(t, env.repo)
@@ -61,6 +89,7 @@ func TestCopy(t *testing.T) {
 
 	// Check integrity of the copy
 	testRunCheck(t, env2.gopts)
+	testRunCheck(t, env3.gopts)
 
 	// Check that the copied snapshots have the same tree contents as the old ones (= identical tree hash)
 	origRestores := make(map[string]struct{})
@@ -84,7 +113,30 @@ func TestCopy(t *testing.T) {
 		rtest.Assert(t, foundMatch, "found no counterpart for snapshot %v", snapshotID)
 	}
 
+	// Check that the rechunk-copied snapshots have the same tree contents as the old ones (= identical tree hash)
+	origRestores2 := make(map[string]struct{})
+	for i, snapshotID := range snapshotIDs {
+		restoredir := filepath.Join(env.base, fmt.Sprintf("restore%d", i))
+		origRestores2[restoredir] = struct{}{}
+		testRunRestore(t, env.gopts, restoredir, snapshotID.String())
+	}
+	for i, snapshotID := range rechunkCopiedSnapshotIDs {
+		restoredir := filepath.Join(env3.base, fmt.Sprintf("restore%d", i))
+		testRunRestore(t, env3.gopts, restoredir, snapshotID.String())
+		foundMatch := false
+		for cmpdir := range origRestores2 {
+			diff := directoriesContentsDiff(t, restoredir, cmpdir)
+			if diff == "" {
+				delete(origRestores2, cmpdir)
+				foundMatch = true
+			}
+		}
+
+		rtest.Assert(t, foundMatch, "found no counterpart for snapshot %v", snapshotID)
+	}
+
 	rtest.Assert(t, len(origRestores) == 0, "found not copied snapshots")
+	rtest.Assert(t, len(origRestores2) == 0, "found not rechunk-copied snapshots")
 
 	// check that snapshots were properly batched while copying
 	_, _, countBlobs := testPackAndBlobCounts(t, env.gopts)
@@ -166,6 +218,8 @@ func TestCopyUnstableJSON(t *testing.T) {
 	defer cleanup()
 	env2, cleanup2 := withTestEnvironment(t)
 	defer cleanup2()
+	env3, cleanup3 := withTestEnvironment(t) // test env for rechunk-copy
+	defer cleanup3()
 
 	// contains a symlink created using `ln -s '../i/'$'\355\246\361''d/samba' broken-symlink`
 	datafile := filepath.Join("testdata", "copy-unstable-json.tar.gz")
@@ -175,6 +229,11 @@ func TestCopyUnstableJSON(t *testing.T) {
 	testRunCopy(t, env.gopts, env2.gopts)
 	testRunCheck(t, env2.gopts)
 	testListSnapshots(t, env2.gopts, 1)
+
+	testRunInit(t, env3.gopts)
+	testRunCopy(t, env.gopts, env3.gopts)
+	testRunCheck(t, env3.gopts)
+	testListSnapshots(t, env3.gopts, 1)
 }
 
 func TestCopyToEmptyPassword(t *testing.T) {
@@ -184,14 +243,20 @@ func TestCopyToEmptyPassword(t *testing.T) {
 	defer cleanup2()
 	env2.gopts.Password = ""
 	env2.gopts.InsecureNoPassword = true
+	env3, cleanup3 := withTestEnvironment(t) // test env for rechunk-copy
+	defer cleanup3()
 
 	testSetupBackupData(t, env)
 	testRunBackup(t, "", []string{filepath.Join(env.testdata, "0", "0", "9")}, BackupOptions{}, env.gopts)
 
 	testRunInit(t, env2.gopts)
 	testRunCopy(t, env.gopts, env2.gopts)
+	testRunInit(t, env3.gopts)
+	testRunRechunkCopy(t, env.gopts, env3.gopts)
 
 	testListSnapshots(t, env.gopts, 1)
 	testListSnapshots(t, env2.gopts, 1)
+	testListSnapshots(t, env3.gopts, 1)
 	testRunCheck(t, env2.gopts)
+	testRunCheck(t, env3.gopts)
 }
diff --git a/cmd/restic/cmd_rechunk_copy.go b/cmd/restic/cmd_rechunk_copy.go
deleted file mode 100644
index d05babb58..000000000
--- a/cmd/restic/cmd_rechunk_copy.go
+++ /dev/null
@@ -1,228 +0,0 @@
-package main
-
-import (
-	"context"
-
-	"github.com/restic/restic/internal/data"
-	"github.com/restic/restic/internal/debug"
-	"github.com/restic/restic/internal/errors"
-	"github.com/restic/restic/internal/feature"
-	"github.com/restic/restic/internal/global"
-	"github.com/restic/restic/internal/rechunker"
-	"github.com/restic/restic/internal/restic"
-	"github.com/restic/restic/internal/ui"
-	"github.com/restic/restic/internal/ui/progress"
-
-	"github.com/spf13/cobra"
-	"github.com/spf13/pflag"
-)
-
-// Reference: cmd_copy.go (v0.18.0)
-
-func newRechunkCopyCommand(globalOptions *global.Options) *cobra.Command {
-	var opts RechunkCopyOptionsObsolete
-	cmd := &cobra.Command{
-		Use:   "rechunk-copy [flags] [snapshotID ...]",
-		Short: "Rechunk-copy snapshots from one repository to another",
-		Long: `
-The "rechunk-copy" command rechunk-copies one or more snapshots from one repository to another.
-
-Data blobs will be rechunked and stored in the destination repo. 
-Tree blobs in the destination repo are also updated to point to the rechunked data blobs, 
-but it does not modify any other metadata.
-
-NOTE: This command has largely different internal mechanism from "copy" command,
-due to restic's content defined chunking (CDC) algorithm. Note that "rechunk-copy"
-could consume significantly more bandwidth during the process compared to "copy", 
-and may also need significantly more time to finish.
-
-EXIT STATUS
-===========
-
-Exit status is 0 if the command was successful.
-Exit status is 1 if there was any error.
-Exit status is 10 if the repository does not exist.
-Exit status is 11 if the repository is already locked.
-Exit status is 12 if the password is incorrect.
-		`,
-		GroupID:           cmdGroupDefault,
-		DisableAutoGenTag: true,
-		RunE: func(cmd *cobra.Command, args []string) error {
-			finalizeSnapshotFilter(&opts.SnapshotFilter)
-			return runRechunkCopy(cmd.Context(), opts, *globalOptions, args, globalOptions.Term)
-		},
-	}
-
-	opts.AddFlags(cmd.Flags())
-	return cmd
-}
-
-// RechunkCopyOptionsObsolete bundles all options for the rechunk-copy command.
-type RechunkCopyOptionsObsolete struct {
-	global.SecondaryRepoOptions
-	data.SnapshotFilter
-	RechunkTags       data.TagLists
-	CacheSize         int
-	isIntegrationTest bool // skip check for RESTIC_FEATURES=rechunk-copy when integration test
-}
-
-func (opts *RechunkCopyOptionsObsolete) AddFlags(f *pflag.FlagSet) {
-	opts.SecondaryRepoOptions.AddFlags(f, "destination", "to copy snapshots from")
-	initMultiSnapshotFilter(f, &opts.SnapshotFilter, true)
-	f.Var(&opts.RechunkTags, "rechunk-tag", "add `tags` for the copied snapshots in the format `tag[,tag,...]` (can be specified multiple times)")
-	f.IntVar(&opts.CacheSize, "cache-size", 4096, "in-memory blob cache size in MiBs (0 to disable)")
-}
-
-func runRechunkCopy(ctx context.Context, opts RechunkCopyOptionsObsolete, gopts global.Options, args []string, term ui.Terminal) error {
-	if !feature.Flag.Enabled(feature.RechunkCopy) && !opts.isIntegrationTest {
-		return errors.Fatal("rechunk-copy feature flag is not set. Currently, rechunk-copy is alpha feature (disabled by default).")
-	}
-	if opts.CacheSize != 0 && opts.CacheSize < 100 {
-		return errors.Fatal("blob cache size must be at least 100 MiB")
-	}
-
-	printer := ui.NewProgressPrinter(false, gopts.Verbosity, term)
-	secondaryGopts, isFromRepo, err := opts.SecondaryRepoOptions.FillGlobalOpts(ctx, gopts, "destination")
-	if err != nil {
-		return err
-	}
-	if isFromRepo {
-		// swap global options, if the secondary repo was set via from-repo
-		gopts, secondaryGopts = secondaryGopts, gopts
-	}
-
-	ctx, srcRepo, unlock, err := openWithReadLock(ctx, gopts, gopts.NoLock, printer)
-	if err != nil {
-		return err
-	}
-	defer unlock()
-
-	ctx, dstRepo, unlock, err := openWithAppendLock(ctx, secondaryGopts, false, printer)
-	if err != nil {
-		return err
-	}
-	defer unlock()
-
-	if srcRepo.Config().ChunkerPolynomial == dstRepo.Config().ChunkerPolynomial {
-		return errors.Fatal("source repo and destination repo have same chunker polynomials; use `restic copy` instead")
-	}
-
-	srcSnapshotLister, err := restic.MemorizeList(ctx, srcRepo, restic.SnapshotFile)
-	if err != nil {
-		return err
-	}
-
-	debug.Log("Loading source index")
-	if err := srcRepo.LoadIndex(ctx, printer); err != nil {
-		return err
-	}
-	debug.Log("Loading destination index")
-	if err := dstRepo.LoadIndex(ctx, printer); err != nil {
-		return err
-	}
-
-	debug.Log("Running NewRechunker()")
-	rechnker := rechunker.NewRechunker(rechunker.Config{
-		CacheSize: opts.CacheSize * (1 << 20),
-		Pol:       dstRepo.Config().ChunkerPolynomial,
-	})
-	rootTrees := restic.IDs{}
-
-	// gather all root trees of snapshots for rechunking
-	debug.Log("Gathering root trees of target snapshots")
-	for sn := range FindFilteredSnapshots(ctx, srcSnapshotLister, srcRepo, &opts.SnapshotFilter, args, printer) {
-		rootTrees = append(rootTrees, *sn.Tree)
-	}
-
-	// run rechunk process
-	debug.Log("Running runRechunk()")
-	progress := rechunker.NewProgress(
-		term,
-		printer,
-		ui.CalculateProgressInterval(!gopts.Quiet, gopts.JSON, term.CanUpdateStatus()),
-	)
-	if err = runRechunk(ctx, srcRepo, rootTrees, dstRepo, rechnker, opts.CacheSize*(1<<20), printer, progress); err != nil {
-		return err
-	}
-
-	// rewrite trees
-	printer.P("Rewriting trees...\n")
-	err = dstRepo.WithBlobUploader(ctx, func(ctx context.Context, uploader restic.BlobSaverWithAsync) error {
-		for sn := range FindFilteredSnapshots(ctx, srcSnapshotLister, srcRepo, &opts.SnapshotFilter, args, printer) {
-			debug.Log("Running RewriteTree() for tree ID %v", sn.Tree.Str())
-			_, err := rechnker.RewriteTree(ctx, srcRepo, uploader, *sn.Tree)
-			if err != nil {
-				return err
-			}
-		}
-
-		return nil
-	})
-	if err != nil {
-		return err
-	}
-	printer.V("Rewriting done.\n\n")
-
-	// write snapshots
-	debug.Log("Writing snapshots")
-	for sn := range FindFilteredSnapshots(ctx, srcSnapshotLister, srcRepo, &opts.SnapshotFilter, args, printer) {
-		sn.Parent = nil // Parent does not have relevance in the new repo.
-		// Use Original as a persistent snapshot ID
-		if sn.Original == nil {
-			sn.Original = sn.ID()
-		}
-
-		newTreeID, err := rechnker.GetRewrittenTree(*sn.Tree)
-		if err != nil {
-			return err
-		}
-		debug.Log("Snapshot %v: Original root tree %v is substituted with new %v", sn.ID().Str(), sn.Tree.Str(), newTreeID.Str())
-		// change Tree field to new one
-		sn.Tree = &newTreeID
-		// add tags if provided by user
-		sn.AddTags(opts.RechunkTags.Flatten())
-		newID, err := data.SaveSnapshot(ctx, dstRepo, sn)
-		if err != nil {
-			return err
-		}
-		debug.Log("Snapshot %v (src repo) is rechunk-copied to snapshot %v (dst repo)", sn.ID().Str(), newID.Str())
-		printer.P("snapshot %s saved\n", newID.Str())
-	}
-
-	// summary
-	printer.V("\n[Post-run Summary]")
-	printer.V("Number of distinct files processed: %v", rechnker.NumFiles())
-	printer.V("  - Total size processed (including duplicate blobs): %v", ui.FormatBytes(rechnker.TotalSize()))
-	printer.P("Additional data stored to the repository: %v", ui.FormatBytes(rechnker.TotalAddedToDstRepo()))
-
-	return ctx.Err()
-}
-
-func runRechunk(ctx context.Context, srcRepo restic.Repository, roots restic.IDs, dstRepo restic.Repository, rechnker *rechunker.Rechunker, cacheSize int, printer progress.Printer, progress *rechunker.Progress) error {
-	printer.V("Planning rechunk...\n")
-	debug.Log("Running Plan()")
-	err := rechnker.Plan(ctx, srcRepo, roots)
-	if err != nil {
-		return err
-	}
-	printer.V("Planning done.")
-
-	printer.V("\n[Pre-run Summary]")
-	// num_snapshots, num_distinct_files, total_size, num_packs,
-	printer.V("Number of snapshots: %v", len(roots))
-	printer.V("Number of distinct files to process: %v", rechnker.NumFiles())
-	printer.V("  - Total size (including duplicate blobs): %v", ui.FormatBytes(rechnker.TotalSize()))
-	printer.V("Number of packs to download: %v\n\n", rechnker.PackCount())
-
-	debug.Log("Running RechunkData()")
-	progress.Start(rechnker.NumFiles(), rechnker.TotalSize())
-	err = rechnker.Rechunk(ctx, srcRepo, dstRepo, progress)
-	if err != nil {
-		return err
-	}
-	progress.Done()
-
-	printer.V("Rechunking done.\n\n")
-
-	return nil
-}
diff --git a/cmd/restic/cmd_rechunk_copy_integration_test.go b/cmd/restic/cmd_rechunk_copy_integration_test.go
deleted file mode 100644
index 58068fd6a..000000000
--- a/cmd/restic/cmd_rechunk_copy_integration_test.go
+++ /dev/null
@@ -1,124 +0,0 @@
-package main
-
-import (
-	"context"
-	"fmt"
-	"path/filepath"
-	"testing"
-
-	"github.com/restic/restic/internal/global"
-	rtest "github.com/restic/restic/internal/test"
-)
-
-// Reference: cmd_copy_integration_test.go (v0.18.0)
-
-func testRunRechunkCopy(t testing.TB, srcGopts global.Options, dstGopts global.Options) {
-	gopts := srcGopts
-	gopts.Repo = dstGopts.Repo
-	gopts.Password = dstGopts.Password
-	gopts.InsecureNoPassword = dstGopts.InsecureNoPassword
-	rechunkCopyOpts := RechunkCopyOptionsObsolete{
-		SecondaryRepoOptions: global.SecondaryRepoOptions{
-			Repo:               srcGopts.Repo,
-			Password:           srcGopts.Password,
-			InsecureNoPassword: srcGopts.InsecureNoPassword,
-		},
-		isIntegrationTest: true,
-	}
-
-	rtest.OK(t, withTermStatus(t, gopts, func(ctx context.Context, gopts global.Options) error {
-		return runRechunkCopy(context.TODO(), rechunkCopyOpts, gopts, nil, gopts.Term)
-	}))
-}
-
-func TestRechunkCopy(t *testing.T) {
-	env, cleanup := withTestEnvironment(t)
-	defer cleanup()
-	env2, cleanup2 := withTestEnvironment(t)
-	defer cleanup2()
-
-	testSetupBackupData(t, env)
-	opts := BackupOptions{}
-	testRunBackup(t, "", []string{filepath.Join(env.testdata, "0", "0", "9")}, opts, env.gopts)
-	testRunBackup(t, "", []string{filepath.Join(env.testdata, "0", "0", "9", "2")}, opts, env.gopts)
-	testRunBackup(t, "", []string{filepath.Join(env.testdata, "0", "0", "9", "3")}, opts, env.gopts)
-	testRunCheck(t, env.gopts)
-
-	testRunInit(t, env2.gopts)
-	testRunRechunkCopy(t, env.gopts, env2.gopts)
-
-	snapshotIDs := testListSnapshots(t, env.gopts, 3)
-	copiedSnapshotIDs := testListSnapshots(t, env2.gopts, 3)
-
-	// Check that the copies size seems reasonable
-	stat := dirStats(t, env.repo)
-	stat2 := dirStats(t, env2.repo)
-	sizeDiff := int64(stat.size) - int64(stat2.size)
-	if sizeDiff < 0 {
-		sizeDiff = -sizeDiff
-	}
-	rtest.Assert(t, sizeDiff < int64(stat.size)/50, "expected less than 2%% size difference: %v vs. %v",
-		stat.size, stat2.size)
-
-	// Check integrity of the copy
-	testRunCheck(t, env2.gopts)
-
-	// Check that the copied snapshots have the same tree contents as the old ones (= identical tree hash)
-	origRestores := make(map[string]struct{})
-	for i, snapshotID := range snapshotIDs {
-		restoredir := filepath.Join(env.base, fmt.Sprintf("restore%d", i))
-		origRestores[restoredir] = struct{}{}
-		testRunRestore(t, env.gopts, restoredir, snapshotID.String())
-	}
-	for i, snapshotID := range copiedSnapshotIDs {
-		restoredir := filepath.Join(env2.base, fmt.Sprintf("restore%d", i))
-		testRunRestore(t, env2.gopts, restoredir, snapshotID.String())
-		foundMatch := false
-		for cmpdir := range origRestores {
-			diff := directoriesContentsDiff(t, restoredir, cmpdir)
-			if diff == "" {
-				delete(origRestores, cmpdir)
-				foundMatch = true
-			}
-		}
-
-		rtest.Assert(t, foundMatch, "found no counterpart for snapshot %v", snapshotID)
-	}
-
-	rtest.Assert(t, len(origRestores) == 0, "found not copied snapshots")
-}
-
-func TestRechunkCopyUnstableJSON(t *testing.T) {
-	env, cleanup := withTestEnvironment(t)
-	defer cleanup()
-	env2, cleanup2 := withTestEnvironment(t)
-	defer cleanup2()
-
-	// contains a symlink created using `ln -s '../i/'$'\355\246\361''d/samba' broken-symlink`
-	datafile := filepath.Join("testdata", "copy-unstable-json.tar.gz")
-	rtest.SetupTarTestFixture(t, env.base, datafile)
-
-	testRunInit(t, env2.gopts)
-	testRunCopy(t, env.gopts, env2.gopts)
-	testRunCheck(t, env2.gopts)
-	testListSnapshots(t, env2.gopts, 1)
-}
-
-func TestRechunkCopyToEmptyPassword(t *testing.T) {
-	env, cleanup := withTestEnvironment(t)
-	defer cleanup()
-	env2, cleanup2 := withTestEnvironment(t)
-	defer cleanup2()
-	env2.gopts.Password = ""
-	env2.gopts.InsecureNoPassword = true
-
-	testSetupBackupData(t, env)
-	testRunBackup(t, "", []string{filepath.Join(env.testdata, "0", "0", "9")}, BackupOptions{}, env.gopts)
-
-	testRunInit(t, env2.gopts)
-	testRunCopy(t, env.gopts, env2.gopts)
-
-	testListSnapshots(t, env.gopts, 1)
-	testListSnapshots(t, env2.gopts, 1)
-	testRunCheck(t, env2.gopts)
-}

From 7c63ff2db5e571102f20eec3132dbaa7967e09da Mon Sep 17 00:00:00 2001
From: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date: Thu, 26 Feb 2026 16:38:14 +0900
Subject: [PATCH 19/34] Docs: Update rechunk copy documentation

Update changelog and user docs regarding rechunk copy
---
 changelog/unreleased/issue-5473 | 15 +++++++-------
 doc/045_working_with_repos.rst  | 36 +++++++++++++++++----------------
 2 files changed, 27 insertions(+), 24 deletions(-)

diff --git a/changelog/unreleased/issue-5473 b/changelog/unreleased/issue-5473
index 12444d23a..1d79ae079 100644
--- a/changelog/unreleased/issue-5473
+++ b/changelog/unreleased/issue-5473
@@ -1,4 +1,4 @@
-Enhancement: Add rechunk-copy feature
+Enhancement: Add rechunk copy feature
 
 Restic didn't rechunk data blobs when copying snapshots between repositories
 with different chunker parameters. Instead, it copied the blobs as-is,
@@ -7,18 +7,19 @@ To mitigate this issue, users had to manually restore the snapshots somewhere,
 and then backup them again to the new repository. This workaround was 
 inefficient, prone to tamper with the original metadata, and bothersome.
 
-It now supports `rechunk-copy` command, in which the data are rechunked while copying.
+It now supports `--rechunk` option in `copy` command, in which the data are rechunked while copying.
 Currently, it does not automatically skip previously copied snapshots. Also,
 it does not remember which files had been rechunked in previous runs, so it will
 try to rework on every file again in the next run (though it would not add new
 data blobs to the repository in that case; this is what deduplication is for).
-Therefore, `rechunk-copy` is adequate for one-time migration between repositories.
+Therefore, current `copy --rechunk` is adequate for one-time migration between repositories.
 For incremental copy scenarios, `copy` between repositories with same chunker 
-parameters is recommended.
+parameter is ideal.
 
-`rechunk-copy` shares the same command interface with `copy`, with an exception
-that `rechunk-copy` additionally supports a `--rechunk-tag` option to add a
-tag to all rechunk-copied snapshots in the destination repository.
+`copy --rechunk` has a few additional options. It has `--force` option to force rechunk copy
+even when the chunker parameters are same between source and destination repositories,
+and `--cache-size` option to specify the in-memory blob cache size during rechunk copy process.
+Also, it has `--add-tag` option to add tags to the copied snapshots in the destination repo.
 
 https://github.com/restic/restic/issues/5473
 https://forum.restic.net/t/is-it-possible-to-re-chunk-after-a-restic-copy/6072
diff --git a/doc/045_working_with_repos.rst b/doc/045_working_with_repos.rst
index f0d05bd1f..79e3e70ee 100644
--- a/doc/045_working_with_repos.rst
+++ b/doc/045_working_with_repos.rst
@@ -285,7 +285,7 @@ Ensuring deduplication for copied snapshots
 Even though the copy command can transfer snapshots between arbitrary repositories,
 deduplication between snapshots from the source and destination repository may not work 
 with plain copy command. There are two methods to ensure proper deduplication between 
-repositories. First one is to use rechunk-copy command described below. Second one is 
+repositories. First one is to use ``--rechunk`` option described below. Second one is 
 to make both repositories use the same parameters for splitting large files into smaller
 chunks, which requires additional setup steps. With
 the same parameters restic will for both repositories split identical files into
@@ -302,33 +302,35 @@ using the same chunker parameters as the source repository:
 
 Note that it is not possible to change the chunker parameters of an existing repository.
 
-Rechunk-copy between repositories with different chunker parameters
+Rechunk copy between repositories with different chunker parameters
 -------------------------------------------------------------------
 
-The rechunk-copy command re-chunks files with destination repository's chunker parameters
-when copying snapshots. The command-line options are compatible with plain copy command,
-with two additions. First is ``--rechunk-tag``, which specifies a tag added to rechunk-copied
-snapshots. Second is ``--cache-size``. The rechunk-copy command uses in-memory cache for
-rechunking, whose default size is 4096 MiB. You can customize the in-memory cache size, 
-fitting your RAM size and desired memory usage. Note that a small cache size will lead to
+The ``copy --rechunk`` command re-chunks files with destination repository's chunker parameters
+when copying snapshots. There are a few command-line options used with ``--rechunk``.
+First is ``--force``, which forces it to rechunk files even when the chunker parameters are same for
+the source and destination repositories.
+Second is ``--cache-size``. The rechunk-copy command uses in-memory cache for
+rechunking, whose default size is 4096 MiB. You can customize the cache size, 
+adapting for your system's RAM size and desired memory usage. Note that a small cache size will lead to
 frequent re-download of packs, which is especially undesirable for remote source repositories.
+Third is ``--add-tag``, which adds tags to the copied snapshots in the destination repo.
 
 The below commands are all valid ones.
 
 .. code-block:: console
 
-    $ restic -r /srv/dst-repo rechunk-copy --from-repo /srv/src-repo
-    $ restic -r /srv/dst-repo rechunk-copy --from-repo /srv/src-repo --host luigi --path /srv/data --tag foo,bar
-    $ restic -r /srv/dst-repo rechunk-copy --rechunk-tag my-rechunk --from-repo /srv/src-repo 34c9e85f 2714b65a
-    $ restic -r /srv/dst-repo rechunk-copy --cache-size 8192 --from-repo /srv/src-repo # set cache size to 8192 MiB
+    $ restic -r /srv/dst-repo copy --rechunk --from-repo /srv/src-repo
+    $ restic -r /srv/dst-repo copy --rechunk --from-repo /srv/src-repo --host luigi --path /srv/data --tag foo,bar
+    $ restic -r /srv/dst-repo copy --rechunk --add-tag my-rechunk --from-repo /srv/src-repo 34c9e85f 2714b65a
+    $ restic -r /srv/dst-repo copy --rechunk --cache-size 8192 --from-repo /srv/src-repo # set cache size to 8192 MiB
 
-.. note:: Although the rechunk-copy command can provide on-demand deduplication between 
+.. note:: Although the ``copy --rechunk`` command can provide on-demand deduplication between 
     repositories with different chunker parameters, there are a few disadvantages compared 
-    to the plain copy command. The rechunk-copy command is slower because it re-assembles 
+    to the plain copy. The rechunk copy is slower because it re-assembles 
     all files and does the same all computations which are done during backup. Also, as of now, 
-    the rechunk-copy command does not support skipping redundant snapshots, so you should 
-    manually designate the exact snapshots to copy. Therefore, it is recommended to use 
-    repositories with the same chunker parameter if you plan to copy regularly between repositories.
+    the rechunk copy does not support skipping redundant snapshots, so you should 
+    manually specify the exact snapshots to copy. Therefore, it is recommended to use 
+    repositories with the same chunker parameter if you plan to copy regularly between your repositories.
 
 
 Removing files from snapshots

From 3e1fa9cda035361adace3b8c65c200843a28594f Mon Sep 17 00:00:00 2001
From: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date: Sun, 15 Mar 2026 16:48:18 +0900
Subject: [PATCH 20/34] Refactor: Rechunker

Integrate Dispatcher and eventTracker in rechunker package into Scheduler
---
 internal/rechunker/dispatcher.go     | 209 ----------------
 internal/rechunker/rechunker.go      | 233 +++---------------
 internal/rechunker/rechunker_test.go |   2 +-
 internal/rechunker/scheduler.go      | 346 +++++++++++++++++++++++++++
 internal/rechunker/worker.go         |   2 +-
 5 files changed, 387 insertions(+), 405 deletions(-)
 delete mode 100644 internal/rechunker/dispatcher.go
 create mode 100644 internal/rechunker/scheduler.go

diff --git a/internal/rechunker/dispatcher.go b/internal/rechunker/dispatcher.go
deleted file mode 100644
index f732d9e0b..000000000
--- a/internal/rechunker/dispatcher.go
+++ /dev/null
@@ -1,209 +0,0 @@
-package rechunker
-
-import (
-	"context"
-	"sync"
-
-	"github.com/restic/restic/internal/debug"
-	"github.com/restic/restic/internal/restic"
-	"golang.org/x/sync/errgroup"
-)
-
-type Dispatcher struct {
-	mu sync.Mutex
-
-	// job dispatch channel to workers
-	regular  <-chan *ChunkedFile
-	priority <-chan *ChunkedFile
-
-	// files list for dispatch
-	regularList  []*ChunkedFile
-	priorityList []*ChunkedFile
-
-	push chan struct{} // priority file notification
-	done chan struct{} // end of regular channel notification
-}
-
-func NewDispatcher(ctx context.Context, files []*ChunkedFile, usePriority bool) *Dispatcher {
-	debug.Log(("Running NewDispatcher()"))
-
-	wg, ctx := errgroup.WithContext(ctx)
-
-	if !usePriority {
-		// this will be a regular dispatcher without priority dispatch
-		d := &Dispatcher{
-			regularList: files,
-			done:        make(chan struct{}),
-		}
-		d.createRegularCh(ctx, wg, nil)
-		return d
-	}
-
-	// below is setup for priority-aware dispatcher
-
-	d := &Dispatcher{
-		regularList: files,
-		push:        make(chan struct{}, 1),
-		done:        make(chan struct{}),
-	}
-
-	set := restic.IDSet{}
-	mu := sync.Mutex{}
-	visited := func(id restic.ID) bool {
-		mu.Lock()
-		visited := set.Has(id)
-		if !visited {
-			set.Insert(id)
-		}
-		mu.Unlock()
-		return visited
-	}
-
-	d.createRegularCh(ctx, wg, visited)
-	d.createPriorityCh(ctx, wg, visited)
-
-	return d
-}
-
-func (d *Dispatcher) Next(ctx context.Context) (*ChunkedFile, bool, error) {
-	file, from, err := PrioritySelect(ctx, d.priority, d.regular)
-	return file, from != 0, err
-}
-
-func (d *Dispatcher) NextPriority(ctx context.Context) (*ChunkedFile, bool, error) {
-	if d.priority == nil {
-		return nil, false, nil
-	}
-	file, from, err := PrioritySelect(ctx, d.priority, nil)
-	return file, from != 0, err
-}
-
-func (d *Dispatcher) PushPriority(files []*ChunkedFile) {
-	if d.priority == nil {
-		return
-	}
-
-	d.mu.Lock()
-	defer d.mu.Unlock()
-
-	d.priorityList = append(d.priorityList, files...)
-
-	// notify push channel
-	select {
-	case d.push <- struct{}{}:
-	default:
-	}
-}
-
-func (d *Dispatcher) popPriority() []*ChunkedFile {
-	d.mu.Lock()
-	defer d.mu.Unlock()
-
-	l := d.priorityList
-	d.priorityList = nil
-
-	return l
-}
-
-func (d *Dispatcher) createRegularCh(ctx context.Context, wg *errgroup.Group, visited func(id restic.ID) bool) {
-	debug.Log("Running dispatcher for regular channel")
-	ch := make(chan *ChunkedFile)
-	wg.Go(func() error {
-		defer close(d.done)
-		defer close(ch)
-
-		for _, file := range d.regularList {
-			// check if the file was visited by another dispatcher;
-			// if it was, skip the file.
-			if visited != nil && visited(file.hashval) {
-				continue
-			}
-
-			select {
-			case <-ctx.Done():
-				return ctx.Err()
-			case ch <- file:
-				debug.Log("Sent file %v through regular channel", file.hashval.Str())
-			}
-		}
-
-		return nil
-	})
-
-	d.regular = ch
-}
-
-func (d *Dispatcher) createPriorityCh(ctx context.Context, wg *errgroup.Group, visited func(id restic.ID) bool) {
-	debug.Log("Running dispatcher for priority channel")
-	ch := make(chan *ChunkedFile)
-	wg.Go(func() error {
-		defer close(ch)
-
-		var list []*ChunkedFile
-		for {
-			if len(list) == 0 {
-				// wait for priority files notification or done signal
-				select {
-				case <-ctx.Done():
-					return ctx.Err()
-				case <-d.push:
-					list = d.popPriority()
-					debug.Log("Detected priority files whose count is %v", len(list))
-					continue
-				case <-d.done:
-					debug.Log("Closing dispatcher for priority channel")
-					return nil
-				}
-			}
-
-			file := list[0]
-			list = list[1:]
-
-			// check if the file was handled by another channel;
-			// if it was, skip the file.
-			if visited != nil && visited(file.hashval) {
-				continue
-			}
-
-			select {
-			case <-ctx.Done():
-				return ctx.Err()
-			case ch <- file:
-				debug.Log("Sent file %v through priority channel", file.hashval.Str())
-			}
-		}
-	})
-
-	d.priority = ch
-}
-
-// PrioritySelect selects from two channels with priority; first channel first.
-func PrioritySelect(ctx context.Context, first <-chan *ChunkedFile, second <-chan *ChunkedFile) (item *ChunkedFile, from int, err error) {
-	// First, try to pull from channel 'first' only. If 'first' is not ready now, try both channels.
-	select {
-	case <-ctx.Done():
-		return nil, 0, ctx.Err()
-	case i, ok := <-first:
-		if ok {
-			item = i
-			from = 1
-		}
-	default:
-		select {
-		case <-ctx.Done():
-			return nil, 0, ctx.Err()
-		case i, ok := <-first:
-			if ok {
-				item = i
-				from = 1
-			}
-		case i, ok := <-second:
-			if ok {
-				item = i
-				from = 2
-			}
-		}
-	}
-
-	return item, from, nil
-}
diff --git a/internal/rechunker/rechunker.go b/internal/rechunker/rechunker.go
index ddcb95719..4bb370134 100644
--- a/internal/rechunker/rechunker.go
+++ b/internal/rechunker/rechunker.go
@@ -18,9 +18,8 @@ import (
 )
 
 type Rechunker struct {
-	cfg     Config
-	idx     *Index
-	tracker *eventTracker
+	cfg Config
+	idx *Index
 
 	filesList    []*ChunkedFile
 	totalSize    uint64
@@ -37,6 +36,11 @@ type Config struct {
 	Pol       chunker.Pol
 }
 
+type ChunkedFile struct {
+	restic.IDs
+	hashval restic.ID
+}
+
 // Index is immutable after Plan() returns.
 type Index struct {
 	BlobSize    map[restic.ID]uint          // blob ID -> blob size
@@ -54,7 +58,6 @@ func NewRechunker(cfg Config) *Rechunker {
 
 func (rc *Rechunker) reset() {
 	rc.idx = nil
-	rc.tracker = nil
 
 	rc.filesList = nil
 	rc.rechunkReady = false
@@ -82,7 +85,7 @@ func (rc *Rechunker) Plan(ctx context.Context, srcRepo restic.Repository, rootTr
 	}
 
 	debug.Log("Building the internal index for use in Rechunk()")
-	rc.idx, rc.tracker, err = createIndex(rc.filesList, srcRepo.LookupBlob)
+	rc.idx, err = createIndex(rc.filesList, srcRepo.LookupBlob)
 	if err != nil {
 		return err
 	}
@@ -144,9 +147,7 @@ func gatherFileContents(ctx context.Context, repo restic.Loader, rootTrees resti
 	return filesList, totalSize, nil
 }
 
-var FILE_HEAD_LENGTH = 25
-
-func createIndex(filesList []*ChunkedFile, lookupBlob func(t restic.BlobType, id restic.ID) []restic.PackedBlob) (*Index, *eventTracker, error) {
+func createIndex(filesList []*ChunkedFile, lookupBlob func(t restic.BlobType, id restic.ID) []restic.PackedBlob) (*Index, error) {
 	// collect blob usage info
 	blobCount := map[restic.ID]int{}
 	for _, file := range filesList {
@@ -167,7 +168,7 @@ func createIndex(filesList []*ChunkedFile, lookupBlob func(t restic.BlobType, id
 	for blob := range blobCount {
 		packs := lookupBlob(restic.DataBlob, blob)
 		if len(packs) == 0 {
-			return nil, nil, fmt.Errorf("can't find blob from source repo: %v", blob)
+			return nil, fmt.Errorf("can't find blob from source repo: %v", blob)
 		}
 		pb := packs[0]
 
@@ -182,30 +183,7 @@ func createIndex(filesList []*ChunkedFile, lookupBlob func(t restic.BlobType, id
 		PackToBlobs: packToBlobs,
 	}
 
-	// build blob load tracker info.
-	// if blob cache is enabled, Rechunker tracks the number of unprepared
-	// blobs (which are not yet ready in the cache) among first FILE_HEAD_LENGTH
-	// chunks in a file, until all of them are available in the cache.
-	// when all of them are ready, that file is prioritized by the dispatcher.
-	blobsToPrepare := map[restic.ID]int{}             // number of unprepared blobs for head of file
-	filesContaining := map[restic.ID][]*ChunkedFile{} // list of files that contain a blob
-	for _, file := range filesList {
-		prefixLen := min(FILE_HEAD_LENGTH, len(file.IDs))
-		blobSet := restic.NewIDSet(file.IDs[:prefixLen]...)
-		blobsToPrepare[file.hashval] = len(blobSet)
-		for b := range blobSet {
-			filesContaining[b] = append(filesContaining[b], file)
-		}
-	}
-
-	tracker := &eventTracker{
-		idx:                idx,
-		filesContaining:    filesContaining,
-		blobsToPrepare:     blobsToPrepare,
-		remainingBlobNeeds: blobCount,
-	}
-
-	return idx, tracker, nil
+	return idx, nil
 }
 
 type Loader interface {
@@ -227,30 +205,28 @@ func (rc *Rechunker) Rechunk(ctx context.Context, srcRepo Loader, dstRepo restic
 	numDownloaders := numWorkers
 	debug.Log("srcRepo.Connections(): %v", srcRepo.Connections())
 
-	// Phase 1: Setup Infrastructure
+	// set up scheduler
+	scheduler := rc.setupScheduler(ctx)
 
-	// start blob cache
+	// set up blob cache
 	var downloader restic.BlobLoader
 	var cache *BlobCache
 	if rc.cfg.CacheSize > 0 {
-		downloader, cache = rc.setupCache(ctx, srcRepo, numDownloaders)
+		downloader, cache = rc.setupCache(ctx, srcRepo, scheduler, numDownloaders)
 		defer cache.Close()
 	} else {
 		downloader = srcRepo
 	}
 
-	// start dispatcher
-	dispatcher := rc.setupDispatcher(ctx)
-
-	// Phase 2: Run Workers
+	// run rechunk workers
 	bufferPool := NewBufferPool(3 * (numWorkers + 1))
 	err := dstRepo.WithBlobUploader(ctx, func(ctx context.Context, uploader restic.BlobSaverWithAsync) error {
 		debug.Log("Starting uploader")
 		defer debug.Log("Closing uploader")
 
 		wg, ctx := errgroup.WithContext(ctx)
-		rc.runWorkers(ctx, wg, numWorkers, downloader, uploader, dispatcher.Next, bufferPool, p)
-		rc.runWorkers(ctx, wg, 1, downloader, uploader, dispatcher.NextPriority, bufferPool, p)
+		rc.runWorkers(ctx, wg, numWorkers, downloader, uploader, scheduler.Next, scheduler.ReadProgress, bufferPool, p)
+		rc.runWorkers(ctx, wg, 1, downloader, uploader, scheduler.NextPriority, scheduler.ReadProgress, bufferPool, p)
 
 		return wg.Wait()
 	})
@@ -263,38 +239,35 @@ func (rc *Rechunker) Rechunk(ctx context.Context, srcRepo Loader, dstRepo restic
 	return nil
 }
 
-func (rc *Rechunker) setupCache(ctx context.Context, srcRepo PackLoader, numDownloaders int) (repo restic.BlobLoader, cache *BlobCache) {
-	debug.Log("Creating blob cache: cacheSize %v", rc.cfg.CacheSize)
-
-	// wrap srcRepo with cache. Now repo's LoadBlob() method will be transparently mediated by blob cache
-	repo, cache = WrapWithCache(ctx, srcRepo, rc.cfg.CacheSize, numDownloaders, rc.idx, rc.tracker.BlobReady, rc.tracker.BlobUnready)
-
-	// register callback to ignore obsolete blobs
-	rc.tracker.obsoleteBlobCB = cache.Ignore
-
-	return repo, cache
-}
-
-func (rc *Rechunker) setupDispatcher(ctx context.Context) (dispatcher *Dispatcher) {
+func (rc *Rechunker) setupScheduler(ctx context.Context) (scheduler *Scheduler) {
 	debug.Log("Running file dispatcher")
 
 	// If the blob cache is enabled, priority dispatch will be used.
 	// With priority dispatch, (small) files with all their blobs ready in the cache are prioritized.
 	// if the blob cache is disabled, dispatch order simply follows the filesList.
 	if rc.cfg.CacheSize > 0 {
-		dispatcher = NewDispatcher(ctx, rc.filesList, true)
-
-		// register callback to push priority files
-		rc.tracker.priorityCB = dispatcher.PushPriority
+		scheduler = NewScheduler(ctx, rc.filesList, rc.idx, true)
 	} else {
-		dispatcher = NewDispatcher(ctx, rc.filesList, false)
+		scheduler = NewScheduler(ctx, rc.filesList, rc.idx, false)
 	}
-	return dispatcher
+	return scheduler
+}
+
+func (rc *Rechunker) setupCache(ctx context.Context, srcRepo PackLoader, scheduler *Scheduler, numDownloaders int) (repo restic.BlobLoader, cache *BlobCache) {
+	debug.Log("Creating blob cache: cacheSize %v", rc.cfg.CacheSize)
+
+	// wrap srcRepo with cache. Now repo's LoadBlob() method will be transparently mediated by blob cache
+	repo, cache = WrapWithCache(ctx, srcRepo, rc.cfg.CacheSize, numDownloaders, rc.idx, scheduler.BlobReady, scheduler.BlobUnready)
+
+	// register cache.Ignore as scheduler's obsolete blob callback for early cache eviction
+	scheduler.SetObsoleteBlobCallback(cache.Ignore)
+
+	return repo, cache
 }
 
 func (rc *Rechunker) runWorkers(ctx context.Context, wg *errgroup.Group, numWorkers int,
 	downloader restic.BlobLoader, uploader restic.BlobSaver, receiveJob func(context.Context) (*ChunkedFile, bool, error),
-	bufferPool *BufferPool, p *Progress) {
+	cursorProgressor func(Cursor, uint) (Cursor, error), bufferPool *BufferPool, p *Progress) {
 	for range numWorkers {
 		wg.Go(func() error {
 			debug.Log("Starting worker")
@@ -303,7 +276,7 @@ func (rc *Rechunker) runWorkers(ctx context.Context, wg *errgroup.Group, numWork
 				downloader,
 				uploader,
 				bufferPool,
-				rc.tracker.ReadProgress,
+				cursorProgressor,
 			)
 
 			for {
@@ -429,15 +402,11 @@ type Cursor struct {
 	Offset  uint
 }
 
-func (idx *Index) AdvanceCursor(c Cursor, numBytes uint) (Cursor, error) {
-	if idx == nil {
-		return Cursor{}, fmt.Errorf("call from nil index")
-	}
-
+func AdvanceCursor(c Cursor, numBytes uint, blobSizes map[restic.ID]uint) (Cursor, error) {
 	for c.BlobIdx < len(c.blobs) {
-		blobSize, ok := idx.BlobSize[c.blobs[c.BlobIdx]]
+		blobSize, ok := blobSizes[c.blobs[c.BlobIdx]]
 		if !ok {
-			return Cursor{}, fmt.Errorf("blob %v not in the index", c.blobs[c.BlobIdx].Str())
+			return Cursor{}, fmt.Errorf("blob %v not in blobSizes", c.blobs[c.BlobIdx].Str())
 		}
 		r := blobSize - c.Offset
 
@@ -458,127 +427,3 @@ func (idx *Index) AdvanceCursor(c Cursor, numBytes uint) (Cursor, error) {
 
 	return c, nil
 }
-
-type ChunkedFile struct {
-	restic.IDs
-	hashval restic.ID
-}
-
-type eventTracker struct {
-	mu sync.Mutex
-
-	idx *Index
-
-	filesContaining map[restic.ID][]*ChunkedFile // blobID -> files containing that blob
-	blobsToPrepare  map[restic.ID]int            // file hashval -> number of blobs until all blobs ready in the cache
-
-	remainingBlobNeeds map[restic.ID]int // blobID -> remaining blob needs
-
-	priorityCB     func(files []*ChunkedFile)
-	obsoleteBlobCB func(ids restic.IDs)
-}
-
-func (t *eventTracker) BlobReady(ids restic.IDs) {
-	// when a new blob is ready, files containing that blob as their prefix
-	// has their blobsToPrepare decreased by one.
-	// The list of files whose blobs are all prepared is passed to priorityCB.
-
-	if t.priorityCB == nil {
-		// if there is no callback, it is of no meaning to track the state
-		return
-	}
-
-	var readyFiles []*ChunkedFile
-
-	t.mu.Lock()
-	for _, id := range ids {
-		for _, file := range t.filesContaining[id] {
-			n := t.blobsToPrepare[file.hashval]
-			if n > 0 {
-				n--
-				if n == 0 {
-					readyFiles = append(readyFiles, file)
-				}
-				t.blobsToPrepare[file.hashval] = n
-			}
-		}
-	}
-	t.mu.Unlock()
-
-	if len(readyFiles) == 0 {
-		return
-	}
-
-	if t.priorityCB != nil {
-		t.priorityCB(readyFiles)
-	}
-
-	// debugStats: trace blob load count
-	if debugStats != nil {
-		dAdds := map[string]int{}
-		for _, id := range ids {
-			dAdds["load:"+id.String()]++
-		}
-		debugStats.AddMap(dAdds)
-	}
-}
-
-func (t *eventTracker) BlobUnready(ids restic.IDs) {
-	// when a blob is evicted, files containing that blob as their prefix
-	// has their blobsToPrepare increased by one. However, ignore files
-	// once they have reached blobsToPrepare value zero; they are no longer tracked.
-
-	if t.priorityCB == nil {
-		// if there is no callback, it is of no meaning to track progress
-		return
-	}
-
-	t.mu.Lock()
-	for _, id := range ids {
-		filesToUpdate := t.filesContaining[id]
-		for _, file := range filesToUpdate {
-			// files with blobsToPrepare==0 is not tracked
-			if t.blobsToPrepare[file.hashval] > 0 {
-				t.blobsToPrepare[file.hashval]++
-			}
-		}
-	}
-	t.mu.Unlock()
-}
-
-func (t *eventTracker) ReadProgress(cursor Cursor, bytesProcessed uint) (Cursor, error) {
-	start := cursor
-	end, err := t.idx.AdvanceCursor(cursor, bytesProcessed)
-	if err != nil {
-		return Cursor{}, err
-	}
-
-	if t.obsoleteBlobCB == nil {
-		// if there is no callback, it is of no meaning to track the state
-		return end, nil
-	}
-
-	if start.BlobIdx == end.BlobIdx { // nothing to do
-		return end, nil
-	}
-
-	blobs := cursor.blobs[start.BlobIdx:end.BlobIdx]
-	var obsolete restic.IDs
-	t.mu.Lock()
-	for _, b := range blobs {
-		t.remainingBlobNeeds[b]--
-		if t.remainingBlobNeeds[b] == 0 {
-			obsolete = append(obsolete, b)
-		}
-	}
-	t.mu.Unlock()
-
-	if len(obsolete) == 0 {
-		return end, nil
-	}
-
-	if t.obsoleteBlobCB != nil {
-		t.obsoleteBlobCB(obsolete)
-	}
-	return end, nil
-}
diff --git a/internal/rechunker/rechunker_test.go b/internal/rechunker/rechunker_test.go
index 121c10254..2be64a3f1 100644
--- a/internal/rechunker/rechunker_test.go
+++ b/internal/rechunker/rechunker_test.go
@@ -170,7 +170,7 @@ func TestRechunker(t *testing.T) {
 	// manually configure rechunker instead of running Plan(), because we are using mock repo
 	var err error
 	rechunker.filesList = srcFilesList
-	rechunker.idx, rechunker.tracker, err = createIndex(srcFilesList, func(t restic.BlobType, id restic.ID) []restic.PackedBlob {
+	rechunker.idx, err = createIndex(srcFilesList, func(t restic.BlobType, id restic.ID) []restic.PackedBlob {
 		pb := restic.PackedBlob{}
 		pb.ID = id
 		pb.Type = t
diff --git a/internal/rechunker/scheduler.go b/internal/rechunker/scheduler.go
new file mode 100644
index 000000000..d458a4adf
--- /dev/null
+++ b/internal/rechunker/scheduler.go
@@ -0,0 +1,346 @@
+package rechunker
+
+import (
+	"context"
+	"sync"
+
+	"github.com/restic/restic/internal/debug"
+	"github.com/restic/restic/internal/restic"
+	"golang.org/x/sync/errgroup"
+)
+
+type Scheduler struct {
+	mu sync.Mutex
+
+	idx *Index
+
+	regularCh  <-chan *ChunkedFile
+	priorityCh <-chan *ChunkedFile
+
+	regularList  []*ChunkedFile
+	priorityList []*ChunkedFile
+
+	filesContaining map[restic.ID][]*ChunkedFile
+	blobsToPrepare  map[restic.ID]int
+
+	remainingBlobNeeds map[restic.ID]int
+
+	obsoleteBlobCB func(ids restic.IDs)
+
+	push chan struct{}
+	done chan struct{}
+}
+
+func NewScheduler(ctx context.Context, files []*ChunkedFile, idx *Index, usePriority bool) *Scheduler {
+	debug.Log(("Running NewScheduler()"))
+
+	wg, ctx := errgroup.WithContext(ctx)
+	filesContaining, blobsToPrepare, remainingBlobNeeds := createSchedulerState(files)
+
+	if !usePriority {
+		s := &Scheduler{
+			idx:                idx,
+			regularList:        files,
+			done:               make(chan struct{}),
+			filesContaining:    filesContaining,
+			blobsToPrepare:     blobsToPrepare,
+			remainingBlobNeeds: remainingBlobNeeds,
+		}
+		s.createRegularCh(ctx, wg, nil)
+		return s
+	}
+
+	s := &Scheduler{
+		idx:                idx,
+		regularList:        files,
+		push:               make(chan struct{}, 1),
+		done:               make(chan struct{}),
+		filesContaining:    filesContaining,
+		blobsToPrepare:     blobsToPrepare,
+		remainingBlobNeeds: remainingBlobNeeds,
+	}
+
+	set := restic.IDSet{}
+	mu := sync.Mutex{}
+	visited := func(id restic.ID) bool {
+		mu.Lock()
+		visited := set.Has(id)
+		if !visited {
+			set.Insert(id)
+		}
+		mu.Unlock()
+		return visited
+	}
+
+	s.createRegularCh(ctx, wg, visited)
+	s.createPriorityCh(ctx, wg, visited)
+
+	return s
+}
+
+const FILE_HEAD_LENGTH = 25
+
+func createSchedulerState(files []*ChunkedFile) (map[restic.ID][]*ChunkedFile, map[restic.ID]int, map[restic.ID]int) {
+	blobCount := map[restic.ID]int{}
+	filesContaining := map[restic.ID][]*ChunkedFile{}
+	blobsToPrepare := map[restic.ID]int{}
+
+	for _, file := range files {
+		prefixLen := min(FILE_HEAD_LENGTH, len(file.IDs))
+		blobSet := restic.NewIDSet(file.IDs[:prefixLen]...)
+		blobsToPrepare[file.hashval] = len(blobSet)
+		for _, blob := range file.IDs {
+			blobCount[blob]++
+		}
+		for b := range blobSet {
+			filesContaining[b] = append(filesContaining[b], file)
+		}
+	}
+
+	return filesContaining, blobsToPrepare, blobCount
+}
+
+func (s *Scheduler) Next(ctx context.Context) (*ChunkedFile, bool, error) {
+	file, from, err := PrioritySelect(ctx, s.priorityCh, s.regularCh)
+	return file, from != 0, err
+}
+
+func (s *Scheduler) NextPriority(ctx context.Context) (*ChunkedFile, bool, error) {
+	if s.priorityCh == nil {
+		return nil, false, nil
+	}
+	file, from, err := PrioritySelect(ctx, s.priorityCh, nil)
+	return file, from != 0, err
+}
+
+func (s *Scheduler) PushPriority(files []*ChunkedFile) {
+	if s.priorityCh == nil {
+		return
+	}
+
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	s.priorityList = append(s.priorityList, files...)
+
+	select {
+	case s.push <- struct{}{}:
+	default:
+	}
+}
+
+func (s *Scheduler) popPriority() []*ChunkedFile {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	l := s.priorityList
+	s.priorityList = nil
+
+	return l
+}
+
+func (s *Scheduler) createRegularCh(ctx context.Context, wg *errgroup.Group, visited func(id restic.ID) bool) {
+	debug.Log("Running scheduler for regular channel")
+	ch := make(chan *ChunkedFile)
+	wg.Go(func() error {
+		defer close(s.done)
+		defer close(ch)
+
+		for _, file := range s.regularList {
+			if visited != nil && visited(file.hashval) {
+				continue
+			}
+
+			select {
+			case <-ctx.Done():
+				return ctx.Err()
+			case ch <- file:
+				debug.Log("Sent file %v through regular channel", file.hashval.Str())
+			}
+		}
+
+		return nil
+	})
+
+	s.regularCh = ch
+}
+
+func (s *Scheduler) createPriorityCh(ctx context.Context, wg *errgroup.Group, visited func(id restic.ID) bool) {
+	debug.Log("Running scheduler for priority channel")
+	ch := make(chan *ChunkedFile)
+	wg.Go(func() error {
+		defer close(ch)
+
+		var list []*ChunkedFile
+		for {
+			if len(list) == 0 {
+				select {
+				case <-ctx.Done():
+					return ctx.Err()
+				case <-s.push:
+					list = s.popPriority()
+					debug.Log("Detected priority files whose count is %v", len(list))
+					continue
+				case <-s.done:
+					debug.Log("Closing scheduler for priority channel")
+					return nil
+				}
+			}
+
+			file := list[0]
+			list = list[1:]
+
+			if visited != nil && visited(file.hashval) {
+				continue
+			}
+
+			select {
+			case <-ctx.Done():
+				return ctx.Err()
+			case ch <- file:
+				debug.Log("Sent file %v through priority channel", file.hashval.Str())
+			}
+		}
+	})
+
+	s.priorityCh = ch
+}
+
+func (s *Scheduler) BlobReady(ids restic.IDs) {
+	// when a new blob is ready, files containing that blob as their prefix
+	// has their blobsToPrepare decreased by one.
+	// The list of files whose blobs are all prepared is pushed to priority chan.
+
+	if s.priorityCh == nil {
+		// if there is no priority chan, it is of no meaning to track the state
+		return
+	}
+
+	var readyFiles []*ChunkedFile
+
+	s.mu.Lock()
+	for _, id := range ids {
+		for _, file := range s.filesContaining[id] {
+			n := s.blobsToPrepare[file.hashval]
+			if n > 0 {
+				n--
+				if n == 0 {
+					readyFiles = append(readyFiles, file)
+				}
+				s.blobsToPrepare[file.hashval] = n
+			}
+		}
+	}
+	s.mu.Unlock()
+
+	if len(readyFiles) == 0 {
+		return
+	}
+
+	s.PushPriority(readyFiles)
+
+	if debugStats != nil {
+		dAdds := map[string]int{}
+		for _, id := range ids {
+			dAdds["load:"+id.String()]++
+		}
+		debugStats.AddMap(dAdds)
+	}
+}
+
+func (s *Scheduler) BlobUnready(ids restic.IDs) {
+	// when a blob is evicted, files containing that blob as their prefix
+	// has their blobsToPrepare increased by one. However, ignore files
+	// once they have reached blobsToPrepare value zero; they are no longer tracked.
+
+	if s.priorityCh == nil {
+		// if there is no priority chan, it is of no meaning to track progress
+		return
+	}
+
+	s.mu.Lock()
+	for _, id := range ids {
+		filesToUpdate := s.filesContaining[id]
+		for _, file := range filesToUpdate {
+			// files with blobsToPrepare==0 is not tracked
+			if s.blobsToPrepare[file.hashval] > 0 {
+				s.blobsToPrepare[file.hashval]++
+			}
+		}
+	}
+	s.mu.Unlock()
+}
+
+func (s *Scheduler) SetObsoleteBlobCallback(cb func(restic.IDs)) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	s.obsoleteBlobCB = cb
+}
+
+func (s *Scheduler) ReadProgress(cursor Cursor, bytesProcessed uint) (Cursor, error) {
+	start := cursor
+	end, err := AdvanceCursor(cursor, bytesProcessed, s.idx.BlobSize)
+	if err != nil {
+		return Cursor{}, err
+	}
+
+	if s.obsoleteBlobCB == nil {
+		return end, nil
+	}
+
+	if start.BlobIdx == end.BlobIdx {
+		return end, nil
+	}
+
+	blobs := cursor.blobs[start.BlobIdx:end.BlobIdx]
+	var obsolete restic.IDs
+	s.mu.Lock()
+	for _, b := range blobs {
+		s.remainingBlobNeeds[b]--
+		if s.remainingBlobNeeds[b] == 0 {
+			obsolete = append(obsolete, b)
+		}
+	}
+	s.mu.Unlock()
+
+	if len(obsolete) == 0 {
+		return end, nil
+	}
+
+	if s.obsoleteBlobCB != nil {
+		s.obsoleteBlobCB(obsolete)
+	}
+	return end, nil
+}
+
+// PrioritySelect selects from two channels with priority; first channel first.
+func PrioritySelect(ctx context.Context, first <-chan *ChunkedFile, second <-chan *ChunkedFile) (item *ChunkedFile, from int, err error) {
+	// First, try to pull from channel 'first' only. If 'first' is not ready now, try both channels.
+	select {
+	case <-ctx.Done():
+		return nil, 0, ctx.Err()
+	case i, ok := <-first:
+		if ok {
+			item = i
+			from = 1
+		}
+	default:
+		select {
+		case <-ctx.Done():
+			return nil, 0, ctx.Err()
+		case i, ok := <-first:
+			if ok {
+				item = i
+				from = 1
+			}
+		case i, ok := <-second:
+			if ok {
+				item = i
+				from = 2
+			}
+		}
+	}
+
+	return item, from, nil
+}
diff --git a/internal/rechunker/worker.go b/internal/rechunker/worker.go
index d251f464a..5aecf30c9 100644
--- a/internal/rechunker/worker.go
+++ b/internal/rechunker/worker.go
@@ -93,7 +93,7 @@ func (w *Worker) runReader(ctx context.Context, wg *errgroup.Group, srcBlobs res
 				return err
 			}
 
-			// if onProgress callback is given, run it
+			// if cursor progressor callback is given, run it
 			if w.cursorProgressor != nil {
 				cursor, err = w.cursorProgressor(cursor, c.Length)
 				if err != nil {

From da8c4cf20b5e1471048aa31ef0b5d24b9c47e81f Mon Sep 17 00:00:00 2001
From: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date: Mon, 6 Apr 2026 16:23:24 +0900
Subject: [PATCH 21/34] Misc: Refine comments for functions

---
 internal/rechunker/rechunker.go | 1 +
 internal/rechunker/scheduler.go | 6 +++---
 internal/rechunker/worker.go    | 6 +++---
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/internal/rechunker/rechunker.go b/internal/rechunker/rechunker.go
index 4bb370134..099413b43 100644
--- a/internal/rechunker/rechunker.go
+++ b/internal/rechunker/rechunker.go
@@ -388,6 +388,7 @@ func (rc *Rechunker) TotalAddedToDstRepo() uint64 {
 	return rc.totalAddedToDstRepo.Load()
 }
 
+// HashOfIDs computes a sha256 hash of the concatenation of all values of `restic.IDs`, making a mapping from `restic.IDs` to `restic.ID`.
 func HashOfIDs(ids restic.IDs) restic.ID {
 	c := make([]byte, 0, len(ids)*32)
 	for _, id := range ids {
diff --git a/internal/rechunker/scheduler.go b/internal/rechunker/scheduler.go
index d458a4adf..4343b2872 100644
--- a/internal/rechunker/scheduler.go
+++ b/internal/rechunker/scheduler.go
@@ -278,6 +278,7 @@ func (s *Scheduler) SetObsoleteBlobCallback(cb func(restic.IDs)) {
 	s.obsoleteBlobCB = cb
 }
 
+// ReadProgress computes progress of cursor for a file, while inferring src blob consumption and using that info to track blob usage.
 func (s *Scheduler) ReadProgress(cursor Cursor, bytesProcessed uint) (Cursor, error) {
 	start := cursor
 	end, err := AdvanceCursor(cursor, bytesProcessed, s.idx.BlobSize)
@@ -308,9 +309,8 @@ func (s *Scheduler) ReadProgress(cursor Cursor, bytesProcessed uint) (Cursor, er
 		return end, nil
 	}
 
-	if s.obsoleteBlobCB != nil {
-		s.obsoleteBlobCB(obsolete)
-	}
+	s.obsoleteBlobCB(obsolete)
+
 	return end, nil
 }
 
diff --git a/internal/rechunker/worker.go b/internal/rechunker/worker.go
index 5aecf30c9..a3b482232 100644
--- a/internal/rechunker/worker.go
+++ b/internal/rechunker/worker.go
@@ -51,7 +51,7 @@ func (w *Worker) RunFile(ctx context.Context, srcBlobs restic.IDs, p *Progress)
 	wg, ctx := errgroup.WithContext(ctx)
 
 	chChunk := make(chan chunker.Chunk)  // chunk passing channel from reader to writer
-	chResult := make(chan FileResult, 1) // file chunk result channel
+	chResult := make(chan FileResult, 1) // file rechunk result channel
 
 	// Run reader goroutine
 	w.runReader(ctx, wg, srcBlobs, reader, chChunk)
@@ -101,7 +101,7 @@ func (w *Worker) runReader(ctx context.Context, wg *errgroup.Group, srcBlobs res
 				}
 			}
 
-			// send chunk to writer
+			// send a rechunked chunk to the writer
 			select {
 			case <-ctx.Done():
 				return ctx.Err()
@@ -121,7 +121,7 @@ func (w *Worker) runWriter(ctx context.Context, wg *errgroup.Group, in <-chan ch
 		var addedSize uint64
 
 		for {
-			// receive chunk from reader
+			// receive chunk from the reader
 			var c chunker.Chunk
 			var ok bool
 			select {

From 331e9cbfbadc35c2dcf2083e227023516e9ee659 Mon Sep 17 00:00:00 2001
From: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date: Tue, 14 Apr 2026 23:30:08 +0900
Subject: [PATCH 22/34] Fix: Integration test for rechunk copy

Fix code redundancy and test ommision in rechunk copy integration test
---
 cmd/restic/cmd_copy_integration_test.go | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/cmd/restic/cmd_copy_integration_test.go b/cmd/restic/cmd_copy_integration_test.go
index b806a3e2d..cf1514436 100644
--- a/cmd/restic/cmd_copy_integration_test.go
+++ b/cmd/restic/cmd_copy_integration_test.go
@@ -93,9 +93,11 @@ func TestCopy(t *testing.T) {
 
 	// Check that the copied snapshots have the same tree contents as the old ones (= identical tree hash)
 	origRestores := make(map[string]struct{})
+	origRestores2 := make(map[string]struct{})
 	for i, snapshotID := range snapshotIDs {
 		restoredir := filepath.Join(env.base, fmt.Sprintf("restore%d", i))
 		origRestores[restoredir] = struct{}{}
+		origRestores2[restoredir] = struct{}{}
 		testRunRestore(t, env.gopts, restoredir, snapshotID.String())
 	}
 	for i, snapshotID := range copiedSnapshotIDs {
@@ -113,13 +115,7 @@ func TestCopy(t *testing.T) {
 		rtest.Assert(t, foundMatch, "found no counterpart for snapshot %v", snapshotID)
 	}
 
-	// Check that the rechunk-copied snapshots have the same tree contents as the old ones (= identical tree hash)
-	origRestores2 := make(map[string]struct{})
-	for i, snapshotID := range snapshotIDs {
-		restoredir := filepath.Join(env.base, fmt.Sprintf("restore%d", i))
-		origRestores2[restoredir] = struct{}{}
-		testRunRestore(t, env.gopts, restoredir, snapshotID.String())
-	}
+	// Check that the rechunk-copied snapshots have the same tree contents as the old ones
 	for i, snapshotID := range rechunkCopiedSnapshotIDs {
 		restoredir := filepath.Join(env3.base, fmt.Sprintf("restore%d", i))
 		testRunRestore(t, env3.gopts, restoredir, snapshotID.String())
@@ -231,7 +227,7 @@ func TestCopyUnstableJSON(t *testing.T) {
 	testListSnapshots(t, env2.gopts, 1)
 
 	testRunInit(t, env3.gopts)
-	testRunCopy(t, env.gopts, env3.gopts)
+	testRunRechunkCopy(t, env.gopts, env3.gopts)
 	testRunCheck(t, env3.gopts)
 	testListSnapshots(t, env3.gopts, 1)
 }
@@ -245,6 +241,8 @@ func TestCopyToEmptyPassword(t *testing.T) {
 	env2.gopts.InsecureNoPassword = true
 	env3, cleanup3 := withTestEnvironment(t) // test env for rechunk-copy
 	defer cleanup3()
+	env3.gopts.Password = ""
+	env3.gopts.InsecureNoPassword = true
 
 	testSetupBackupData(t, env)
 	testRunBackup(t, "", []string{filepath.Join(env.testdata, "0", "0", "9")}, BackupOptions{}, env.gopts)

From 816181d88abab2e344128e2ed02de9d72059e92c Mon Sep 17 00:00:00 2001
From: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date: Tue, 14 Apr 2026 23:56:51 +0900
Subject: [PATCH 23/34] Fix: Rechunker TreeRewriter behavior

Change rechunker TreeRewriter to allow rewriting unstable json.
---
 cmd/restic/cmd_copy.go          | 2 +-
 internal/rechunker/rechunker.go | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/cmd/restic/cmd_copy.go b/cmd/restic/cmd_copy.go
index 600b617e7..5126a3131 100644
--- a/cmd/restic/cmd_copy.go
+++ b/cmd/restic/cmd_copy.go
@@ -87,8 +87,8 @@ type RechunkCopyOptions struct {
 func (opts *RechunkCopyOptions) AddFlags(f *pflag.FlagSet) {
 	f.BoolVar(&opts.Rechunk, "rechunk", false, "rechunk files when copying")
 	f.BoolVar(&opts.ForceRechunk, "force", false, "force rechunk even when src and dst repo have same chunker polynomials; to be used with --rechunk")
-	f.IntVar(&opts.CacheSize, "cache-size", 4096, "for rechunk copy, specify in-memory blob cache size in MiBs (0 to disable cache). Used with --rechunk")
 	f.Var(&opts.AddTags, "add-tag", "add `tags` for the copied snapshots in the format `tag[,tag,...]` (can be specified multiple times). Used with --rechunk")
+	f.IntVar(&opts.CacheSize, "cache-size", 4096, "for rechunk copy, specify in-memory blob cache size in MiBs (0 to disable cache). Used with --rechunk")
 }
 
 // collectAllSnapshots: select all snapshot trees to be copied
diff --git a/internal/rechunker/rechunker.go b/internal/rechunker/rechunker.go
index 099413b43..4aceb42be 100644
--- a/internal/rechunker/rechunker.go
+++ b/internal/rechunker/rechunker.go
@@ -352,6 +352,7 @@ func (rc *Rechunker) RewriteTree(ctx context.Context, srcRepo restic.BlobLoader,
 			node.Content = dstBlobs
 			return node
 		},
+		AllowUnstableSerialization: true,
 	})
 
 	newID, err := rewriter.RewriteTree(ctx, srcRepo, saver, "/", treeID)

From cc2f0b4a892bef338f2031979eebdb4e48128047 Mon Sep 17 00:00:00 2001
From: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date: Wed, 15 Apr 2026 00:23:03 +0900
Subject: [PATCH 24/34] Refactor: Move Cursor type definition to worker.go

---
 internal/rechunker/rechunker.go | 32 --------------------------------
 internal/rechunker/worker.go    | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 32 deletions(-)

diff --git a/internal/rechunker/rechunker.go b/internal/rechunker/rechunker.go
index 4aceb42be..7a0cd41e1 100644
--- a/internal/rechunker/rechunker.go
+++ b/internal/rechunker/rechunker.go
@@ -397,35 +397,3 @@ func HashOfIDs(ids restic.IDs) restic.ID {
 	}
 	return sha256.Sum256(c)
 }
-
-type Cursor struct {
-	blobs   restic.IDs
-	BlobIdx int
-	Offset  uint
-}
-
-func AdvanceCursor(c Cursor, numBytes uint, blobSizes map[restic.ID]uint) (Cursor, error) {
-	for c.BlobIdx < len(c.blobs) {
-		blobSize, ok := blobSizes[c.blobs[c.BlobIdx]]
-		if !ok {
-			return Cursor{}, fmt.Errorf("blob %v not in blobSizes", c.blobs[c.BlobIdx].Str())
-		}
-		r := blobSize - c.Offset
-
-		if numBytes < r {
-			c.Offset += numBytes
-			numBytes = 0
-			break
-		}
-
-		numBytes -= r
-		c.BlobIdx++
-		c.Offset = 0
-	}
-
-	if numBytes != 0 {
-		return Cursor{}, fmt.Errorf("cursor out of range; %d bytes over end position", numBytes)
-	}
-
-	return c, nil
-}
diff --git a/internal/rechunker/worker.go b/internal/rechunker/worker.go
index a3b482232..5174618d9 100644
--- a/internal/rechunker/worker.go
+++ b/internal/rechunker/worker.go
@@ -2,6 +2,7 @@ package rechunker
 
 import (
 	"context"
+	"fmt"
 	"io"
 
 	"github.com/restic/chunker"
@@ -227,3 +228,35 @@ func (p *BufferPool) Put(buf []byte) {
 		debug.Log("bufferPool is full; discarding the buffer")
 	}
 }
+
+type Cursor struct {
+	blobs   restic.IDs
+	BlobIdx int
+	Offset  uint
+}
+
+func AdvanceCursor(c Cursor, numBytes uint, blobSizes map[restic.ID]uint) (Cursor, error) {
+	for c.BlobIdx < len(c.blobs) {
+		blobSize, ok := blobSizes[c.blobs[c.BlobIdx]]
+		if !ok {
+			return Cursor{}, fmt.Errorf("blob %v not in blobSizes", c.blobs[c.BlobIdx].Str())
+		}
+		r := blobSize - c.Offset
+
+		if numBytes < r {
+			c.Offset += numBytes
+			numBytes = 0
+			break
+		}
+
+		numBytes -= r
+		c.BlobIdx++
+		c.Offset = 0
+	}
+
+	if numBytes != 0 {
+		return Cursor{}, fmt.Errorf("cursor out of range; %d bytes over end position", numBytes)
+	}
+
+	return c, nil
+}

From cee5f0ac76b6e9b08b6878d23c096acb00397b4c Mon Sep 17 00:00:00 2001
From: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date: Wed, 15 Apr 2026 19:21:59 +0900
Subject: [PATCH 25/34] Misc: Change variable name in rechunker

---
 internal/rechunker/rechunker.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/internal/rechunker/rechunker.go b/internal/rechunker/rechunker.go
index 7a0cd41e1..bf802a0bf 100644
--- a/internal/rechunker/rechunker.go
+++ b/internal/rechunker/rechunker.go
@@ -323,7 +323,7 @@ func (rc *Rechunker) RewriteTree(ctx context.Context, srcRepo restic.BlobLoader,
 	}
 
 	// wrap dstRepo so that total uploaded tree blobs size can be tracked
-	saver := wrappedBlobSaver(func(ctx context.Context, tpe restic.BlobType, buf []byte, id restic.ID, storeDuplicate bool) (newID restic.ID, known bool, sizeInRepo int, err error) {
+	treeSaver := wrappedBlobSaver(func(ctx context.Context, tpe restic.BlobType, buf []byte, id restic.ID, storeDuplicate bool) (newID restic.ID, known bool, sizeInRepo int, err error) {
 		newID, known, sizeInRepo, err = dstRepo.SaveBlob(ctx, tpe, buf, id, storeDuplicate)
 		if err != nil {
 			return
@@ -355,14 +355,14 @@ func (rc *Rechunker) RewriteTree(ctx context.Context, srcRepo restic.BlobLoader,
 		AllowUnstableSerialization: true,
 	})
 
-	newID, err := rewriter.RewriteTree(ctx, srcRepo, saver, "/", treeID)
+	newID, err := rewriter.RewriteTree(ctx, srcRepo, treeSaver, "/", treeID)
 	if err != nil {
 		return restic.ID{}, err
 	}
 
 	rc.rewriteTreeMap[treeID] = newID
 
-	return newID, err
+	return newID, nil
 }
 
 func (rc *Rechunker) NumFiles() int {

From db2bdd5447dbbbb3b67d4dd55c9a6104c5f4613f Mon Sep 17 00:00:00 2001
From: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date: Wed, 15 Apr 2026 19:22:25 +0900
Subject: [PATCH 26/34] Test: Refactor rechunker test toward code reuse

Refactor rechunker test code to largely reuse code in other packages, for maintainability
---
 internal/rechunker/rechunker_test.go | 438 ++++++++-------------------
 internal/rechunker/testing.go        |  30 ++
 2 files changed, 149 insertions(+), 319 deletions(-)
 create mode 100644 internal/rechunker/testing.go

diff --git a/internal/rechunker/rechunker_test.go b/internal/rechunker/rechunker_test.go
index 2be64a3f1..d5e99f437 100644
--- a/internal/rechunker/rechunker_test.go
+++ b/internal/rechunker/rechunker_test.go
@@ -1,113 +1,69 @@
 package rechunker
 
 import (
-	"bytes"
 	"context"
-	"errors"
 	"fmt"
-	"io"
-	"sort"
-	"sync"
 	"testing"
 
 	"github.com/restic/chunker"
 
+	"github.com/restic/restic/internal/archiver"
 	"github.com/restic/restic/internal/data"
 	"github.com/restic/restic/internal/restic"
 	rtest "github.com/restic/restic/internal/test"
+	"github.com/restic/restic/internal/walker"
 )
 
-// TestRechunkerRepo implements minimal repository interface for rechunker test.
-type TestRechunkerRepo struct {
-	loadBlob          func(id restic.ID, buf []byte) ([]byte, error)
-	loadBlobsFromPack func(packID restic.ID, blobs []restic.Blob, handleBlobFn func(blob restic.BlobHandle, buf []byte, err error) error) error
-	saveBlob          func(buf []byte) (newID restic.ID, known bool, size int, err error)
-}
-
-// methods to satisfy interfaces used in rechunker
-
-func (r *TestRechunkerRepo) LoadBlob(ctx context.Context, t restic.BlobType, id restic.ID, buf []byte) ([]byte, error) {
-	return r.loadBlob(id, buf)
-}
-func (r *TestRechunkerRepo) LoadBlobsFromPack(ctx context.Context, packID restic.ID, blobs []restic.Blob, handleBlobFn func(blob restic.BlobHandle, buf []byte, err error) error) error {
-	return r.loadBlobsFromPack(packID, blobs, handleBlobFn)
-}
-func (r *TestRechunkerRepo) SaveBlob(ctx context.Context, t restic.BlobType, buf []byte, id restic.ID, storeDuplicate bool) (newID restic.ID, known bool, size int, err error) {
-	return r.saveBlob(buf)
-}
-func (r *TestRechunkerRepo) SaveBlobAsync(ctx context.Context, tpe restic.BlobType, buf []byte, id restic.ID, storeDuplicate bool, cb func(newID restic.ID, known bool, sizeInRepo int, err error)) {
-	// not used in rechunker; declared just to satisfy restic.BlobSaverWithAsync interface
-}
-func (r *TestRechunkerRepo) WithBlobUploader(ctx context.Context, fn func(ctx context.Context, uploader restic.BlobSaverWithAsync) error) error {
-	return fn(ctx, r)
-}
-func (r *TestRechunkerRepo) Connections() uint {
-	// arbitrarily chosen value
-	return 5
-}
-
-// chunkFiles chunk `files` by `pol` and return fileIndex (map from path to blob IDs) and chunkStore (map from blob ID to blob data).
-func chunkFiles(chnker *chunker.Chunker, pol chunker.Pol, files map[string][]byte) (map[string]restic.IDs, map[restic.ID][]byte) {
-	fileIndex := map[string]restic.IDs{}
-	chunkStore := map[restic.ID][]byte{}
-
-	for name, data := range files {
-		r := bytes.NewReader(data)
-		chnker.Reset(r, pol)
-		chunks := restic.IDs{}
-
-		for {
-			chunk, err := chnker.Next(nil)
-			if err == io.EOF {
-				break
-			}
-			if err != nil {
-				panic(err)
-			}
-
-			id := restic.Hash(chunk.Data)
-			chunks = append(chunks, id)
-			if _, ok := chunkStore[id]; !ok {
-				chunkStore[id] = chunk.Data
-			}
-		}
-
-		fileIndex[name] = chunks
-	}
-
-	return fileIndex, chunkStore
-}
-
-// simulatedPack assigns arbitrary pack to each blob in chunkStore.
-func simulatedPack(chunkStore map[restic.ID][]byte) map[restic.ID]restic.ID {
-	blobToPack := map[restic.ID]restic.ID{}
-	i := 0
-	packID := restic.NewRandomID()
-	for blobID := range chunkStore {
-		blobToPack[blobID] = packID
-		i++
-		if i%10 == 0 {
-			packID = restic.NewRandomID()
-		}
-	}
-
-	return blobToPack
-}
-
 // prepareData prepares random data for rechunker test.
-func prepareData() map[string][]byte {
-	files := map[string][]byte{
-		"0": {},
-		"1": rtest.Random(1, 10_000),
-		"2": rtest.Random(4, 10_000_000),
-		"3": rtest.Random(5, 100_000_000),
+func prepareData(t *testing.T) string {
+	tempdir := rtest.TempDir(t)
+	repo := archiver.TestDir{
+		"0": archiver.TestFile{Content: ""},
+		"1": archiver.TestFile{Content: string(rtest.Random(1, 10_000))},
+		"2": archiver.TestFile{Content: string(rtest.Random(4, 10_000_000))},
+		"3": archiver.TestFile{Content: string(rtest.Random(5, 100_000_000))},
 	}
+	archiver.TestCreateFiles(t, tempdir, repo)
 
-	return files
+	return tempdir
 }
 
-func TestRechunker(t *testing.T) {
-	ctx, cancel := context.WithCancel(context.TODO())
+func gatherFileContentsByPath(t *testing.T, repo restic.BlobLoader, root restic.ID) map[string]restic.IDs {
+	t.Helper()
+
+	record := map[string]restic.IDs{}
+	err := walker.Walk(t.Context(), repo, root, walker.WalkVisitor{
+		ProcessNode: func(parentTreeID restic.ID, path string, node *data.Node, nodeErr error) (err error) {
+			if node != nil && node.Type == data.NodeTypeFile {
+				record[path] = node.Content
+			}
+			return nodeErr
+		},
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	return record
+}
+
+func buildRechunkMapByMatchingPath(t *testing.T, srcList, dstList map[string]restic.IDs) map[restic.ID]restic.IDs {
+	t.Helper()
+
+	rechunkMap := map[restic.ID]restic.IDs{}
+
+	for k, v := range srcList {
+		if _, ok := dstList[k]; !ok {
+			t.Fatalf("%v expected in dstList, but not found", k)
+		}
+		rechunkMap[HashOfIDs(v)] = dstList[k]
+	}
+
+	return rechunkMap
+}
+
+func TestRechunk(t *testing.T) {
+	ctx, cancel := context.WithCancel(t.Context())
 	defer cancel()
 
 	// generate reandom polynomials
@@ -115,103 +71,58 @@ func TestRechunker(t *testing.T) {
 	dstChunkerParam, _ := chunker.RandomPolynomial()
 
 	// prepare test data
-	files := prepareData()
+	tempdir := prepareData(t)
 
-	// prepare chunker and minimal repositories
-	chnker := chunker.New(nil, 0)
-	srcFileIndex, srcChunkStore := chunkFiles(chnker, srcChunkerParam, files)
-	dstWantsFileIndex, dstWantsChunkStore := chunkFiles(chnker, dstChunkerParam, files)
-	rechunkStore := restic.IDSet{}
+	// prepare repositories
+	srcRepo := TestRepositoryWithPol(t, srcChunkerParam)
+	dstWantsRepo := TestRepositoryWithPol(t, dstChunkerParam)
+	dstTestsRepo := TestRepositoryWithPol(t, dstChunkerParam)
 
-	// build files list and virtual blobToPack mapping
-	srcFilesList := []*ChunkedFile{}
-	for _, file := range srcFileIndex {
-		srcFilesList = append(srcFilesList, &ChunkedFile{file, HashOfIDs(file)})
-	}
-	srcBlobToPack := simulatedPack(srcChunkStore)
+	srcSn := archiver.TestSnapshot(t, srcRepo, tempdir, nil)
+	dstWantsSn := archiver.TestSnapshot(t, dstWantsRepo, tempdir, nil)
 
-	// define src repo for rechunker test
-	srcRepo := &TestRechunkerRepo{
-		loadBlob: func(id restic.ID, buf []byte) ([]byte, error) {
-			blob, ok := srcChunkStore[id]
-			if !ok {
-				return nil, fmt.Errorf("blob not found")
-			}
+	srcList := gatherFileContentsByPath(t, srcRepo, *srcSn.Tree)
+	dstWantsList := gatherFileContentsByPath(t, dstWantsRepo, *dstWantsSn.Tree)
+	wantedRechunkMap := buildRechunkMapByMatchingPath(t, srcList, dstWantsList)
 
-			if cap(buf) < len(blob) {
-				buf = make([]byte, len(blob))
-			}
-			buf = buf[:len(blob)]
-			copy(buf, blob)
-
-			return buf, nil
-		},
-		loadBlobsFromPack: func(packID restic.ID, blobs []restic.Blob, handleBlobFn func(blob restic.BlobHandle, buf []byte, err error) error) error {
-			for _, blob := range blobs {
-				if packID != srcBlobToPack[blob.ID] {
-					return fmt.Errorf("blob %v is not in the pack %v", blob.ID, packID)
-				}
-				err := handleBlobFn(blob.BlobHandle, srcChunkStore[blob.ID], nil)
-				if err != nil {
-					return err
-				}
-			}
-			return nil
-		},
-	}
-
-	// create rechunker
-	cfg := Config{
-		CacheSize: 4096 * (1 << 20),
+	// run rechunk copy
+	rechunker := NewRechunker(Config{
+		CacheSize: 4 * (1 << 30),
 		Pol:       dstChunkerParam,
+	})
+
+	err := rechunker.Plan(ctx, srcRepo, restic.IDs{*srcSn.Tree})
+	if err != nil {
+		t.Fatal(err)
 	}
-	rechunker := NewRechunker(cfg)
 
-	// manually configure rechunker instead of running Plan(), because we are using mock repo
-	var err error
-	rechunker.filesList = srcFilesList
-	rechunker.idx, err = createIndex(srcFilesList, func(t restic.BlobType, id restic.ID) []restic.PackedBlob {
-		pb := restic.PackedBlob{}
-		pb.ID = id
-		pb.Type = t
-		pb.UncompressedLength = uint(len(srcChunkStore[id]))
-		pb.PackID = srcBlobToPack[id]
+	err = rechunker.Rechunk(ctx, srcRepo, dstTestsRepo, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
 
-		return []restic.PackedBlob{pb}
+	// compare dstTestsRepo (rechunker result) vs dstWantsRepo (reference result)
+	// 1) check if all expected data blobs are stored
+	inCtx, stop := context.WithCancelCause(ctx)
+	err = dstWantsRepo.ListBlobs(inCtx, func(pb restic.PackedBlob) {
+		if pb.Type == restic.DataBlob {
+			_, found := dstTestsRepo.LookupBlobSize(restic.DataBlob, pb.ID)
+			if !found {
+				stop(fmt.Errorf("blob %v expected but not found", pb.ID.Str()))
+			}
+		}
 	})
 	if err != nil {
-		panic(err)
+		t.Error(err)
 	}
 
-	rechunker.rechunkReady = true
-
-	// define dst repo for rechunker test, and run Rechunk
-	saveBlobLock := sync.Mutex{}
-	rechunkTestRepo := &TestRechunkerRepo{
-		saveBlob: func(buf []byte) (newID restic.ID, known bool, size int, err error) {
-			newID = restic.Hash(buf)
-			saveBlobLock.Lock()
-			rechunkStore.Insert(newID)
-			saveBlobLock.Unlock()
-			return
-		},
-	}
-	rtest.OK(t, rechunker.Rechunk(ctx, srcRepo, rechunkTestRepo, nil))
-
-	// compare test result (by rechunker) vs dstWantsChunkedFiles (ordinary backup)
-	testResult := rechunker.rechunkMap
-	for name, srcBlobs := range srcFileIndex {
-		hashval := HashOfIDs(srcBlobs)
-		wants := HashOfIDs(dstWantsFileIndex[name])
-		if HashOfIDs(testResult[hashval]) != wants {
-			t.Errorf("blob mismatch for file '%v'", name)
-		}
-	}
-
-	// check if all blobs are stored
-	for blobID := range dstWantsChunkStore {
-		if !rechunkStore.Has(blobID) {
-			t.Errorf("blob missing: %v", blobID.Str())
+	// 2) check if rechunk is done correctly by comparing rechunkMap
+	testedRechunkMap := rechunker.rechunkMap
+	for k, v := range wantedRechunkMap {
+		wanted := HashOfIDs(v)
+		tested := HashOfIDs(testedRechunkMap[k])
+		if wanted != tested {
+			t.Errorf("rechunk result for src file %v does not match: %v wanted, but got %v", k.Str(), wanted.Str(), tested.Str())
 		}
 	}
 }
@@ -221,7 +132,7 @@ type BlobIDsPair struct {
 	dstBlobIDs restic.IDs
 }
 
-func generateBlobIDsPair(nSrc, nDst uint) BlobIDsPair {
+func generateRandomBlobIDsPair(nSrc, nDst uint) BlobIDsPair {
 	srcIDs := make(restic.IDs, 0, nSrc)
 	dstIDs := make(restic.IDs, 0, nDst)
 	for range nSrc {
@@ -234,179 +145,68 @@ func generateBlobIDsPair(nSrc, nDst uint) BlobIDsPair {
 	return BlobIDsPair{srcBlobIDs: srcIDs, dstBlobIDs: dstIDs}
 }
 
-// Type definitions for rewriteTree test.
-// Reference: walker/rewriter_test.go and walker/walker_test.go (v0.18.0).
-
-type TreeMap map[restic.ID][]byte
-type TestTree map[string]interface{}
-type TestContentNode struct {
-	Type    data.NodeType
-	Size    uint64
-	Content restic.IDs
-}
-
-func (t TreeMap) LoadBlob(_ context.Context, tpe restic.BlobType, id restic.ID, _ []byte) ([]byte, error) {
-	if tpe != restic.TreeBlob {
-		return nil, errors.New("can only load trees")
-	}
-	tree, ok := t[id]
-	if !ok {
-		return nil, errors.New("tree not found")
-	}
-	return tree, nil
-}
-
-func (t TreeMap) SaveBlob(_ context.Context, tpe restic.BlobType, buf []byte, id restic.ID, _ bool) (newID restic.ID, known bool, size int, err error) {
-	if tpe != restic.TreeBlob {
-		return restic.ID{}, false, 0, errors.New("can only save trees")
-	}
-
-	if id.IsNull() {
-		id = restic.Hash(buf)
-	}
-	_, ok := t[id]
-	if ok {
-		return id, false, 0, nil
-	}
-
-	t[id] = append([]byte{}, buf...)
-	return id, true, len(buf), nil
-}
-
-func BuildTreeMap(tree TestTree) (m TreeMap, root restic.ID) {
-	m = TreeMap{}
-	id := buildTreeMap(tree, m)
-	return m, id
-}
-
-func buildTreeMap(tree TestTree, m TreeMap) restic.ID {
-	tb := data.NewTreeJSONBuilder()
-	var names []string
-	for name := range tree {
-		names = append(names, name)
-	}
-	sort.Strings(names)
-
-	for _, name := range names {
-		item := tree[name]
-		switch elem := item.(type) {
-		case TestTree:
-			id := buildTreeMap(elem, m)
-			err := tb.AddNode(&data.Node{
-				Name:    name,
-				Subtree: &id,
-				Type:    data.NodeTypeDir,
-			})
-			if err != nil {
-				panic(err)
-			}
-		case TestContentNode:
-			err := tb.AddNode(&data.Node{
-				Name:    name,
-				Type:    elem.Type,
-				Size:    elem.Size,
-				Content: elem.Content,
-			})
-			if err != nil {
-				panic(err)
-			}
-		default:
-			panic(fmt.Sprintf("invalid type %T", elem))
-		}
-	}
-
-	buf, err := tb.Finalize()
-	if err != nil {
-		panic(err)
-	}
-
-	id := restic.Hash(buf)
-
-	if _, ok := m[id]; !ok {
-		m[id] = buf
-	}
-
-	return id
-}
-
 // prepareTree prepares sample tree for rewriteTree test.
-func prepareTree() (srcTree TestTree, wantsTree TestTree, rechunkMap map[restic.ID]restic.IDs) {
+func prepareTree() (srcTree walker.TestTree, wantsTree walker.TestTree, rechunkMap map[restic.ID]restic.IDs) {
 	blobIDsMap := map[string]BlobIDsPair{
-		"a":        generateBlobIDsPair(1, 1),
-		"subdir/a": generateBlobIDsPair(30, 31),
-		"x":        generateBlobIDsPair(42, 41),
-		"0":        generateBlobIDsPair(0, 0),
+		"a":        generateRandomBlobIDsPair(1, 1),
+		"subdir/a": generateRandomBlobIDsPair(30, 31),
+		"x":        generateRandomBlobIDsPair(42, 41),
+		"0":        generateRandomBlobIDsPair(0, 0),
 	}
 	rechunkMap = map[restic.ID]restic.IDs{}
 	for _, v := range blobIDsMap {
 		rechunkMap[HashOfIDs(v.srcBlobIDs)] = v.dstBlobIDs
 	}
 
-	srcTree = TestTree{
-		"zerofile": TestContentNode{
-			Type:    data.NodeTypeFile,
+	srcTree = walker.TestTree{
+		"zerofile": walker.TestFile{
 			Size:    0,
 			Content: restic.IDs{},
 		},
-		"a": TestContentNode{
-			Type:    data.NodeTypeFile,
+		"a": walker.TestFile{
 			Size:    1,
 			Content: blobIDsMap["a"].srcBlobIDs,
 		},
-		"subdir": TestTree{
-			"a": TestContentNode{
-				Type:    data.NodeTypeFile,
+		"x": walker.TestFile{
+			Size:    2,
+			Content: blobIDsMap["x"].srcBlobIDs,
+		},
+		"subdir": walker.TestTree{
+			"a": walker.TestFile{
 				Size:    3,
 				Content: blobIDsMap["subdir/a"].srcBlobIDs,
 			},
-			"x": TestContentNode{
-				Type:    data.NodeTypeFile,
-				Size:    2,
-				Content: blobIDsMap["x"].srcBlobIDs,
-			},
-			"subdir": TestTree{
-				"dup_x": TestContentNode{
-					Type:    data.NodeTypeFile,
+			"subdir": walker.TestTree{
+				"dup_x": walker.TestFile{
 					Size:    2,
 					Content: blobIDsMap["x"].srcBlobIDs,
 				},
-				"nonregularfile": TestContentNode{
-					Type: data.NodeTypeSymlink,
-				},
 			},
 		},
 	}
-	wantsTree = TestTree{
-		"zerofile": TestContentNode{
-			Type:    data.NodeTypeFile,
+	wantsTree = walker.TestTree{
+		"zerofile": walker.TestFile{
 			Size:    0,
 			Content: restic.IDs{},
 		},
-		"a": TestContentNode{
-			Type:    data.NodeTypeFile,
+		"a": walker.TestFile{
 			Size:    1,
 			Content: blobIDsMap["a"].dstBlobIDs,
 		},
-		"subdir": TestTree{
-			"a": TestContentNode{
-				Type:    data.NodeTypeFile,
+		"x": walker.TestFile{
+			Size:    2,
+			Content: blobIDsMap["x"].dstBlobIDs,
+		},
+		"subdir": walker.TestTree{
+			"a": walker.TestFile{
 				Size:    3,
 				Content: blobIDsMap["subdir/a"].dstBlobIDs,
 			},
-			"x": TestContentNode{
-				Type:    data.NodeTypeFile,
-				Size:    2,
-				Content: blobIDsMap["x"].dstBlobIDs,
-			},
-			"subdir": TestTree{
-				"dup_x": TestContentNode{
-					Type:    data.NodeTypeFile,
+			"subdir": walker.TestTree{
+				"dup_x": walker.TestFile{
 					Size:    2,
 					Content: blobIDsMap["x"].dstBlobIDs,
 				},
-				"nonregularfile": TestContentNode{
-					Type: data.NodeTypeSymlink,
-				},
 			},
 		},
 	}
@@ -417,13 +217,13 @@ func prepareTree() (srcTree TestTree, wantsTree TestTree, rechunkMap map[restic.
 func TestRechunkerRewriteTree(t *testing.T) {
 	srcTree, wantsTree, rechunkMap := prepareTree()
 
-	srcRepo, srcRoot := BuildTreeMap(srcTree)
-	_, wantsRoot := BuildTreeMap(wantsTree)
+	srcRepo, srcRoot := walker.BuildTreeMap(srcTree)
+	_, wantsRoot := walker.BuildTreeMap(wantsTree)
 
-	testsRepo := TreeMap{}
+	testsRepo := data.TestWritableTreeMap{TestTreeMap: data.TestTreeMap{}}
 	rechunker := NewRechunker(Config{})
 	rechunker.rechunkMap = rechunkMap
-	testsRoot, err := rechunker.RewriteTree(context.TODO(), srcRepo, testsRepo, srcRoot)
+	testsRoot, err := rechunker.RewriteTree(t.Context(), srcRepo, testsRepo, srcRoot)
 	if err != nil {
 		t.Error(err)
 	}
diff --git a/internal/rechunker/testing.go b/internal/rechunker/testing.go
new file mode 100644
index 000000000..75e7e465b
--- /dev/null
+++ b/internal/rechunker/testing.go
@@ -0,0 +1,30 @@
+package rechunker
+
+import (
+	"context"
+	"testing"
+
+	"github.com/restic/chunker"
+	"github.com/restic/restic/internal/repository"
+	"github.com/restic/restic/internal/restic"
+	"github.com/restic/restic/internal/test"
+)
+
+func TestRepositoryWithPol(t *testing.T, pol chunker.Pol) restic.Repository {
+	t.Helper()
+
+	be := repository.TestBackend(t)
+
+	repo, err := repository.New(be, repository.Options{})
+	if err != nil {
+		t.Fatalf("TestRepository(): new repo failed: %v", err)
+	}
+
+	var version uint = restic.StableRepoVersion
+	err = repo.Init(context.TODO(), version, test.TestPassword, &pol)
+	if err != nil {
+		t.Fatalf("TestRepository(): initialize repo failed: %v", err)
+	}
+
+	return repo
+}

From c5c0bde8a4fb95d8a91b0852a0e39863cf9af73a Mon Sep 17 00:00:00 2001
From: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date: Wed, 15 Apr 2026 19:26:02 +0900
Subject: [PATCH 27/34] Misc: gofmt

Run gofmt for cmd_copy_integration_test.go
---
 cmd/restic/cmd_copy_integration_test.go | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cmd/restic/cmd_copy_integration_test.go b/cmd/restic/cmd_copy_integration_test.go
index cf1514436..15fdfdc75 100644
--- a/cmd/restic/cmd_copy_integration_test.go
+++ b/cmd/restic/cmd_copy_integration_test.go
@@ -37,12 +37,12 @@ func testRunRechunkCopy(t testing.TB, srcGopts global.Options, dstGopts global.O
 	gopts.InsecureNoPassword = dstGopts.InsecureNoPassword
 	copyOpts := CopyOptions{
 		SecondaryRepoOptions: global.SecondaryRepoOptions{
-			Repo: srcGopts.Repo,
-			Password: srcGopts.Password,
+			Repo:               srcGopts.Repo,
+			Password:           srcGopts.Password,
 			InsecureNoPassword: srcGopts.InsecureNoPassword,
 		},
 		RechunkCopyOptions: RechunkCopyOptions{
-			Rechunk: true,
+			Rechunk:           true,
 			isIntegrationTest: true,
 		},
 	}
@@ -69,7 +69,7 @@ func TestCopy(t *testing.T) {
 
 	testRunInit(t, env2.gopts)
 	testRunCopy(t, env.gopts, env2.gopts)
-	
+
 	testRunInit(t, env3.gopts)
 	testRunRechunkCopy(t, env.gopts, env3.gopts)
 

From 7018c8071ec84e0e7d99a1d9b59640dfde8ff1e7 Mon Sep 17 00:00:00 2001
From: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date: Wed, 15 Apr 2026 19:38:40 +0900
Subject: [PATCH 28/34] Test: Use t.Run for test in rechunker_test

Use testing.T.Run() scheme for test routines in rechunker_test
---
 internal/rechunker/rechunker_test.go | 87 ++++++++++++++++------------
 1 file changed, 50 insertions(+), 37 deletions(-)

diff --git a/internal/rechunker/rechunker_test.go b/internal/rechunker/rechunker_test.go
index d5e99f437..d17177bb2 100644
--- a/internal/rechunker/rechunker_test.go
+++ b/internal/rechunker/rechunker_test.go
@@ -63,9 +63,6 @@ func buildRechunkMapByMatchingPath(t *testing.T, srcList, dstList map[string]res
 }
 
 func TestRechunk(t *testing.T) {
-	ctx, cancel := context.WithCancel(t.Context())
-	defer cancel()
-
 	// generate reandom polynomials
 	srcChunkerParam, _ := chunker.RandomPolynomial()
 	dstChunkerParam, _ := chunker.RandomPolynomial()
@@ -91,40 +88,48 @@ func TestRechunk(t *testing.T) {
 		Pol:       dstChunkerParam,
 	})
 
-	err := rechunker.Plan(ctx, srcRepo, restic.IDs{*srcSn.Tree})
-	if err != nil {
-		t.Fatal(err)
-	}
+	t.Run("Plan running", func(t *testing.T) {
+		err := rechunker.Plan(t.Context(), srcRepo, restic.IDs{*srcSn.Tree})
+		if err != nil {
+			t.Fatal(err)
+		}
+	})
 
-	err = rechunker.Rechunk(ctx, srcRepo, dstTestsRepo, nil)
-	if err != nil {
-		t.Fatal(err)
-	}
+	t.Run("Rechunk running", func(t *testing.T) {
+		err := rechunker.Rechunk(t.Context(), srcRepo, dstTestsRepo, nil)
+		if err != nil {
+			t.Fatal(err)
+		}
+	})
 
 	// compare dstTestsRepo (rechunker result) vs dstWantsRepo (reference result)
 	// 1) check if all expected data blobs are stored
-	inCtx, stop := context.WithCancelCause(ctx)
-	err = dstWantsRepo.ListBlobs(inCtx, func(pb restic.PackedBlob) {
-		if pb.Type == restic.DataBlob {
-			_, found := dstTestsRepo.LookupBlobSize(restic.DataBlob, pb.ID)
-			if !found {
-				stop(fmt.Errorf("blob %v expected but not found", pb.ID.Str()))
+	t.Run("data blob verification", func(t *testing.T) {
+		inCtx, stop := context.WithCancelCause(t.Context())
+		err := dstWantsRepo.ListBlobs(inCtx, func(pb restic.PackedBlob) {
+			if pb.Type == restic.DataBlob {
+				_, found := dstTestsRepo.LookupBlobSize(restic.DataBlob, pb.ID)
+				if !found {
+					stop(fmt.Errorf("blob %v expected but not found", pb.ID.Str()))
+				}
+			}
+		})
+		if err != nil {
+			t.Error(err)
+		}
+	})
+
+	// 2) check if rechunk is done correctly by comparing rechunkMap
+	t.Run("rechunk mapping verification", func(t *testing.T) {
+		testedRechunkMap := rechunker.rechunkMap
+		for k, v := range wantedRechunkMap {
+			wanted := HashOfIDs(v)
+			tested := HashOfIDs(testedRechunkMap[k])
+			if wanted != tested {
+				t.Errorf("rechunk result for src file %v does not match: %v wanted, but got %v", k.Str(), wanted.Str(), tested.Str())
 			}
 		}
 	})
-	if err != nil {
-		t.Error(err)
-	}
-
-	// 2) check if rechunk is done correctly by comparing rechunkMap
-	testedRechunkMap := rechunker.rechunkMap
-	for k, v := range wantedRechunkMap {
-		wanted := HashOfIDs(v)
-		tested := HashOfIDs(testedRechunkMap[k])
-		if wanted != tested {
-			t.Errorf("rechunk result for src file %v does not match: %v wanted, but got %v", k.Str(), wanted.Str(), tested.Str())
-		}
-	}
 }
 
 type BlobIDsPair struct {
@@ -223,11 +228,19 @@ func TestRechunkerRewriteTree(t *testing.T) {
 	testsRepo := data.TestWritableTreeMap{TestTreeMap: data.TestTreeMap{}}
 	rechunker := NewRechunker(Config{})
 	rechunker.rechunkMap = rechunkMap
-	testsRoot, err := rechunker.RewriteTree(t.Context(), srcRepo, testsRepo, srcRoot)
-	if err != nil {
-		t.Error(err)
-	}
-	if wantsRoot != testsRoot {
-		t.Errorf("tree mismatch. wants: %v, tests: %v", wantsRoot, testsRoot)
-	}
+
+	var testsRoot restic.ID
+	t.Run("RewriteTree running", func(t *testing.T) {
+		root, err := rechunker.RewriteTree(t.Context(), srcRepo, testsRepo, srcRoot)
+		if err != nil {
+			t.Error(err)
+		}
+		testsRoot = root
+	})
+
+	t.Run("result verification", func(t *testing.T) {
+		if wantsRoot != testsRoot {
+			t.Errorf("tree mismatch. wants: %v, tests: %v", wantsRoot, testsRoot)
+		}
+	})
 }

From 0a4dc82a3c6c25550284e3a701c2b80229f99ea6 Mon Sep 17 00:00:00 2001
From: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date: Wed, 22 Apr 2026 00:38:50 +0900
Subject: [PATCH 29/34] Refactor: Rechunker

Cleanup unnecessary pieces of code in rechunker.go
---
 internal/rechunker/rechunker.go | 29 ++++++++---------------------
 1 file changed, 8 insertions(+), 21 deletions(-)

diff --git a/internal/rechunker/rechunker.go b/internal/rechunker/rechunker.go
index bf802a0bf..60884565f 100644
--- a/internal/rechunker/rechunker.go
+++ b/internal/rechunker/rechunker.go
@@ -64,22 +64,9 @@ func (rc *Rechunker) reset() {
 }
 
 func (rc *Rechunker) Plan(ctx context.Context, srcRepo restic.Repository, rootTrees restic.IDs) error {
-	rc.reset()
-
-	visitedFiles := restic.IDSet{}
-	visitedTrees := restic.IDSet{}
-
-	// skip previously processed files and trees
-	for k := range rc.rechunkMap {
-		visitedFiles.Insert(k)
-	}
-	for k := range rc.rewriteTreeMap {
-		visitedTrees.Insert(k)
-	}
-
 	var err error
 	debug.Log("Gathering distinct file Contents from target snapshots")
-	rc.filesList, rc.totalSize, err = gatherFileContents(ctx, srcRepo, rootTrees, visitedFiles, visitedTrees)
+	rc.filesList, rc.totalSize, err = gatherFileContents(ctx, srcRepo, rootTrees)
 	if err != nil {
 		return err
 	}
@@ -100,8 +87,10 @@ func (rc *Rechunker) Plan(ctx context.Context, srcRepo restic.Repository, rootTr
 	return nil
 }
 
-func gatherFileContents(ctx context.Context, repo restic.Loader, rootTrees restic.IDs, visitedFiles restic.IDSet, visitedTrees restic.IDSet) (filesList []*ChunkedFile, totalSize uint64, err error) {
+func gatherFileContents(ctx context.Context, repo restic.Loader, rootTrees restic.IDs) (filesList []*ChunkedFile, totalSize uint64, err error) {
 	mu := sync.Mutex{}
+	visitedFiles := restic.NewIDSet()
+	visitedTrees := restic.NewIDSet()
 
 	// Stream through all subtrees in target rootTrees and gather all distinct file Contents
 	err = data.StreamTrees(ctx, repo, rootTrees, nil, func(id restic.ID) bool {
@@ -186,13 +175,11 @@ func createIndex(filesList []*ChunkedFile, lookupBlob func(t restic.BlobType, id
 	return idx, nil
 }
 
-type Loader interface {
-	restic.BlobLoader
-	LoadBlobsFromPack(context.Context, restic.ID, []restic.Blob, func(restic.BlobHandle, []byte, error) error) error
-	Connections() uint
-}
+func (rc *Rechunker) Rechunk(ctx context.Context, srcRepo, dstRepo restic.Repository, p *Progress) error {
+	if dstRepo.Config().ChunkerPolynomial != rc.cfg.Pol {
+		return fmt.Errorf("chunker polynomial of dstRepo does not match with Rechunker's one")
+	}
 
-func (rc *Rechunker) Rechunk(ctx context.Context, srcRepo Loader, dstRepo restic.WithBlobUploader, p *Progress) error {
 	if !rc.rechunkReady {
 		return fmt.Errorf("Plan() must be run first before Rechunk()")
 	}

From 0dbf22574b298ebc99f401cf3af5894c18b79646 Mon Sep 17 00:00:00 2001
From: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date: Wed, 22 Apr 2026 01:05:14 +0900
Subject: [PATCH 30/34] Refactor: Rechunker RewriteTree

Refactor rechunker RewriteTree (including rename to RewriteTrees).

Change cmd_copy.go to align with refactored RewriteTrees.
---
 cmd/restic/cmd_copy.go          | 20 +++-------
 internal/rechunker/rechunker.go | 66 +++++++++++++++++----------------
 2 files changed, 40 insertions(+), 46 deletions(-)

diff --git a/cmd/restic/cmd_copy.go b/cmd/restic/cmd_copy.go
index 5126a3131..54dbc6167 100644
--- a/cmd/restic/cmd_copy.go
+++ b/cmd/restic/cmd_copy.go
@@ -419,7 +419,7 @@ func rechunkCopy(ctx context.Context, srcRepo, dstRepo restic.Repository, select
 	printer.V("  - Total size (including duplicate blobs): %v", ui.FormatBytes(rechnker.TotalSize()))
 	printer.V("Number of packs to download: %v\n\n", rechnker.PackCount())
 
-	debug.Log("Running RechunkData()")
+	debug.Log("Running Rechunk()")
 	progress.Start(rechnker.NumFiles(), rechnker.TotalSize())
 	err = rechnker.Rechunk(ctx, srcRepo, dstRepo, progress)
 	if err != nil {
@@ -427,24 +427,14 @@ func rechunkCopy(ctx context.Context, srcRepo, dstRepo restic.Repository, select
 	}
 	progress.Done()
 
-	printer.V("\nRewriting trees...")
-	err = dstRepo.WithBlobUploader(ctx, func(ctx context.Context, uploader restic.BlobSaverWithAsync) error {
-		for _, tree := range rootTrees {
-			debug.Log("Running RewriteTree() for tree ID %v", tree.Str())
-			_, err := rechnker.RewriteTree(ctx, srcRepo, uploader, tree)
-			if err != nil {
-				return err
-			}
-		}
-
-		return nil
-	})
+	printer.V("Rewriting trees...")
+	debug.Log("Running RewriteTrees()")
+	_, err = rechnker.RewriteTrees(ctx, srcRepo, dstRepo, rootTrees)
 	if err != nil {
 		return err
 	}
-	printer.V("Rewriting done.\n\n")
 
-	printer.V("Writing snapshots")
+	printer.V("Writing snapshots...")
 	for _, sn := range snapshots {
 		newTreeID, err := rechnker.GetRewrittenTree(*sn.Tree)
 		if err != nil {
diff --git a/internal/rechunker/rechunker.go b/internal/rechunker/rechunker.go
index 60884565f..ba1385630 100644
--- a/internal/rechunker/rechunker.go
+++ b/internal/rechunker/rechunker.go
@@ -56,13 +56,6 @@ func NewRechunker(cfg Config) *Rechunker {
 	}
 }
 
-func (rc *Rechunker) reset() {
-	rc.idx = nil
-
-	rc.filesList = nil
-	rc.rechunkReady = false
-}
-
 func (rc *Rechunker) Plan(ctx context.Context, srcRepo restic.Repository, rootTrees restic.IDs) error {
 	var err error
 	debug.Log("Gathering distinct file Contents from target snapshots")
@@ -302,26 +295,9 @@ func (s wrappedBlobSaver) SaveBlob(ctx context.Context, tpe restic.BlobType, buf
 	return s(ctx, tpe, buf, id, storeDuplicate)
 }
 
-func (rc *Rechunker) RewriteTree(ctx context.Context, srcRepo restic.BlobLoader, dstRepo restic.BlobSaver, treeID restic.ID) (restic.ID, error) {
-	// check if the identical tree has already been processed
-	newID, ok := rc.rewriteTreeMap[treeID]
-	if ok {
-		return newID, nil
-	}
+func (rc *Rechunker) RewriteTrees(ctx context.Context, srcRepo, dstRepo restic.Repository, treeIDs restic.IDs) (restic.IDs, error) {
+	result := restic.IDs{}
 
-	// wrap dstRepo so that total uploaded tree blobs size can be tracked
-	treeSaver := wrappedBlobSaver(func(ctx context.Context, tpe restic.BlobType, buf []byte, id restic.ID, storeDuplicate bool) (newID restic.ID, known bool, sizeInRepo int, err error) {
-		newID, known, sizeInRepo, err = dstRepo.SaveBlob(ctx, tpe, buf, id, storeDuplicate)
-		if err != nil {
-			return
-		}
-		if !known {
-			rc.totalAddedToDstRepo.Add(uint64(sizeInRepo))
-		}
-		return
-	})
-
-	// prepare rewriter that rewrites node.Content of regular files
 	rewriter := walker.NewTreeRewriter(walker.RewriteOpts{
 		RewriteNode: func(node *data.Node, _ string) *data.Node {
 			if node == nil {
@@ -342,14 +318,42 @@ func (rc *Rechunker) RewriteTree(ctx context.Context, srcRepo restic.BlobLoader,
 		AllowUnstableSerialization: true,
 	})
 
-	newID, err := rewriter.RewriteTree(ctx, srcRepo, treeSaver, "/", treeID)
+	err := dstRepo.WithBlobUploader(ctx, func(ctx context.Context, uploader restic.BlobSaverWithAsync) error {
+		// wrap dstRepo so that total uploaded tree blobs size can be tracked
+		saver := wrappedBlobSaver(func(ctx context.Context, tpe restic.BlobType, buf []byte, id restic.ID, storeDuplicate bool) (newID restic.ID, known bool, sizeInRepo int, err error) {
+			newID, known, sizeInRepo, err = uploader.SaveBlob(ctx, tpe, buf, id, storeDuplicate)
+			if err != nil {
+				return
+			}
+			if !known {
+				rc.totalAddedToDstRepo.Add(uint64(sizeInRepo))
+			}
+			return
+		})
+
+		for _, treeID := range treeIDs {
+			// check if the identical tree has already been processed
+			newID, ok := rc.rewriteTreeMap[treeID]
+			if ok {
+				result = append(result, newID)
+				continue
+			}
+
+			newID, err := rewriter.RewriteTree(ctx, srcRepo, saver, "/", treeID)
+			if err != nil {
+				return err
+			}
+			rc.rewriteTreeMap[treeID] = newID
+			result = append(result, newID)
+		}
+
+		return nil
+	})
 	if err != nil {
-		return restic.ID{}, err
+		return nil, err
 	}
 
-	rc.rewriteTreeMap[treeID] = newID
-
-	return newID, nil
+	return result, nil
 }
 
 func (rc *Rechunker) NumFiles() int {

From b431cd08b73689eba39ef751d8f82a9aabbae446 Mon Sep 17 00:00:00 2001
From: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date: Wed, 22 Apr 2026 01:16:26 +0900
Subject: [PATCH 31/34] Test: Rechunker

Compact rechunker test code
---
 internal/rechunker/rechunker_test.go | 217 ++++++++++-----------------
 1 file changed, 82 insertions(+), 135 deletions(-)

diff --git a/internal/rechunker/rechunker_test.go b/internal/rechunker/rechunker_test.go
index d17177bb2..9a8b3b9f9 100644
--- a/internal/rechunker/rechunker_test.go
+++ b/internal/rechunker/rechunker_test.go
@@ -17,25 +17,36 @@ import (
 // prepareData prepares random data for rechunker test.
 func prepareData(t *testing.T) string {
 	tempdir := rtest.TempDir(t)
+	data := map[int][]byte{
+		1: rtest.Random(1, 10_000),
+		2: rtest.Random(2, 10_000_000),
+		3: rtest.Random(3, 100_000_000),
+	}
 	repo := archiver.TestDir{
-		"0": archiver.TestFile{Content: ""},
-		"1": archiver.TestFile{Content: string(rtest.Random(1, 10_000))},
-		"2": archiver.TestFile{Content: string(rtest.Random(4, 10_000_000))},
-		"3": archiver.TestFile{Content: string(rtest.Random(5, 100_000_000))},
+		"zero":  archiver.TestFile{Content: ""},
+		"one":   archiver.TestFile{Content: string(data[1])},
+		"two":   archiver.TestFile{Content: string(data[2])},
+		"three": archiver.TestFile{Content: string(data[3])},
+		"dir1": archiver.TestDir{
+			"dir2": archiver.TestDir{
+				"dup_1": archiver.TestFile{Content: string(data[1])},
+				"dup_3": archiver.TestFile{Content: string(data[3])},
+			},
+		},
 	}
 	archiver.TestCreateFiles(t, tempdir, repo)
 
 	return tempdir
 }
 
-func gatherFileContentsByPath(t *testing.T, repo restic.BlobLoader, root restic.ID) map[string]restic.IDs {
+func gatherNodesByPath(t *testing.T, repo restic.BlobLoader, root restic.ID) map[string]*data.Node {
 	t.Helper()
 
-	record := map[string]restic.IDs{}
+	result := map[string]*data.Node{}
 	err := walker.Walk(t.Context(), repo, root, walker.WalkVisitor{
 		ProcessNode: func(parentTreeID restic.ID, path string, node *data.Node, nodeErr error) (err error) {
-			if node != nil && node.Type == data.NodeTypeFile {
-				record[path] = node.Content
+			if node != nil {
+				result[path] = node
 			}
 			return nodeErr
 		},
@@ -44,25 +55,28 @@ func gatherFileContentsByPath(t *testing.T, repo restic.BlobLoader, root restic.
 		t.Fatal(err)
 	}
 
-	return record
+	return result
 }
 
-func buildRechunkMapByMatchingPath(t *testing.T, srcList, dstList map[string]restic.IDs) map[restic.ID]restic.IDs {
+func buildRechunkMapByMatchingPath(t *testing.T, srcNodes, dstNodes map[string]*data.Node) map[restic.ID]restic.IDs {
 	t.Helper()
 
 	rechunkMap := map[restic.ID]restic.IDs{}
 
-	for k, v := range srcList {
-		if _, ok := dstList[k]; !ok {
-			t.Fatalf("%v expected in dstList, but not found", k)
+	for k, v := range srcNodes {
+		if v.Type != data.NodeTypeFile {
+			continue
 		}
-		rechunkMap[HashOfIDs(v)] = dstList[k]
+		if _, ok := dstNodes[k]; !ok {
+			t.Fatalf("%v expected in dstNodes, but not found", k)
+		}
+		rechunkMap[HashOfIDs(v.Content)] = dstNodes[k].Content
 	}
 
 	return rechunkMap
 }
 
-func TestRechunk(t *testing.T) {
+func TestRechunker(t *testing.T) {
 	// generate reandom polynomials
 	srcChunkerParam, _ := chunker.RandomPolynomial()
 	dstChunkerParam, _ := chunker.RandomPolynomial()
@@ -78,9 +92,9 @@ func TestRechunk(t *testing.T) {
 	srcSn := archiver.TestSnapshot(t, srcRepo, tempdir, nil)
 	dstWantsSn := archiver.TestSnapshot(t, dstWantsRepo, tempdir, nil)
 
-	srcList := gatherFileContentsByPath(t, srcRepo, *srcSn.Tree)
-	dstWantsList := gatherFileContentsByPath(t, dstWantsRepo, *dstWantsSn.Tree)
-	wantedRechunkMap := buildRechunkMapByMatchingPath(t, srcList, dstWantsList)
+	srcNodes := gatherNodesByPath(t, srcRepo, *srcSn.Tree)
+	dstWantsNodes := gatherNodesByPath(t, dstWantsRepo, *dstWantsSn.Tree)
+	wantedRechunkMap := buildRechunkMapByMatchingPath(t, srcNodes, dstWantsNodes)
 
 	// run rechunk copy
 	rechunker := NewRechunker(Config{
@@ -102,6 +116,15 @@ func TestRechunk(t *testing.T) {
 		}
 	})
 
+	var testsTree restic.ID
+	t.Run("RewriteTrees running", func(t *testing.T) {
+		newID, err := rechunker.RewriteTrees(t.Context(), srcRepo, dstTestsRepo, restic.IDs{*srcSn.Tree})
+		if err != nil {
+			t.Fatal(err)
+		}
+		testsTree = newID[0]
+	})
+
 	// compare dstTestsRepo (rechunker result) vs dstWantsRepo (reference result)
 	// 1) check if all expected data blobs are stored
 	t.Run("data blob verification", func(t *testing.T) {
@@ -123,124 +146,48 @@ func TestRechunk(t *testing.T) {
 	t.Run("rechunk mapping verification", func(t *testing.T) {
 		testedRechunkMap := rechunker.rechunkMap
 		for k, v := range wantedRechunkMap {
-			wanted := HashOfIDs(v)
-			tested := HashOfIDs(testedRechunkMap[k])
-			if wanted != tested {
-				t.Errorf("rechunk result for src file %v does not match: %v wanted, but got %v", k.Str(), wanted.Str(), tested.Str())
+			wants := HashOfIDs(v)
+			tests := HashOfIDs(testedRechunkMap[k])
+			if wants != tests {
+				t.Errorf("rechunk result for src file %v does not match: %v expected, but got %v", k.Str(), wants.Str(), tests.Str())
+			}
+		}
+	})
+
+	// 3) check if tree is rewritten correctly by comparing tree nodes
+	t.Run("tree verification", func(t *testing.T) {
+		testsNodes := gatherNodesByPath(t, dstTestsRepo, testsTree)
+
+		// (i) compare Content field with dstWantsNodes
+		for path, node := range dstWantsNodes {
+			if node.Type != data.NodeTypeFile {
+				continue
+			}
+			if _, ok := testsNodes[path]; !ok {
+				t.Errorf("node for path %v does not exist", path)
+				continue
+			}
+			wants := HashOfIDs(node.Content)
+			tests := HashOfIDs(testsNodes[path].Content)
+			if wants != tests {
+				t.Errorf("node content for path %v does not match: %v expected, but got %v", path, wants.Str(), tests.Str())
+			}
+		}
+
+		// (ii) compare remaining fields with srcNodes
+		for path, wantsNode := range srcNodes {
+			testsNode, ok := testsNodes[path]
+			if !ok {
+				t.Errorf("node for path %v does not exist", path)
+				continue
+			}
+			// copy nodes and clear rewritten fields for comparison
+			wants, tests := *wantsNode, *testsNode
+			wants.Content, tests.Content = nil, nil
+			wants.Subtree, tests.Subtree = nil, nil
+			if !wants.Equals(tests) {
+				t.Errorf("node fields for path %v does not match", path)
 			}
 		}
 	})
 }
-
-type BlobIDsPair struct {
-	srcBlobIDs restic.IDs
-	dstBlobIDs restic.IDs
-}
-
-func generateRandomBlobIDsPair(nSrc, nDst uint) BlobIDsPair {
-	srcIDs := make(restic.IDs, 0, nSrc)
-	dstIDs := make(restic.IDs, 0, nDst)
-	for range nSrc {
-		srcIDs = append(srcIDs, restic.NewRandomID())
-	}
-	for range nDst {
-		dstIDs = append(dstIDs, restic.NewRandomID())
-	}
-
-	return BlobIDsPair{srcBlobIDs: srcIDs, dstBlobIDs: dstIDs}
-}
-
-// prepareTree prepares sample tree for rewriteTree test.
-func prepareTree() (srcTree walker.TestTree, wantsTree walker.TestTree, rechunkMap map[restic.ID]restic.IDs) {
-	blobIDsMap := map[string]BlobIDsPair{
-		"a":        generateRandomBlobIDsPair(1, 1),
-		"subdir/a": generateRandomBlobIDsPair(30, 31),
-		"x":        generateRandomBlobIDsPair(42, 41),
-		"0":        generateRandomBlobIDsPair(0, 0),
-	}
-	rechunkMap = map[restic.ID]restic.IDs{}
-	for _, v := range blobIDsMap {
-		rechunkMap[HashOfIDs(v.srcBlobIDs)] = v.dstBlobIDs
-	}
-
-	srcTree = walker.TestTree{
-		"zerofile": walker.TestFile{
-			Size:    0,
-			Content: restic.IDs{},
-		},
-		"a": walker.TestFile{
-			Size:    1,
-			Content: blobIDsMap["a"].srcBlobIDs,
-		},
-		"x": walker.TestFile{
-			Size:    2,
-			Content: blobIDsMap["x"].srcBlobIDs,
-		},
-		"subdir": walker.TestTree{
-			"a": walker.TestFile{
-				Size:    3,
-				Content: blobIDsMap["subdir/a"].srcBlobIDs,
-			},
-			"subdir": walker.TestTree{
-				"dup_x": walker.TestFile{
-					Size:    2,
-					Content: blobIDsMap["x"].srcBlobIDs,
-				},
-			},
-		},
-	}
-	wantsTree = walker.TestTree{
-		"zerofile": walker.TestFile{
-			Size:    0,
-			Content: restic.IDs{},
-		},
-		"a": walker.TestFile{
-			Size:    1,
-			Content: blobIDsMap["a"].dstBlobIDs,
-		},
-		"x": walker.TestFile{
-			Size:    2,
-			Content: blobIDsMap["x"].dstBlobIDs,
-		},
-		"subdir": walker.TestTree{
-			"a": walker.TestFile{
-				Size:    3,
-				Content: blobIDsMap["subdir/a"].dstBlobIDs,
-			},
-			"subdir": walker.TestTree{
-				"dup_x": walker.TestFile{
-					Size:    2,
-					Content: blobIDsMap["x"].dstBlobIDs,
-				},
-			},
-		},
-	}
-
-	return srcTree, wantsTree, rechunkMap
-}
-
-func TestRechunkerRewriteTree(t *testing.T) {
-	srcTree, wantsTree, rechunkMap := prepareTree()
-
-	srcRepo, srcRoot := walker.BuildTreeMap(srcTree)
-	_, wantsRoot := walker.BuildTreeMap(wantsTree)
-
-	testsRepo := data.TestWritableTreeMap{TestTreeMap: data.TestTreeMap{}}
-	rechunker := NewRechunker(Config{})
-	rechunker.rechunkMap = rechunkMap
-
-	var testsRoot restic.ID
-	t.Run("RewriteTree running", func(t *testing.T) {
-		root, err := rechunker.RewriteTree(t.Context(), srcRepo, testsRepo, srcRoot)
-		if err != nil {
-			t.Error(err)
-		}
-		testsRoot = root
-	})
-
-	t.Run("result verification", func(t *testing.T) {
-		if wantsRoot != testsRoot {
-			t.Errorf("tree mismatch. wants: %v, tests: %v", wantsRoot, testsRoot)
-		}
-	})
-}

From eb8e127dcd10c0222065cd06e72fd33b568f4c25 Mon Sep 17 00:00:00 2001
From: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date: Sun, 26 Apr 2026 17:19:11 +0900
Subject: [PATCH 32/34] Refactor: Change rechunker.Index to interface

Change rechunker.Index to interface type, so that the index is expandable to custom types
---
 internal/rechunker/blob_cache.go | 14 ++++----
 internal/rechunker/rechunker.go  | 58 +++++++++++++++++++++++---------
 internal/rechunker/scheduler.go  |  6 ++--
 internal/rechunker/worker.go     |  6 ++--
 4 files changed, 55 insertions(+), 29 deletions(-)

diff --git a/internal/rechunker/blob_cache.go b/internal/rechunker/blob_cache.go
index e402df636..218104cc4 100644
--- a/internal/rechunker/blob_cache.go
+++ b/internal/rechunker/blob_cache.go
@@ -15,7 +15,7 @@ type BlobCache struct {
 	mu sync.RWMutex
 	c  *simplelru.LRU[restic.ID, []byte]
 
-	idx *Index
+	idx Index
 
 	free, size int
 
@@ -31,7 +31,7 @@ type BlobCache struct {
 const overhead = len(restic.ID{}) + 64
 
 func NewBlobCache(ctx context.Context, size int, numDownloaders int,
-	repo PackLoader, idx *Index,
+	repo PackLoader, idx Index,
 	onReady func(blobIDs restic.IDs), onEvict func(blobIDs restic.IDs)) *BlobCache {
 	if size < 32*(1<<20) {
 		panic("Blob cache size should be at least 32 MiB!!")
@@ -115,7 +115,7 @@ func (c *BlobCache) startDownloaders(ctx context.Context, numDownloaders int,
 				// filter out ignored blobs
 				c.mu.RLock()
 				var filtered []restic.Blob
-				for _, blob := range c.idx.PackToBlobs[packID] {
+				for _, blob := range c.idx.PackToBlobs(packID) {
 					ignored := c.ignored.Has(blob.ID)
 					ready := c.c.Contains(blob.ID)
 					if !ignored && !ready {
@@ -260,13 +260,13 @@ func (c *BlobCache) asyncGet(ctx context.Context, id restic.ID, buf []byte) <-ch
 }
 
 func (c *BlobCache) requestDownload(ctx context.Context, id restic.ID) error {
-	packID, ok := c.idx.BlobToPack[id]
-	if !ok {
+	packID := c.idx.BlobToPack(id)
+	if packID.IsNull() {
 		return fmt.Errorf("unknown blob: %v", id.Str())
 	}
 
 	c.mu.Lock()
-	ok = c.waitList.Has(packID)
+	ok := c.waitList.Has(packID)
 	if !ok {
 		// queue pack download
 		c.waitList.Insert(packID)
@@ -332,7 +332,7 @@ type PackLoader interface {
 	LoadBlobsFromPack(context.Context, restic.ID, []restic.Blob, func(restic.BlobHandle, []byte, error) error) error
 }
 
-func WrapWithCache(ctx context.Context, repo PackLoader, cacheSize int, numDownloaders int, idx *Index,
+func WrapWithCache(ctx context.Context, repo PackLoader, cacheSize int, numDownloaders int, idx Index,
 	onReady, onEvict func(restic.IDs)) (*BlobLoaderWithCache, *BlobCache) {
 	r := &BlobLoaderWithCache{
 		repo:  repo,
diff --git a/internal/rechunker/rechunker.go b/internal/rechunker/rechunker.go
index ba1385630..e991d2a61 100644
--- a/internal/rechunker/rechunker.go
+++ b/internal/rechunker/rechunker.go
@@ -19,7 +19,7 @@ import (
 
 type Rechunker struct {
 	cfg Config
-	idx *Index
+	idx Index
 
 	filesList    []*ChunkedFile
 	totalSize    uint64
@@ -41,11 +41,11 @@ type ChunkedFile struct {
 	hashval restic.ID
 }
 
-// Index is immutable after Plan() returns.
-type Index struct {
-	BlobSize    map[restic.ID]uint          // blob ID -> blob size
-	BlobToPack  map[restic.ID]restic.ID     // blob ID -> pack ID
-	PackToBlobs map[restic.ID][]restic.Blob // pack ID -> list of blobs to be loaded from the pack
+type Index interface {
+	BlobSize(blobID restic.ID) (size uint)
+	BlobToPack(blobID restic.ID) (packID restic.ID)
+	PackToBlobs(packID restic.ID) (blobs []restic.Blob)
+	Packs() (packIDs restic.IDSet)
 }
 
 func NewRechunker(cfg Config) *Rechunker {
@@ -129,7 +129,33 @@ func gatherFileContents(ctx context.Context, repo restic.Loader, rootTrees resti
 	return filesList, totalSize, nil
 }
 
-func createIndex(filesList []*ChunkedFile, lookupBlob func(t restic.BlobType, id restic.ID) []restic.PackedBlob) (*Index, error) {
+type index struct {
+	blobSize map[restic.ID]uint          // blob ID -> blob size
+	blobIdx  map[restic.ID]restic.ID     // blob ID -> pack ID
+	packIdx  map[restic.ID][]restic.Blob // pack ID -> list of blobs to be loaded from the pack
+}
+
+func (i *index) BlobSize(id restic.ID) uint {
+	return i.blobSize[id]
+}
+
+func (i *index) BlobToPack(id restic.ID) restic.ID {
+	return i.blobIdx[id]
+}
+
+func (i *index) PackToBlobs(id restic.ID) []restic.Blob {
+	return i.packIdx[id]
+}
+
+func (i *index) Packs() restic.IDSet {
+	ids := restic.NewIDSet()
+	for id := range i.packIdx {
+		ids.Insert(id)
+	}
+	return ids
+}
+
+func createIndex(filesList []*ChunkedFile, lookupBlob func(t restic.BlobType, id restic.ID) []restic.PackedBlob) (Index, error) {
 	// collect blob usage info
 	blobCount := map[restic.ID]int{}
 	for _, file := range filesList {
@@ -145,8 +171,8 @@ func createIndex(filesList []*ChunkedFile, lookupBlob func(t restic.BlobType, id
 
 	// build blob lookup info
 	blobSize := map[restic.ID]uint{}
-	blobToPack := map[restic.ID]restic.ID{}
-	packToBlobs := map[restic.ID][]restic.Blob{}
+	blobIdx := map[restic.ID]restic.ID{}
+	packIdx := map[restic.ID][]restic.Blob{}
 	for blob := range blobCount {
 		packs := lookupBlob(restic.DataBlob, blob)
 		if len(packs) == 0 {
@@ -155,14 +181,14 @@ func createIndex(filesList []*ChunkedFile, lookupBlob func(t restic.BlobType, id
 		pb := packs[0]
 
 		blobSize[pb.Blob.ID] = pb.DataLength()
-		blobToPack[pb.Blob.ID] = pb.PackID
-		packToBlobs[pb.PackID] = append(packToBlobs[pb.PackID], pb.Blob)
+		blobIdx[pb.Blob.ID] = pb.PackID
+		packIdx[pb.PackID] = append(packIdx[pb.PackID], pb.Blob)
 	}
 
-	idx := &Index{
-		BlobSize:    blobSize,
-		BlobToPack:  blobToPack,
-		PackToBlobs: packToBlobs,
+	idx := &index{
+		blobSize: blobSize,
+		blobIdx:  blobIdx,
+		packIdx:  packIdx,
 	}
 
 	return idx, nil
@@ -373,7 +399,7 @@ func (rc *Rechunker) TotalSize() uint64 {
 }
 
 func (rc *Rechunker) PackCount() int {
-	return len(rc.idx.PackToBlobs)
+	return len(rc.idx.Packs())
 }
 
 func (rc *Rechunker) TotalAddedToDstRepo() uint64 {
diff --git a/internal/rechunker/scheduler.go b/internal/rechunker/scheduler.go
index 4343b2872..61063cf29 100644
--- a/internal/rechunker/scheduler.go
+++ b/internal/rechunker/scheduler.go
@@ -12,7 +12,7 @@ import (
 type Scheduler struct {
 	mu sync.Mutex
 
-	idx *Index
+	idx Index
 
 	regularCh  <-chan *ChunkedFile
 	priorityCh <-chan *ChunkedFile
@@ -31,7 +31,7 @@ type Scheduler struct {
 	done chan struct{}
 }
 
-func NewScheduler(ctx context.Context, files []*ChunkedFile, idx *Index, usePriority bool) *Scheduler {
+func NewScheduler(ctx context.Context, files []*ChunkedFile, idx Index, usePriority bool) *Scheduler {
 	debug.Log(("Running NewScheduler()"))
 
 	wg, ctx := errgroup.WithContext(ctx)
@@ -281,7 +281,7 @@ func (s *Scheduler) SetObsoleteBlobCallback(cb func(restic.IDs)) {
 // ReadProgress computes progress of cursor for a file, while inferring src blob consumption and using that info to track blob usage.
 func (s *Scheduler) ReadProgress(cursor Cursor, bytesProcessed uint) (Cursor, error) {
 	start := cursor
-	end, err := AdvanceCursor(cursor, bytesProcessed, s.idx.BlobSize)
+	end, err := AdvanceCursor(start, bytesProcessed, s.idx.BlobSize)
 	if err != nil {
 		return Cursor{}, err
 	}
diff --git a/internal/rechunker/worker.go b/internal/rechunker/worker.go
index 5174618d9..124451ffe 100644
--- a/internal/rechunker/worker.go
+++ b/internal/rechunker/worker.go
@@ -235,10 +235,10 @@ type Cursor struct {
 	Offset  uint
 }
 
-func AdvanceCursor(c Cursor, numBytes uint, blobSizes map[restic.ID]uint) (Cursor, error) {
+func AdvanceCursor(c Cursor, numBytes uint, blobSizes func(restic.ID) uint) (Cursor, error) {
 	for c.BlobIdx < len(c.blobs) {
-		blobSize, ok := blobSizes[c.blobs[c.BlobIdx]]
-		if !ok {
+		blobSize := blobSizes(c.blobs[c.BlobIdx])
+		if blobSize == 0 {
 			return Cursor{}, fmt.Errorf("blob %v not in blobSizes", c.blobs[c.BlobIdx].Str())
 		}
 		r := blobSize - c.Offset

From e004f1865dde7d40850847ee8b9bc758af42c98d Mon Sep 17 00:00:00 2001
From: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date: Sun, 26 Apr 2026 17:39:11 +0900
Subject: [PATCH 33/34] Refactor: Change function names

Change function names in rechunker to
---
 cmd/restic/cmd_copy.go          |  2 +-
 internal/rechunker/rechunker.go | 16 ++++-----
 internal/rechunker/scheduler.go | 61 +++++++++++++++++++++++++++++----
 internal/rechunker/worker.go    | 47 ++++---------------------
 4 files changed, 71 insertions(+), 55 deletions(-)

diff --git a/cmd/restic/cmd_copy.go b/cmd/restic/cmd_copy.go
index 54dbc6167..4971b1257 100644
--- a/cmd/restic/cmd_copy.go
+++ b/cmd/restic/cmd_copy.go
@@ -417,7 +417,7 @@ func rechunkCopy(ctx context.Context, srcRepo, dstRepo restic.Repository, select
 	printer.V("Number of snapshots: %v", len(rootTrees))
 	printer.V("Number of distinct files to process: %v", rechnker.NumFiles())
 	printer.V("  - Total size (including duplicate blobs): %v", ui.FormatBytes(rechnker.TotalSize()))
-	printer.V("Number of packs to download: %v\n\n", rechnker.PackCount())
+	printer.V("Number of packs to download: %v\n\n", rechnker.NumPacks())
 
 	debug.Log("Running Rechunk()")
 	progress.Start(rechnker.NumFiles(), rechnker.TotalSize())
diff --git a/internal/rechunker/rechunker.go b/internal/rechunker/rechunker.go
index e991d2a61..7498a50a9 100644
--- a/internal/rechunker/rechunker.go
+++ b/internal/rechunker/rechunker.go
@@ -231,8 +231,8 @@ func (rc *Rechunker) Rechunk(ctx context.Context, srcRepo, dstRepo restic.Reposi
 		defer debug.Log("Closing uploader")
 
 		wg, ctx := errgroup.WithContext(ctx)
-		rc.runWorkers(ctx, wg, numWorkers, downloader, uploader, scheduler.Next, scheduler.ReadProgress, bufferPool, p)
-		rc.runWorkers(ctx, wg, 1, downloader, uploader, scheduler.NextPriority, scheduler.ReadProgress, bufferPool, p)
+		rc.runWorkers(ctx, wg, numWorkers, downloader, uploader, scheduler.Next, scheduler.progressCursor, bufferPool, p)
+		rc.runWorkers(ctx, wg, 1, downloader, uploader, scheduler.NextPriority, scheduler.progressCursor, bufferPool, p)
 
 		return wg.Wait()
 	})
@@ -273,7 +273,7 @@ func (rc *Rechunker) setupCache(ctx context.Context, srcRepo PackLoader, schedul
 
 func (rc *Rechunker) runWorkers(ctx context.Context, wg *errgroup.Group, numWorkers int,
 	downloader restic.BlobLoader, uploader restic.BlobSaver, receiveJob func(context.Context) (*ChunkedFile, bool, error),
-	cursorProgressor func(Cursor, uint) (Cursor, error), bufferPool *BufferPool, p *Progress) {
+	cursorProgressor func(cursor, uint) (cursor, error), bufferPool *BufferPool, p *Progress) {
 	for range numWorkers {
 		wg.Go(func() error {
 			debug.Log("Starting worker")
@@ -382,10 +382,6 @@ func (rc *Rechunker) RewriteTrees(ctx context.Context, srcRepo, dstRepo restic.R
 	return result, nil
 }
 
-func (rc *Rechunker) NumFiles() int {
-	return len(rc.filesList)
-}
-
 func (rc *Rechunker) GetRewrittenTree(originalTree restic.ID) (restic.ID, error) {
 	newID, ok := rc.rewriteTreeMap[originalTree]
 	if !ok {
@@ -398,7 +394,11 @@ func (rc *Rechunker) TotalSize() uint64 {
 	return rc.totalSize
 }
 
-func (rc *Rechunker) PackCount() int {
+func (rc *Rechunker) NumFiles() int {
+	return len(rc.filesList)
+}
+
+func (rc *Rechunker) NumPacks() int {
 	return len(rc.idx.Packs())
 }
 
diff --git a/internal/rechunker/scheduler.go b/internal/rechunker/scheduler.go
index 61063cf29..1c33cf00c 100644
--- a/internal/rechunker/scheduler.go
+++ b/internal/rechunker/scheduler.go
@@ -2,6 +2,7 @@ package rechunker
 
 import (
 	"context"
+	"fmt"
 	"sync"
 
 	"github.com/restic/restic/internal/debug"
@@ -278,12 +279,23 @@ func (s *Scheduler) SetObsoleteBlobCallback(cb func(restic.IDs)) {
 	s.obsoleteBlobCB = cb
 }
 
-// ReadProgress computes progress of cursor for a file, while inferring src blob consumption and using that info to track blob usage.
-func (s *Scheduler) ReadProgress(cursor Cursor, bytesProcessed uint) (Cursor, error) {
-	start := cursor
-	end, err := AdvanceCursor(start, bytesProcessed, s.idx.BlobSize)
+func (s *Scheduler) newCursor(blobs restic.IDs) cursor {
+	if s == nil {
+		return cursor{}
+	}
+
+	return cursor{
+		blobs:    blobs,
+		blobSize: s.idx.BlobSize,
+	}
+}
+
+// progressCursor computes progress of cursor for a file, while inferring src blob consumption and using that info to track blob usage.
+func (s *Scheduler) progressCursor(c cursor, bytesProcessed uint) (cursor, error) {
+	start := c
+	end, err := c.Advance(bytesProcessed)
 	if err != nil {
-		return Cursor{}, err
+		return cursor{}, err
 	}
 
 	if s.obsoleteBlobCB == nil {
@@ -294,7 +306,7 @@ func (s *Scheduler) ReadProgress(cursor Cursor, bytesProcessed uint) (Cursor, er
 		return end, nil
 	}
 
-	blobs := cursor.blobs[start.BlobIdx:end.BlobIdx]
+	blobs := c.blobs[start.BlobIdx:end.BlobIdx]
 	var obsolete restic.IDs
 	s.mu.Lock()
 	for _, b := range blobs {
@@ -314,6 +326,43 @@ func (s *Scheduler) ReadProgress(cursor Cursor, bytesProcessed uint) (Cursor, er
 	return end, nil
 }
 
+type cursor struct {
+	blobs    restic.IDs
+	BlobIdx  int
+	Offset   uint
+	blobSize func(restic.ID) uint
+}
+
+func (c cursor) Advance(numBytes uint) (cursor, error) {
+	if c.blobs == nil {
+		return cursor{}, nil
+	}
+
+	for c.BlobIdx < len(c.blobs) {
+		blobSize := c.blobSize(c.blobs[c.BlobIdx])
+		if blobSize == 0 {
+			return cursor{}, fmt.Errorf("unknown blob %v", c.blobs[c.BlobIdx].Str())
+		}
+		r := blobSize - c.Offset
+
+		if numBytes < r {
+			c.Offset += numBytes
+			numBytes = 0
+			break
+		}
+
+		numBytes -= r
+		c.BlobIdx++
+		c.Offset = 0
+	}
+
+	if numBytes != 0 {
+		return cursor{}, fmt.Errorf("cursor out of range; %d bytes over end position", numBytes)
+	}
+
+	return c, nil
+}
+
 // PrioritySelect selects from two channels with priority; first channel first.
 func PrioritySelect(ctx context.Context, first <-chan *ChunkedFile, second <-chan *ChunkedFile) (item *ChunkedFile, from int, err error) {
 	// First, try to pull from channel 'first' only. If 'first' is not ready now, try both channels.
diff --git a/internal/rechunker/worker.go b/internal/rechunker/worker.go
index 124451ffe..33aace50d 100644
--- a/internal/rechunker/worker.go
+++ b/internal/rechunker/worker.go
@@ -2,7 +2,6 @@ package rechunker
 
 import (
 	"context"
-	"fmt"
 	"io"
 
 	"github.com/restic/chunker"
@@ -23,12 +22,12 @@ type Worker struct {
 	downloader restic.BlobLoader
 	uploader   restic.BlobSaver
 
-	cursorProgressor func(cursor Cursor, bytesProcessed uint) (Cursor, error)
+	cursorProgressor func(cursor cursor, bytesProcessed uint) (cursor, error)
 }
 
 func NewWorker(pol chunker.Pol, downloader restic.BlobLoader, uploader restic.BlobSaver,
 	bufferPool *BufferPool,
-	cursorProgressor func(Cursor, uint) (Cursor, error),
+	cursorProgressor func(cursor, uint) (cursor, error),
 ) *Worker {
 	return &Worker{
 		pool: bufferPool,
@@ -78,14 +77,14 @@ func (w *Worker) runReader(ctx context.Context, wg *errgroup.Group, srcBlobs res
 
 		w.chunker.Reset(reader, w.pol)
 
-		cursor := Cursor{blobs: srcBlobs}
+		c := cursor{blobs: srcBlobs}
 
 		for {
 			// bring buffer from bufferPool
 			buf := w.pool.Get()
 
 			// rechunk with new parameter
-			c, err := w.chunker.Next(buf)
+			chunk, err := w.chunker.Next(buf)
 			if err == io.EOF { // reached EOF; all done
 				w.pool.Put(buf)
 				return nil
@@ -96,7 +95,7 @@ func (w *Worker) runReader(ctx context.Context, wg *errgroup.Group, srcBlobs res
 
 			// if cursor progressor callback is given, run it
 			if w.cursorProgressor != nil {
-				cursor, err = w.cursorProgressor(cursor, c.Length)
+				c, err = w.cursorProgressor(c, chunk.Length)
 				if err != nil {
 					return err
 				}
@@ -106,8 +105,8 @@ func (w *Worker) runReader(ctx context.Context, wg *errgroup.Group, srcBlobs res
 			select {
 			case <-ctx.Done():
 				return ctx.Err()
-			case out <- c:
-				debug.Log("Sending a new chunk of size %v to writer", c.Length)
+			case out <- chunk:
+				debug.Log("Sending a new chunk of size %v to writer", chunk.Length)
 			}
 		}
 	})
@@ -228,35 +227,3 @@ func (p *BufferPool) Put(buf []byte) {
 		debug.Log("bufferPool is full; discarding the buffer")
 	}
 }
-
-type Cursor struct {
-	blobs   restic.IDs
-	BlobIdx int
-	Offset  uint
-}
-
-func AdvanceCursor(c Cursor, numBytes uint, blobSizes func(restic.ID) uint) (Cursor, error) {
-	for c.BlobIdx < len(c.blobs) {
-		blobSize := blobSizes(c.blobs[c.BlobIdx])
-		if blobSize == 0 {
-			return Cursor{}, fmt.Errorf("blob %v not in blobSizes", c.blobs[c.BlobIdx].Str())
-		}
-		r := blobSize - c.Offset
-
-		if numBytes < r {
-			c.Offset += numBytes
-			numBytes = 0
-			break
-		}
-
-		numBytes -= r
-		c.BlobIdx++
-		c.Offset = 0
-	}
-
-	if numBytes != 0 {
-		return Cursor{}, fmt.Errorf("cursor out of range; %d bytes over end position", numBytes)
-	}
-
-	return c, nil
-}

From b93abf8f907f2d40f48d95a5e5ed638cb80ecfad Mon Sep 17 00:00:00 2001
From: Donggyu Kim <kimdonggyu.dev@gmail.com>
Date: Sun, 26 Apr 2026 18:46:11 +0900
Subject: [PATCH 34/34] Refactor: Rename variables and functions

Rename variables and functions for readability
Create WorkerConfig for cleaner argument passing
---
 internal/rechunker/rechunker.go | 37 ++++++++------
 internal/rechunker/scheduler.go | 88 ++++++++++++++++-----------------
 internal/rechunker/worker.go    | 44 +++++++++++------
 3 files changed, 93 insertions(+), 76 deletions(-)

diff --git a/internal/rechunker/rechunker.go b/internal/rechunker/rechunker.go
index 7498a50a9..634262882 100644
--- a/internal/rechunker/rechunker.go
+++ b/internal/rechunker/rechunker.go
@@ -42,10 +42,10 @@ type ChunkedFile struct {
 }
 
 type Index interface {
-	BlobSize(blobID restic.ID) (size uint)
-	BlobToPack(blobID restic.ID) (packID restic.ID)
-	PackToBlobs(packID restic.ID) (blobs []restic.Blob)
-	Packs() (packIDs restic.IDSet)
+	BlobSize(blobID restic.ID) (size uint)              // blob ID -> blob size
+	BlobToPack(blobID restic.ID) (packID restic.ID)     // blob ID -> pack ID
+	PackToBlobs(packID restic.ID) (blobs []restic.Blob) // pack ID -> list of blobs to be loaded from the pack
+	Packs() (packIDs restic.IDSet)                      // set of all pack IDs
 }
 
 func NewRechunker(cfg Config) *Rechunker {
@@ -230,9 +230,20 @@ func (rc *Rechunker) Rechunk(ctx context.Context, srcRepo, dstRepo restic.Reposi
 		debug.Log("Starting uploader")
 		defer debug.Log("Closing uploader")
 
+		workerCfg := WorkerConfig{
+			Pol: rc.cfg.Pol,
+
+			Downloader: downloader,
+			Uploader:   uploader,
+			BufferPool: bufferPool,
+
+			NewCursor:    scheduler.newCursor,
+			UpdateCursor: scheduler.updateCursor,
+		}
+
 		wg, ctx := errgroup.WithContext(ctx)
-		rc.runWorkers(ctx, wg, numWorkers, downloader, uploader, scheduler.Next, scheduler.progressCursor, bufferPool, p)
-		rc.runWorkers(ctx, wg, 1, downloader, uploader, scheduler.NextPriority, scheduler.progressCursor, bufferPool, p)
+		rc.runWorkers(ctx, wg, numWorkers, workerCfg, scheduler.Next, p)
+		rc.runWorkers(ctx, wg, 1, workerCfg, scheduler.NextPriority, p)
 
 		return wg.Wait()
 	})
@@ -266,24 +277,18 @@ func (rc *Rechunker) setupCache(ctx context.Context, srcRepo PackLoader, schedul
 	repo, cache = WrapWithCache(ctx, srcRepo, rc.cfg.CacheSize, numDownloaders, rc.idx, scheduler.BlobReady, scheduler.BlobUnready)
 
 	// register cache.Ignore as scheduler's obsolete blob callback for early cache eviction
-	scheduler.SetObsoleteBlobCallback(cache.Ignore)
+	scheduler.SetIgnoreBlobsCallback(cache.Ignore)
 
 	return repo, cache
 }
 
 func (rc *Rechunker) runWorkers(ctx context.Context, wg *errgroup.Group, numWorkers int,
-	downloader restic.BlobLoader, uploader restic.BlobSaver, receiveJob func(context.Context) (*ChunkedFile, bool, error),
-	cursorProgressor func(cursor, uint) (cursor, error), bufferPool *BufferPool, p *Progress) {
+	workerCfg WorkerConfig, receiveJob func(context.Context) (*ChunkedFile, bool, error),
+	p *Progress) {
 	for range numWorkers {
 		wg.Go(func() error {
 			debug.Log("Starting worker")
-			worker := NewWorker(
-				rc.cfg.Pol,
-				downloader,
-				uploader,
-				bufferPool,
-				cursorProgressor,
-			)
+			worker := NewWorker(workerCfg)
 
 			for {
 				debug.Log("receiving job")
diff --git a/internal/rechunker/scheduler.go b/internal/rechunker/scheduler.go
index 1c33cf00c..6294664b0 100644
--- a/internal/rechunker/scheduler.go
+++ b/internal/rechunker/scheduler.go
@@ -21,12 +21,12 @@ type Scheduler struct {
 	regularList  []*ChunkedFile
 	priorityList []*ChunkedFile
 
-	filesContaining map[restic.ID][]*ChunkedFile
-	blobsToPrepare  map[restic.ID]int
+	prefixLookup         map[restic.ID][]*ChunkedFile // blob ID -> files that contain the blob as prefix
+	remainingPrefixBlobs map[restic.ID]int            // file hashval -> remaining count until all its blobs ready
 
-	remainingBlobNeeds map[restic.ID]int
+	remainingBlobUsage map[restic.ID]int // blob ID -> remaining blob usage until the end
 
-	obsoleteBlobCB func(ids restic.IDs)
+	ignoreBlobsCB func(ids restic.IDs)
 
 	push chan struct{}
 	done chan struct{}
@@ -40,25 +40,25 @@ func NewScheduler(ctx context.Context, files []*ChunkedFile, idx Index, usePrior
 
 	if !usePriority {
 		s := &Scheduler{
-			idx:                idx,
-			regularList:        files,
-			done:               make(chan struct{}),
-			filesContaining:    filesContaining,
-			blobsToPrepare:     blobsToPrepare,
-			remainingBlobNeeds: remainingBlobNeeds,
+			idx:                  idx,
+			regularList:          files,
+			done:                 make(chan struct{}),
+			prefixLookup:         filesContaining,
+			remainingPrefixBlobs: blobsToPrepare,
+			remainingBlobUsage:   remainingBlobNeeds,
 		}
 		s.createRegularCh(ctx, wg, nil)
 		return s
 	}
 
 	s := &Scheduler{
-		idx:                idx,
-		regularList:        files,
-		push:               make(chan struct{}, 1),
-		done:               make(chan struct{}),
-		filesContaining:    filesContaining,
-		blobsToPrepare:     blobsToPrepare,
-		remainingBlobNeeds: remainingBlobNeeds,
+		idx:                  idx,
+		regularList:          files,
+		push:                 make(chan struct{}, 1),
+		done:                 make(chan struct{}),
+		prefixLookup:         filesContaining,
+		remainingPrefixBlobs: blobsToPrepare,
+		remainingBlobUsage:   remainingBlobNeeds,
 	}
 
 	set := restic.IDSet{}
@@ -82,23 +82,23 @@ func NewScheduler(ctx context.Context, files []*ChunkedFile, idx Index, usePrior
 const FILE_HEAD_LENGTH = 25
 
 func createSchedulerState(files []*ChunkedFile) (map[restic.ID][]*ChunkedFile, map[restic.ID]int, map[restic.ID]int) {
-	blobCount := map[restic.ID]int{}
-	filesContaining := map[restic.ID][]*ChunkedFile{}
-	blobsToPrepare := map[restic.ID]int{}
+	blobUsage := map[restic.ID]int{}
+	prefixLookup := map[restic.ID][]*ChunkedFile{}
+	numPrefixBlobs := map[restic.ID]int{}
 
 	for _, file := range files {
 		prefixLen := min(FILE_HEAD_LENGTH, len(file.IDs))
-		blobSet := restic.NewIDSet(file.IDs[:prefixLen]...)
-		blobsToPrepare[file.hashval] = len(blobSet)
+		prefixSet := restic.NewIDSet(file.IDs[:prefixLen]...)
+		numPrefixBlobs[file.hashval] = len(prefixSet)
 		for _, blob := range file.IDs {
-			blobCount[blob]++
+			blobUsage[blob]++
 		}
-		for b := range blobSet {
-			filesContaining[b] = append(filesContaining[b], file)
+		for b := range prefixSet {
+			prefixLookup[b] = append(prefixLookup[b], file)
 		}
 	}
 
-	return filesContaining, blobsToPrepare, blobCount
+	return prefixLookup, numPrefixBlobs, blobUsage
 }
 
 func (s *Scheduler) Next(ctx context.Context) (*ChunkedFile, bool, error) {
@@ -114,7 +114,7 @@ func (s *Scheduler) NextPriority(ctx context.Context) (*ChunkedFile, bool, error
 	return file, from != 0, err
 }
 
-func (s *Scheduler) PushPriority(files []*ChunkedFile) {
+func (s *Scheduler) pushPriority(files []*ChunkedFile) {
 	if s.priorityCh == nil {
 		return
 	}
@@ -221,14 +221,14 @@ func (s *Scheduler) BlobReady(ids restic.IDs) {
 
 	s.mu.Lock()
 	for _, id := range ids {
-		for _, file := range s.filesContaining[id] {
-			n := s.blobsToPrepare[file.hashval]
+		for _, file := range s.prefixLookup[id] {
+			n := s.remainingPrefixBlobs[file.hashval]
 			if n > 0 {
 				n--
 				if n == 0 {
 					readyFiles = append(readyFiles, file)
 				}
-				s.blobsToPrepare[file.hashval] = n
+				s.remainingPrefixBlobs[file.hashval] = n
 			}
 		}
 	}
@@ -238,7 +238,7 @@ func (s *Scheduler) BlobReady(ids restic.IDs) {
 		return
 	}
 
-	s.PushPriority(readyFiles)
+	s.pushPriority(readyFiles)
 
 	if debugStats != nil {
 		dAdds := map[string]int{}
@@ -261,22 +261,22 @@ func (s *Scheduler) BlobUnready(ids restic.IDs) {
 
 	s.mu.Lock()
 	for _, id := range ids {
-		filesToUpdate := s.filesContaining[id]
+		filesToUpdate := s.prefixLookup[id]
 		for _, file := range filesToUpdate {
 			// files with blobsToPrepare==0 is not tracked
-			if s.blobsToPrepare[file.hashval] > 0 {
-				s.blobsToPrepare[file.hashval]++
+			if s.remainingPrefixBlobs[file.hashval] > 0 {
+				s.remainingPrefixBlobs[file.hashval]++
 			}
 		}
 	}
 	s.mu.Unlock()
 }
 
-func (s *Scheduler) SetObsoleteBlobCallback(cb func(restic.IDs)) {
+func (s *Scheduler) SetIgnoreBlobsCallback(cb func(restic.IDs)) {
 	s.mu.Lock()
 	defer s.mu.Unlock()
 
-	s.obsoleteBlobCB = cb
+	s.ignoreBlobsCB = cb
 }
 
 func (s *Scheduler) newCursor(blobs restic.IDs) cursor {
@@ -290,18 +290,14 @@ func (s *Scheduler) newCursor(blobs restic.IDs) cursor {
 	}
 }
 
-// progressCursor computes progress of cursor for a file, while inferring src blob consumption and using that info to track blob usage.
-func (s *Scheduler) progressCursor(c cursor, bytesProcessed uint) (cursor, error) {
+// updateCursor computes progress of cursor for a file, while inferring src blob consumption and using that info to track blob usage.
+func (s *Scheduler) updateCursor(c cursor, bytesProcessed uint) (cursor, error) {
 	start := c
 	end, err := c.Advance(bytesProcessed)
 	if err != nil {
 		return cursor{}, err
 	}
 
-	if s.obsoleteBlobCB == nil {
-		return end, nil
-	}
-
 	if start.BlobIdx == end.BlobIdx {
 		return end, nil
 	}
@@ -310,8 +306,8 @@ func (s *Scheduler) progressCursor(c cursor, bytesProcessed uint) (cursor, error
 	var obsolete restic.IDs
 	s.mu.Lock()
 	for _, b := range blobs {
-		s.remainingBlobNeeds[b]--
-		if s.remainingBlobNeeds[b] == 0 {
+		s.remainingBlobUsage[b]--
+		if s.remainingBlobUsage[b] == 0 {
 			obsolete = append(obsolete, b)
 		}
 	}
@@ -321,7 +317,9 @@ func (s *Scheduler) progressCursor(c cursor, bytesProcessed uint) (cursor, error
 		return end, nil
 	}
 
-	s.obsoleteBlobCB(obsolete)
+	if s.ignoreBlobsCB != nil {
+		s.ignoreBlobsCB(obsolete)
+	}
 
 	return end, nil
 }
diff --git a/internal/rechunker/worker.go b/internal/rechunker/worker.go
index 33aace50d..9f433ca62 100644
--- a/internal/rechunker/worker.go
+++ b/internal/rechunker/worker.go
@@ -22,22 +22,34 @@ type Worker struct {
 	downloader restic.BlobLoader
 	uploader   restic.BlobSaver
 
-	cursorProgressor func(cursor cursor, bytesProcessed uint) (cursor, error)
+	newCursor    func(blobs restic.IDs) cursor
+	updateCursor func(c cursor, numBytes uint) (cursor, error)
+}
+type WorkerConfig struct {
+	Pol chunker.Pol
+
+	Downloader restic.BlobLoader
+	Uploader   restic.BlobSaver
+	BufferPool *BufferPool
+
+	NewCursor    func(blobs restic.IDs) cursor
+	UpdateCursor func(c cursor, numBytes uint) (cursor, error)
 }
 
-func NewWorker(pol chunker.Pol, downloader restic.BlobLoader, uploader restic.BlobSaver,
-	bufferPool *BufferPool,
-	cursorProgressor func(cursor, uint) (cursor, error),
-) *Worker {
+func NewWorker(cfg WorkerConfig) *Worker {
+	if cfg.BufferPool == nil {
+		cfg.BufferPool = NewBufferPool(3)
+	}
 	return &Worker{
-		pool: bufferPool,
+		pool: cfg.BufferPool,
 
-		chunker:    chunker.New(nil, pol),
-		pol:        pol,
-		downloader: downloader,
-		uploader:   uploader,
+		chunker:    chunker.New(nil, cfg.Pol),
+		pol:        cfg.Pol,
+		downloader: cfg.Downloader,
+		uploader:   cfg.Uploader,
 
-		cursorProgressor: cursorProgressor,
+		newCursor:    cfg.NewCursor,
+		updateCursor: cfg.UpdateCursor,
 	}
 }
 
@@ -77,7 +89,10 @@ func (w *Worker) runReader(ctx context.Context, wg *errgroup.Group, srcBlobs res
 
 		w.chunker.Reset(reader, w.pol)
 
-		c := cursor{blobs: srcBlobs}
+		var c cursor
+		if w.newCursor != nil {
+			c = w.newCursor(srcBlobs)
+		}
 
 		for {
 			// bring buffer from bufferPool
@@ -93,9 +108,8 @@ func (w *Worker) runReader(ctx context.Context, wg *errgroup.Group, srcBlobs res
 				return err
 			}
 
-			// if cursor progressor callback is given, run it
-			if w.cursorProgressor != nil {
-				c, err = w.cursorProgressor(c, chunk.Length)
+			if w.updateCursor != nil {
+				c, err = w.updateCursor(c, chunk.Length)
 				if err != nil {
 					return err
 				}