From a8f751c0b51ed4bedfa7abd151492bec4e3832ea Mon Sep 17 00:00:00 2001 From: "weizhou.lan@daocloud.io" Date: Thu, 14 May 2026 15:30:34 +0800 Subject: [PATCH] Fix kubelet startup failure when restoring legacy V3 CPU manager checkpoints Signed-off-by: weizhou.lan@daocloud.io --- pkg/kubelet/cm/cpumanager/state/checkpoint.go | 19 ++++++++- .../cpumanager/state/state_checkpoint_test.go | 41 ++++++++++++++++++- 2 files changed, 57 insertions(+), 3 deletions(-) diff --git a/pkg/kubelet/cm/cpumanager/state/checkpoint.go b/pkg/kubelet/cm/cpumanager/state/checkpoint.go index eec993d9fae..8c7b6f99233 100644 --- a/pkg/kubelet/cm/cpumanager/state/checkpoint.go +++ b/pkg/kubelet/cm/cpumanager/state/checkpoint.go @@ -267,8 +267,25 @@ func (cp *CPUManagerCheckpointV3) VerifyChecksum() error { ck := cp.Checksum cp.Checksum = 0 err := ck.Verify(cp) + if err == nil { + cp.Checksum = ck + return nil + } + + object := dump.ForHash(cp) + object = strings.Replace(object, "CPUManagerCheckpointV3", "CPUManagerCheckpoint", 1) cp.Checksum = ck - return err + + hash := fnv.New32a() + _, _ = fmt.Fprintf(hash, "%v", object) + actualCS := checksum.Checksum(hash.Sum32()) + if cp.Checksum != actualCS { + return &errors.CorruptCheckpointError{ + ActualCS: uint64(actualCS), + ExpectedCS: uint64(cp.Checksum), + } + } + return nil } // VerifyChecksum verifies that current checksum of checkpoint is valid in v4 format diff --git a/pkg/kubelet/cm/cpumanager/state/state_checkpoint_test.go b/pkg/kubelet/cm/cpumanager/state/state_checkpoint_test.go index 6a45ded9eeb..ff62d6b6a24 100644 --- a/pkg/kubelet/cm/cpumanager/state/state_checkpoint_test.go +++ b/pkg/kubelet/cm/cpumanager/state/state_checkpoint_test.go @@ -322,7 +322,7 @@ func TestCheckpointStateRestore(t *testing.T) { "cpuSet":"7-9" } }, - "checksum": 2284712151 + "checksum": 766259872 }`, "static", containermap.ContainerMap{}, @@ -354,7 +354,7 @@ func TestCheckpointStateRestore(t *testing.T) { "cpuSet":"7-9" } }, - "checksum": 2284712151 + "checksum": 766259872 }`, "static", containermap.ContainerMap{}, @@ -775,6 +775,43 @@ func TestCPUManagerCheckpoint_MarshalCheckpoint_HashCompatibility(t *testing.T) } } +func TestCPUManagerCheckpointV3_VerifyChecksum_Compatibility(t *testing.T) { + testCases := []struct { + name string + checksum checksum.Checksum + }{ + { + name: "accepts legacy v3 checksum", + checksum: 766259872, + }, + { + name: "accepts current v3 checksum", + checksum: 2284712151, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + cp := &CPUManagerCheckpointV3{ + PolicyName: "static", + DefaultCPUSet: "1-2", + Entries: map[string]map[string]string{ + "pod1": { + "container1": "5-6", + "container2": "3-4", + }, + }, + PodEntries: PodCPUAssignments{ + "pod2": {CPUSet: cpuset.New(7, 8, 9)}, + }, + Checksum: tc.checksum, + } + + require.NoError(t, cp.VerifyChecksum()) + }) + } +} + func removeAll(dir string, t *testing.T) { t.Helper() if err := os.RemoveAll(dir); err != nil {