From 7ad875b7d19bb29450bb75529bf30d9e3ad70741 Mon Sep 17 00:00:00 2001 From: Alex Demicev Date: Tue, 26 May 2026 12:07:29 +0200 Subject: [PATCH] Use built-in etcd bootstrap defrag threshold if specified Signed-off-by: Alex Demicev --- pkg/daemons/executor/executor.go | 2 ++ pkg/etcd/etcd.go | 51 ++++++++------------------------ 2 files changed, 14 insertions(+), 39 deletions(-) diff --git a/pkg/daemons/executor/executor.go b/pkg/daemons/executor/executor.go index 83dd63c9d53..2e33829275d 100644 --- a/pkg/daemons/executor/executor.go +++ b/pkg/daemons/executor/executor.go @@ -77,6 +77,8 @@ type ETCDConfig struct { ExperimentalInitialCorruptCheck bool `json:"experimental-initial-corrupt-check"` ExperimentalWatchProgressNotifyInterval time.Duration `json:"experimental-watch-progress-notify-interval"` + + BootstrapDefragThresholdMegabytes uint `json:"bootstrap-defrag-threshold-megabytes,omitempty"` } type ServerTrust struct { diff --git a/pkg/etcd/etcd.go b/pkg/etcd/etcd.go index 6be650dc25d..2c747aa7618 100644 --- a/pkg/etcd/etcd.go +++ b/pkg/etcd/etcd.go @@ -190,11 +190,12 @@ func (e *ETCD) SetControlConfig(config *config.Control) error { return nil } -// Test ensures that the local node is a voting member of the target cluster, -// and that the datastore is defragmented and not in maintenance mode due to alarms. +// Test ensures that the local node is a voting member of the target cluster +// and not in maintenance mode due to alarms. // If it is still a learner or not a part of the cluster, an error is raised. -// If enableMaintenance is true, an attempt will be made to defagment the datastore and clear alarms. -// If it cannot be defragmented or has any alarms that cannot be disarmed, an error is raised. +// If enableMaintenance is true, an attempt will be made to clear alarms. +// Startup defragmentation is delegated to etcd itself via the +// bootstrap-defrag-threshold-megabytes flag. func (e *ETCD) Test(ctx context.Context, enableMaintenance bool) error { if e.config == nil { return errors.New("control config not set") @@ -222,28 +223,11 @@ func (e *ETCD) Test(ctx context.Context, enableMaintenance bool) error { return nil } - // defrag this node to reclaim freed space from compacted revisions - if err := e.defragment(ctx); err != nil { - return errors.WithMessage(err, "failed to defragment etcd database") - } - // clear alarms on this node if err := e.clearAlarms(ctx, status.Header.MemberId); err != nil { return errors.WithMessage(err, "failed to disarm etcd alarms") } - // refresh status - note that errors may remain on other nodes, but this - // should not prevent us from continuing with startup. - status, err = e.status(ctx) - if err != nil { - return errors.WithMessage(err, "failed to get etcd status") - } - - logrus.Infof("Datastore using %d of %d bytes after defragment", status.DbSizeInUse, status.DbSize) - if len(status.Errors) > 0 { - logrus.Warnf("Errors present on etcd cluster after defragment: %s", strings.Join(status.Errors, ",")) - } - members, err := e.client.MemberList(ctx) if err != nil { return err @@ -1061,12 +1045,13 @@ func (e *ETCD) cluster(ctx context.Context, wg *sync.WaitGroup, reset bool, opti ClientCertAuth: true, TrustedCAFile: e.config.Runtime.ETCDPeerCA, }, - SnapshotCount: 10000, - ElectionTimeout: 5000, - HeartbeatInterval: 500, - Logger: "zap", - LogOutputs: []string{"stderr"}, - ListenClientHTTPURLs: e.listenClientHTTPURLs(), + SnapshotCount: 10000, + ElectionTimeout: 5000, + HeartbeatInterval: 500, + BootstrapDefragThresholdMegabytes: 100, + Logger: "zap", + LogOutputs: []string{"stderr"}, + ListenClientHTTPURLs: e.listenClientHTTPURLs(), SocketOpts: executor.ETCDSocketOpts{ ReuseAddress: true, ReusePort: true, @@ -1470,18 +1455,6 @@ func (e *ETCD) status(ctx context.Context) (*clientv3.StatusResponse, error) { return e.client.Status(ctx, endpoints[0]) } -// defragment defragments the etcd datastore using the first etcd endpoint -func (e *ETCD) defragment(ctx context.Context) error { - if e.client == nil { - return errors.New("etcd client was nil") - } - - logrus.Infof("Defragmenting etcd database") - endpoints := getEndpoints(e.config) - _, err := e.client.Defragment(ctx, endpoints[0]) - return err -} - // clientURLs returns a list of all non-learner etcd cluster member client access URLs. // The list is retrieved from the remote server that is being joined. func ClientURLs(ctx context.Context, clientAccessInfo *clientaccess.Info, selfIP string) ([]string, Members, error) {