This commit is contained in:
Alex Demicev 2026-05-26 14:58:51 +02:00 committed by GitHub
commit a30702bd2c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 14 additions and 39 deletions

View file

@ -77,6 +77,8 @@ type ETCDConfig struct {
ExperimentalInitialCorruptCheck bool `json:"experimental-initial-corrupt-check"`
ExperimentalWatchProgressNotifyInterval time.Duration `json:"experimental-watch-progress-notify-interval"`
BootstrapDefragThresholdMegabytes uint `json:"bootstrap-defrag-threshold-megabytes,omitempty"`
}
type ServerTrust struct {

View file

@ -190,11 +190,12 @@ func (e *ETCD) SetControlConfig(config *config.Control) error {
return nil
}
// Test ensures that the local node is a voting member of the target cluster,
// and that the datastore is defragmented and not in maintenance mode due to alarms.
// Test ensures that the local node is a voting member of the target cluster
// and not in maintenance mode due to alarms.
// If it is still a learner or not a part of the cluster, an error is raised.
// If enableMaintenance is true, an attempt will be made to defagment the datastore and clear alarms.
// If it cannot be defragmented or has any alarms that cannot be disarmed, an error is raised.
// If enableMaintenance is true, an attempt will be made to clear alarms.
// Startup defragmentation is delegated to etcd itself via the
// bootstrap-defrag-threshold-megabytes flag.
func (e *ETCD) Test(ctx context.Context, enableMaintenance bool) error {
if e.config == nil {
return errors.New("control config not set")
@ -222,28 +223,11 @@ func (e *ETCD) Test(ctx context.Context, enableMaintenance bool) error {
return nil
}
// defrag this node to reclaim freed space from compacted revisions
if err := e.defragment(ctx); err != nil {
return errors.WithMessage(err, "failed to defragment etcd database")
}
// clear alarms on this node
if err := e.clearAlarms(ctx, status.Header.MemberId); err != nil {
return errors.WithMessage(err, "failed to disarm etcd alarms")
}
// refresh status - note that errors may remain on other nodes, but this
// should not prevent us from continuing with startup.
status, err = e.status(ctx)
if err != nil {
return errors.WithMessage(err, "failed to get etcd status")
}
logrus.Infof("Datastore using %d of %d bytes after defragment", status.DbSizeInUse, status.DbSize)
if len(status.Errors) > 0 {
logrus.Warnf("Errors present on etcd cluster after defragment: %s", strings.Join(status.Errors, ","))
}
members, err := e.client.MemberList(ctx)
if err != nil {
return err
@ -1061,12 +1045,13 @@ func (e *ETCD) cluster(ctx context.Context, wg *sync.WaitGroup, reset bool, opti
ClientCertAuth: true,
TrustedCAFile: e.config.Runtime.ETCDPeerCA,
},
SnapshotCount: 10000,
ElectionTimeout: 5000,
HeartbeatInterval: 500,
Logger: "zap",
LogOutputs: []string{"stderr"},
ListenClientHTTPURLs: e.listenClientHTTPURLs(),
SnapshotCount: 10000,
ElectionTimeout: 5000,
HeartbeatInterval: 500,
BootstrapDefragThresholdMegabytes: 100,
Logger: "zap",
LogOutputs: []string{"stderr"},
ListenClientHTTPURLs: e.listenClientHTTPURLs(),
SocketOpts: executor.ETCDSocketOpts{
ReuseAddress: true,
ReusePort: true,
@ -1470,18 +1455,6 @@ func (e *ETCD) status(ctx context.Context) (*clientv3.StatusResponse, error) {
return e.client.Status(ctx, endpoints[0])
}
// defragment defragments the etcd datastore using the first etcd endpoint
func (e *ETCD) defragment(ctx context.Context) error {
if e.client == nil {
return errors.New("etcd client was nil")
}
logrus.Infof("Defragmenting etcd database")
endpoints := getEndpoints(e.config)
_, err := e.client.Defragment(ctx, endpoints[0])
return err
}
// clientURLs returns a list of all non-learner etcd cluster member client access URLs.
// The list is retrieved from the remote server that is being joined.
func ClientURLs(ctx context.Context, clientAccessInfo *clientaccess.Info, selfIP string) ([]string, Members, error) {