diff --git a/builtin/logical/pki/backend.go b/builtin/logical/pki/backend.go
index 4b88b27c2f..2b9044db4e 100644
--- a/builtin/logical/pki/backend.go
+++ b/builtin/logical/pki/backend.go
@@ -131,6 +131,7 @@ func Backend(conf *logical.BackendConfig) *backend {
issuing.PathCerts,
issuing.PathCertMetadata,
acmePathPrefix,
+ autoTidyLastRunPath,
},
Root: []string{
@@ -294,8 +295,12 @@ func Backend(conf *logical.BackendConfig) *backend {
conf.System.ReplicationState().HasState(consts.ReplicationDRSecondary)
b.crlBuilder = newCRLBuilder(!cannotRebuildCRLs)
- // Delay the first tidy until after we've started up.
- b.lastTidy = time.Now()
+ // Delay the first tidy until after we've started up, this will be reset within the initialize function
+ now := time.Now()
+ b.lastAutoTidy = now
+
+ // Keep track of when this mount was started up.
+ b.mountStartup = now
b.unifiedTransferStatus = newUnifiedTransferStatus()
@@ -320,7 +325,14 @@ type backend struct {
tidyStatusLock sync.RWMutex
tidyStatus *tidyStatus
- lastTidy time.Time
+ // lastAutoTidy should be accessed through the tidyStatusLock,
+ // use getAutoTidyLastRun and writeAutoTidyLastRun instead of direct access
+ lastAutoTidy time.Time
+
+ // autoTidyBackoff a random time in the future in which auto-tidy can't start
+ // for after the system starts up to avoid a thundering herd of tidy operations
+ // at startup.
+ autoTidyBackoff time.Time
unifiedTransferStatus *UnifiedTransferStatus
@@ -335,6 +347,9 @@ type backend struct {
// Context around ACME operations
acmeState *acmeState
acmeAccountLock sync.RWMutex // (Write) Locked on Tidy, (Read) Locked on Account Creation
+
+ // Track when this mount was started.
+ mountStartup time.Time
}
// BackendOps a bridge/legacy interface until we can further
@@ -448,9 +463,38 @@ func (b *backend) initialize(ctx context.Context, ir *logical.InitializationRequ
b.GetCertificateCounter().SetError(err)
}
+ // Initialize lastAutoTidy from disk
+ b.initializeLastTidyFromStorage(sc)
+
return b.initializeEnt(sc, ir)
}
+// initializeLastTidyFromStorage reads the time we last ran auto tidy from storage and initializes
+// b.lastAutoTidy with the value. If no previous value existed, we persist time.Now() and initialize
+// b.lastAutoTidy with that value.
+func (b *backend) initializeLastTidyFromStorage(sc *storageContext) {
+ now := time.Now()
+
+ lastTidyTime, err := sc.getAutoTidyLastRun()
+ if err != nil {
+ lastTidyTime = now
+ b.Logger().Error("failed loading previous tidy last run time, using now", "error", err.Error())
+ }
+ if lastTidyTime.IsZero() {
+ // No previous time was set, persist now so we can track a starting point across Vault restarts
+ lastTidyTime = now
+ if err = b.updateLastAutoTidyTime(sc, now); err != nil {
+ b.Logger().Error("failed persisting tidy last run time", "error", err.Error())
+ }
+ }
+
+ // We bypass using updateLastAutoTidyTime here to avoid the storage write on init
+ // that normally isn't required
+ b.tidyStatusLock.Lock()
+ defer b.tidyStatusLock.Unlock()
+ b.lastAutoTidy = lastTidyTime
+}
+
func (b *backend) cleanup(ctx context.Context) {
sc := b.makeStorageContext(ctx, b.storage)
@@ -708,13 +752,20 @@ func (b *backend) periodicFunc(ctx context.Context, request *logical.Request) er
// Check if we should run another tidy...
now := time.Now()
- b.tidyStatusLock.RLock()
- nextOp := b.lastTidy.Add(config.Interval)
- b.tidyStatusLock.RUnlock()
+ nextOp := b.getLastAutoTidyTime().Add(config.Interval)
if now.Before(nextOp) {
return nil
}
+ if b.autoTidyBackoff.IsZero() {
+ b.autoTidyBackoff = config.CalculateStartupBackoff(b.mountStartup)
+ }
+
+ if b.autoTidyBackoff.After(now) {
+ b.Logger().Info("Auto tidy will not run as we are still within the random backoff ending at", "backoff_until", b.autoTidyBackoff)
+ return nil
+ }
+
// Ensure a tidy isn't already running... If it is, we'll trigger
// again when the running one finishes.
if !atomic.CompareAndSwapUint32(b.tidyCASGuard, 0, 1) {
@@ -724,9 +775,11 @@ func (b *backend) periodicFunc(ctx context.Context, request *logical.Request) er
// Prevent ourselves from starting another tidy operation while
// this one is still running. This operation runs in the background
// and has a separate error reporting mechanism.
- b.tidyStatusLock.Lock()
- b.lastTidy = now
- b.tidyStatusLock.Unlock()
+ err = b.updateLastAutoTidyTime(sc, now)
+ if err != nil {
+ // We don't really mind if this write fails, we'll re-run in the future
+ b.Logger().Warn("failed to persist auto tidy last run time", "error", err.Error())
+ }
// Because the request from the parent storage will be cleared at
// some point (and potentially reused) -- due to tidy executing in
diff --git a/builtin/logical/pki/path_tidy.go b/builtin/logical/pki/path_tidy.go
index e18540cc33..a8971832c2 100644
--- a/builtin/logical/pki/path_tidy.go
+++ b/builtin/logical/pki/path_tidy.go
@@ -8,6 +8,7 @@ import (
"crypto/x509"
"errors"
"fmt"
+ "math/rand/v2"
"net/http"
"sync/atomic"
"time"
@@ -81,8 +82,10 @@ type tidyStatus struct {
type tidyConfig struct {
// AutoTidy config
- Enabled bool `json:"enabled"`
- Interval time.Duration `json:"interval_duration"`
+ Enabled bool `json:"enabled"`
+ Interval time.Duration `json:"interval_duration"`
+ MinStartupBackoff time.Duration `json:"min_startup_backoff_duration"`
+ MaxStartupBackoff time.Duration `json:"max_startup_backoff_duration"`
// Tidy Operations
CertStore bool `json:"tidy_cert_store"`
@@ -116,9 +119,24 @@ func (tc *tidyConfig) AnyTidyConfig() string {
return "tidy_cert_store / tidy_revoked_certs / tidy_revoked_cert_issuer_associations / tidy_expired_issuers / tidy_move_legacy_ca_bundle / tidy_revocation_queue / tidy_cross_cluster_revoked_certs / tidy_acme"
}
+func (tc *tidyConfig) CalculateStartupBackoff(mountStartup time.Time) time.Time {
+ minBackoff := int64(tc.MinStartupBackoff.Seconds())
+ maxBackoff := int64(tc.MaxStartupBackoff.Seconds())
+
+ maxNumber := maxBackoff - minBackoff
+ if maxNumber <= 0 {
+ return mountStartup.Add(tc.MinStartupBackoff)
+ }
+
+ backoffSecs := rand.Int64N(maxNumber) + minBackoff
+ return mountStartup.Add(time.Duration(backoffSecs) * time.Second)
+}
+
var defaultTidyConfig = tidyConfig{
Enabled: false,
Interval: 12 * time.Hour,
+ MinStartupBackoff: 5 * time.Minute,
+ MaxStartupBackoff: 15 * time.Minute,
CertStore: false,
RevokedCerts: false,
IssuerAssocs: false,
@@ -561,6 +579,108 @@ func pathTidyStatus(b *backend) *framework.Path {
}
func pathConfigAutoTidy(b *backend) *framework.Path {
+ autoTidyResponseFields := map[string]*framework.FieldSchema{
+ "enabled": {
+ Type: framework.TypeBool,
+ Description: `Specifies whether automatic tidy is enabled or not`,
+ Required: true,
+ },
+ "min_startup_backoff_duration": {
+ Type: framework.TypeInt,
+ Description: `The minimum amount of time in seconds auto-tidy will be delayed after startup`,
+ Required: true,
+ },
+ "max_startup_backoff_duration": {
+ Type: framework.TypeInt,
+ Description: `The maximum amount of time in seconds auto-tidy will be delayed after startup`,
+ Required: true,
+ },
+ "interval_duration": {
+ Type: framework.TypeInt,
+ Description: `Specifies the duration between automatic tidy operation`,
+ Required: true,
+ },
+ "tidy_cert_store": {
+ Type: framework.TypeBool,
+ Description: `Specifies whether to tidy up the certificate store`,
+ Required: true,
+ },
+ "tidy_revoked_certs": {
+ Type: framework.TypeBool,
+ Description: `Specifies whether to remove all invalid and expired certificates from storage`,
+ Required: true,
+ },
+ "tidy_revoked_cert_issuer_associations": {
+ Type: framework.TypeBool,
+ Description: `Specifies whether to associate revoked certificates with their corresponding issuers`,
+ Required: true,
+ },
+ "tidy_expired_issuers": {
+ Type: framework.TypeBool,
+ Description: `Specifies whether tidy expired issuers`,
+ Required: true,
+ },
+ "tidy_acme": {
+ Type: framework.TypeBool,
+ Description: `Tidy Unused Acme Accounts, and Orders`,
+ Required: true,
+ },
+ "tidy_cert_metadata": {
+ Type: framework.TypeBool,
+ Description: `Tidy cert metadata`,
+ Required: true,
+ },
+ "tidy_cmpv2_nonce_store": {
+ Type: framework.TypeBool,
+ Description: `Tidy CMPv2 nonce store`,
+ Required: true,
+ },
+ "safety_buffer": {
+ Type: framework.TypeInt,
+ Description: `Safety buffer time duration`,
+ Required: true,
+ },
+ "issuer_safety_buffer": {
+ Type: framework.TypeInt,
+ Description: `Issuer safety buffer`,
+ Required: true,
+ },
+ "acme_account_safety_buffer": {
+ Type: framework.TypeInt,
+ Description: `Safety buffer after creation after which accounts lacking orders are revoked`,
+ Required: true,
+ },
+ "pause_duration": {
+ Type: framework.TypeString,
+ Description: `Duration to pause between tidying certificates`,
+ Required: true,
+ },
+ "tidy_cross_cluster_revoked_certs": {
+ Type: framework.TypeBool,
+ Description: `Tidy the cross-cluster revoked certificate store`,
+ Required: true,
+ },
+ "tidy_revocation_queue": {
+ Type: framework.TypeBool,
+ Required: true,
+ },
+ "tidy_move_legacy_ca_bundle": {
+ Type: framework.TypeBool,
+ Required: true,
+ },
+ "revocation_queue_safety_buffer": {
+ Type: framework.TypeInt,
+ Required: true,
+ },
+ "publish_stored_certificate_count_metrics": {
+ Type: framework.TypeBool,
+ Required: true,
+ },
+ "maintain_stored_certificate_counts": {
+ Type: framework.TypeBool,
+ Required: true,
+ },
+ }
return &framework.Path{
Pattern: "config/auto-tidy",
DisplayAttrs: &framework.DisplayAttributes{
@@ -571,6 +691,16 @@ func pathConfigAutoTidy(b *backend) *framework.Path {
Type: framework.TypeBool,
Description: `Set to true to enable automatic tidy operations.`,
},
+ "min_startup_backoff_duration": {
+ Type: framework.TypeDurationSecond,
+ Description: `The minimum amount of time in seconds auto-tidy will be delayed after startup.`,
+ Default: int(defaultTidyConfig.MinStartupBackoff.Seconds()),
+ },
+ "max_startup_backoff_duration": {
+ Type: framework.TypeDurationSecond,
+ Description: `The maximum amount of time in seconds auto-tidy will be delayed after startup.`,
+ Default: int(defaultTidyConfig.MaxStartupBackoff.Seconds()),
+ },
"interval_duration": {
Type: framework.TypeDurationSecond,
Description: `Interval at which to run an auto-tidy operation. This is the time between tidy invocations (after one finishes to the start of the next). Running a manual tidy will reset this duration.`,
@@ -601,97 +731,7 @@ available on the tidy-status endpoint.`,
Responses: map[int][]framework.Response{
http.StatusOK: {{
Description: "OK",
- Fields: map[string]*framework.FieldSchema{
- "enabled": {
- Type: framework.TypeBool,
- Description: `Specifies whether automatic tidy is enabled or not`,
- Required: true,
- },
- "interval_duration": {
- Type: framework.TypeInt,
- Description: `Specifies the duration between automatic tidy operation`,
- Required: true,
- },
- "tidy_cert_store": {
- Type: framework.TypeBool,
- Description: `Specifies whether to tidy up the certificate store`,
- Required: true,
- },
- "tidy_revoked_certs": {
- Type: framework.TypeBool,
- Description: `Specifies whether to remove all invalid and expired certificates from storage`,
- Required: true,
- },
- "tidy_revoked_cert_issuer_associations": {
- Type: framework.TypeBool,
- Description: `Specifies whether to associate revoked certificates with their corresponding issuers`,
- Required: true,
- },
- "tidy_expired_issuers": {
- Type: framework.TypeBool,
- Description: `Specifies whether tidy expired issuers`,
- Required: true,
- },
- "tidy_acme": {
- Type: framework.TypeBool,
- Description: `Tidy Unused Acme Accounts, and Orders`,
- Required: true,
- },
- "tidy_cert_metadata": {
- Type: framework.TypeBool,
- Description: `Tidy cert metadata`,
- Required: true,
- },
- "tidy_cmpv2_nonce_store": {
- Type: framework.TypeBool,
- Description: `Tidy CMPv2 nonce store`,
- Required: true,
- },
- "safety_buffer": {
- Type: framework.TypeInt,
- Description: `Safety buffer time duration`,
- Required: true,
- },
- "issuer_safety_buffer": {
- Type: framework.TypeInt,
- Description: `Issuer safety buffer`,
- Required: true,
- },
- "acme_account_safety_buffer": {
- Type: framework.TypeInt,
- Description: `Safety buffer after creation after which accounts lacking orders are revoked`,
- Required: false,
- },
- "pause_duration": {
- Type: framework.TypeString,
- Description: `Duration to pause between tidying certificates`,
- Required: true,
- },
- "tidy_move_legacy_ca_bundle": {
- Type: framework.TypeBool,
- Required: true,
- },
- "tidy_cross_cluster_revoked_certs": {
- Type: framework.TypeBool,
- Required: true,
- },
- "tidy_revocation_queue": {
- Type: framework.TypeBool,
- Required: true,
- },
- "revocation_queue_safety_buffer": {
- Type: framework.TypeInt,
- Required: true,
- },
- "publish_stored_certificate_count_metrics": {
- Type: framework.TypeBool,
- Required: true,
- },
- "maintain_stored_certificate_counts": {
- Type: framework.TypeBool,
- Required: true,
- },
- },
+ Fields: autoTidyResponseFields,
}},
},
},
@@ -704,98 +744,7 @@ available on the tidy-status endpoint.`,
Responses: map[int][]framework.Response{
http.StatusOK: {{
Description: "OK",
- Fields: map[string]*framework.FieldSchema{
- "enabled": {
- Type: framework.TypeBool,
- Description: `Specifies whether automatic tidy is enabled or not`,
- Required: true,
- },
- "interval_duration": {
- Type: framework.TypeInt,
- Description: `Specifies the duration between automatic tidy operation`,
- Required: true,
- },
- "tidy_cert_store": {
- Type: framework.TypeBool,
- Description: `Specifies whether to tidy up the certificate store`,
- Required: true,
- },
- "tidy_revoked_certs": {
- Type: framework.TypeBool,
- Description: `Specifies whether to remove all invalid and expired certificates from storage`,
- Required: true,
- },
- "tidy_revoked_cert_issuer_associations": {
- Type: framework.TypeBool,
- Description: `Specifies whether to associate revoked certificates with their corresponding issuers`,
- Required: true,
- },
- "tidy_expired_issuers": {
- Type: framework.TypeBool,
- Description: `Specifies whether tidy expired issuers`,
- Required: true,
- },
- "tidy_acme": {
- Type: framework.TypeBool,
- Description: `Tidy Unused Acme Accounts, and Orders`,
- Required: true,
- },
- "tidy_cert_metadata": {
- Type: framework.TypeBool,
- Description: `Tidy cert metadata`,
- Required: true,
- },
- "tidy_cmpv2_nonce_store": {
- Type: framework.TypeBool,
- Description: `Tidy CMPv2 nonce store`,
- Required: true,
- },
- "safety_buffer": {
- Type: framework.TypeInt,
- Description: `Safety buffer time duration`,
- Required: true,
- },
- "issuer_safety_buffer": {
- Type: framework.TypeInt,
- Description: `Issuer safety buffer`,
- Required: true,
- },
- "acme_account_safety_buffer": {
- Type: framework.TypeInt,
- Description: `Safety buffer after creation after which accounts lacking orders are revoked`,
- Required: true,
- },
- "pause_duration": {
- Type: framework.TypeString,
- Description: `Duration to pause between tidying certificates`,
- Required: true,
- },
- "tidy_cross_cluster_revoked_certs": {
- Type: framework.TypeBool,
- Description: `Tidy the cross-cluster revoked certificate store`,
- Required: true,
- },
- "tidy_revocation_queue": {
- Type: framework.TypeBool,
- Required: true,
- },
- "tidy_move_legacy_ca_bundle": {
- Type: framework.TypeBool,
- Required: true,
- },
- "revocation_queue_safety_buffer": {
- Type: framework.TypeInt,
- Required: true,
- },
- "publish_stored_certificate_count_metrics": {
- Type: framework.TypeBool,
- Required: true,
- },
- "maintain_stored_certificate_counts": {
- Type: framework.TypeBool,
- Required: true,
- },
- },
+ Fields: autoTidyResponseFields,
}},
},
// Read more about why these flags are set in backend.go.
@@ -896,16 +845,20 @@ func (b *backend) pathTidyWrite(ctx context.Context, req *logical.Request, d *fr
Storage: req.Storage,
}
+ resp := &logical.Response{}
// Mark the last tidy operation as relatively recent, to ensure we don't
// try to trigger the periodic function.
- b.tidyStatusLock.Lock()
- b.lastTidy = time.Now()
- b.tidyStatusLock.Unlock()
+ // NOTE: not sure this is correct as we are updating the auto tidy time with this manual run. Ideally we
+ // could track when we ran each type of tidy was last run which would allow manual runs and auto
+ // runs to properly impact each other.
+ sc := b.makeStorageContext(ctx, req.Storage)
+ if err := b.updateLastAutoTidyTime(sc, time.Now()); err != nil {
+ resp.AddWarning(fmt.Sprintf("failed persisting tidy last run time: %v", err))
+ }
// Kick off the actual tidy.
b.startTidyOperation(req, config)
- resp := &logical.Response{}
if !config.IsAnyTidyEnabled() {
resp.AddWarning("Manual tidy requested but no tidy operations were set. Enable at least one tidy operation to be run (" + config.AnyTidyConfig() + ").")
} else {
@@ -1042,9 +995,10 @@ func (b *backend) startTidyOperation(req *logical.Request, config *tidyConfig) {
// Since the tidy operation finished without an error, we don't
// really want to start another tidy right away (if the interval
// is too short). So mark the last tidy as now.
- b.tidyStatusLock.Lock()
- b.lastTidy = time.Now()
- b.tidyStatusLock.Unlock()
+ sc := b.makeStorageContext(ctx, req.Storage)
+ if err := b.updateLastAutoTidyTime(sc, time.Now()); err != nil {
+ logger.Error("error persisting last tidy run time", "error", err)
+ }
}
}()
}
@@ -1770,6 +1724,7 @@ func (b *backend) pathTidyStatusRead(_ context.Context, _ *logical.Request, _ *f
"acme_account_safety_buffer": nil,
"cert_metadata_deleted_count": nil,
"cmpv2_nonce_deleted_count": nil,
+ "last_auto_tidy_finished": b.getLastAutoTidyTime(),
},
}
@@ -1814,7 +1769,6 @@ func (b *backend) pathTidyStatusRead(_ context.Context, _ *logical.Request, _ *f
resp.Data["revocation_queue_deleted_count"] = b.tidyStatus.revQueueDeletedCount
resp.Data["cross_revoked_cert_deleted_count"] = b.tidyStatus.crossRevokedDeletedCount
resp.Data["revocation_queue_safety_buffer"] = b.tidyStatus.revQueueSafetyBuffer
- resp.Data["last_auto_tidy_finished"] = b.lastTidy
resp.Data["total_acme_account_count"] = b.tidyStatus.acmeAccountsCount
resp.Data["acme_account_deleted_count"] = b.tidyStatus.acmeAccountsDeletedCount
resp.Data["acme_account_revoked_count"] = b.tidyStatus.acmeAccountsRevokedCount
@@ -1865,8 +1819,44 @@ func (b *backend) pathConfigAutoTidyWrite(ctx context.Context, req *logical.Requ
return nil, err
}
+ isAutoTidyBeingEnabled := false
+
if enabledRaw, ok := d.GetOk("enabled"); ok {
- config.Enabled = enabledRaw.(bool)
+ enabled, err := parseutil.ParseBool(enabledRaw)
+ if err != nil {
+ return logical.ErrorResponse(fmt.Sprintf("failed to parse enabled flag as a boolean: %s", err.Error())), nil
+ }
+ if !config.Enabled && enabled {
+ // we are turning on auto-tidy reset our persisted time to now
+ isAutoTidyBeingEnabled = true
+ }
+ config.Enabled = enabled
+ }
+
+ if minStartupBackoffRaw, ok := d.GetOk("min_startup_backoff_duration"); ok {
+ minDuration, err := parseutil.ParseDurationSecond(minStartupBackoffRaw)
+ if err != nil {
+ return logical.ErrorResponse(fmt.Sprintf("failed to parse min_startup_backoff_duration flag as a duration: %s", err.Error())), nil
+ }
+ if minDuration.Seconds() < 1 {
+ return logical.ErrorResponse(fmt.Sprintf("min_startup_backoff_duration must be at least 1 second: parsed: %v", minDuration)), nil
+ }
+ config.MinStartupBackoff = minDuration
+ }
+
+ if maxStartupBackoffRaw, ok := d.GetOk("max_startup_backoff_duration"); ok {
+ maxDuration, err := parseutil.ParseDurationSecond(maxStartupBackoffRaw)
+ if err != nil {
+ return logical.ErrorResponse(fmt.Sprintf("failed to parse max_startup_backoff_duration flag as a duration: %s", err.Error())), nil
+ }
+ if maxDuration.Seconds() < 1 {
+ return logical.ErrorResponse(fmt.Sprintf("max_startup_backoff_duration must be at least 1 second: parsed: %v", maxDuration)), nil
+ }
+ config.MaxStartupBackoff = maxDuration
+ }
+
+ if config.MinStartupBackoff > config.MaxStartupBackoff {
+ return logical.ErrorResponse(fmt.Sprintf("max_startup_backoff_duration %v must be greater or equal to min_startup_backoff_duration %v", config.MaxStartupBackoff, config.MinStartupBackoff)), nil
}
if intervalRaw, ok := d.GetOk("interval_duration"); ok {
@@ -1975,6 +1965,13 @@ func (b *backend) pathConfigAutoTidyWrite(ctx context.Context, req *logical.Requ
return nil, err
}
+ if isAutoTidyBeingEnabled {
+ if err := b.updateLastAutoTidyTime(sc, time.Now()); err != nil {
+ b.Logger().Warn("failed to update last auto tidy run time to now, the first auto-tidy "+
+ "might run soon and not at the next delay provided", "error", err.Error())
+ }
+ }
+
return &logical.Response{
Data: getTidyConfigData(*config),
}, nil
@@ -2114,6 +2111,24 @@ func (b *backend) tidyStatusIncCMPV2NonceDeletedCount() {
b.tidyStatus.cmpv2NonceDeletedCount++
}
+// updateLastAutoTidyTime should be used to update b.lastAutoTidy as the required locks
+// are acquired and the auto tidy time is persisted to storage to work across restarts
+func (b *backend) updateLastAutoTidyTime(sc *storageContext, lastRunTime time.Time) error {
+ b.tidyStatusLock.Lock()
+ defer b.tidyStatusLock.Unlock()
+
+ b.lastAutoTidy = lastRunTime
+ return sc.writeAutoTidyLastRun(lastRunTime)
+}
+
+// getLastAutoTidyTime should be used to read from b.lastAutoTidy as the required locks
+// are acquired prior to reading
+func (b *backend) getLastAutoTidyTime() time.Time {
+ b.tidyStatusLock.RLock()
+ defer b.tidyStatusLock.RUnlock()
+ return b.lastAutoTidy
+}
+
const pathTidyHelpSyn = `
Tidy up the backend by removing expired certificates, revocation information,
or both.
@@ -2210,6 +2225,8 @@ func getTidyConfigData(config tidyConfig) map[string]interface{} {
// This map is in the same order as tidyConfig to ensure that all fields are accounted for
"enabled": config.Enabled,
"interval_duration": int(config.Interval / time.Second),
+ "min_startup_backoff_duration": int(config.MinStartupBackoff.Seconds()),
+ "max_startup_backoff_duration": int(config.MaxStartupBackoff.Seconds()),
"tidy_cert_store": config.CertStore,
"tidy_revoked_certs": config.RevokedCerts,
"tidy_revoked_cert_issuer_associations": config.IssuerAssocs,
diff --git a/builtin/logical/pki/path_tidy_test.go b/builtin/logical/pki/path_tidy_test.go
index 46c9b5a89c..0b12e845b7 100644
--- a/builtin/logical/pki/path_tidy_test.go
+++ b/builtin/logical/pki/path_tidy_test.go
@@ -236,11 +236,13 @@ func TestAutoTidy(t *testing.T) {
// Write the auto-tidy config.
_, err = client.Logical().Write("pki/config/auto-tidy", map[string]interface{}{
- "enabled": true,
- "interval_duration": "1s",
- "tidy_cert_store": true,
- "tidy_revoked_certs": true,
- "safety_buffer": "1s",
+ "enabled": true,
+ "interval_duration": "1s",
+ "tidy_cert_store": true,
+ "tidy_revoked_certs": true,
+ "safety_buffer": "1s",
+ "min_startup_backoff_duration": "1s",
+ "max_startup_backoff_duration": "1s",
})
require.NoError(t, err)
@@ -341,6 +343,67 @@ func TestAutoTidy(t *testing.T) {
require.Nil(t, resp)
}
+// TestAutoTidyPersistsAcrossRestarts validates that on initial
+// startup of a mount we persisted the current auto tidy time so that
+// our counter that auto-tidy is based on isn't reset everytime Vault restarts
+func TestAutoTidyPersistsAcrossRestarts(t *testing.T) {
+ t.Parallel()
+
+ newPeriod := 1 * time.Second
+
+ // This test requires the periodicFunc to trigger, which requires we stand
+ // up a full test cluster.
+ coreConfig := &vault.CoreConfig{
+ LogicalBackends: map[string]logical.Factory{
+ "pki": Factory,
+ },
+ RollbackPeriod: newPeriod,
+ }
+ opts := &vault.TestClusterOptions{
+ HandlerFunc: vaulthttp.Handler,
+ NumCores: 1,
+ }
+ cluster := vault.NewTestCluster(t, coreConfig, opts)
+ cluster.Start()
+ defer cluster.Cleanup()
+
+ client := cluster.Cores[0].Client
+
+ // Mount PKI
+ err := client.Sys().Mount("pki", &api.MountInput{
+ Type: "pki",
+ })
+ require.NoError(t, err, "failed mounting pki")
+
+ // Run a tidy that should set us up
+ _, err = client.Logical().Write("pki/tidy", map[string]interface{}{
+ "tidy_cert_store": "true",
+ })
+ require.NoError(t, err, "failed running tidy")
+
+ waitForTidyToFinish(t, client, "pki")
+
+ resp, err := client.Logical().Read("pki/tidy-status")
+ require.NoError(t, err, "failed reading tidy status")
+ require.NotNil(t, resp, "response from tidy-status was nil")
+ lastAutoTidy, exists := resp.Data["last_auto_tidy_finished"]
+ require.True(t, exists, "did not find last_auto_tidy_finished")
+
+ cluster.StopCore(t, 0)
+ cluster.StartCore(t, 0, opts)
+ cluster.UnsealCore(t, cluster.Cores[0])
+ vault.TestWaitActive(t, cluster.Cores[0].Core)
+
+ client = cluster.Cores[0].Client
+ resp, err = client.Logical().Read("pki/tidy-status")
+ require.NoError(t, err, "failed reading tidy status")
+ require.NotNil(t, resp, "response from tidy-status was nil")
+ postRestartLastAutoTidy, exists := resp.Data["last_auto_tidy_finished"]
+ require.True(t, exists, "did not find last_auto_tidy_finished")
+
+ require.Equal(t, lastAutoTidy, postRestartLastAutoTidy, "values for last_auto_tidy_finished did not match on restart")
+}
+
func TestTidyCancellation(t *testing.T) {
t.Parallel()
@@ -553,6 +616,8 @@ func TestTidyIssuerConfig(t *testing.T) {
defaultConfigMap["pause_duration"] = time.Duration(defaultConfigMap["pause_duration"].(float64)).String()
defaultConfigMap["revocation_queue_safety_buffer"] = int(time.Duration(defaultConfigMap["revocation_queue_safety_buffer"].(float64)) / time.Second)
defaultConfigMap["acme_account_safety_buffer"] = int(time.Duration(defaultConfigMap["acme_account_safety_buffer"].(float64)) / time.Second)
+ defaultConfigMap["min_startup_backoff_duration"] = int(time.Duration(defaultConfigMap["min_startup_backoff_duration"].(float64)) / time.Second)
+ defaultConfigMap["max_startup_backoff_duration"] = int(time.Duration(defaultConfigMap["max_startup_backoff_duration"].(float64)) / time.Second)
require.Equal(t, defaultConfigMap, resp.Data)
@@ -683,6 +748,8 @@ func TestCertStorageMetrics(t *testing.T) {
"safety_buffer": "1s",
"maintain_stored_certificate_counts": true,
"publish_stored_certificate_count_metrics": false,
+ "min_startup_backoff_duration": "1s",
+ "max_startup_backoff_duration": "1s",
})
require.NoError(t, err)
@@ -1302,6 +1369,9 @@ func waitForTidyToFinish(t *testing.T, client *api.Client, mount string) *api.Se
if err != nil {
return fmt.Errorf("failed reading path: %s: %w", tidyStatusPath, err)
}
+ if statusResp == nil {
+ return fmt.Errorf("got nil, nil response from: %s", tidyStatusPath)
+ }
if state, ok := statusResp.Data["state"]; !ok || state == "Running" {
return fmt.Errorf("tidy status state is still running")
}
diff --git a/builtin/logical/pki/storage.go b/builtin/logical/pki/storage.go
index 990d4af686..43c4853ba3 100644
--- a/builtin/logical/pki/storage.go
+++ b/builtin/logical/pki/storage.go
@@ -50,6 +50,8 @@ const (
autoTidyConfigPath = "config/auto-tidy"
clusterConfigPath = "config/cluster"
+ autoTidyLastRunPath = "config/auto-tidy-last-run"
+
maxRolesToScanOnIssuerChange = 100
maxRolesToFindOnIssuerChange = 10
)
@@ -713,6 +715,14 @@ func (sc *storageContext) getAutoTidyConfig() (*tidyConfig, error) {
result.IssuerSafetyBuffer = defaultTidyConfig.IssuerSafetyBuffer
}
+ if result.MinStartupBackoff == 0 {
+ result.MinStartupBackoff = defaultTidyConfig.MinStartupBackoff
+ }
+
+ if result.MaxStartupBackoff == 0 {
+ result.MaxStartupBackoff = defaultTidyConfig.MaxStartupBackoff
+ }
+
return &result, nil
}
@@ -769,6 +779,41 @@ func (sc *storageContext) writeClusterConfig(config *issuing.ClusterConfigEntry)
return sc.Storage.Put(sc.Context, entry)
}
+// tidyLastRun Track the various pieces of information around tidy on a specific cluster
+type tidyLastRun struct {
+ LastRunTime time.Time
+}
+
+func (sc *storageContext) getAutoTidyLastRun() (time.Time, error) {
+ entry, err := sc.Storage.Get(sc.Context, autoTidyLastRunPath)
+ if err != nil {
+ return time.Time{}, fmt.Errorf("failed getting auto tidy last run: %w", err)
+ }
+ if entry == nil {
+ return time.Time{}, nil
+ }
+
+ var result tidyLastRun
+ if err = entry.DecodeJSON(&result); err != nil {
+ return time.Time{}, fmt.Errorf("failed parsing auto tidy last run: %w", err)
+ }
+ return result.LastRunTime, nil
+}
+
+func (sc *storageContext) writeAutoTidyLastRun(lastRunTime time.Time) error {
+ lastRun := tidyLastRun{LastRunTime: lastRunTime}
+ entry, err := logical.StorageEntryJSON(autoTidyLastRunPath, lastRun)
+ if err != nil {
+ return fmt.Errorf("failed generating json for auto tidy last run: %w", err)
+ }
+
+ if err := sc.Storage.Put(sc.Context, entry); err != nil {
+ return fmt.Errorf("failed writing auto tidy last run: %w", err)
+ }
+
+ return nil
+}
+
func fetchRevocationInfo(sc pki_backend.StorageContext, serial string) (*revocation.RevocationInfo, error) {
var revInfo *revocation.RevocationInfo
revEntry, err := fetchCertBySerial(sc, revocation.RevokedPath, serial)
diff --git a/changelog/28488.txt b/changelog/28488.txt
new file mode 100644
index 0000000000..bc297fc2f2
--- /dev/null
+++ b/changelog/28488.txt
@@ -0,0 +1,3 @@
+```release-note:improvement
+secrets/pki: Track the last time auto-tidy ran to address auto-tidy not running if the auto-tidy interval is longer than scheduled Vault restarts.
+```
diff --git a/ui/app/models/pki/tidy.js b/ui/app/models/pki/tidy.js
index ee4f335a1f..7b40ad1c7f 100644
--- a/ui/app/models/pki/tidy.js
+++ b/ui/app/models/pki/tidy.js
@@ -16,7 +16,7 @@ export default class PkiTidyModel extends Model {
label: 'Tidy ACME enabled',
labelDisabled: 'Tidy ACME disabled',
mapToBoolean: 'tidyAcme',
- helperTextDisabled: 'Tidying of ACME accounts, orders and authorizations is disabled',
+ helperTextDisabled: 'Tidying of ACME accounts, orders and authorizations is disabled.',
helperTextEnabled:
'The amount of time that must pass after creation that an account with no orders is marked revoked, and the amount of time after being marked revoked or deactivated.',
detailsLabel: 'ACME account safety buffer',
@@ -31,25 +31,45 @@ export default class PkiTidyModel extends Model {
})
tidyAcme;
+ // * auto tidy only fields
@attr('boolean', {
label: 'Automatic tidy enabled',
- defaultValue: false,
+ labelDisabled: 'Automatic tidy disabled',
+ helperTextDisabled: 'Automatic tidy operations will not run.',
})
- enabled; // auto-tidy only
+ enabled; // renders outside FormField loop as a toggle, auto tidy fields only render if enabled
@attr({
- label: 'Automatic tidy enabled',
- labelDisabled: 'Automatic tidy disabled',
- mapToBoolean: 'enabled',
+ editType: 'ttl',
helperTextEnabled:
'Sets the interval_duration between automatic tidy operations; note that this is from the end of one operation to the start of the next.',
- helperTextDisabled: 'Automatic tidy operations will not run.',
- detailsLabel: 'Automatic tidy duration',
+ hideToggle: true,
formatTtl: true,
})
- intervalDuration; // auto-tidy only
+ intervalDuration;
- @attr('string', {
+ @attr({
+ label: 'Minimum startup backoff duration',
+ editType: 'ttl',
+ helperTextEnabled:
+ 'Sets the min_startup_backoff_duration field which forces the minimum delay after Vault startup auto-tidy can run.',
+ hideToggle: true,
+ formatTtl: true,
+ })
+ minStartupBackoffDuration;
+
+ @attr({
+ label: 'Maximum startup backoff duration',
+ editType: 'ttl',
+ helperTextEnabled:
+ 'Sets the max_startup_backoff_duration field which forces the maximum delay after Vault startup auto-tidy can run.',
+ hideToggle: true,
+ formatTtl: true,
+ })
+ maxStartupBackoffDuration;
+ // * end of auto-tidy only fields
+
+ @attr({
editType: 'ttl',
helperTextEnabled:
'Specifies a duration that issuers should be kept for, past their NotAfter validity period. Defaults to 365 days (8760 hours).',
@@ -76,7 +96,7 @@ export default class PkiTidyModel extends Model {
})
revocationQueueSafetyBuffer; // enterprise only
- @attr('string', {
+ @attr({
editType: 'ttl',
helperTextEnabled:
'For a certificate to be expunged, the time must be after the expiration time of the certificate (according to the local clock) plus the safety buffer. Defaults to 72 hours.',
@@ -131,10 +151,16 @@ export default class PkiTidyModel extends Model {
tidyRevokedCerts;
get allGroups() {
- const groups = [{ autoTidy: ['enabled', 'intervalDuration'] }, ...this.sharedFields];
+ const groups = [{ autoTidy: ['enabled', ...this.autoTidyConfigFields] }, ...this.sharedFields];
return this._expandGroups(groups);
}
+ // fields that are specific to auto-tidy
+ get autoTidyConfigFields() {
+ // 'enabled' is not included here because it is responsible for hiding/showing these params and renders separately in the form
+ return ['intervalDuration', 'minStartupBackoffDuration', 'maxStartupBackoffDuration'];
+ }
+
// shared between auto and manual tidy operations
get sharedFields() {
const groups = [
diff --git a/ui/app/serializers/pki/tidy.js b/ui/app/serializers/pki/tidy.js
index d56c887976..abfad56070 100644
--- a/ui/app/serializers/pki/tidy.js
+++ b/ui/app/serializers/pki/tidy.js
@@ -11,6 +11,8 @@ export default class PkiTidySerializer extends ApplicationSerializer {
if (tidyType === 'manual') {
delete data?.enabled;
delete data?.intervalDuration;
+ delete data?.minStartupBackoffDuration;
+ delete data?.maxStartupBackoffDuration;
}
return data;
}
diff --git a/ui/lib/pki/addon/components/pki-tidy-form.hbs b/ui/lib/pki/addon/components/pki-tidy-form.hbs
index e45483b687..31d6b1f7cf 100644
--- a/ui/lib/pki/addon/components/pki-tidy-form.hbs
+++ b/ui/lib/pki/addon/components/pki-tidy-form.hbs
@@ -13,23 +13,34 @@