diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index 7db9c53171..f46f1fa64d 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -675,6 +675,18 @@ func main() { os.Exit(2) } + // Set TSDB retention defaults from CLI flags before any config file is loaded. + // This makes CLI flags act as the default when no retention section is present. + cliRetentionDuration := cfg.tsdb.RetentionDuration + cliMaxBytes := cfg.tsdb.MaxBytes + if cliRetentionDuration == 0 && cliMaxBytes == 0 { + cliRetentionDuration = defaultRetentionDuration + } + config.DefaultTSDBRetentionConfig = config.TSDBRetentionConfig{ + Time: cliRetentionDuration, + Size: cliMaxBytes, + } + // Throw error for invalid config before starting other components. var cfgFile *config.Config if cfgFile, err = config.LoadFile(cfg.configFile, agentMode, promslog.NewNopLogger()); err != nil { @@ -716,21 +728,11 @@ func main() { logger.Warn("The option --storage.tsdb.block-reload-interval is set to a value less than 1s. Setting it to 1s to avoid overload.") cfg.tsdb.BlockReloadInterval = model.Duration(1 * time.Second) } - if cfgFile.StorageConfig.TSDBConfig != nil { - cfg.tsdb.OutOfOrderTimeWindow = cfgFile.StorageConfig.TSDBConfig.OutOfOrderTimeWindow - cfg.tsdb.StaleSeriesCompactionThreshold = cfgFile.StorageConfig.TSDBConfig.StaleSeriesCompactionThreshold - if cfgFile.StorageConfig.TSDBConfig.Retention != nil { - if cfgFile.StorageConfig.TSDBConfig.Retention.Time > 0 { - cfg.tsdb.RetentionDuration = cfgFile.StorageConfig.TSDBConfig.Retention.Time - } - if cfgFile.StorageConfig.TSDBConfig.Retention.Size > 0 { - cfg.tsdb.MaxBytes = cfgFile.StorageConfig.TSDBConfig.Retention.Size - } - if cfgFile.StorageConfig.TSDBConfig.Retention.Percentage > 0 { - cfg.tsdb.MaxPercentage = cfgFile.StorageConfig.TSDBConfig.Retention.Percentage - } - } - } + cfg.tsdb.OutOfOrderTimeWindow = cfgFile.StorageConfig.TSDBConfig.OutOfOrderTimeWindow + cfg.tsdb.StaleSeriesCompactionThreshold = cfgFile.StorageConfig.TSDBConfig.StaleSeriesCompactionThreshold + cfg.tsdb.RetentionDuration = cfgFile.StorageConfig.TSDBConfig.Retention.Time + cfg.tsdb.MaxBytes = cfgFile.StorageConfig.TSDBConfig.Retention.Size + cfg.tsdb.MaxPercentage = cfgFile.StorageConfig.TSDBConfig.Retention.Percentage // Set Go runtime parameters before we get too far into initialization. updateGoGC(cfgFile, logger) @@ -782,11 +784,6 @@ func main() { cfg.web.RoutePrefix = "/" + strings.Trim(cfg.web.RoutePrefix, "/") if !agentMode { - if cfg.tsdb.RetentionDuration == 0 && cfg.tsdb.MaxBytes == 0 && cfg.tsdb.MaxPercentage == 0 { - cfg.tsdb.RetentionDuration = defaultRetentionDuration - logger.Info("No time, size or percentage retention was set so using the default time retention", "duration", defaultRetentionDuration) - } - // Check for overflows. This limits our max retention to 100y. if cfg.tsdb.RetentionDuration < 0 { y, err := model.ParseDuration("100y") @@ -1035,8 +1032,29 @@ func main() { reloaders := []reloader{ { - name: "db_storage", - reloader: localStorage.ApplyConfig, + name: "db_storage", + reloader: func() func(*config.Config) error { + lastTSDBRetention := config.TSDBRetentionConfig{} + return func(cfg *config.Config) error { + err := localStorage.ApplyConfig(cfg) + if err != nil || agentMode || cfg.StorageConfig.TSDBConfig == nil || cfg.StorageConfig.TSDBConfig.Retention == nil { + return err + } + + curr := cfg.StorageConfig.TSDBConfig.Retention + if *curr == lastTSDBRetention { + return nil + } + + logger.Info("TSDB retention updated", + "duration", curr.Time, + "size", curr.Size, + "percentage", curr.Percentage, + ) + lastTSDBRetention = *curr + return nil + } + }(), }, { name: "remote_storage", reloader: remoteStorage.ApplyConfig, diff --git a/config/config.go b/config/config.go index b390a4a629..2082743b0d 100644 --- a/config/config.go +++ b/config/config.go @@ -83,6 +83,13 @@ func Load(s string, logger *slog.Logger) (*Config, error) { return nil, err } + // When the config body is empty, UnmarshalYAML is never called, so + // TSDBConfig may still be nil. + if cfg.StorageConfig.TSDBConfig == nil { + retention := DefaultTSDBRetentionConfig + cfg.StorageConfig.TSDBConfig = &TSDBConfig{Retention: &retention} + } + b := labels.NewScratchBuilder(0) cfg.GlobalConfig.ExternalLabels.Range(func(v labels.Label) { newV := os.Expand(v.Value, func(s string) string { @@ -276,6 +283,9 @@ var ( // For backwards compatibility. LabelNamePreserveMultipleUnderscores: true, } + + // DefaultTSDBRetentionConfig is the default TSDB retention configuration. + DefaultTSDBRetentionConfig TSDBRetentionConfig ) // Config is the top-level configuration for Prometheus's config files. @@ -405,6 +415,13 @@ func (c *Config) UnmarshalYAML(unmarshal func(any) error) error { c.Runtime = DefaultRuntimeConfig } + // If no storage.tsdb section is present, TSDBConfig is nil and its + // UnmarshalYAML never runs. Inject the default retention here. + if c.StorageConfig.TSDBConfig == nil { + retention := DefaultTSDBRetentionConfig + c.StorageConfig.TSDBConfig = &TSDBConfig{Retention: &retention} + } + for _, rf := range c.RuleFiles { if !patRulePath.MatchString(rf) { return fmt.Errorf("invalid rule file path %q", rf) @@ -1097,6 +1114,22 @@ type TSDBRetentionConfig struct { Percentage uint `yaml:"percentage,omitempty"` } +// UnmarshalYAML implements the yaml.Unmarshaler interface. +func (t *TSDBRetentionConfig) UnmarshalYAML(unmarshal func(any) error) error { + *t = TSDBRetentionConfig{} + type plain TSDBRetentionConfig + if err := unmarshal((*plain)(t)); err != nil { + return err + } + if t.Size < 0 { + return fmt.Errorf("'storage.tsdb.retention.size' must be greater than or equal to 0, got %v", t.Size) + } + if t.Percentage > 100 { + return fmt.Errorf("'storage.tsdb.retention.percentage' must be in the range [0, 100], got %v", t.Percentage) + } + return nil +} + // TSDBConfig configures runtime reloadable configuration options. type TSDBConfig struct { // OutOfOrderTimeWindow sets how long back in time an out-of-order sample can be inserted @@ -1127,6 +1160,11 @@ func (t *TSDBConfig) UnmarshalYAML(unmarshal func(any) error) error { t.OutOfOrderTimeWindow = time.Duration(t.OutOfOrderTimeWindowFlag).Milliseconds() + if t.Retention == nil { + retention := DefaultTSDBRetentionConfig + t.Retention = &retention + } + return nil } diff --git a/config/config_default_test.go b/config/config_default_test.go index 91c290ae4e..ec7a112824 100644 --- a/config/config_default_test.go +++ b/config/config_default_test.go @@ -20,9 +20,10 @@ const ruleFilesConfigFile = "testdata/rules_abs_path.good.yml" var ruleFilesExpectedConf = &Config{ loaded: true, - GlobalConfig: DefaultGlobalConfig, - Runtime: DefaultRuntimeConfig, - OTLPConfig: DefaultOTLPConfig, + GlobalConfig: DefaultGlobalConfig, + Runtime: DefaultRuntimeConfig, + OTLPConfig: DefaultOTLPConfig, + StorageConfig: StorageConfig{TSDBConfig: &TSDBConfig{Retention: &TSDBRetentionConfig{}}}, RuleFiles: []string{ "testdata/first.rules", "testdata/rules/second.rules", diff --git a/config/config_test.go b/config/config_test.go index 7001283443..8d4df86be6 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -2626,6 +2626,22 @@ var expectedErrors = []struct { filename: "stackit_endpoint.bad.yml", errMsg: "invalid endpoint", }, + { + filename: "tsdb_retention_time.bad.yml", + errMsg: `not a valid duration string: "-1h"`, + }, + { + filename: "tsdb_retention_size.bad.yml", + errMsg: `'storage.tsdb.retention.size' must be greater than or equal to 0`, + }, + { + filename: "tsdb_retention_percentage.bad.yml", + errMsg: `'storage.tsdb.retention.percentage' must be in the range [0, 100]`, + }, + { + filename: "tsdb_retention_percentage_negative.bad.yml", + errMsg: "cannot unmarshal !!int `-1` into uint", + }, } func TestBadConfigs(t *testing.T) { @@ -2649,6 +2665,8 @@ func TestEmptyConfig(t *testing.T) { require.NoError(t, err) exp := DefaultConfig exp.loaded = true + retention := DefaultTSDBRetentionConfig + exp.StorageConfig.TSDBConfig = &TSDBConfig{Retention: &retention} require.Equal(t, exp, *c) require.Equal(t, 75, c.Runtime.GoGC) } @@ -2700,6 +2718,10 @@ func TestGlobalConfig(t *testing.T) { require.NoError(t, err) exp := DefaultConfig exp.loaded = true + // TSDBConfig is always injected by Config.UnmarshalYAML even when no + // storage.tsdb section is present, so the expected config must include it. + retention := DefaultTSDBRetentionConfig + exp.StorageConfig.TSDBConfig = &TSDBConfig{Retention: &retention} require.Equal(t, exp, *c) }) diff --git a/config/config_windows_test.go b/config/config_windows_test.go index 72a56ff41a..e7627f562a 100644 --- a/config/config_windows_test.go +++ b/config/config_windows_test.go @@ -18,8 +18,9 @@ const ruleFilesConfigFile = "testdata/rules_abs_path_windows.good.yml" var ruleFilesExpectedConf = &Config{ loaded: true, - GlobalConfig: DefaultGlobalConfig, - Runtime: DefaultRuntimeConfig, + GlobalConfig: DefaultGlobalConfig, + Runtime: DefaultRuntimeConfig, + StorageConfig: StorageConfig{TSDBConfig: &TSDBConfig{Retention: &TSDBRetentionConfig{}}}, RuleFiles: []string{ "testdata\\first.rules", "testdata\\rules\\second.rules", diff --git a/config/testdata/tsdb_retention_percentage.bad.yml b/config/testdata/tsdb_retention_percentage.bad.yml new file mode 100644 index 0000000000..cb57abe0c0 --- /dev/null +++ b/config/testdata/tsdb_retention_percentage.bad.yml @@ -0,0 +1,4 @@ +storage: + tsdb: + retention: + percentage: 101 diff --git a/config/testdata/tsdb_retention_percentage_negative.bad.yml b/config/testdata/tsdb_retention_percentage_negative.bad.yml new file mode 100644 index 0000000000..2eeb60c091 --- /dev/null +++ b/config/testdata/tsdb_retention_percentage_negative.bad.yml @@ -0,0 +1,4 @@ +storage: + tsdb: + retention: + percentage: -1 diff --git a/config/testdata/tsdb_retention_size.bad.yml b/config/testdata/tsdb_retention_size.bad.yml new file mode 100644 index 0000000000..ecae64aae6 --- /dev/null +++ b/config/testdata/tsdb_retention_size.bad.yml @@ -0,0 +1,4 @@ +storage: + tsdb: + retention: + size: -1GB diff --git a/config/testdata/tsdb_retention_time.bad.yml b/config/testdata/tsdb_retention_time.bad.yml new file mode 100644 index 0000000000..465b3cf5da --- /dev/null +++ b/config/testdata/tsdb_retention_time.bad.yml @@ -0,0 +1,4 @@ +storage: + tsdb: + retention: + time: -1h diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index 75b47e0d94..3682348e67 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -3877,9 +3877,9 @@ with this feature. # or when a compaction completes, whichever comes first. [ retention: ] : # How long to retain samples in storage. If neither this option nor the size option - # is set, the retention time defaults to 15d. Units Supported: y, w, d, h, m, s, ms. + # is set, the retention time defaults to 15d. Setting this to 0 disables time-based retention. # This option takes precedence over the deprecated command-line flag --storage.tsdb.retention.time. - [ time: | default = 15d ] + [ time: ] # Maximum number of bytes that can be stored for blocks. A unit is required, # supported units: B, KB, MB, GB, TB, PB, EB. Ex: "512MB". Based on powers-of-2, so 1KB is 1024B. diff --git a/tsdb/db.go b/tsdb/db.go index 136d198750..2ca1bccf0d 100644 --- a/tsdb/db.go +++ b/tsdb/db.go @@ -1286,18 +1286,12 @@ func (db *DB) ApplyConfig(conf *config.Config) error { // Update retention configuration if provided. if conf.StorageConfig.TSDBConfig.Retention != nil { db.retentionMtx.Lock() - if conf.StorageConfig.TSDBConfig.Retention.Time > 0 { - db.opts.RetentionDuration = int64(conf.StorageConfig.TSDBConfig.Retention.Time) - db.metrics.retentionDuration.Set((time.Duration(db.opts.RetentionDuration) * time.Millisecond).Seconds()) - } - if conf.StorageConfig.TSDBConfig.Retention.Size > 0 { - db.opts.MaxBytes = int64(conf.StorageConfig.TSDBConfig.Retention.Size) - db.metrics.maxBytes.Set(float64(db.opts.MaxBytes)) - } - if conf.StorageConfig.TSDBConfig.Retention.Percentage > 0 { - db.opts.MaxPercentage = conf.StorageConfig.TSDBConfig.Retention.Percentage - db.metrics.maxPercentage.Set(float64(db.opts.MaxPercentage)) - } + db.opts.RetentionDuration = int64(time.Duration(conf.StorageConfig.TSDBConfig.Retention.Time) / time.Millisecond) + db.metrics.retentionDuration.Set((time.Duration(db.opts.RetentionDuration) * time.Millisecond).Seconds()) + db.opts.MaxBytes = int64(conf.StorageConfig.TSDBConfig.Retention.Size) + db.metrics.maxBytes.Set(float64(db.opts.MaxBytes)) + db.opts.MaxPercentage = conf.StorageConfig.TSDBConfig.Retention.Percentage + db.metrics.maxPercentage.Set(float64(db.opts.MaxPercentage)) db.retentionMtx.Unlock() } } else { diff --git a/tsdb/db_test.go b/tsdb/db_test.go index fe2a3eaa65..21b2c08124 100644 --- a/tsdb/db_test.go +++ b/tsdb/db_test.go @@ -1748,7 +1748,7 @@ func TestRuntimeRetentionConfigChange(t *testing.T) { StorageConfig: config.StorageConfig{ TSDBConfig: &config.TSDBConfig{ Retention: &config.TSDBRetentionConfig{ - Time: model.Duration(shorterRetentionDuration), + Time: model.Duration(time.Duration(shorterRetentionDuration) * time.Millisecond), }, }, }, @@ -1777,6 +1777,31 @@ func TestRuntimeRetentionConfigChange(t *testing.T) { require.Positive(t, int(prom_testutil.ToFloat64(db.metrics.timeRetentionCount)), "time retention count should be incremented") } +// TestApplyConfigRetentionDurationMetricUnit verifies that after a config +// reload the prometheus_tsdb_retention_limit_seconds metric reports the +// retention in seconds. +func TestApplyConfigRetentionDurationMetricUnit(t *testing.T) { + oneHourMs := int64(time.Hour / time.Millisecond) + db := newTestDB(t, withOpts(&Options{RetentionDuration: oneHourMs})) + + cfg := &config.Config{ + StorageConfig: config.StorageConfig{ + TSDBConfig: &config.TSDBConfig{ + Retention: &config.TSDBRetentionConfig{ + Time: model.Duration(time.Hour), + }, + }, + }, + } + require.NoError(t, db.ApplyConfig(cfg)) + + require.Equal(t, oneHourMs, db.getRetentionDuration()) + + gotSeconds := prom_testutil.ToFloat64(db.metrics.retentionDuration) + wantSeconds := time.Hour.Seconds() + require.Equal(t, wantSeconds, gotSeconds) +} + func TestNotMatcherSelectsLabelsUnsetSeries(t *testing.T) { db := newTestDB(t) diff --git a/web/web.go b/web/web.go index 90eaf13afe..c4fcfdb2c4 100644 --- a/web/web.go +++ b/web/web.go @@ -253,6 +253,11 @@ func (h *Handler) ApplyConfig(conf *config.Config) error { defer h.mtx.Unlock() h.config = conf + if conf.StorageConfig.TSDBConfig != nil && conf.StorageConfig.TSDBConfig.Retention != nil { + h.options.TSDBRetentionDuration = conf.StorageConfig.TSDBConfig.Retention.Time + h.options.TSDBMaxBytes = conf.StorageConfig.TSDBConfig.Retention.Size + h.options.TSDBMaxPercentage = conf.StorageConfig.TSDBConfig.Retention.Percentage + } return nil } @@ -866,20 +871,25 @@ func (h *Handler) runtimeInfo() (api_v1.RuntimeInfo, error) { status.Hostname = hostname status.ServerTime = time.Now().UTC() - if h.options.TSDBRetentionDuration != 0 { - status.StorageRetention = h.options.TSDBRetentionDuration.String() + h.mtx.RLock() + tsdbRetentionDuration := h.options.TSDBRetentionDuration + tsdbMaxBytes := h.options.TSDBMaxBytes + tsdbMaxPercentage := h.options.TSDBMaxPercentage + h.mtx.RUnlock() + if tsdbRetentionDuration != 0 { + status.StorageRetention = tsdbRetentionDuration.String() } - if h.options.TSDBMaxBytes != 0 { + if tsdbMaxBytes != 0 { if status.StorageRetention != "" { status.StorageRetention += " or " } - status.StorageRetention += h.options.TSDBMaxBytes.String() + status.StorageRetention += tsdbMaxBytes.String() } - if h.options.TSDBMaxPercentage != 0 { + if tsdbMaxPercentage != 0 { if status.StorageRetention != "" { status.StorageRetention += " or " } - status.StorageRetention = status.StorageRetention + strconv.FormatUint(uint64(h.options.TSDBMaxPercentage), 10) + "%" + status.StorageRetention = status.StorageRetention + strconv.FormatUint(uint64(tsdbMaxPercentage), 10) + "%" } metrics, err := h.gatherer.Gather()