mirror of
https://github.com/prometheus/prometheus.git
synced 2026-05-28 04:02:21 -04:00
Merge pull request #18200 from roidelapluie/roidelapluie/retention-validation
Some checks are pending
buf.build / lint and publish (push) Waiting to run
CI / Go tests (push) Waiting to run
CI / More Go tests (push) Waiting to run
CI / Go tests with previous Go version (push) Waiting to run
CI / UI tests (push) Waiting to run
CI / Go tests on Windows (push) Waiting to run
CI / Mixins tests (push) Waiting to run
CI / Compliance testing (push) Waiting to run
CI / Build Prometheus for common architectures (push) Waiting to run
CI / Build Prometheus for all architectures (push) Waiting to run
CI / Report status of build Prometheus for all architectures (push) Blocked by required conditions
CI / Check generated parser (push) Waiting to run
CI / golangci-lint (push) Waiting to run
CI / fuzzing (push) Waiting to run
CI / codeql (push) Waiting to run
CI / Publish main branch artifacts (push) Blocked by required conditions
CI / Publish release artefacts (push) Blocked by required conditions
CI / Publish UI on npm Registry (push) Blocked by required conditions
Scorecards supply-chain security / Scorecards analysis (push) Waiting to run
Some checks are pending
buf.build / lint and publish (push) Waiting to run
CI / Go tests (push) Waiting to run
CI / More Go tests (push) Waiting to run
CI / Go tests with previous Go version (push) Waiting to run
CI / UI tests (push) Waiting to run
CI / Go tests on Windows (push) Waiting to run
CI / Mixins tests (push) Waiting to run
CI / Compliance testing (push) Waiting to run
CI / Build Prometheus for common architectures (push) Waiting to run
CI / Build Prometheus for all architectures (push) Waiting to run
CI / Report status of build Prometheus for all architectures (push) Blocked by required conditions
CI / Check generated parser (push) Waiting to run
CI / golangci-lint (push) Waiting to run
CI / fuzzing (push) Waiting to run
CI / codeql (push) Waiting to run
CI / Publish main branch artifacts (push) Blocked by required conditions
CI / Publish release artefacts (push) Blocked by required conditions
CI / Publish UI on npm Registry (push) Blocked by required conditions
Scorecards supply-chain security / Scorecards analysis (push) Waiting to run
Multiple fixes in retention configuration
This commit is contained in:
commit
16876bab95
13 changed files with 173 additions and 48 deletions
|
|
@ -675,6 +675,18 @@ func main() {
|
|||
os.Exit(2)
|
||||
}
|
||||
|
||||
// Set TSDB retention defaults from CLI flags before any config file is loaded.
|
||||
// This makes CLI flags act as the default when no retention section is present.
|
||||
cliRetentionDuration := cfg.tsdb.RetentionDuration
|
||||
cliMaxBytes := cfg.tsdb.MaxBytes
|
||||
if cliRetentionDuration == 0 && cliMaxBytes == 0 {
|
||||
cliRetentionDuration = defaultRetentionDuration
|
||||
}
|
||||
config.DefaultTSDBRetentionConfig = config.TSDBRetentionConfig{
|
||||
Time: cliRetentionDuration,
|
||||
Size: cliMaxBytes,
|
||||
}
|
||||
|
||||
// Throw error for invalid config before starting other components.
|
||||
var cfgFile *config.Config
|
||||
if cfgFile, err = config.LoadFile(cfg.configFile, agentMode, promslog.NewNopLogger()); err != nil {
|
||||
|
|
@ -716,21 +728,11 @@ func main() {
|
|||
logger.Warn("The option --storage.tsdb.block-reload-interval is set to a value less than 1s. Setting it to 1s to avoid overload.")
|
||||
cfg.tsdb.BlockReloadInterval = model.Duration(1 * time.Second)
|
||||
}
|
||||
if cfgFile.StorageConfig.TSDBConfig != nil {
|
||||
cfg.tsdb.OutOfOrderTimeWindow = cfgFile.StorageConfig.TSDBConfig.OutOfOrderTimeWindow
|
||||
cfg.tsdb.StaleSeriesCompactionThreshold = cfgFile.StorageConfig.TSDBConfig.StaleSeriesCompactionThreshold
|
||||
if cfgFile.StorageConfig.TSDBConfig.Retention != nil {
|
||||
if cfgFile.StorageConfig.TSDBConfig.Retention.Time > 0 {
|
||||
cfg.tsdb.RetentionDuration = cfgFile.StorageConfig.TSDBConfig.Retention.Time
|
||||
}
|
||||
if cfgFile.StorageConfig.TSDBConfig.Retention.Size > 0 {
|
||||
cfg.tsdb.MaxBytes = cfgFile.StorageConfig.TSDBConfig.Retention.Size
|
||||
}
|
||||
if cfgFile.StorageConfig.TSDBConfig.Retention.Percentage > 0 {
|
||||
cfg.tsdb.MaxPercentage = cfgFile.StorageConfig.TSDBConfig.Retention.Percentage
|
||||
}
|
||||
}
|
||||
}
|
||||
cfg.tsdb.OutOfOrderTimeWindow = cfgFile.StorageConfig.TSDBConfig.OutOfOrderTimeWindow
|
||||
cfg.tsdb.StaleSeriesCompactionThreshold = cfgFile.StorageConfig.TSDBConfig.StaleSeriesCompactionThreshold
|
||||
cfg.tsdb.RetentionDuration = cfgFile.StorageConfig.TSDBConfig.Retention.Time
|
||||
cfg.tsdb.MaxBytes = cfgFile.StorageConfig.TSDBConfig.Retention.Size
|
||||
cfg.tsdb.MaxPercentage = cfgFile.StorageConfig.TSDBConfig.Retention.Percentage
|
||||
|
||||
// Set Go runtime parameters before we get too far into initialization.
|
||||
updateGoGC(cfgFile, logger)
|
||||
|
|
@ -782,11 +784,6 @@ func main() {
|
|||
cfg.web.RoutePrefix = "/" + strings.Trim(cfg.web.RoutePrefix, "/")
|
||||
|
||||
if !agentMode {
|
||||
if cfg.tsdb.RetentionDuration == 0 && cfg.tsdb.MaxBytes == 0 && cfg.tsdb.MaxPercentage == 0 {
|
||||
cfg.tsdb.RetentionDuration = defaultRetentionDuration
|
||||
logger.Info("No time, size or percentage retention was set so using the default time retention", "duration", defaultRetentionDuration)
|
||||
}
|
||||
|
||||
// Check for overflows. This limits our max retention to 100y.
|
||||
if cfg.tsdb.RetentionDuration < 0 {
|
||||
y, err := model.ParseDuration("100y")
|
||||
|
|
@ -1035,8 +1032,29 @@ func main() {
|
|||
|
||||
reloaders := []reloader{
|
||||
{
|
||||
name: "db_storage",
|
||||
reloader: localStorage.ApplyConfig,
|
||||
name: "db_storage",
|
||||
reloader: func() func(*config.Config) error {
|
||||
lastTSDBRetention := config.TSDBRetentionConfig{}
|
||||
return func(cfg *config.Config) error {
|
||||
err := localStorage.ApplyConfig(cfg)
|
||||
if err != nil || agentMode || cfg.StorageConfig.TSDBConfig == nil || cfg.StorageConfig.TSDBConfig.Retention == nil {
|
||||
return err
|
||||
}
|
||||
|
||||
curr := cfg.StorageConfig.TSDBConfig.Retention
|
||||
if *curr == lastTSDBRetention {
|
||||
return nil
|
||||
}
|
||||
|
||||
logger.Info("TSDB retention updated",
|
||||
"duration", curr.Time,
|
||||
"size", curr.Size,
|
||||
"percentage", curr.Percentage,
|
||||
)
|
||||
lastTSDBRetention = *curr
|
||||
return nil
|
||||
}
|
||||
}(),
|
||||
}, {
|
||||
name: "remote_storage",
|
||||
reloader: remoteStorage.ApplyConfig,
|
||||
|
|
|
|||
|
|
@ -83,6 +83,13 @@ func Load(s string, logger *slog.Logger) (*Config, error) {
|
|||
return nil, err
|
||||
}
|
||||
|
||||
// When the config body is empty, UnmarshalYAML is never called, so
|
||||
// TSDBConfig may still be nil.
|
||||
if cfg.StorageConfig.TSDBConfig == nil {
|
||||
retention := DefaultTSDBRetentionConfig
|
||||
cfg.StorageConfig.TSDBConfig = &TSDBConfig{Retention: &retention}
|
||||
}
|
||||
|
||||
b := labels.NewScratchBuilder(0)
|
||||
cfg.GlobalConfig.ExternalLabels.Range(func(v labels.Label) {
|
||||
newV := os.Expand(v.Value, func(s string) string {
|
||||
|
|
@ -276,6 +283,9 @@ var (
|
|||
// For backwards compatibility.
|
||||
LabelNamePreserveMultipleUnderscores: true,
|
||||
}
|
||||
|
||||
// DefaultTSDBRetentionConfig is the default TSDB retention configuration.
|
||||
DefaultTSDBRetentionConfig TSDBRetentionConfig
|
||||
)
|
||||
|
||||
// Config is the top-level configuration for Prometheus's config files.
|
||||
|
|
@ -405,6 +415,13 @@ func (c *Config) UnmarshalYAML(unmarshal func(any) error) error {
|
|||
c.Runtime = DefaultRuntimeConfig
|
||||
}
|
||||
|
||||
// If no storage.tsdb section is present, TSDBConfig is nil and its
|
||||
// UnmarshalYAML never runs. Inject the default retention here.
|
||||
if c.StorageConfig.TSDBConfig == nil {
|
||||
retention := DefaultTSDBRetentionConfig
|
||||
c.StorageConfig.TSDBConfig = &TSDBConfig{Retention: &retention}
|
||||
}
|
||||
|
||||
for _, rf := range c.RuleFiles {
|
||||
if !patRulePath.MatchString(rf) {
|
||||
return fmt.Errorf("invalid rule file path %q", rf)
|
||||
|
|
@ -1097,6 +1114,22 @@ type TSDBRetentionConfig struct {
|
|||
Percentage uint `yaml:"percentage,omitempty"`
|
||||
}
|
||||
|
||||
// UnmarshalYAML implements the yaml.Unmarshaler interface.
|
||||
func (t *TSDBRetentionConfig) UnmarshalYAML(unmarshal func(any) error) error {
|
||||
*t = TSDBRetentionConfig{}
|
||||
type plain TSDBRetentionConfig
|
||||
if err := unmarshal((*plain)(t)); err != nil {
|
||||
return err
|
||||
}
|
||||
if t.Size < 0 {
|
||||
return fmt.Errorf("'storage.tsdb.retention.size' must be greater than or equal to 0, got %v", t.Size)
|
||||
}
|
||||
if t.Percentage > 100 {
|
||||
return fmt.Errorf("'storage.tsdb.retention.percentage' must be in the range [0, 100], got %v", t.Percentage)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// TSDBConfig configures runtime reloadable configuration options.
|
||||
type TSDBConfig struct {
|
||||
// OutOfOrderTimeWindow sets how long back in time an out-of-order sample can be inserted
|
||||
|
|
@ -1127,6 +1160,11 @@ func (t *TSDBConfig) UnmarshalYAML(unmarshal func(any) error) error {
|
|||
|
||||
t.OutOfOrderTimeWindow = time.Duration(t.OutOfOrderTimeWindowFlag).Milliseconds()
|
||||
|
||||
if t.Retention == nil {
|
||||
retention := DefaultTSDBRetentionConfig
|
||||
t.Retention = &retention
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -20,9 +20,10 @@ const ruleFilesConfigFile = "testdata/rules_abs_path.good.yml"
|
|||
var ruleFilesExpectedConf = &Config{
|
||||
loaded: true,
|
||||
|
||||
GlobalConfig: DefaultGlobalConfig,
|
||||
Runtime: DefaultRuntimeConfig,
|
||||
OTLPConfig: DefaultOTLPConfig,
|
||||
GlobalConfig: DefaultGlobalConfig,
|
||||
Runtime: DefaultRuntimeConfig,
|
||||
OTLPConfig: DefaultOTLPConfig,
|
||||
StorageConfig: StorageConfig{TSDBConfig: &TSDBConfig{Retention: &TSDBRetentionConfig{}}},
|
||||
RuleFiles: []string{
|
||||
"testdata/first.rules",
|
||||
"testdata/rules/second.rules",
|
||||
|
|
|
|||
|
|
@ -2626,6 +2626,22 @@ var expectedErrors = []struct {
|
|||
filename: "stackit_endpoint.bad.yml",
|
||||
errMsg: "invalid endpoint",
|
||||
},
|
||||
{
|
||||
filename: "tsdb_retention_time.bad.yml",
|
||||
errMsg: `not a valid duration string: "-1h"`,
|
||||
},
|
||||
{
|
||||
filename: "tsdb_retention_size.bad.yml",
|
||||
errMsg: `'storage.tsdb.retention.size' must be greater than or equal to 0`,
|
||||
},
|
||||
{
|
||||
filename: "tsdb_retention_percentage.bad.yml",
|
||||
errMsg: `'storage.tsdb.retention.percentage' must be in the range [0, 100]`,
|
||||
},
|
||||
{
|
||||
filename: "tsdb_retention_percentage_negative.bad.yml",
|
||||
errMsg: "cannot unmarshal !!int `-1` into uint",
|
||||
},
|
||||
}
|
||||
|
||||
func TestBadConfigs(t *testing.T) {
|
||||
|
|
@ -2649,6 +2665,8 @@ func TestEmptyConfig(t *testing.T) {
|
|||
require.NoError(t, err)
|
||||
exp := DefaultConfig
|
||||
exp.loaded = true
|
||||
retention := DefaultTSDBRetentionConfig
|
||||
exp.StorageConfig.TSDBConfig = &TSDBConfig{Retention: &retention}
|
||||
require.Equal(t, exp, *c)
|
||||
require.Equal(t, 75, c.Runtime.GoGC)
|
||||
}
|
||||
|
|
@ -2700,6 +2718,10 @@ func TestGlobalConfig(t *testing.T) {
|
|||
require.NoError(t, err)
|
||||
exp := DefaultConfig
|
||||
exp.loaded = true
|
||||
// TSDBConfig is always injected by Config.UnmarshalYAML even when no
|
||||
// storage.tsdb section is present, so the expected config must include it.
|
||||
retention := DefaultTSDBRetentionConfig
|
||||
exp.StorageConfig.TSDBConfig = &TSDBConfig{Retention: &retention}
|
||||
require.Equal(t, exp, *c)
|
||||
})
|
||||
|
||||
|
|
|
|||
|
|
@ -18,8 +18,9 @@ const ruleFilesConfigFile = "testdata/rules_abs_path_windows.good.yml"
|
|||
var ruleFilesExpectedConf = &Config{
|
||||
loaded: true,
|
||||
|
||||
GlobalConfig: DefaultGlobalConfig,
|
||||
Runtime: DefaultRuntimeConfig,
|
||||
GlobalConfig: DefaultGlobalConfig,
|
||||
Runtime: DefaultRuntimeConfig,
|
||||
StorageConfig: StorageConfig{TSDBConfig: &TSDBConfig{Retention: &TSDBRetentionConfig{}}},
|
||||
RuleFiles: []string{
|
||||
"testdata\\first.rules",
|
||||
"testdata\\rules\\second.rules",
|
||||
|
|
|
|||
4
config/testdata/tsdb_retention_percentage.bad.yml
vendored
Normal file
4
config/testdata/tsdb_retention_percentage.bad.yml
vendored
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
storage:
|
||||
tsdb:
|
||||
retention:
|
||||
percentage: 101
|
||||
4
config/testdata/tsdb_retention_percentage_negative.bad.yml
vendored
Normal file
4
config/testdata/tsdb_retention_percentage_negative.bad.yml
vendored
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
storage:
|
||||
tsdb:
|
||||
retention:
|
||||
percentage: -1
|
||||
4
config/testdata/tsdb_retention_size.bad.yml
vendored
Normal file
4
config/testdata/tsdb_retention_size.bad.yml
vendored
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
storage:
|
||||
tsdb:
|
||||
retention:
|
||||
size: -1GB
|
||||
4
config/testdata/tsdb_retention_time.bad.yml
vendored
Normal file
4
config/testdata/tsdb_retention_time.bad.yml
vendored
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
storage:
|
||||
tsdb:
|
||||
retention:
|
||||
time: -1h
|
||||
|
|
@ -3877,9 +3877,9 @@ with this feature.
|
|||
# or when a compaction completes, whichever comes first.
|
||||
[ retention: <retention> ] :
|
||||
# How long to retain samples in storage. If neither this option nor the size option
|
||||
# is set, the retention time defaults to 15d. Units Supported: y, w, d, h, m, s, ms.
|
||||
# is set, the retention time defaults to 15d. Setting this to 0 disables time-based retention.
|
||||
# This option takes precedence over the deprecated command-line flag --storage.tsdb.retention.time.
|
||||
[ time: <duration> | default = 15d ]
|
||||
[ time: <duration> ]
|
||||
|
||||
# Maximum number of bytes that can be stored for blocks. A unit is required,
|
||||
# supported units: B, KB, MB, GB, TB, PB, EB. Ex: "512MB". Based on powers-of-2, so 1KB is 1024B.
|
||||
|
|
|
|||
18
tsdb/db.go
18
tsdb/db.go
|
|
@ -1286,18 +1286,12 @@ func (db *DB) ApplyConfig(conf *config.Config) error {
|
|||
// Update retention configuration if provided.
|
||||
if conf.StorageConfig.TSDBConfig.Retention != nil {
|
||||
db.retentionMtx.Lock()
|
||||
if conf.StorageConfig.TSDBConfig.Retention.Time > 0 {
|
||||
db.opts.RetentionDuration = int64(conf.StorageConfig.TSDBConfig.Retention.Time)
|
||||
db.metrics.retentionDuration.Set((time.Duration(db.opts.RetentionDuration) * time.Millisecond).Seconds())
|
||||
}
|
||||
if conf.StorageConfig.TSDBConfig.Retention.Size > 0 {
|
||||
db.opts.MaxBytes = int64(conf.StorageConfig.TSDBConfig.Retention.Size)
|
||||
db.metrics.maxBytes.Set(float64(db.opts.MaxBytes))
|
||||
}
|
||||
if conf.StorageConfig.TSDBConfig.Retention.Percentage > 0 {
|
||||
db.opts.MaxPercentage = conf.StorageConfig.TSDBConfig.Retention.Percentage
|
||||
db.metrics.maxPercentage.Set(float64(db.opts.MaxPercentage))
|
||||
}
|
||||
db.opts.RetentionDuration = int64(time.Duration(conf.StorageConfig.TSDBConfig.Retention.Time) / time.Millisecond)
|
||||
db.metrics.retentionDuration.Set((time.Duration(db.opts.RetentionDuration) * time.Millisecond).Seconds())
|
||||
db.opts.MaxBytes = int64(conf.StorageConfig.TSDBConfig.Retention.Size)
|
||||
db.metrics.maxBytes.Set(float64(db.opts.MaxBytes))
|
||||
db.opts.MaxPercentage = conf.StorageConfig.TSDBConfig.Retention.Percentage
|
||||
db.metrics.maxPercentage.Set(float64(db.opts.MaxPercentage))
|
||||
db.retentionMtx.Unlock()
|
||||
}
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -1748,7 +1748,7 @@ func TestRuntimeRetentionConfigChange(t *testing.T) {
|
|||
StorageConfig: config.StorageConfig{
|
||||
TSDBConfig: &config.TSDBConfig{
|
||||
Retention: &config.TSDBRetentionConfig{
|
||||
Time: model.Duration(shorterRetentionDuration),
|
||||
Time: model.Duration(time.Duration(shorterRetentionDuration) * time.Millisecond),
|
||||
},
|
||||
},
|
||||
},
|
||||
|
|
@ -1777,6 +1777,31 @@ func TestRuntimeRetentionConfigChange(t *testing.T) {
|
|||
require.Positive(t, int(prom_testutil.ToFloat64(db.metrics.timeRetentionCount)), "time retention count should be incremented")
|
||||
}
|
||||
|
||||
// TestApplyConfigRetentionDurationMetricUnit verifies that after a config
|
||||
// reload the prometheus_tsdb_retention_limit_seconds metric reports the
|
||||
// retention in seconds.
|
||||
func TestApplyConfigRetentionDurationMetricUnit(t *testing.T) {
|
||||
oneHourMs := int64(time.Hour / time.Millisecond)
|
||||
db := newTestDB(t, withOpts(&Options{RetentionDuration: oneHourMs}))
|
||||
|
||||
cfg := &config.Config{
|
||||
StorageConfig: config.StorageConfig{
|
||||
TSDBConfig: &config.TSDBConfig{
|
||||
Retention: &config.TSDBRetentionConfig{
|
||||
Time: model.Duration(time.Hour),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
require.NoError(t, db.ApplyConfig(cfg))
|
||||
|
||||
require.Equal(t, oneHourMs, db.getRetentionDuration())
|
||||
|
||||
gotSeconds := prom_testutil.ToFloat64(db.metrics.retentionDuration)
|
||||
wantSeconds := time.Hour.Seconds()
|
||||
require.Equal(t, wantSeconds, gotSeconds)
|
||||
}
|
||||
|
||||
func TestNotMatcherSelectsLabelsUnsetSeries(t *testing.T) {
|
||||
db := newTestDB(t)
|
||||
|
||||
|
|
|
|||
22
web/web.go
22
web/web.go
|
|
@ -253,6 +253,11 @@ func (h *Handler) ApplyConfig(conf *config.Config) error {
|
|||
defer h.mtx.Unlock()
|
||||
|
||||
h.config = conf
|
||||
if conf.StorageConfig.TSDBConfig != nil && conf.StorageConfig.TSDBConfig.Retention != nil {
|
||||
h.options.TSDBRetentionDuration = conf.StorageConfig.TSDBConfig.Retention.Time
|
||||
h.options.TSDBMaxBytes = conf.StorageConfig.TSDBConfig.Retention.Size
|
||||
h.options.TSDBMaxPercentage = conf.StorageConfig.TSDBConfig.Retention.Percentage
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
@ -866,20 +871,25 @@ func (h *Handler) runtimeInfo() (api_v1.RuntimeInfo, error) {
|
|||
status.Hostname = hostname
|
||||
status.ServerTime = time.Now().UTC()
|
||||
|
||||
if h.options.TSDBRetentionDuration != 0 {
|
||||
status.StorageRetention = h.options.TSDBRetentionDuration.String()
|
||||
h.mtx.RLock()
|
||||
tsdbRetentionDuration := h.options.TSDBRetentionDuration
|
||||
tsdbMaxBytes := h.options.TSDBMaxBytes
|
||||
tsdbMaxPercentage := h.options.TSDBMaxPercentage
|
||||
h.mtx.RUnlock()
|
||||
if tsdbRetentionDuration != 0 {
|
||||
status.StorageRetention = tsdbRetentionDuration.String()
|
||||
}
|
||||
if h.options.TSDBMaxBytes != 0 {
|
||||
if tsdbMaxBytes != 0 {
|
||||
if status.StorageRetention != "" {
|
||||
status.StorageRetention += " or "
|
||||
}
|
||||
status.StorageRetention += h.options.TSDBMaxBytes.String()
|
||||
status.StorageRetention += tsdbMaxBytes.String()
|
||||
}
|
||||
if h.options.TSDBMaxPercentage != 0 {
|
||||
if tsdbMaxPercentage != 0 {
|
||||
if status.StorageRetention != "" {
|
||||
status.StorageRetention += " or "
|
||||
}
|
||||
status.StorageRetention = status.StorageRetention + strconv.FormatUint(uint64(h.options.TSDBMaxPercentage), 10) + "%"
|
||||
status.StorageRetention = status.StorageRetention + strconv.FormatUint(uint64(tsdbMaxPercentage), 10) + "%"
|
||||
}
|
||||
|
||||
metrics, err := h.gatherer.Gather()
|
||||
|
|
|
|||
Loading…
Reference in a new issue