From 696679e50c43339c255c5cc79bdd7b39d80a6a29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20LOYET?= <822436+fatpat@users.noreply.github.com> Date: Wed, 19 Apr 2023 15:15:02 +0200 Subject: [PATCH 1/3] Add `storage.tsdb.retention.percentage` config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jérôme LOYET <822436+fatpat@users.noreply.github.com> Signed-off-by: Laurent Dufresne --- cmd/prometheus/main.go | 23 ++++++++++-- tsdb/db.go | 30 +++++++++++++-- util/runtime/statfs.go | 12 ++++-- util/runtime/statfs_default.go | 13 ++++++- util/runtime/statfs_linux_386.go | 15 +++++++- util/runtime/statfs_uint32.go | 13 ++++++- util/runtime/statfs_unix_test.go | 58 +++++++++++++++++++++++++++++ util/runtime/statfs_windows.go | 56 ++++++++++++++++++++++++++++ util/runtime/statfs_windows_test.go | 49 ++++++++++++++++++++++++ web/web.go | 7 ++++ 10 files changed, 259 insertions(+), 17 deletions(-) create mode 100644 util/runtime/statfs_unix_test.go create mode 100644 util/runtime/statfs_windows.go create mode 100644 util/runtime/statfs_windows_test.go diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index 763911363b..d6db8c3d36 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -770,9 +770,9 @@ func main() { cfg.web.RoutePrefix = "/" + strings.Trim(cfg.web.RoutePrefix, "/") if !agentMode { - if cfg.tsdb.RetentionDuration == 0 && cfg.tsdb.MaxBytes == 0 { + if cfg.tsdb.RetentionDuration == 0 && cfg.tsdb.MaxBytes == 0 && cfg.tsdb.MaxPercentage == 0 { cfg.tsdb.RetentionDuration = defaultRetentionDuration - logger.Info("No time or size retention was set so using the default time retention", "duration", defaultRetentionDuration) + logger.Info("No time, size or percentage retention was set so using the default time retention", "duration", defaultRetentionDuration) } // Check for overflows. This limits our max retention to 100y. @@ -785,6 +785,17 @@ func main() { logger.Warn("Time retention value is too high. Limiting to: " + y.String()) } + if cfg.tsdb.MaxPercentage > 100 { + cfg.tsdb.MaxPercentage = 100 + logger.Warn("Percentage retention value is too high. Limiting to: 100%") + } + if cfg.tsdb.MaxPercentage > 0 { + if prom_runtime.FsSize(localStoragePath) == 0 { + fmt.Fprintln(os.Stderr, fmt.Errorf("unable to detect total capacity of metric storage at %s, please disable retention percentage (%d%%)", localStoragePath, cfg.tsdb.MaxPercentage)) + os.Exit(2) + } + } + // Max block size settings. if cfg.tsdb.MaxBlockDuration == 0 { maxBlockDuration, err := model.ParseDuration("31d") @@ -958,6 +969,7 @@ func main() { cfg.web.Context = ctxWeb cfg.web.TSDBRetentionDuration = cfg.tsdb.RetentionDuration cfg.web.TSDBMaxBytes = cfg.tsdb.MaxBytes + cfg.web.TSDBMaxPercentage = cfg.tsdb.MaxPercentage cfg.web.TSDBDir = localStoragePath cfg.web.LocalStorage = localStorage cfg.web.Storage = fanoutStorage @@ -1371,7 +1383,7 @@ func main() { return fmt.Errorf("opening storage failed: %w", err) } - switch fsType := prom_runtime.Statfs(localStoragePath); fsType { + switch fsType := prom_runtime.FsType(localStoragePath); fsType { case "NFS_SUPER_MAGIC": logger.Warn("This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.", "fs_type", fsType) default: @@ -1383,6 +1395,7 @@ func main() { "MinBlockDuration", cfg.tsdb.MinBlockDuration, "MaxBlockDuration", cfg.tsdb.MaxBlockDuration, "MaxBytes", cfg.tsdb.MaxBytes, + "MaxPercentage", cfg.tsdb.MaxPercentage, "NoLockfile", cfg.tsdb.NoLockfile, "RetentionDuration", cfg.tsdb.RetentionDuration, "WALSegmentSize", cfg.tsdb.WALSegmentSize, @@ -1430,7 +1443,7 @@ func main() { return fmt.Errorf("opening storage failed: %w", err) } - switch fsType := prom_runtime.Statfs(localStoragePath); fsType { + switch fsType := prom_runtime.FsType(localStoragePath); fsType { case "NFS_SUPER_MAGIC": logger.Warn(fsType, "msg", "This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.") default: @@ -1950,6 +1963,7 @@ type tsdbOptions struct { MaxBlockChunkSegmentSize units.Base2Bytes RetentionDuration model.Duration MaxBytes units.Base2Bytes + MaxPercentage uint NoLockfile bool WALCompressionType compression.Type HeadChunksWriteQueueSize int @@ -1978,6 +1992,7 @@ func (opts tsdbOptions) ToTSDBOptions() tsdb.Options { MaxBlockChunkSegmentSize: int64(opts.MaxBlockChunkSegmentSize), RetentionDuration: int64(time.Duration(opts.RetentionDuration) / time.Millisecond), MaxBytes: int64(opts.MaxBytes), + MaxPercentage: opts.MaxPercentage, NoLockfile: opts.NoLockfile, WALCompression: opts.WALCompressionType, HeadChunksWriteQueueSize: opts.HeadChunksWriteQueueSize, diff --git a/tsdb/db.go b/tsdb/db.go index 1d73628bfd..646ed83cd5 100644 --- a/tsdb/db.go +++ b/tsdb/db.go @@ -47,6 +47,7 @@ import ( "github.com/prometheus/prometheus/tsdb/wlog" "github.com/prometheus/prometheus/util/compression" "github.com/prometheus/prometheus/util/features" + prom_runtime "github.com/prometheus/prometheus/util/runtime" ) const ( @@ -126,6 +127,11 @@ type Options struct { // the current size of the database. MaxBytes int64 + // Maximum % of disk space to use for blocks to be retained. + // 0 or less means disabled. + // If both MaxBytes and MaxPercentage are set, percentage prevails. + MaxPercentage uint + // NoLockfile disables creation and consideration of a lock file. NoLockfile bool @@ -1983,12 +1989,30 @@ func BeyondTimeRetention(db *DB, blocks []*Block) (deletable map[ulid.ULID]struc // BeyondSizeRetention returns those blocks which are beyond the size retention // set in the db options. func BeyondSizeRetention(db *DB, blocks []*Block) (deletable map[ulid.ULID]struct{}) { - // Size retention is disabled or no blocks to work with. - maxBytes := db.getMaxBytes() - if len(blocks) == 0 || maxBytes <= 0 { + // No blocks to work with + if len(blocks) == 0 { return deletable } + maxBytes := db.getMaxBytes() + + // Max percentage prevails over max size. + if db.opts.MaxPercentage > 0 { + diskSize := prom_runtime.FsSize(db.dir) + if diskSize <= 0 { + db.logger.Warn("Unable to retrieve filesystem size of database directory, skip percentage limitation and default to fixed size limitation", "dir", db.dir) + } else { + maxBytes = int64(uint64(db.opts.MaxPercentage) * diskSize / 100) + } + } + + // Size retention is disabled. + if maxBytes <= 0 { + return deletable + } + // update MaxBytes gauge + db.metrics.maxBytes.Set(float64(maxBytes)) + deletable = make(map[ulid.ULID]struct{}) // Initializing size counter with WAL size and Head chunks diff --git a/util/runtime/statfs.go b/util/runtime/statfs.go index 98dd822e4a..b6edbd872b 100644 --- a/util/runtime/statfs.go +++ b/util/runtime/statfs.go @@ -11,12 +11,16 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build openbsd || windows || netbsd || solaris +//go:build openbsd || netbsd || solaris package runtime -// Statfs returns the file system type (Unix only) -// syscall.Statfs_t isn't available on openbsd -func Statfs(path string) string { +// FsType returns the file system type or "unknown" if unsupported. +func FsType(path string) string { return "unknown" } + +// FsSize returns the file system size or 0 if unsupported. +func FsSize(path string) uint64 { + return 0 +} diff --git a/util/runtime/statfs_default.go b/util/runtime/statfs_default.go index 0cf5c2e616..de65b780f0 100644 --- a/util/runtime/statfs_default.go +++ b/util/runtime/statfs_default.go @@ -20,8 +20,7 @@ import ( "syscall" ) -// Statfs returns the file system type (Unix only). -func Statfs(path string) string { +func FsType(path string) string { // Types of file systems that may be returned by `statfs` fsTypes := map[int64]string{ 0xadf5: "ADFS_SUPER_MAGIC", @@ -67,6 +66,7 @@ func Statfs(path string) string { 0x012FF7B4: "XENIX_SUPER_MAGIC", 0x58465342: "XFS_SUPER_MAGIC", 0x012FD16D: "_XIAFS_SUPER_MAGIC", + 0x794c7630: "OVERLAYFS_SUPER_MAGIC", } var fs syscall.Statfs_t @@ -82,3 +82,12 @@ func Statfs(path string) string { } return strconv.FormatInt(localType, 16) } + +func FsSize(path string) uint64 { + var fs syscall.Statfs_t + err := syscall.Statfs(path, &fs) + if err != nil { + return 0 + } + return uint64(fs.Bsize) * fs.Blocks +} diff --git a/util/runtime/statfs_linux_386.go b/util/runtime/statfs_linux_386.go index 33dbc4c3e9..82e586dc94 100644 --- a/util/runtime/statfs_linux_386.go +++ b/util/runtime/statfs_linux_386.go @@ -20,8 +20,8 @@ import ( "syscall" ) -// Statfs returns the file system type (Unix only) -func Statfs(path string) string { +// FsType returns the file system type (Unix only). +func FsType(path string) string { // Types of file systems that may be returned by `statfs` fsTypes := map[int32]string{ 0xadf5: "ADFS_SUPER_MAGIC", @@ -63,6 +63,7 @@ func Statfs(path string) string { 0x012FF7B4: "XENIX_SUPER_MAGIC", 0x58465342: "XFS_SUPER_MAGIC", 0x012FD16D: "_XIAFS_SUPER_MAGIC", + 0x794c7630: "OVERLAYFS_SUPER_MAGIC", } var fs syscall.Statfs_t @@ -75,3 +76,13 @@ func Statfs(path string) string { } return strconv.Itoa(int(fs.Type)) } + +// FsSize returns the file system size (Unix only). +func FsSize(path string) uint64 { + var fs syscall.Statfs_t + err := syscall.Statfs(path, &fs) + if err != nil { + return 0 + } + return uint64(fs.Bsize) * fs.Blocks +} diff --git a/util/runtime/statfs_uint32.go b/util/runtime/statfs_uint32.go index 2fb4d70849..acffb41295 100644 --- a/util/runtime/statfs_uint32.go +++ b/util/runtime/statfs_uint32.go @@ -20,8 +20,7 @@ import ( "syscall" ) -// Statfs returns the file system type (Unix only) -func Statfs(path string) string { +func FsType(path string) string { // Types of file systems that may be returned by `statfs` fsTypes := map[uint32]string{ 0xadf5: "ADFS_SUPER_MAGIC", @@ -63,6 +62,7 @@ func Statfs(path string) string { 0x012FF7B4: "XENIX_SUPER_MAGIC", 0x58465342: "XFS_SUPER_MAGIC", 0x012FD16D: "_XIAFS_SUPER_MAGIC", + 0x794c7630: "OVERLAYFS_SUPER_MAGIC", } var fs syscall.Statfs_t @@ -75,3 +75,12 @@ func Statfs(path string) string { } return strconv.Itoa(int(fs.Type)) } + +func FsSize(path string) uint64 { + var fs syscall.Statfs_t + err := syscall.Statfs(path, &fs) + if err != nil { + return 0 + } + return uint64(fs.Bsize) * fs.Blocks +} diff --git a/util/runtime/statfs_unix_test.go b/util/runtime/statfs_unix_test.go new file mode 100644 index 0000000000..563bd1dfa6 --- /dev/null +++ b/util/runtime/statfs_unix_test.go @@ -0,0 +1,58 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !windows && !openbsd && !netbsd && !solaris + +package runtime + +import ( + "os" + "testing" + + "github.com/grafana/regexp" + "github.com/stretchr/testify/require" +) + +var regexpFsType = regexp.MustCompile("^[A-Z][A-Z0-9_]*_MAGIC$") + +func TestFsType(t *testing.T) { + var fsType string + + path, err := os.Getwd() + require.NoError(t, err) + + fsType = FsType(path) + require.Regexp(t, regexpFsType, fsType) + + fsType = FsType("/no/where/to/be/found") + require.Equal(t, "0", fsType) + + fsType = FsType(" %% not event a real path\n\n") + require.Equal(t, "0", fsType) +} + +func TestFsSize(t *testing.T) { + var size uint64 + + path, err := os.Getwd() + require.NoError(t, err) + + size = FsSize(path) + require.Positive(t, size) + + size = FsSize("/no/where/to/be/found") + require.Equal(t, uint64(0), size) + + size = FsSize(" %% not event a real path\n\n") + require.Equal(t, uint64(0), size) +} diff --git a/util/runtime/statfs_windows.go b/util/runtime/statfs_windows.go new file mode 100644 index 0000000000..717d4c16f1 --- /dev/null +++ b/util/runtime/statfs_windows.go @@ -0,0 +1,56 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build windows + +package runtime + +import ( + "os" + "syscall" + "unsafe" + + "golang.org/x/sys/windows" +) + +var ( + dll = windows.MustLoadDLL("kernel32.dll") + getDiskFreeSpaceExW = dll.MustFindProc("GetDiskFreeSpaceExW") +) + +func FsType(path string) string { + return "unknown" +} + +func FsSize(path string) uint64 { + // Ensure the path exists. + if _, err := os.Stat(path); err != nil { + return 0 + } + + var avail int64 + var total int64 + var free int64 + // https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getdiskfreespaceexa + ret, _, _ := getDiskFreeSpaceExW.Call( + uintptr(unsafe.Pointer(syscall.StringToUTF16Ptr(path))), + uintptr(unsafe.Pointer(&avail)), + uintptr(unsafe.Pointer(&total)), + uintptr(unsafe.Pointer(&free))) + + if ret == 0 || uint64(free) > uint64(total) { + return 0 + } + + return uint64(total) +} diff --git a/util/runtime/statfs_windows_test.go b/util/runtime/statfs_windows_test.go new file mode 100644 index 0000000000..5b65d7029e --- /dev/null +++ b/util/runtime/statfs_windows_test.go @@ -0,0 +1,49 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build windows + +package runtime + +import ( + "os" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestFsType(t *testing.T) { + var fsType string + + path, err := os.Getwd() + require.NoError(t, err) + + fsType = FsType(path) + require.Equal(t, "unknown", fsType) + + fsType = FsType("A:\\no\\where\\to\\be\\found") + require.Equal(t, "unknown", fsType) +} + +func TestFsSize(t *testing.T) { + var size uint64 + + size = FsSize("C:\\") + require.Positive(t, size) + + size = FsSize("c:\\no\\where\\to\\be\\found") + require.Equal(t, uint64(0), size) + + size = FsSize(" %% not event a real path\n\n") + require.Equal(t, uint64(0), size) +} diff --git a/web/web.go b/web/web.go index 583492abc9..90eaf13afe 100644 --- a/web/web.go +++ b/web/web.go @@ -263,6 +263,7 @@ type Options struct { TSDBRetentionDuration model.Duration TSDBDir string TSDBMaxBytes units.Base2Bytes + TSDBMaxPercentage uint LocalStorage LocalStorage Storage storage.Storage ExemplarStorage storage.ExemplarQueryable @@ -874,6 +875,12 @@ func (h *Handler) runtimeInfo() (api_v1.RuntimeInfo, error) { } status.StorageRetention += h.options.TSDBMaxBytes.String() } + if h.options.TSDBMaxPercentage != 0 { + if status.StorageRetention != "" { + status.StorageRetention += " or " + } + status.StorageRetention = status.StorageRetention + strconv.FormatUint(uint64(h.options.TSDBMaxPercentage), 10) + "%" + } metrics, err := h.gatherer.Gather() if err != nil { From 971143edac5f6375fbf881e71d31f48a55233245 Mon Sep 17 00:00:00 2001 From: Laurent Dufresne Date: Fri, 13 Feb 2026 10:48:36 +0100 Subject: [PATCH 2/3] Added `Retention.Percentage` to config file with runtime config reloading Signed-off-by: Laurent Dufresne --- cmd/prometheus/main.go | 6 ++++++ config/config.go | 3 +++ config/config_test.go | 5 +++-- config/testdata/conf.good.yml | 1 + docs/configuration/configuration.md | 8 ++++++++ tsdb/db.go | 25 +++++++++++++++++-------- 6 files changed, 38 insertions(+), 10 deletions(-) diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index d6db8c3d36..4cca65466f 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -717,6 +717,9 @@ func main() { if cfgFile.StorageConfig.TSDBConfig.Retention.Size > 0 { cfg.tsdb.MaxBytes = cfgFile.StorageConfig.TSDBConfig.Retention.Size } + if cfgFile.StorageConfig.TSDBConfig.Retention.Percentage > 0 { + cfg.tsdb.MaxPercentage = cfgFile.StorageConfig.TSDBConfig.Retention.Percentage + } } } @@ -790,6 +793,9 @@ func main() { logger.Warn("Percentage retention value is too high. Limiting to: 100%") } if cfg.tsdb.MaxPercentage > 0 { + if cfg.tsdb.MaxBytes > 0 { + logger.Warn("storage.tsdb.retention.size is ignored, because storage.tsdb.retention.percentage is specified") + } if prom_runtime.FsSize(localStoragePath) == 0 { fmt.Fprintln(os.Stderr, fmt.Errorf("unable to detect total capacity of metric storage at %s, please disable retention percentage (%d%%)", localStoragePath, cfg.tsdb.MaxPercentage)) os.Exit(2) diff --git a/config/config.go b/config/config.go index d721d7fb86..0ebebc26d5 100644 --- a/config/config.go +++ b/config/config.go @@ -1092,6 +1092,9 @@ type TSDBRetentionConfig struct { // Maximum number of bytes that can be stored for blocks. Size units.Base2Bytes `yaml:"size,omitempty"` + + // Maximum percentage of disk used for TSDB storage. + Percentage uint `yaml:"percentage,omitempty"` } // TSDBConfig configures runtime reloadable configuration options. diff --git a/config/config_test.go b/config/config_test.go index 968b563e1e..43c56a501f 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -1737,8 +1737,9 @@ var expectedConf = &Config{ OutOfOrderTimeWindowFlag: model.Duration(30 * time.Minute), StaleSeriesCompactionThreshold: 0.5, Retention: &TSDBRetentionConfig{ - Time: model.Duration(24 * time.Hour), - Size: 1 * units.GiB, + Time: model.Duration(24 * time.Hour), + Size: 1 * units.GiB, + Percentage: 28, }, }, }, diff --git a/config/testdata/conf.good.yml b/config/testdata/conf.good.yml index 96bf9e2b33..d6b1690243 100644 --- a/config/testdata/conf.good.yml +++ b/config/testdata/conf.good.yml @@ -457,6 +457,7 @@ storage: retention: time: 1d size: 1GB + percentage: 28 tracing: endpoint: "localhost:4317" diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index 49b7774b5f..853f15dc4c 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -3581,6 +3581,14 @@ with this feature. # This option takes precedence over the deprecated command-line flag --storage.tsdb.retention.size. [ size: | default = 0 ] + # Maximum percent of total disk space allowed for storage of blocks. Alternative to `size` and + # behaves the same as if size was calculated by hand as a percentage of the total storage capacity. + # Prometheus will fail to start if this config is enabled, but it fails to query the total storage capacity. + # The total disk space allowed will automatically adapt to volume resize. + # If set to 0 or not set, percentage-based retention is disabled. + # + # This is an experimental feature, this behaviour could change or be removed in the future. + [ percentage: | default = 0 ] ``` ### `` diff --git a/tsdb/db.go b/tsdb/db.go index 646ed83cd5..ee234db352 100644 --- a/tsdb/db.go +++ b/tsdb/db.go @@ -350,6 +350,7 @@ type dbMetrics struct { tombCleanTimer prometheus.Histogram blocksBytes prometheus.Gauge maxBytes prometheus.Gauge + maxPercentage prometheus.Gauge retentionDuration prometheus.Gauge staleSeriesCompactionsTriggered prometheus.Counter staleSeriesCompactionsFailed prometheus.Counter @@ -430,6 +431,10 @@ func newDBMetrics(db *DB, r prometheus.Registerer) *dbMetrics { Name: "prometheus_tsdb_retention_limit_bytes", Help: "Max number of bytes to be retained in the tsdb blocks, configured 0 means disabled", }) + m.maxPercentage = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "prometheus_tsdb_retention_limit_percentage", + Help: "Max percentage of total storage space to be retained in the tsdb blocks, configured 0 means disabled", + }) m.retentionDuration = prometheus.NewGauge(prometheus.GaugeOpts{ Name: "prometheus_tsdb_retention_limit_seconds", Help: "How long to retain samples in storage.", @@ -470,6 +475,7 @@ func newDBMetrics(db *DB, r prometheus.Registerer) *dbMetrics { m.tombCleanTimer, m.blocksBytes, m.maxBytes, + m.maxPercentage, m.retentionDuration, m.staleSeriesCompactionsTriggered, m.staleSeriesCompactionsFailed, @@ -1068,6 +1074,7 @@ func open(dir string, l *slog.Logger, r prometheus.Registerer, opts *Options, rn db.metrics = newDBMetrics(db, r) maxBytes := max(opts.MaxBytes, 0) db.metrics.maxBytes.Set(float64(maxBytes)) + db.metrics.maxPercentage.Set(float64(max(opts.MaxPercentage, 0))) db.metrics.retentionDuration.Set((time.Duration(opts.RetentionDuration) * time.Millisecond).Seconds()) // Calling db.reload() calls db.reloadBlocks() which requires cmtx to be locked. @@ -1280,6 +1287,10 @@ func (db *DB) ApplyConfig(conf *config.Config) error { db.opts.MaxBytes = int64(conf.StorageConfig.TSDBConfig.Retention.Size) db.metrics.maxBytes.Set(float64(db.opts.MaxBytes)) } + if conf.StorageConfig.TSDBConfig.Retention.Percentage > 0 { + db.opts.MaxPercentage = conf.StorageConfig.TSDBConfig.Retention.Percentage + db.metrics.maxPercentage.Set(float64(db.opts.MaxPercentage)) + } db.retentionMtx.Unlock() } } else { @@ -1325,11 +1336,11 @@ func (db *DB) getRetentionDuration() int64 { return db.opts.RetentionDuration } -// getMaxBytes returns the current max bytes setting in a thread-safe manner. -func (db *DB) getMaxBytes() int64 { +// getRetentionSettings returns max bytes and max percentage settings in a thread-safe manner. +func (db *DB) getRetentionSettings() (int64, uint) { db.retentionMtx.RLock() defer db.retentionMtx.RUnlock() - return db.opts.MaxBytes + return db.opts.MaxBytes, db.opts.MaxPercentage } // dbAppender wraps the DB's head appender and triggers compactions on commit @@ -1994,15 +2005,15 @@ func BeyondSizeRetention(db *DB, blocks []*Block) (deletable map[ulid.ULID]struc return deletable } - maxBytes := db.getMaxBytes() + maxBytes, maxPercentage := db.getRetentionSettings() // Max percentage prevails over max size. - if db.opts.MaxPercentage > 0 { + if maxPercentage > 0 { diskSize := prom_runtime.FsSize(db.dir) if diskSize <= 0 { db.logger.Warn("Unable to retrieve filesystem size of database directory, skip percentage limitation and default to fixed size limitation", "dir", db.dir) } else { - maxBytes = int64(uint64(db.opts.MaxPercentage) * diskSize / 100) + maxBytes = int64(uint64(maxPercentage) * diskSize / 100) } } @@ -2010,8 +2021,6 @@ func BeyondSizeRetention(db *DB, blocks []*Block) (deletable map[ulid.ULID]struc if maxBytes <= 0 { return deletable } - // update MaxBytes gauge - db.metrics.maxBytes.Set(float64(maxBytes)) deletable = make(map[ulid.ULID]struct{}) From c76e78d0a4191e0cdf21d100e2b285a60c8a4f40 Mon Sep 17 00:00:00 2001 From: Laurent Dufresne Date: Thu, 19 Feb 2026 14:04:31 +0100 Subject: [PATCH 3/3] Added test for percentage-based retention Signed-off-by: Laurent Dufresne --- tsdb/db.go | 16 +++++++++++++++- tsdb/db_test.go | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/tsdb/db.go b/tsdb/db.go index ee234db352..b0076bed23 100644 --- a/tsdb/db.go +++ b/tsdb/db.go @@ -263,6 +263,9 @@ type Options struct { // StaleSeriesCompactionThreshold is a number between 0.0-1.0 indicating the % of stale series in // the in-memory Head block. If the % of stale series crosses this threshold, stale series compaction is run immediately. StaleSeriesCompactionThreshold float64 + + // FsSizeFunc is a function returning the total disk size for a given path. + FsSizeFunc FsSizeFunc } type NewCompactorFunc func(ctx context.Context, r prometheus.Registerer, l *slog.Logger, ranges []int64, pool chunkenc.Pool, opts *Options) (Compactor, error) @@ -273,6 +276,8 @@ type BlockQuerierFunc func(b BlockReader, mint, maxt int64) (storage.Querier, er type BlockChunkQuerierFunc func(b BlockReader, mint, maxt int64) (storage.ChunkQuerier, error) +type FsSizeFunc func(path string) uint64 + // DB handles reads and writes of time series falling into // a hashed partition of a seriedb. type DB struct { @@ -334,6 +339,8 @@ type DB struct { blockQuerierFunc BlockQuerierFunc blockChunkQuerierFunc BlockChunkQuerierFunc + + fsSizeFunc FsSizeFunc } type dbMetrics struct { @@ -681,6 +688,7 @@ func (db *DBReadOnly) loadDataAsQueryable(maxt int64) (storage.SampleAndChunkQue head: head, blockQuerierFunc: NewBlockQuerier, blockChunkQuerierFunc: NewBlockChunkQuerier, + fsSizeFunc: prom_runtime.FsSize, }, nil } @@ -1015,6 +1023,12 @@ func open(dir string, l *slog.Logger, r prometheus.Registerer, opts *Options, rn db.blockChunkQuerierFunc = opts.BlockChunkQuerierFunc } + if opts.FsSizeFunc == nil { + db.fsSizeFunc = prom_runtime.FsSize + } else { + db.fsSizeFunc = opts.FsSizeFunc + } + var wal, wbl *wlog.WL segmentSize := wlog.DefaultSegmentSize // Wal is enabled. @@ -2009,7 +2023,7 @@ func BeyondSizeRetention(db *DB, blocks []*Block) (deletable map[ulid.ULID]struc // Max percentage prevails over max size. if maxPercentage > 0 { - diskSize := prom_runtime.FsSize(db.dir) + diskSize := db.fsSizeFunc(db.dir) if diskSize <= 0 { db.logger.Warn("Unable to retrieve filesystem size of database directory, skip percentage limitation and default to fixed size limitation", "dir", db.dir) } else { diff --git a/tsdb/db_test.go b/tsdb/db_test.go index 18e969f952..ad66945541 100644 --- a/tsdb/db_test.go +++ b/tsdb/db_test.go @@ -9611,3 +9611,39 @@ func TestStaleSeriesCompactionWithZeroSeries(t *testing.T) { // Should still have no blocks since there was nothing to compact. require.Empty(t, db.Blocks()) } + +func TestBeyondSizeRetentionWithPercentage(t *testing.T) { + const maxBlock = 100 + const numBytesChunks = 1024 + const diskSize = maxBlock * numBytesChunks + + opts := DefaultOptions() + opts.MaxPercentage = 10 + opts.FsSizeFunc = func(_ string) uint64 { + return uint64(diskSize) + } + + db := newTestDB(t, withOpts(opts)) + require.Zero(t, db.Head().Size()) + + blocks := make([]*Block, 0, opts.MaxPercentage+1) + for range opts.MaxPercentage { + blocks = append(blocks, &Block{ + numBytesChunks: numBytesChunks, + meta: BlockMeta{ULID: ulid.Make()}, + }) + } + + deletable := BeyondSizeRetention(db, blocks) + require.Empty(t, deletable) + + ulid := ulid.Make() + blocks = append(blocks, &Block{ + numBytesChunks: numBytesChunks, + meta: BlockMeta{ULID: ulid}, + }) + + deletable = BeyondSizeRetention(db, blocks) + require.Len(t, deletable, 1) + require.Contains(t, deletable, ulid) +}