From a2f2e1dfc79cb7e3f2563be466ca81f1990dd8ca Mon Sep 17 00:00:00 2001 From: Ganesh Vernekar Date: Sun, 25 Jan 2026 16:21:48 -0800 Subject: [PATCH] Update metrics Signed-off-by: Ganesh Vernekar --- tsdb/fileutil/buffered_file_test.go | 19 ++-- tsdb/fileutil/file_cache.go | 136 ++++++++++++++++++++++++-- tsdb/fileutil/file_cache_metrics.go | 143 ---------------------------- tsdb/fileutil/file_cache_test.go | 6 +- tsdb/fileutil/file_reader.go | 11 ++- tsdb/fileutil/integration_test.go | 6 +- tsdb/head.go | 9 ++ 7 files changed, 164 insertions(+), 166 deletions(-) delete mode 100644 tsdb/fileutil/file_cache_metrics.go diff --git a/tsdb/fileutil/buffered_file_test.go b/tsdb/fileutil/buffered_file_test.go index f6c9eb85b9..89e27d3a42 100644 --- a/tsdb/fileutil/buffered_file_test.go +++ b/tsdb/fileutil/buffered_file_test.go @@ -88,10 +88,13 @@ func TestBufferedFile_WithCache(t *testing.T) { } // Check cache stats - hits, misses, _, _ := cache.Stats() + requests, misses, _, _, _, _ := cache.Stats() if misses != 1 { t.Errorf("expected 1 miss, got %d", misses) } + if requests != 1 { + t.Errorf("expected 1 request, got %d", requests) + } // Second read from same block - should hit cache got = bf.Range(50, 150) @@ -99,9 +102,13 @@ func TestBufferedFile_WithCache(t *testing.T) { t.Error("second read mismatch") } - hits, _, _, _ = cache.Stats() - if hits != 1 { - t.Errorf("expected 1 hit, got %d", hits) + requests, misses, _, _, _, _ = cache.Stats() + // requests should be 2, misses still 1 (so 1 hit) + if requests != 2 { + t.Errorf("expected 2 requests, got %d", requests) + } + if misses != 1 { + t.Errorf("expected 1 miss still, got %d", misses) } // Read spanning multiple blocks @@ -244,8 +251,8 @@ func TestBufferedFileReader(t *testing.T) { } // Check that we're using the cache - hits, misses, size, _ := GlobalCacheStats() - if hits+misses == 0 { + requests, _, _, size, _, _ := GlobalCacheStats() + if requests == 0 { t.Error("expected cache to be used") } if size == 0 { diff --git a/tsdb/fileutil/file_cache.go b/tsdb/fileutil/file_cache.go index 8229e6578f..82b296d473 100644 --- a/tsdb/fileutil/file_cache.go +++ b/tsdb/fileutil/file_cache.go @@ -17,6 +17,8 @@ import ( "container/list" "sync" "sync/atomic" + + "github.com/prometheus/client_golang/prometheus" ) const ( @@ -43,7 +45,7 @@ type cacheEntry struct { } // FileCache is a shared LRU cache for file blocks. -// It provides configurable memory limits and efficient eviction. +// It provides configurable memory limits, efficient eviction, and Prometheus metrics. type FileCache struct { mu sync.RWMutex maxSize int64 @@ -55,18 +57,34 @@ type FileCache struct { // Buffer pool for allocating blocks pool sync.Pool - // Metrics - hits atomic.Uint64 - misses atomic.Uint64 + // Metrics - all atomic for lock-free reads + requests atomic.Uint64 // Total cache access attempts + misses atomic.Uint64 // Cache misses + evictions atomic.Uint64 // Number of evictions + + // Prometheus metrics + metrics *fileCacheMetrics // File ID counter for unique identification nextFileID atomic.Uint64 } +// fileCacheMetrics holds Prometheus metrics for the file cache. +type fileCacheMetrics struct { + cacheRequests prometheus.CounterFunc + cacheMisses prometheus.CounterFunc + cacheEvictions prometheus.CounterFunc + cacheSize prometheus.GaugeFunc + cacheMaxSize prometheus.GaugeFunc + cacheEntries prometheus.GaugeFunc + cacheUsageRatio prometheus.GaugeFunc +} + // FileCacheOptions configures the file cache. type FileCacheOptions struct { - MaxSize int64 // Maximum cache size in bytes - BlockSize int // Size of each cached block + MaxSize int64 // Maximum cache size in bytes + BlockSize int // Size of each cached block + Reg prometheus.Registerer // Prometheus registerer for metrics (optional) } // DefaultFileCacheOptions returns the default cache configuration. @@ -99,9 +117,107 @@ func NewFileCache(opts FileCacheOptions) *FileCache { }, } + fc.metrics = fc.newMetrics() + if opts.Reg != nil { + opts.Reg.MustRegister(fc) + } + return fc } +// newMetrics creates the Prometheus metrics for this cache. +func (fc *FileCache) newMetrics() *fileCacheMetrics { + return &fileCacheMetrics{ + cacheRequests: prometheus.NewCounterFunc(prometheus.CounterOpts{ + Name: "prometheus_tsdb_file_cache_requests_total", + Help: "Total number of file cache access requests.", + }, func() float64 { + return float64(fc.requests.Load()) + }), + + cacheMisses: prometheus.NewCounterFunc(prometheus.CounterOpts{ + Name: "prometheus_tsdb_file_cache_misses_total", + Help: "Total number of file cache misses.", + }, func() float64 { + return float64(fc.misses.Load()) + }), + + cacheEvictions: prometheus.NewCounterFunc(prometheus.CounterOpts{ + Name: "prometheus_tsdb_file_cache_evictions_total", + Help: "Total number of file cache evictions.", + }, func() float64 { + return float64(fc.evictions.Load()) + }), + + cacheSize: prometheus.NewGaugeFunc(prometheus.GaugeOpts{ + Name: "prometheus_tsdb_file_cache_size_bytes", + Help: "Current size of the file cache in bytes.", + }, func() float64 { + fc.mu.RLock() + defer fc.mu.RUnlock() + return float64(fc.currentSize) + }), + + cacheMaxSize: prometheus.NewGaugeFunc(prometheus.GaugeOpts{ + Name: "prometheus_tsdb_file_cache_max_size_bytes", + Help: "Maximum configured size of the file cache in bytes.", + }, func() float64 { + fc.mu.RLock() + defer fc.mu.RUnlock() + return float64(fc.maxSize) + }), + + cacheEntries: prometheus.NewGaugeFunc(prometheus.GaugeOpts{ + Name: "prometheus_tsdb_file_cache_entries", + Help: "Current number of entries (blocks) in the file cache.", + }, func() float64 { + fc.mu.RLock() + defer fc.mu.RUnlock() + return float64(len(fc.entries)) + }), + + cacheUsageRatio: prometheus.NewGaugeFunc(prometheus.GaugeOpts{ + Name: "prometheus_tsdb_file_cache_usage_ratio", + Help: "Ratio of current cache size to maximum size (0 to 1).", + }, func() float64 { + fc.mu.RLock() + defer fc.mu.RUnlock() + if fc.maxSize == 0 { + return 0 + } + return float64(fc.currentSize) / float64(fc.maxSize) + }), + } +} + +// Describe implements prometheus.Collector. +func (fc *FileCache) Describe(ch chan<- *prometheus.Desc) { + if fc.metrics == nil { + return + } + fc.metrics.cacheRequests.Describe(ch) + fc.metrics.cacheMisses.Describe(ch) + fc.metrics.cacheEvictions.Describe(ch) + fc.metrics.cacheSize.Describe(ch) + fc.metrics.cacheMaxSize.Describe(ch) + fc.metrics.cacheEntries.Describe(ch) + fc.metrics.cacheUsageRatio.Describe(ch) +} + +// Collect implements prometheus.Collector. +func (fc *FileCache) Collect(ch chan<- prometheus.Metric) { + if fc.metrics == nil { + return + } + fc.metrics.cacheRequests.Collect(ch) + fc.metrics.cacheMisses.Collect(ch) + fc.metrics.cacheEvictions.Collect(ch) + fc.metrics.cacheSize.Collect(ch) + fc.metrics.cacheMaxSize.Collect(ch) + fc.metrics.cacheEntries.Collect(ch) + fc.metrics.cacheUsageRatio.Collect(ch) +} + // NextFileID returns a unique file ID for cache key generation. func (fc *FileCache) NextFileID() uint64 { return fc.nextFileID.Add(1) @@ -115,6 +231,7 @@ func (fc *FileCache) BlockSize() int { // Get retrieves a block from the cache. // Returns nil if the block is not cached. func (fc *FileCache) Get(fileID uint64, block int64) []byte { + fc.requests.Add(1) key := cacheKey{fileID: fileID, block: block} fc.mu.RLock() @@ -136,7 +253,6 @@ func (fc *FileCache) Get(fileID uint64, block int64) []byte { fc.mu.Unlock() if ok { - fc.hits.Add(1) return entry.data[:entry.size] } @@ -197,6 +313,7 @@ func (fc *FileCache) evictLocked() { fc.lru.Remove(elem) delete(fc.entries, entry.key) fc.currentSize -= int64(fc.blockSize) + fc.evictions.Add(1) // Return buffer to pool fc.pool.Put(entry.data) @@ -240,11 +357,12 @@ func (fc *FileCache) Clear() { } // Stats returns cache statistics. -func (fc *FileCache) Stats() (hits, misses uint64, size, maxSize int64) { +func (fc *FileCache) Stats() (requests, misses, evictions uint64, size, maxSize int64, numEntries int) { fc.mu.RLock() size = fc.currentSize + numEntries = len(fc.entries) fc.mu.RUnlock() - return fc.hits.Load(), fc.misses.Load(), size, fc.maxSize + return fc.requests.Load(), fc.misses.Load(), fc.evictions.Load(), size, fc.maxSize, numEntries } // Size returns the current cache size in bytes. diff --git a/tsdb/fileutil/file_cache_metrics.go b/tsdb/fileutil/file_cache_metrics.go deleted file mode 100644 index f6072fa274..0000000000 --- a/tsdb/fileutil/file_cache_metrics.go +++ /dev/null @@ -1,143 +0,0 @@ -// Copyright The Prometheus Authors -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fileutil - -import ( - "github.com/prometheus/client_golang/prometheus" -) - -// FileCacheMetrics holds Prometheus metrics for the file cache. -type FileCacheMetrics struct { - cacheHits prometheus.Counter - cacheMisses prometheus.Counter - cacheSize prometheus.Gauge - cacheMaxSize prometheus.Gauge - cacheEvictions prometheus.Counter - cacheHitRatio prometheus.GaugeFunc -} - -// NewFileCacheMetrics creates metrics for a FileCache. -// The returned metrics are not registered; call Register() on the collector -// or register individual metrics manually. -func NewFileCacheMetrics(cache *FileCache) *FileCacheMetrics { - m := &FileCacheMetrics{ - cacheHits: prometheus.NewCounter(prometheus.CounterOpts{ - Name: "prometheus_tsdb_file_cache_hits_total", - Help: "Total number of file cache hits.", - }), - cacheMisses: prometheus.NewCounter(prometheus.CounterOpts{ - Name: "prometheus_tsdb_file_cache_misses_total", - Help: "Total number of file cache misses.", - }), - cacheSize: prometheus.NewGauge(prometheus.GaugeOpts{ - Name: "prometheus_tsdb_file_cache_size_bytes", - Help: "Current size of the file cache in bytes.", - }), - cacheMaxSize: prometheus.NewGauge(prometheus.GaugeOpts{ - Name: "prometheus_tsdb_file_cache_max_size_bytes", - Help: "Maximum size of the file cache in bytes.", - }), - cacheEvictions: prometheus.NewCounter(prometheus.CounterOpts{ - Name: "prometheus_tsdb_file_cache_evictions_total", - Help: "Total number of cache evictions.", - }), - } - - if cache != nil { - m.cacheHitRatio = prometheus.NewGaugeFunc(prometheus.GaugeOpts{ - Name: "prometheus_tsdb_file_cache_hit_ratio", - Help: "Cache hit ratio (hits / (hits + misses)).", - }, func() float64 { - hits, misses, _, _ := cache.Stats() - total := hits + misses - if total == 0 { - return 0 - } - return float64(hits) / float64(total) - }) - } - - return m -} - -// Describe implements prometheus.Collector. -func (m *FileCacheMetrics) Describe(ch chan<- *prometheus.Desc) { - m.cacheHits.Describe(ch) - m.cacheMisses.Describe(ch) - m.cacheSize.Describe(ch) - m.cacheMaxSize.Describe(ch) - m.cacheEvictions.Describe(ch) - if m.cacheHitRatio != nil { - m.cacheHitRatio.Describe(ch) - } -} - -// Collect implements prometheus.Collector. -func (m *FileCacheMetrics) Collect(ch chan<- prometheus.Metric) { - m.cacheHits.Collect(ch) - m.cacheMisses.Collect(ch) - m.cacheSize.Collect(ch) - m.cacheMaxSize.Collect(ch) - m.cacheEvictions.Collect(ch) - if m.cacheHitRatio != nil { - m.cacheHitRatio.Collect(ch) - } -} - -// Update updates the metrics from the cache. -// Call this periodically to keep metrics current. -func (m *FileCacheMetrics) Update(cache *FileCache) { - if cache == nil { - return - } - - hits, misses, size, maxSize := cache.Stats() - m.cacheHits.Add(0) // Counter maintains its own value; this just ensures it exists - m.cacheMisses.Add(0) // Counter maintains its own value; this just ensures it exists - m.cacheSize.Set(float64(size)) - m.cacheMaxSize.Set(float64(maxSize)) - - // Note: For accurate hit/miss counters, we'd need to track deltas - // or have the cache directly increment the prometheus counters. - _ = hits - _ = misses -} - -// FileCacheWithMetrics wraps a FileCache and updates Prometheus metrics. -type FileCacheWithMetrics struct { - *FileCache - metrics *FileCacheMetrics -} - -// NewFileCacheWithMetrics creates a new FileCache with Prometheus metrics. -func NewFileCacheWithMetrics(opts FileCacheOptions, reg prometheus.Registerer) (*FileCacheWithMetrics, error) { - cache := NewFileCache(opts) - metrics := NewFileCacheMetrics(cache) - - if reg != nil { - if err := reg.Register(metrics); err != nil { - return nil, err - } - } - - return &FileCacheWithMetrics{ - FileCache: cache, - metrics: metrics, - }, nil -} - -// Metrics returns the metrics for this cache. -func (c *FileCacheWithMetrics) Metrics() *FileCacheMetrics { - return c.metrics -} diff --git a/tsdb/fileutil/file_cache_test.go b/tsdb/fileutil/file_cache_test.go index 68529baf58..10b165eb34 100644 --- a/tsdb/fileutil/file_cache_test.go +++ b/tsdb/fileutil/file_cache_test.go @@ -49,9 +49,9 @@ func TestFileCache_BasicOperations(t *testing.T) { } // Test stats - hits, misses, size, maxSize := cache.Stats() - if hits != 1 { - t.Errorf("expected 1 hit, got %d", hits) + requests, misses, _, size, maxSize, _ := cache.Stats() + if requests != 2 { + t.Errorf("expected 2 requests, got %d", requests) } if misses != 1 { t.Errorf("expected 1 miss, got %d", misses) diff --git a/tsdb/fileutil/file_reader.go b/tsdb/fileutil/file_reader.go index 13c7e2f674..d7e946f75d 100644 --- a/tsdb/fileutil/file_reader.go +++ b/tsdb/fileutil/file_reader.go @@ -15,6 +15,8 @@ package fileutil import ( "sync" + + "github.com/prometheus/client_golang/prometheus" ) // BufferedFileReaderConfig holds configuration for buffered file reading. @@ -26,6 +28,10 @@ type BufferedFileReaderConfig struct { // BlockSize is the size of each cached block. // Default is 64KiB. BlockSize int + + // Reg is the Prometheus registerer for metrics. + // If nil, metrics will not be registered. + Reg prometheus.Registerer } // DefaultBufferedFileReaderConfig returns the default configuration. @@ -60,6 +66,7 @@ func SetBufferedFileReaderConfig(cfg BufferedFileReaderConfig) { globalManager.cache = NewFileCache(FileCacheOptions{ MaxSize: cfg.CacheSize, BlockSize: cfg.BlockSize, + Reg: cfg.Reg, }) } else { // Update cache settings @@ -121,7 +128,7 @@ func ClearGlobalCache() { // GlobalCacheStats returns statistics for the global file cache. // Returns zeros if cache is not initialized. -func GlobalCacheStats() (hits, misses uint64, size, maxSize int64) { +func GlobalCacheStats() (requests, misses, evictions uint64, size, maxSize int64, numEntries int) { globalManager.mu.RLock() cache := globalManager.cache globalManager.mu.RUnlock() @@ -129,5 +136,5 @@ func GlobalCacheStats() (hits, misses uint64, size, maxSize int64) { if cache != nil { return cache.Stats() } - return 0, 0, 0, 0 + return 0, 0, 0, 0, 0, 0 } diff --git a/tsdb/fileutil/integration_test.go b/tsdb/fileutil/integration_test.go index cc3711bece..56e714504a 100644 --- a/tsdb/fileutil/integration_test.go +++ b/tsdb/fileutil/integration_test.go @@ -67,9 +67,9 @@ func TestBufferedFileReaderCorrectness(t *testing.T) { } // Verify cache was used - hits, misses, _, _ := GlobalCacheStats() - t.Logf("Cache stats: hits=%d, misses=%d", hits, misses) - if hits+misses == 0 { + requests, misses, _, _, _, _ := GlobalCacheStats() + t.Logf("Cache stats: requests=%d, misses=%d, hits=%d", requests, misses, requests-misses) + if requests == 0 { t.Error("expected cache to be used") } } diff --git a/tsdb/head.go b/tsdb/head.go index 3d700944d9..ff6f304796 100644 --- a/tsdb/head.go +++ b/tsdb/head.go @@ -41,6 +41,7 @@ import ( "github.com/prometheus/prometheus/tsdb/chunkenc" "github.com/prometheus/prometheus/tsdb/chunks" tsdb_errors "github.com/prometheus/prometheus/tsdb/errors" + "github.com/prometheus/prometheus/tsdb/fileutil" "github.com/prometheus/prometheus/tsdb/index" "github.com/prometheus/prometheus/tsdb/record" "github.com/prometheus/prometheus/tsdb/tombstones" @@ -300,6 +301,14 @@ func NewHead(r prometheus.Registerer, l *slog.Logger, wal, wbl *wlog.WL, opts *H opts.WALReplayConcurrency = defaultWALReplayConcurrency } + // Set up the buffered file reader config for head chunk files. + // This must be done before NewChunkDiskMapper is called. + fileutil.SetBufferedFileReaderConfig(fileutil.BufferedFileReaderConfig{ + CacheSize: fileutil.DefaultCacheSize, + BlockSize: fileutil.DefaultBlockSize, + Reg: r, + }) + h.chunkDiskMapper, err = chunks.NewChunkDiskMapper( r, mmappedChunksDir(opts.ChunkDirRoot),