diff --git a/tsdb/semconv/README.md b/tsdb/semconv/README.md index 3ad406d208..3a83d7c62d 100644 --- a/tsdb/semconv/README.md +++ b/tsdb/semconv/README.md @@ -58,6 +58,7 @@ This document describes the metrics defined in this semantic convention registry | `prometheus_tsdb_out_of_order_wbl_completed_pages_total` | counter | {page} | Total number of completed WBL pages for out-of-order samples. | | `prometheus_tsdb_out_of_order_wbl_fsync_duration_seconds` | histogram | s | Duration of WBL fsync for out-of-order samples. | | `prometheus_tsdb_out_of_order_wbl_page_flushes_total` | counter | {flush} | Total number of WBL page flushes for out-of-order samples. | +| `prometheus_tsdb_out_of_order_wbl_record_bytes_saved_total` | counter | By | Total bytes saved by WBL record compression for out-of-order samples. | | `prometheus_tsdb_out_of_order_wbl_record_part_writes_total` | counter | {write} | Total number of WBL record part writes for out-of-order samples. | | `prometheus_tsdb_out_of_order_wbl_record_parts_bytes_written_total` | counter | By | Total bytes written to WBL record parts for out-of-order samples. | | `prometheus_tsdb_out_of_order_wbl_segment_current` | gauge | {segment} | Current out-of-order WBL segment. | @@ -594,6 +595,22 @@ Total number of WBL page flushes for out-of-order samples. - **Stability:** development +### `prometheus_tsdb_out_of_order_wbl_record_bytes_saved_total` + +Total bytes saved by WBL record compression for out-of-order samples. + +- **Type:** counter +- **Unit:** By +- **Stability:** development + +#### Attributes + +| Attribute | Type | Description | Examples | +|-----------|------|-------------|----------| +| `compression` | string | The compression algorithm. | snappy | + + + ### `prometheus_tsdb_out_of_order_wbl_record_part_writes_total` Total number of WBL record part writes for out-of-order samples. diff --git a/tsdb/semconv/metrics.go b/tsdb/semconv/metrics.go index 10afad97ba..f5cb41a6f9 100644 --- a/tsdb/semconv/metrics.go +++ b/tsdb/semconv/metrics.go @@ -853,6 +853,43 @@ func NewPrometheusTSDBOutOfOrderWBLPageFlushesTotal() PrometheusTSDBOutOfOrderWB } } +// PrometheusTSDBOutOfOrderWBLRecordBytesSavedTotal records the total bytes saved by WBL record compression for out-of-order samples. +type PrometheusTSDBOutOfOrderWBLRecordBytesSavedTotal struct { + *prometheus.CounterVec +} + +// NewPrometheusTSDBOutOfOrderWBLRecordBytesSavedTotal returns a new PrometheusTSDBOutOfOrderWBLRecordBytesSavedTotal instrument. +func NewPrometheusTSDBOutOfOrderWBLRecordBytesSavedTotal() PrometheusTSDBOutOfOrderWBLRecordBytesSavedTotal { + labels := []string{ + "compression", + } + return PrometheusTSDBOutOfOrderWBLRecordBytesSavedTotal{ + CounterVec: prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: "prometheus_tsdb_out_of_order_wbl_record_bytes_saved_total", + Help: "Total bytes saved by WBL record compression for out-of-order samples.", + }, labels), + } +} + +type PrometheusTSDBOutOfOrderWBLRecordBytesSavedTotalAttr interface { + Attribute + implPrometheusTSDBOutOfOrderWBLRecordBytesSavedTotal() +} + +func (a CompressionAttr) implPrometheusTSDBOutOfOrderWBLRecordBytesSavedTotal() {} + +func (m PrometheusTSDBOutOfOrderWBLRecordBytesSavedTotal) With( + extra ...PrometheusTSDBOutOfOrderWBLRecordBytesSavedTotalAttr, +) prometheus.Counter { + labels := prometheus.Labels{ + "compression": "", + } + for _, v := range extra { + labels[v.ID()] = v.Value() + } + return m.CounterVec.With(labels) +} + // PrometheusTSDBOutOfOrderWBLRecordPartWritesTotal records the total number of WBL record part writes for out-of-order samples. type PrometheusTSDBOutOfOrderWBLRecordPartWritesTotal struct { prometheus.Counter @@ -899,17 +936,10 @@ func NewPrometheusTSDBOutOfOrderWBLSegmentCurrent() PrometheusTSDBOutOfOrderWBLS } // PrometheusTSDBOutOfOrderWBLStorageSizeBytes records the size of the out-of-order WBL storage. -type PrometheusTSDBOutOfOrderWBLStorageSizeBytes struct { - prometheus.Gauge -} - -// NewPrometheusTSDBOutOfOrderWBLStorageSizeBytes returns a new PrometheusTSDBOutOfOrderWBLStorageSizeBytes instrument. -func NewPrometheusTSDBOutOfOrderWBLStorageSizeBytes() PrometheusTSDBOutOfOrderWBLStorageSizeBytes { - return PrometheusTSDBOutOfOrderWBLStorageSizeBytes{ - Gauge: prometheus.NewGauge(prometheus.GaugeOpts{ - Name: "prometheus_tsdb_out_of_order_wbl_storage_size_bytes", - Help: "Size of the out-of-order WBL storage.", - }), +func PrometheusTSDBOutOfOrderWBLStorageSizeBytesOpts() prometheus.GaugeOpts { + return prometheus.GaugeOpts{ + Name: "prometheus_tsdb_out_of_order_wbl_storage_size_bytes", + Help: "Size of the out-of-order WBL storage.", } } @@ -1360,17 +1390,10 @@ func NewPrometheusTSDBWALSegmentCurrent() PrometheusTSDBWALSegmentCurrent { } // PrometheusTSDBWALStorageSizeBytes records the size of the WAL storage. -type PrometheusTSDBWALStorageSizeBytes struct { - prometheus.Gauge -} - -// NewPrometheusTSDBWALStorageSizeBytes returns a new PrometheusTSDBWALStorageSizeBytes instrument. -func NewPrometheusTSDBWALStorageSizeBytes() PrometheusTSDBWALStorageSizeBytes { - return PrometheusTSDBWALStorageSizeBytes{ - Gauge: prometheus.NewGauge(prometheus.GaugeOpts{ - Name: "prometheus_tsdb_wal_storage_size_bytes", - Help: "Size of the WAL storage.", - }), +func PrometheusTSDBWALStorageSizeBytesOpts() prometheus.GaugeOpts { + return prometheus.GaugeOpts{ + Name: "prometheus_tsdb_wal_storage_size_bytes", + Help: "Size of the WAL storage.", } } diff --git a/tsdb/semconv/registry.yaml b/tsdb/semconv/registry.yaml index 027944c9e1..0bff5b0d69 100644 --- a/tsdb/semconv/registry.yaml +++ b/tsdb/semconv/registry.yaml @@ -576,6 +576,21 @@ groups: instrument: counter unit: "{flush}" + - id: metric.prometheus_tsdb_out_of_order_wbl_record_bytes_saved_total + type: metric + stability: development + brief: Total bytes saved by WBL record compression for out-of-order samples. + metric_name: prometheus_tsdb_out_of_order_wbl_record_bytes_saved_total + instrument: counter + unit: By + attributes: + - id: compression + type: string + stability: development + brief: The compression algorithm. + examples: + - snappy + - id: metric.prometheus_tsdb_out_of_order_wbl_record_part_writes_total type: metric stability: development @@ -607,6 +622,9 @@ groups: metric_name: prometheus_tsdb_out_of_order_wbl_storage_size_bytes instrument: gauge unit: By + annotations: + prometheus: + only_opts: true # Implemented as GaugeFunc - id: metric.prometheus_tsdb_out_of_order_wbl_truncations_failed_total type: metric @@ -804,6 +822,9 @@ groups: metric_name: prometheus_tsdb_wal_storage_size_bytes instrument: gauge unit: By + annotations: + prometheus: + only_opts: true # Implemented as GaugeFunc - id: metric.prometheus_tsdb_wal_truncate_duration_seconds type: metric diff --git a/tsdb/wlog/wlog.go b/tsdb/wlog/wlog.go index 5a80d58abf..0db89eb8a8 100644 --- a/tsdb/wlog/wlog.go +++ b/tsdb/wlog/wlog.go @@ -30,10 +30,10 @@ import ( "time" "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/tsdb/fileutil" + semconv "github.com/prometheus/prometheus/tsdb/semconv" "github.com/prometheus/prometheus/util/compression" ) @@ -205,15 +205,21 @@ type wlMetrics struct { truncateFail prometheus.Counter truncateTotal prometheus.Counter currentSegment prometheus.Gauge - writesFailed prometheus.Counter walFileSize prometheus.GaugeFunc + writesFailed prometheus.Counter recordPartWrites prometheus.Counter recordPartBytes prometheus.Counter - recordBytesSaved *prometheus.CounterVec + recordBytesSaved recordBytesSavedMetric r prometheus.Registerer } +// recordBytesSavedMetric abstracts the different CounterVec types for WAL/WBL. +type recordBytesSavedMetric interface { + prometheus.Collector + WithLabelValues(lvs ...string) prometheus.Counter +} + func (w *wlMetrics) Unregister() { if w.r == nil { return @@ -224,70 +230,71 @@ func (w *wlMetrics) Unregister() { w.r.Unregister(w.truncateFail) w.r.Unregister(w.truncateTotal) w.r.Unregister(w.currentSegment) - w.r.Unregister(w.writesFailed) w.r.Unregister(w.walFileSize) + w.r.Unregister(w.writesFailed) w.r.Unregister(w.recordPartWrites) w.r.Unregister(w.recordPartBytes) w.r.Unregister(w.recordBytesSaved) } -func newWLMetrics(w *WL, r prometheus.Registerer) *wlMetrics { - return &wlMetrics{ - r: r, - fsyncDuration: promauto.With(r).NewSummary(prometheus.SummaryOpts{ - Name: "fsync_duration_seconds", - Help: "Duration of write log fsync.", - Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, - }), - pageFlushes: promauto.With(r).NewCounter(prometheus.CounterOpts{ - Name: "page_flushes_total", - Help: "Total number of page flushes.", - }), - pageCompletions: promauto.With(r).NewCounter(prometheus.CounterOpts{ - Name: "completed_pages_total", - Help: "Total number of completed pages.", - }), - truncateFail: promauto.With(r).NewCounter(prometheus.CounterOpts{ - Name: "truncations_failed_total", - Help: "Total number of write log truncations that failed.", - }), - truncateTotal: promauto.With(r).NewCounter(prometheus.CounterOpts{ - Name: "truncations_total", - Help: "Total number of write log truncations attempted.", - }), - currentSegment: promauto.With(r).NewGauge(prometheus.GaugeOpts{ - Name: "segment_current", - Help: "Write log segment index that TSDB is currently writing to.", - }), - writesFailed: promauto.With(r).NewCounter(prometheus.CounterOpts{ - Name: "writes_failed_total", - Help: "Total number of write log writes that failed.", - }), - walFileSize: promauto.With(r).NewGaugeFunc(prometheus.GaugeOpts{ - Name: "storage_size_bytes", - Help: "Size of the write log directory.", - }, func() float64 { +func newWLMetrics(w *WL, r prometheus.Registerer, isWBL bool) *wlMetrics { + m := &wlMetrics{r: r} + + if isWBL { + m.fsyncDuration = semconv.NewPrometheusTSDBOutOfOrderWBLFsyncDurationSeconds().Summary + m.pageFlushes = semconv.NewPrometheusTSDBOutOfOrderWBLPageFlushesTotal().Counter + m.pageCompletions = semconv.NewPrometheusTSDBOutOfOrderWBLCompletedPagesTotal().Counter + m.truncateFail = semconv.NewPrometheusTSDBOutOfOrderWBLTruncationsFailedTotal().Counter + m.truncateTotal = semconv.NewPrometheusTSDBOutOfOrderWBLTruncationsTotal().Counter + m.currentSegment = semconv.NewPrometheusTSDBOutOfOrderWBLSegmentCurrent().Gauge + m.writesFailed = semconv.NewPrometheusTSDBOutOfOrderWBLWritesFailedTotal().Counter + m.walFileSize = prometheus.NewGaugeFunc(semconv.PrometheusTSDBOutOfOrderWBLStorageSizeBytesOpts(), func() float64 { + val, err := w.Size() + if err != nil { + w.logger.Error("Failed to calculate size of \"wbl\" dir", "err", err.Error()) + } + return float64(val) + }) + m.recordPartWrites = semconv.NewPrometheusTSDBOutOfOrderWBLRecordPartWritesTotal().Counter + m.recordPartBytes = semconv.NewPrometheusTSDBOutOfOrderWBLRecordPartsBytesWrittenTotal().Counter + m.recordBytesSaved = semconv.NewPrometheusTSDBOutOfOrderWBLRecordBytesSavedTotal().CounterVec + } else { + m.fsyncDuration = semconv.NewPrometheusTSDBWALFsyncDurationSeconds().Summary + m.pageFlushes = semconv.NewPrometheusTSDBWALPageFlushesTotal().Counter + m.pageCompletions = semconv.NewPrometheusTSDBWALCompletedPagesTotal().Counter + m.truncateFail = semconv.NewPrometheusTSDBWALTruncationsFailedTotal().Counter + m.truncateTotal = semconv.NewPrometheusTSDBWALTruncationsTotal().Counter + m.currentSegment = semconv.NewPrometheusTSDBWALSegmentCurrent().Gauge + m.writesFailed = semconv.NewPrometheusTSDBWALWritesFailedTotal().Counter + m.walFileSize = prometheus.NewGaugeFunc(semconv.PrometheusTSDBWALStorageSizeBytesOpts(), func() float64 { val, err := w.Size() if err != nil { w.logger.Error("Failed to calculate size of \"wal\" dir", "err", err.Error()) } return float64(val) - }), - recordPartWrites: promauto.With(r).NewCounter(prometheus.CounterOpts{ - Name: "record_part_writes_total", - Help: "Total number of record parts written before flushing.", - }), - recordPartBytes: promauto.With(r).NewCounter(prometheus.CounterOpts{ - Name: "record_parts_bytes_written_total", - Help: "Total number of record part bytes written before flushing, including" + - " CRC and compression headers.", - }), - recordBytesSaved: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ - Name: "record_bytes_saved_total", - Help: "Total number of bytes saved by the optional record compression." + - " Use this metric to learn about the effectiveness compression.", - }, []string{"compression"}), + }) + m.recordPartWrites = semconv.NewPrometheusTSDBWALRecordPartWritesTotal().Counter + m.recordPartBytes = semconv.NewPrometheusTSDBWALRecordPartsBytesWrittenTotal().Counter + m.recordBytesSaved = semconv.NewPrometheusTSDBWALRecordBytesSavedTotal().CounterVec } + + if r != nil { + r.MustRegister( + m.fsyncDuration, + m.pageFlushes, + m.pageCompletions, + m.truncateFail, + m.truncateTotal, + m.currentSegment, + m.walFileSize, + m.writesFailed, + m.recordPartWrites, + m.recordPartBytes, + m.recordBytesSaved, + ) + } + + return m } // New returns a new WAL over the given directory. @@ -318,11 +325,8 @@ func NewSize(logger *slog.Logger, reg prometheus.Registerer, dir string, segment compress: compress, cEnc: compression.NewSyncEncodeBuffer(), } - prefix := "prometheus_tsdb_wal_" - if filepath.Base(dir) == WblDirName { - prefix = "prometheus_tsdb_out_of_order_wbl_" - } - w.metrics = newWLMetrics(w, prometheus.WrapRegistererWithPrefix(prefix, reg)) + isWBL := filepath.Base(dir) == WblDirName + w.metrics = newWLMetrics(w, reg, isWBL) _, last, err := Segments(w.Dir()) if err != nil {