Migrate SD metrics that required const_label support

Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com>
This commit is contained in:
Arthur Silva Sens 2026-01-20 11:24:58 -03:00
parent 47ed33d135
commit 42eb1f9be7
No known key found for this signature in database
4 changed files with 101 additions and 173 deletions

View file

@ -26,6 +26,7 @@ import (
"github.com/prometheus/common/config"
"github.com/prometheus/common/promslog"
semconv "github.com/prometheus/prometheus/discovery/semconv"
"github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/util/features"
)
@ -232,7 +233,7 @@ func (m *Manager) ApplyConfig(cfg map[string]Configs) error {
for name, scfg := range cfg {
failedCount += m.registerProviders(scfg, name)
}
m.metrics.FailedConfigs.Set(float64(failedCount))
m.metrics.FailedConfigs.Gauge.Set(float64(failedCount))
var (
wg sync.WaitGroup
@ -266,13 +267,13 @@ func (m *Manager) ApplyConfig(cfg map[string]Configs) error {
// Remove obsolete subs' targets.
if _, ok := prov.newSubs[s]; !ok {
delete(m.targets, poolKey{s, prov.name})
m.metrics.DiscoveredTargets.DeleteLabelValues(m.name, s)
m.metrics.DiscoveredTargets.GaugeVec.DeleteLabelValues(s)
}
}
// Set metrics and targets for new subs.
for s := range prov.newSubs {
if _, ok := prov.subs[s]; !ok {
m.metrics.DiscoveredTargets.WithLabelValues(s).Set(0)
m.metrics.DiscoveredTargets.With(semconv.ConfigAttr(s)).Set(0)
}
if l := len(refTargets); l > 0 {
m.targets[poolKey{s, prov.name}] = make(map[string]*targetgroup.Group, l)
@ -360,7 +361,7 @@ func (m *Manager) updater(ctx context.Context, p *Provider, updates chan []*targ
case <-ctx.Done():
return
case tgs, ok := <-updates:
m.metrics.ReceivedUpdates.Inc()
m.metrics.ReceivedUpdates.Counter.Inc()
if !ok {
m.logger.Debug("Discoverer channel closed", "provider", p.name)
// Wait for provider cancellation to ensure targets are cleaned up when expected.
@ -395,11 +396,11 @@ func (m *Manager) sender() {
case <-ticker.C: // Some discoverers send updates too often, so we throttle these with the ticker.
select {
case <-m.triggerSend:
m.metrics.SentUpdates.Inc()
m.metrics.SentUpdates.Counter.Inc()
select {
case m.syncCh <- m.allGroups():
default:
m.metrics.DelayedUpdates.Inc()
m.metrics.DelayedUpdates.Counter.Inc()
m.logger.Debug("Discovery receiver's channel was full so will retry the next cycle")
select {
case m.triggerSend <- struct{}{}:
@ -474,7 +475,7 @@ func (m *Manager) allGroups() map[string][]*targetgroup.Group {
m.mtx.RUnlock()
for setName, v := range n {
m.metrics.DiscoveredTargets.WithLabelValues(setName).Set(float64(v))
m.metrics.DiscoveredTargets.With(semconv.ConfigAttr(setName)).Set(float64(v))
}
return tSets

View file

@ -17,67 +17,35 @@ import (
"fmt"
"github.com/prometheus/client_golang/prometheus"
semconv "github.com/prometheus/prometheus/discovery/semconv"
)
// Metrics to be used with a discovery manager.
type Metrics struct {
FailedConfigs prometheus.Gauge
DiscoveredTargets *prometheus.GaugeVec
ReceivedUpdates prometheus.Counter
DelayedUpdates prometheus.Counter
SentUpdates prometheus.Counter
FailedConfigs semconv.PrometheusSDFailedConfigs
DiscoveredTargets semconv.PrometheusSDDiscoveredTargets
ReceivedUpdates semconv.PrometheusSDReceivedUpdatesTotal
DelayedUpdates semconv.PrometheusSDUpdatesDelayedTotal
SentUpdates semconv.PrometheusSDUpdatesTotal
}
func NewManagerMetrics(registerer prometheus.Registerer, sdManagerName string) (*Metrics, error) {
m := &Metrics{}
m.FailedConfigs = prometheus.NewGauge(
prometheus.GaugeOpts{
Name: "prometheus_sd_failed_configs",
Help: "Current number of service discovery configurations that failed to load.",
ConstLabels: prometheus.Labels{"name": sdManagerName},
},
)
m.DiscoveredTargets = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "prometheus_sd_discovered_targets",
Help: "Current number of discovered targets.",
ConstLabels: prometheus.Labels{"name": sdManagerName},
},
[]string{"config"},
)
m.ReceivedUpdates = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_sd_received_updates_total",
Help: "Total number of update events received from the SD providers.",
ConstLabels: prometheus.Labels{"name": sdManagerName},
},
)
m.DelayedUpdates = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_sd_updates_delayed_total",
Help: "Total number of update events that couldn't be sent immediately.",
ConstLabels: prometheus.Labels{"name": sdManagerName},
},
)
m.SentUpdates = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_sd_updates_total",
Help: "Total number of update events sent to the SD consumers.",
ConstLabels: prometheus.Labels{"name": sdManagerName},
},
)
name := semconv.NameAttr(sdManagerName)
m := &Metrics{
FailedConfigs: semconv.NewPrometheusSDFailedConfigs(name),
DiscoveredTargets: semconv.NewPrometheusSDDiscoveredTargets(name),
ReceivedUpdates: semconv.NewPrometheusSDReceivedUpdatesTotal(name),
DelayedUpdates: semconv.NewPrometheusSDUpdatesDelayedTotal(name),
SentUpdates: semconv.NewPrometheusSDUpdatesTotal(name),
}
metrics := []prometheus.Collector{
m.FailedConfigs,
m.DiscoveredTargets,
m.ReceivedUpdates,
m.DelayedUpdates,
m.SentUpdates,
m.FailedConfigs.Gauge,
m.DiscoveredTargets.GaugeVec,
m.ReceivedUpdates.Counter,
m.DelayedUpdates.Counter,
m.SentUpdates.Counter,
}
for _, collector := range metrics {
@ -92,9 +60,9 @@ func NewManagerMetrics(registerer prometheus.Registerer, sdManagerName string) (
// Unregister unregisters all metrics.
func (m *Metrics) Unregister(registerer prometheus.Registerer) {
registerer.Unregister(m.FailedConfigs)
registerer.Unregister(m.DiscoveredTargets)
registerer.Unregister(m.ReceivedUpdates)
registerer.Unregister(m.DelayedUpdates)
registerer.Unregister(m.SentUpdates)
registerer.Unregister(m.FailedConfigs.Gauge)
registerer.Unregister(m.DiscoveredTargets.GaugeVec)
registerer.Unregister(m.ReceivedUpdates.Counter)
registerer.Unregister(m.DelayedUpdates.Counter)
registerer.Unregister(m.SentUpdates.Counter)
}

View file

@ -190,16 +190,21 @@ type PrometheusSDDiscoveredTargets struct {
*prometheus.GaugeVec
}
// NewPrometheusSDDiscoveredTargets returns a new PrometheusSDDiscoveredTargets instrument.
func NewPrometheusSDDiscoveredTargets() PrometheusSDDiscoveredTargets {
// NewPrometheusSDDiscoveredTargets returns a new PrometheusSDDiscoveredTargets instrument with the given const labels.
func NewPrometheusSDDiscoveredTargets(
name NameAttr,
) PrometheusSDDiscoveredTargets {
labels := []string{
"name",
"config",
}
return PrometheusSDDiscoveredTargets{
GaugeVec: prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "prometheus_sd_discovered_targets",
Help: "Current number of discovered targets.",
ConstLabels: prometheus.Labels{
"name": name.Value(),
},
}, labels),
}
}
@ -209,14 +214,12 @@ type PrometheusSDDiscoveredTargetsAttr interface {
implPrometheusSDDiscoveredTargets()
}
func (a NameAttr) implPrometheusSDDiscoveredTargets() {}
func (a ConfigAttr) implPrometheusSDDiscoveredTargets() {}
func (m PrometheusSDDiscoveredTargets) With(
extra ...PrometheusSDDiscoveredTargetsAttr,
) prometheus.Gauge {
labels := prometheus.Labels{
"name": "",
"config": "",
}
for _, v := range extra {
@ -257,39 +260,23 @@ func NewPrometheusSDDNSLookupsTotal() PrometheusSDDNSLookupsTotal {
// PrometheusSDFailedConfigs records the current number of service discovery configurations that failed to load.
type PrometheusSDFailedConfigs struct {
*prometheus.GaugeVec
prometheus.Gauge
}
// NewPrometheusSDFailedConfigs returns a new PrometheusSDFailedConfigs instrument.
func NewPrometheusSDFailedConfigs() PrometheusSDFailedConfigs {
labels := []string{
"name",
}
// NewPrometheusSDFailedConfigs returns a new PrometheusSDFailedConfigs instrument with the given const labels.
func NewPrometheusSDFailedConfigs(
name NameAttr,
) PrometheusSDFailedConfigs {
return PrometheusSDFailedConfigs{
GaugeVec: prometheus.NewGaugeVec(prometheus.GaugeOpts{
Gauge: prometheus.NewGauge(prometheus.GaugeOpts{
Name: "prometheus_sd_failed_configs",
Help: "Current number of service discovery configurations that failed to load.",
}, labels),
}
}
type PrometheusSDFailedConfigsAttr interface {
Attribute
implPrometheusSDFailedConfigs()
}
func (a NameAttr) implPrometheusSDFailedConfigs() {}
func (m PrometheusSDFailedConfigs) With(
extra ...PrometheusSDFailedConfigsAttr,
) prometheus.Gauge {
labels := prometheus.Labels{
"name": "",
ConstLabels: prometheus.Labels{
"name": name.Value(),
},
}),
}
for _, v := range extra {
labels[v.ID()] = v.Value()
}
return m.GaugeVec.With(labels)
}
// PrometheusSDFileMtimeSeconds records the modification time of the SD file.
@ -531,39 +518,23 @@ func NewPrometheusSDNomadFailuresTotal() PrometheusSDNomadFailuresTotal {
// PrometheusSDReceivedUpdatesTotal records the total number of update events received from the SD providers.
type PrometheusSDReceivedUpdatesTotal struct {
*prometheus.CounterVec
prometheus.Counter
}
// NewPrometheusSDReceivedUpdatesTotal returns a new PrometheusSDReceivedUpdatesTotal instrument.
func NewPrometheusSDReceivedUpdatesTotal() PrometheusSDReceivedUpdatesTotal {
labels := []string{
"name",
}
// NewPrometheusSDReceivedUpdatesTotal returns a new PrometheusSDReceivedUpdatesTotal instrument with the given const labels.
func NewPrometheusSDReceivedUpdatesTotal(
name NameAttr,
) PrometheusSDReceivedUpdatesTotal {
return PrometheusSDReceivedUpdatesTotal{
CounterVec: prometheus.NewCounterVec(prometheus.CounterOpts{
Counter: prometheus.NewCounter(prometheus.CounterOpts{
Name: "prometheus_sd_received_updates_total",
Help: "Total number of update events received from the SD providers.",
}, labels),
}
}
type PrometheusSDReceivedUpdatesTotalAttr interface {
Attribute
implPrometheusSDReceivedUpdatesTotal()
}
func (a NameAttr) implPrometheusSDReceivedUpdatesTotal() {}
func (m PrometheusSDReceivedUpdatesTotal) With(
extra ...PrometheusSDReceivedUpdatesTotalAttr,
) prometheus.Counter {
labels := prometheus.Labels{
"name": "",
ConstLabels: prometheus.Labels{
"name": name.Value(),
},
}),
}
for _, v := range extra {
labels[v.ID()] = v.Value()
}
return m.CounterVec.With(labels)
}
// PrometheusSDRefreshDurationHistogramSeconds records the duration of a SD refresh cycle as a histogram.
@ -694,76 +665,44 @@ func (m PrometheusSDRefreshFailuresTotal) With(
// PrometheusSDUpdatesDelayedTotal records the total number of update events that couldn't be sent immediately.
type PrometheusSDUpdatesDelayedTotal struct {
*prometheus.CounterVec
prometheus.Counter
}
// NewPrometheusSDUpdatesDelayedTotal returns a new PrometheusSDUpdatesDelayedTotal instrument.
func NewPrometheusSDUpdatesDelayedTotal() PrometheusSDUpdatesDelayedTotal {
labels := []string{
"name",
}
// NewPrometheusSDUpdatesDelayedTotal returns a new PrometheusSDUpdatesDelayedTotal instrument with the given const labels.
func NewPrometheusSDUpdatesDelayedTotal(
name NameAttr,
) PrometheusSDUpdatesDelayedTotal {
return PrometheusSDUpdatesDelayedTotal{
CounterVec: prometheus.NewCounterVec(prometheus.CounterOpts{
Counter: prometheus.NewCounter(prometheus.CounterOpts{
Name: "prometheus_sd_updates_delayed_total",
Help: "Total number of update events that couldn't be sent immediately.",
}, labels),
}
}
type PrometheusSDUpdatesDelayedTotalAttr interface {
Attribute
implPrometheusSDUpdatesDelayedTotal()
}
func (a NameAttr) implPrometheusSDUpdatesDelayedTotal() {}
func (m PrometheusSDUpdatesDelayedTotal) With(
extra ...PrometheusSDUpdatesDelayedTotalAttr,
) prometheus.Counter {
labels := prometheus.Labels{
"name": "",
ConstLabels: prometheus.Labels{
"name": name.Value(),
},
}),
}
for _, v := range extra {
labels[v.ID()] = v.Value()
}
return m.CounterVec.With(labels)
}
// PrometheusSDUpdatesTotal records the total number of update events sent to the SD consumers.
type PrometheusSDUpdatesTotal struct {
*prometheus.CounterVec
prometheus.Counter
}
// NewPrometheusSDUpdatesTotal returns a new PrometheusSDUpdatesTotal instrument.
func NewPrometheusSDUpdatesTotal() PrometheusSDUpdatesTotal {
labels := []string{
"name",
}
// NewPrometheusSDUpdatesTotal returns a new PrometheusSDUpdatesTotal instrument with the given const labels.
func NewPrometheusSDUpdatesTotal(
name NameAttr,
) PrometheusSDUpdatesTotal {
return PrometheusSDUpdatesTotal{
CounterVec: prometheus.NewCounterVec(prometheus.CounterOpts{
Counter: prometheus.NewCounter(prometheus.CounterOpts{
Name: "prometheus_sd_updates_total",
Help: "Total number of update events sent to the SD consumers.",
}, labels),
}
}
type PrometheusSDUpdatesTotalAttr interface {
Attribute
implPrometheusSDUpdatesTotal()
}
func (a NameAttr) implPrometheusSDUpdatesTotal() {}
func (m PrometheusSDUpdatesTotal) With(
extra ...PrometheusSDUpdatesTotalAttr,
) prometheus.Counter {
labels := prometheus.Labels{
"name": "",
ConstLabels: prometheus.Labels{
"name": name.Value(),
},
}),
}
for _, v := range extra {
labels[v.ID()] = v.Value()
}
return m.CounterVec.With(labels)
}
// PrometheusTreecacheWatcherGoroutines records the current number of treecache watcher goroutines.

View file

@ -23,10 +23,14 @@ groups:
- id: name
type: string
stability: development
requirement_level: required
brief: The discovery manager name.
examples:
- scrape
- notify
annotations:
prometheus:
const_labels: ["name"]
- id: metric.prometheus_sd_failed_configs
type: metric
@ -39,10 +43,14 @@ groups:
- id: name
type: string
stability: development
requirement_level: required
brief: The discovery manager name.
examples:
- scrape
- notify
annotations:
prometheus:
const_labels: ["name"]
- id: metric.prometheus_sd_received_updates_total
type: metric
@ -55,10 +63,14 @@ groups:
- id: name
type: string
stability: development
requirement_level: required
brief: The discovery manager name.
examples:
- scrape
- notify
annotations:
prometheus:
const_labels: ["name"]
- id: metric.prometheus_sd_updates_delayed_total
type: metric
@ -71,10 +83,14 @@ groups:
- id: name
type: string
stability: development
requirement_level: required
brief: The discovery manager name.
examples:
- scrape
- notify
annotations:
prometheus:
const_labels: ["name"]
- id: metric.prometheus_sd_updates_total
type: metric
@ -87,10 +103,14 @@ groups:
- id: name
type: string
stability: development
requirement_level: required
brief: The discovery manager name.
examples:
- scrape
- notify
annotations:
prometheus:
const_labels: ["name"]
# Refresh metrics
- id: metric.prometheus_sd_refresh_duration_seconds