From 8caf1f1c411190907324fe216e3c304487acf6b3 Mon Sep 17 00:00:00 2001 From: Linas Medziunas Date: Sun, 5 Oct 2025 22:38:07 +0300 Subject: [PATCH 1/9] [NHCB] Separate CustomBucketBoundsMatch from floatBucketsMatch Signed-off-by: Linas Medziunas --- model/histogram/float_histogram.go | 16 ++++--- model/histogram/generic.go | 14 ++++++ model/histogram/generic_test.go | 68 ++++++++++++++++++++++++++++++ model/histogram/histogram.go | 2 +- promql/promqltest/test.go | 2 +- tsdb/chunkenc/float_histogram.go | 4 +- tsdb/chunkenc/histogram.go | 4 +- 7 files changed, 97 insertions(+), 13 deletions(-) diff --git a/model/histogram/float_histogram.go b/model/histogram/float_histogram.go index 009dcc82cd..6d7365b8b6 100644 --- a/model/histogram/float_histogram.go +++ b/model/histogram/float_histogram.go @@ -346,7 +346,7 @@ func (h *FloatHistogram) Add(other *FloatHistogram) (res *FloatHistogram, counte if h.UsesCustomBuckets() != other.UsesCustomBuckets() { return nil, false, ErrHistogramsIncompatibleSchema } - if h.UsesCustomBuckets() && !FloatBucketsMatch(h.CustomValues, other.CustomValues) { + if h.UsesCustomBuckets() && !CustomBucketBoundsMatch(h.CustomValues, other.CustomValues) { return nil, false, ErrHistogramsIncompatibleBounds } @@ -422,7 +422,7 @@ func (h *FloatHistogram) Sub(other *FloatHistogram) (res *FloatHistogram, counte if h.UsesCustomBuckets() != other.UsesCustomBuckets() { return nil, false, ErrHistogramsIncompatibleSchema } - if h.UsesCustomBuckets() && !FloatBucketsMatch(h.CustomValues, other.CustomValues) { + if h.UsesCustomBuckets() && !CustomBucketBoundsMatch(h.CustomValues, other.CustomValues) { return nil, false, ErrHistogramsIncompatibleBounds } @@ -500,7 +500,7 @@ func (h *FloatHistogram) Equals(h2 *FloatHistogram) bool { } if h.UsesCustomBuckets() { - if !FloatBucketsMatch(h.CustomValues, h2.CustomValues) { + if !CustomBucketBoundsMatch(h.CustomValues, h2.CustomValues) { return false } } @@ -513,14 +513,14 @@ func (h *FloatHistogram) Equals(h2 *FloatHistogram) bool { if !spansMatch(h.NegativeSpans, h2.NegativeSpans) { return false } - if !FloatBucketsMatch(h.NegativeBuckets, h2.NegativeBuckets) { + if !floatBucketsMatch(h.NegativeBuckets, h2.NegativeBuckets) { return false } if !spansMatch(h.PositiveSpans, h2.PositiveSpans) { return false } - if !FloatBucketsMatch(h.PositiveBuckets, h2.PositiveBuckets) { + if !floatBucketsMatch(h.PositiveBuckets, h2.PositiveBuckets) { return false } @@ -639,7 +639,7 @@ func (h *FloatHistogram) DetectReset(previous *FloatHistogram) bool { if h.Count < previous.Count { return true } - if h.UsesCustomBuckets() != previous.UsesCustomBuckets() || (h.UsesCustomBuckets() && !FloatBucketsMatch(h.CustomValues, previous.CustomValues)) { + if h.UsesCustomBuckets() != previous.UsesCustomBuckets() || (h.UsesCustomBuckets() && !CustomBucketBoundsMatch(h.CustomValues, previous.CustomValues)) { // Mark that something has changed or that the application has been restarted. However, this does // not matter so much since the change in schema will be handled directly in the chunks and PromQL // functions. @@ -1352,7 +1352,9 @@ func addBuckets( return spansA, bucketsA } -func FloatBucketsMatch(b1, b2 []float64) bool { +// floatBucketsMatch compares bucket values of two float histograms using binary float comparison +// and returns true if all values match. +func floatBucketsMatch(b1, b2 []float64) bool { if len(b1) != len(b2) { return false } diff --git a/model/histogram/generic.go b/model/histogram/generic.go index 327dada3ca..75db0d958e 100644 --- a/model/histogram/generic.go +++ b/model/histogram/generic.go @@ -79,6 +79,20 @@ func IsKnownSchema(s int32) bool { return IsCustomBucketsSchema(s) || IsExponentialSchemaReserved(s) } +// CustomBucketBoundsMatch compares histogram custom bucket bounds (CustomValues) +// and returns true if all values match. +func CustomBucketBoundsMatch(c1, c2 []float64) bool { + if len(c1) != len(c2) { + return false + } + for i, c := range c1 { + if c != c2[i] { + return false + } + } + return true +} + // BucketCount is a type constraint for the count in a bucket, which can be // float64 (for type FloatHistogram) or uint64 (for type Histogram). type BucketCount interface { diff --git a/model/histogram/generic_test.go b/model/histogram/generic_test.go index b49adbcadf..1651830e9d 100644 --- a/model/histogram/generic_test.go +++ b/model/histogram/generic_test.go @@ -207,3 +207,71 @@ func TestReduceResolutionFloatHistogram(t *testing.T) { require.Equal(t, buckets, tc.buckets[:len(buckets)]) } } + +func TestCustomBucketBoundsMatch(t *testing.T) { + tests := []struct { + name string + c1, c2 []float64 + want bool + }{ + { + name: "both nil", + c1: nil, + c2: nil, + want: true, + }, + { + name: "both empty", + c1: []float64{}, + c2: []float64{}, + want: true, + }, + { + name: "one empty one non-empty", + c1: []float64{}, + c2: []float64{1.0}, + want: false, + }, + { + name: "different lengths", + c1: []float64{1.0, 2.0}, + c2: []float64{1.0, 2.0, 3.0}, + want: false, + }, + { + name: "same single value", + c1: []float64{1.5}, + c2: []float64{1.5}, + want: true, + }, + { + name: "different single value", + c1: []float64{1.5}, + c2: []float64{2.5}, + want: false, + }, + { + name: "same multiple values", + c1: []float64{1.0, 2.0, 3.0, 4.0, 5.0}, + c2: []float64{1.0, 2.0, 3.0, 4.0, 5.0}, + want: true, + }, + { + name: "different values", + c1: []float64{1.0, 2.1, 3.0}, + c2: []float64{1.0, 2.0, 3.0}, + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := CustomBucketBoundsMatch(tt.c1, tt.c2) + require.Equal(t, tt.want, got) + + // Test commutativity (should be symmetric) + gotReverse := CustomBucketBoundsMatch(tt.c2, tt.c1) + require.Equal(t, got, gotReverse) + }) + } +} diff --git a/model/histogram/histogram.go b/model/histogram/histogram.go index 35c9797155..a7d9ce80f0 100644 --- a/model/histogram/histogram.go +++ b/model/histogram/histogram.go @@ -255,7 +255,7 @@ func (h *Histogram) Equals(h2 *Histogram) bool { } if h.UsesCustomBuckets() { - if !FloatBucketsMatch(h.CustomValues, h2.CustomValues) { + if !CustomBucketBoundsMatch(h.CustomValues, h2.CustomValues) { return false } } diff --git a/promql/promqltest/test.go b/promql/promqltest/test.go index 9c420b1ddc..41d8cdde20 100644 --- a/promql/promqltest/test.go +++ b/promql/promqltest/test.go @@ -1139,7 +1139,7 @@ func compareNativeHistogram(exp, cur *histogram.FloatHistogram) bool { } if exp.UsesCustomBuckets() { - if !histogram.FloatBucketsMatch(exp.CustomValues, cur.CustomValues) { + if !histogram.CustomBucketBoundsMatch(exp.CustomValues, cur.CustomValues) { return false } } diff --git a/tsdb/chunkenc/float_histogram.go b/tsdb/chunkenc/float_histogram.go index d80b1d9bcc..f9baa1df9c 100644 --- a/tsdb/chunkenc/float_histogram.go +++ b/tsdb/chunkenc/float_histogram.go @@ -258,7 +258,7 @@ func (a *FloatHistogramAppender) appendable(h *histogram.FloatHistogram) ( return } - if histogram.IsCustomBucketsSchema(h.Schema) && !histogram.FloatBucketsMatch(h.CustomValues, a.customValues) { + if histogram.IsCustomBucketsSchema(h.Schema) && !histogram.CustomBucketBoundsMatch(h.CustomValues, a.customValues) { counterReset = true return } @@ -495,7 +495,7 @@ func (a *FloatHistogramAppender) appendableGauge(h *histogram.FloatHistogram) ( return } - if histogram.IsCustomBucketsSchema(h.Schema) && !histogram.FloatBucketsMatch(h.CustomValues, a.customValues) { + if histogram.IsCustomBucketsSchema(h.Schema) && !histogram.CustomBucketBoundsMatch(h.CustomValues, a.customValues) { return } diff --git a/tsdb/chunkenc/histogram.go b/tsdb/chunkenc/histogram.go index 9c433fc5e5..0488c6554d 100644 --- a/tsdb/chunkenc/histogram.go +++ b/tsdb/chunkenc/histogram.go @@ -294,7 +294,7 @@ func (a *HistogramAppender) appendable(h *histogram.Histogram) ( return } - if histogram.IsCustomBucketsSchema(h.Schema) && !histogram.FloatBucketsMatch(h.CustomValues, a.customValues) { + if histogram.IsCustomBucketsSchema(h.Schema) && !histogram.CustomBucketBoundsMatch(h.CustomValues, a.customValues) { counterResetHint = CounterReset return } @@ -532,7 +532,7 @@ func (a *HistogramAppender) appendableGauge(h *histogram.Histogram) ( return } - if histogram.IsCustomBucketsSchema(h.Schema) && !histogram.FloatBucketsMatch(h.CustomValues, a.customValues) { + if histogram.IsCustomBucketsSchema(h.Schema) && !histogram.CustomBucketBoundsMatch(h.CustomValues, a.customValues) { return } From d11ee103acd9eb46a28b621828604b2446a0887a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Mon, 6 Oct 2025 21:44:34 +0200 Subject: [PATCH 2/9] perf(tsdb): reuse map of sample types to speed up head appender MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While investigating +10% CPU in v3.7 release, found that ~5% is from expanding the types map. Try reuse. Also fix a linter error. Signed-off-by: György Krajcsovits --- tsdb/head.go | 1 + tsdb/head_append.go | 21 ++++++++++++++++++++- util/testutil/synctest/disabled.go | 2 +- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/tsdb/head.go b/tsdb/head.go index 45f425cea9..f3242b8ba7 100644 --- a/tsdb/head.go +++ b/tsdb/head.go @@ -93,6 +93,7 @@ type Head struct { floatHistogramsPool zeropool.Pool[[]record.RefFloatHistogramSample] metadataPool zeropool.Pool[[]record.RefMetadata] seriesPool zeropool.Pool[[]*memSeries] + typeMapPool zeropool.Pool[map[chunks.HeadSeriesRef]sampleType] bytesPool zeropool.Pool[[]byte] memChunkPool sync.Pool diff --git a/tsdb/head_append.go b/tsdb/head_append.go index 659c09f7e7..9f930a763f 100644 --- a/tsdb/head_append.go +++ b/tsdb/head_append.go @@ -173,7 +173,7 @@ func (h *Head) appender() *headAppender { oooTimeWindow: h.opts.OutOfOrderTimeWindow.Load(), seriesRefs: h.getRefSeriesBuffer(), series: h.getSeriesBuffer(), - typesInBatch: map[chunks.HeadSeriesRef]sampleType{}, + typesInBatch: h.getTypeMap(), appendID: appendID, cleanupAppendIDsBelow: cleanupAppendIDsBelow, } @@ -297,6 +297,19 @@ func (h *Head) putSeriesBuffer(b []*memSeries) { h.seriesPool.Put(b[:0]) } +func (h *Head) getTypeMap() map[chunks.HeadSeriesRef]sampleType { + b := h.typeMapPool.Get() + if b == nil { + return make(map[chunks.HeadSeriesRef]sampleType) + } + return b +} + +func (h *Head) putTypeMap(b map[chunks.HeadSeriesRef]sampleType) { + clear(b) + h.typeMapPool.Put(b) +} + func (h *Head) getBytesBuffer() []byte { b := h.bytesPool.Get() if b == nil { @@ -1687,8 +1700,13 @@ func (a *headAppender) Commit() (err error) { h := a.head defer func() { + if a.closed { + // Don't double-close in case Rollback() was called. + return + } h.putRefSeriesBuffer(a.seriesRefs) h.putSeriesBuffer(a.series) + h.putTypeMap(a.typesInBatch) a.closed = true }() @@ -2216,6 +2234,7 @@ func (a *headAppender) Rollback() (err error) { a.closed = true h.putRefSeriesBuffer(a.seriesRefs) h.putSeriesBuffer(a.series) + h.putTypeMap(a.typesInBatch) }() var series *memSeries diff --git a/util/testutil/synctest/disabled.go b/util/testutil/synctest/disabled.go index 2cdcc72e07..e87454afcf 100644 --- a/util/testutil/synctest/disabled.go +++ b/util/testutil/synctest/disabled.go @@ -19,7 +19,7 @@ import ( "testing" ) -func Test(t *testing.T, f func(t *testing.T)) { +func Test(t *testing.T, _ func(t *testing.T)) { t.Skip("goexperiment.synctest is not enabled") } From 5f582a7e1feb95957cd67f2f90f1d2d3f7e42aa8 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Tue, 7 Oct 2025 14:58:25 +0200 Subject: [PATCH 3/9] tsdb: Remove leftover debug fmt.Println Signed-off-by: beorn7 --- tsdb/head_append.go | 1 - 1 file changed, 1 deletion(-) diff --git a/tsdb/head_append.go b/tsdb/head_append.go index 9f930a763f..f49bf1b261 100644 --- a/tsdb/head_append.go +++ b/tsdb/head_append.go @@ -2238,7 +2238,6 @@ func (a *headAppender) Rollback() (err error) { }() var series *memSeries - fmt.Println("ROLLBACK") for _, b := range a.batches { for i := range b.floats { series = b.floatSeries[i] From 51c8e558357f112c6802dc1a98eb704f0d1ac6eb Mon Sep 17 00:00:00 2001 From: beorn7 Date: Tue, 7 Oct 2025 15:01:22 +0200 Subject: [PATCH 4/9] tsdb: Do not track stFloat in typesInBatch explicitly Signed-off-by: beorn7 --- tsdb/head_append.go | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/tsdb/head_append.go b/tsdb/head_append.go index f49bf1b261..1bf8a5b9eb 100644 --- a/tsdb/head_append.go +++ b/tsdb/head_append.go @@ -570,7 +570,10 @@ func (a *headAppender) getCurrentBatch(st sampleType, s chunks.HeadSeriesRef) *a b.exemplars = h.getExemplarBuffer() } clear(a.typesInBatch) - if st != stNone { + switch st { + case stHistogram, stFloatHistogram, stCustomBucketHistogram, stCustomBucketFloatHistogram: + // We only record histogram sample types in the map. + // Floats are implicit. a.typesInBatch[s] = st } a.batches = append(a.batches, &b) @@ -597,14 +600,32 @@ func (a *headAppender) getCurrentBatch(st sampleType, s chunks.HeadSeriesRef) *a } prevST, ok := a.typesInBatch[s] switch { - case !ok: // New series. Add it to map and return current batch. + case prevST == st: + // An old series of some histogram type with the same type being appended. + // Continue the batch. + return lastBatch + case !ok && st == stFloat: + // A new float series, or an old float series that gets floats appended. + // Note that we do not track stFloat in typesInBatch. + // Continue the batch. + return lastBatch + case st == stFloat: + // A float being appended to a histogram series. + // Start a new batch. + return newBatch() + case !ok: + // A new series of some histogram type, or some histogram type + // being appended to on old float series. Even in the latter + // case, we don't need to start a new batch because histograms + // after floats are fine. + // Add new sample type to the map and continue batch. a.typesInBatch[s] = st return lastBatch - case prevST == st: // Old series, same type. Just return batch. - return lastBatch + default: + // One histogram type changed to another. + // Start a new batch. + return newBatch() } - // An old series got a new type. Start new batch. - return newBatch() } // appendable checks whether the given sample is valid for appending to the series. @@ -1068,6 +1089,8 @@ func (a *headAppender) log() error { return fmt.Errorf("log metadata: %w", err) } } + // It's important to do (float) Samples before histogram samples + // to end up with the correct order. if len(b.floats) > 0 { rec = enc.Samples(b.floats, buf) buf = rec[:0] @@ -1748,8 +1771,9 @@ func (a *headAppender) Commit() (err error) { }() for _, b := range a.batches { - // Do not change the order of these calls. The staleness marker - // handling depends on it. + // Do not change the order of these calls. We depend on it for + // correct commit order of samples and for the staleness marker + // handling. a.commitFloats(b, acc) a.commitHistograms(b, acc) a.commitFloatHistograms(b, acc) From df068cff5d8a3dc1b8402abf084293736102ec1a Mon Sep 17 00:00:00 2001 From: beorn7 Date: Tue, 7 Oct 2025 15:52:38 +0200 Subject: [PATCH 5/9] Update prometheus/common to 0.67.1 This should enable NHCB federation with text format. Signed-off-by: beorn7 --- go.mod | 4 ++-- go.sum | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index b3793ba033..450ff69281 100644 --- a/go.mod +++ b/go.mod @@ -56,7 +56,7 @@ require ( github.com/prometheus/alertmanager v0.28.1 github.com/prometheus/client_golang v1.23.2 github.com/prometheus/client_model v0.6.2 - github.com/prometheus/common v0.66.1 + github.com/prometheus/common v0.67.1 github.com/prometheus/common/assets v0.2.0 github.com/prometheus/exporter-toolkit v0.14.1 github.com/prometheus/sigv4 v0.2.1 @@ -91,7 +91,7 @@ require ( google.golang.org/api v0.250.0 google.golang.org/genproto/googleapis/api v0.0.0-20250929231259-57b25ae835d4 google.golang.org/grpc v1.75.1 - google.golang.org/protobuf v1.36.9 + google.golang.org/protobuf v1.36.10 gopkg.in/yaml.v3 v3.0.1 k8s.io/api v0.34.1 k8s.io/apimachinery v0.34.1 diff --git a/go.sum b/go.sum index 8a7d514b5f..279deec119 100644 --- a/go.sum +++ b/go.sum @@ -443,8 +443,8 @@ github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNw github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4= -github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs= -github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA= +github.com/prometheus/common v0.67.1 h1:OTSON1P4DNxzTg4hmKCc37o4ZAZDv0cfXLkOt0oEowI= +github.com/prometheus/common v0.67.1/go.mod h1:RpmT9v35q2Y+lsieQsdOh5sXZ6ajUGC8NjZAmr8vb0Q= github.com/prometheus/common/assets v0.2.0 h1:0P5OrzoHrYBOSM1OigWL3mY8ZvV2N4zIE/5AahrSrfM= github.com/prometheus/common/assets v0.2.0/go.mod h1:D17UVUE12bHbim7HzwUvtqm6gwBEaDQ0F+hIGbFbccI= github.com/prometheus/exporter-toolkit v0.14.1 h1:uKPE4ewweVRWFainwvAcHs3uw15pjw2dk3I7b+aNo9o= @@ -690,8 +690,8 @@ google.golang.org/genproto/googleapis/rpc v0.0.0-20250922171735-9219d122eba9 h1: google.golang.org/genproto/googleapis/rpc v0.0.0-20250922171735-9219d122eba9/go.mod h1:HSkG/KdJWusxU1F6CNrwNDjBMgisKxGnc5dAZfT0mjQ= google.golang.org/grpc v1.75.1 h1:/ODCNEuf9VghjgO3rqLcfg8fiOP0nSluljWFlDxELLI= google.golang.org/grpc v1.75.1/go.mod h1:JtPAzKiq4v1xcAB2hydNlWI2RnF85XXcV0mhKXr2ecQ= -google.golang.org/protobuf v1.36.9 h1:w2gp2mA27hUeUzj9Ex9FBjsBm40zfaDtEWow293U7Iw= -google.golang.org/protobuf v1.36.9/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= +google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE= +google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= From e2aed2cd27dac221779608a1c299918ce9677929 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Tue, 7 Oct 2025 16:34:59 +0200 Subject: [PATCH 6/9] tsdb: Disable more tests on MS Windows Signed-off-by: beorn7 --- tsdb/compact_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tsdb/compact_test.go b/tsdb/compact_test.go index 8a4d19c179..18c76a4597 100644 --- a/tsdb/compact_test.go +++ b/tsdb/compact_test.go @@ -1988,7 +1988,7 @@ func TestDelayedCompaction(t *testing.T) { for _, c := range cases { t.Run(c.name, func(t *testing.T) { - if c.compactionDelay > 0 && runtime.GOOS == "windows" { + if runtime.GOOS == "windows" { t.Skip("Time imprecision on windows makes the test flaky, see https://github.com/prometheus/prometheus/issues/16450") } t.Parallel() From f64ee61312fb2b9b98fc505da2b4a654a0a0077e Mon Sep 17 00:00:00 2001 From: Ali Nazari Date: Wed, 8 Oct 2025 09:06:09 +0330 Subject: [PATCH 7/9] Use rlock in read methods of scrape target Signed-off-by: Ali Nazari --- scrape/target.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scrape/target.go b/scrape/target.go index 0af2b8ba14..2e43750af7 100644 --- a/scrape/target.go +++ b/scrape/target.go @@ -190,9 +190,9 @@ func (t *Target) LabelsRange(f func(l labels.Label)) { // DiscoveredLabels returns a copy of the target's labels before any processing. func (t *Target) DiscoveredLabels(lb *labels.Builder) labels.Labels { - t.mtx.Lock() + t.mtx.RLock() cfg, tLabels, tgLabels := t.scrapeConfig, t.tLabels, t.tgLabels - t.mtx.Unlock() + t.mtx.RUnlock() PopulateDiscoveredLabels(lb, cfg, tLabels, tgLabels) return lb.Labels() } @@ -208,9 +208,9 @@ func (t *Target) SetScrapeConfig(scrapeConfig *config.ScrapeConfig, tLabels, tgL // URL returns a copy of the target's URL. func (t *Target) URL() *url.URL { - t.mtx.Lock() + t.mtx.RLock() configParams := t.scrapeConfig.Params - t.mtx.Unlock() + t.mtx.RUnlock() params := url.Values{} for k, v := range configParams { From 51e0982c915e931ae9a069986aaf90ebea330759 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Thu, 9 Oct 2025 02:10:07 +0200 Subject: [PATCH 8/9] promql(histograms): Fix counter reset hint handling when aggregating Fixes #17308. As explained adding the warn-annotation about conflicting counter reset hints doesn't happen consistently. Furthermore, because of incremental mean calculation being used so far (which includes subtraction), avg calculation always created gauge histograms. The fix is to make Sub behave like Add WRT counter reset handling, and then set the result of a subtraction to gauge explicitly in actual PromQL subtraction (rather than using Sub for something else, like incremental mean calculation). Also, track the presence of a CounterReset hint and a NotCounterReset hint separately for the entirety of aggregated histograms and create the warn-annotation based on that. As a minor fix, this commit also consistently creates the warn annotation in aggregation to be about "aggregation" rather than "subtraction" or "addition", because the latter are just internal operations within the aggregation, which is not of interest for the user. Signed-off-by: beorn7 --- model/histogram/float_histogram.go | 111 ++++++----- model/histogram/float_histogram_test.go | 51 ++--- promql/engine.go | 45 +++-- promql/functions.go | 48 +++-- promql/parser/parse_test.go | 2 - .../testdata/native_histograms.test | 175 +++++++++++++++--- 6 files changed, 308 insertions(+), 124 deletions(-) diff --git a/model/histogram/float_histogram.go b/model/histogram/float_histogram.go index 6d7365b8b6..08469ad124 100644 --- a/model/histogram/float_histogram.go +++ b/model/histogram/float_histogram.go @@ -283,7 +283,8 @@ func (h *FloatHistogram) ZeroBucket() Bucket[float64] { // bucket counts including the zero bucket and the count and the sum of // observations. The bucket layout stays the same. This method changes the // receiving histogram directly (rather than acting on a copy). It returns a -// pointer to the receiving histogram for convenience. +// pointer to the receiving histogram for convenience. If factor is negative, +// the counter reset hint is set to GaugeType. func (h *FloatHistogram) Mul(factor float64) *FloatHistogram { h.ZeroCount *= factor h.Count *= factor @@ -301,7 +302,8 @@ func (h *FloatHistogram) Mul(factor float64) *FloatHistogram { } // Div works like Mul but divides instead of multiplies. -// When dividing by 0, everything will be set to Inf. +// When dividing by 0, everything will be set to Inf. If scalar is negative, +// the counter reset hint is set to GaugeType. func (h *FloatHistogram) Div(scalar float64) *FloatHistogram { h.ZeroCount /= scalar h.Count /= scalar @@ -343,37 +345,10 @@ func (h *FloatHistogram) Div(scalar float64) *FloatHistogram { // // This method returns a pointer to the receiving histogram for convenience. func (h *FloatHistogram) Add(other *FloatHistogram) (res *FloatHistogram, counterResetCollision bool, err error) { - if h.UsesCustomBuckets() != other.UsesCustomBuckets() { - return nil, false, ErrHistogramsIncompatibleSchema + if err := h.checkSchemaAndBounds(other); err != nil { + return nil, false, err } - if h.UsesCustomBuckets() && !CustomBucketBoundsMatch(h.CustomValues, other.CustomValues) { - return nil, false, ErrHistogramsIncompatibleBounds - } - - switch { - case other.CounterResetHint == h.CounterResetHint: - // Adding apples to apples, all good. No need to change anything. - case h.CounterResetHint == GaugeType: - // Adding something else to a gauge. That's probably OK. Outcome is a gauge. - // Nothing to do since the receiver is already marked as gauge. - case other.CounterResetHint == GaugeType: - // Similar to before, but this time the receiver is "something else" and we have to change it to gauge. - h.CounterResetHint = GaugeType - case h.CounterResetHint == UnknownCounterReset: - // With the receiver's CounterResetHint being "unknown", this could still be legitimate - // if the caller knows what they are doing. Outcome is then again "unknown". - // No need to do anything since the receiver's CounterResetHint is already "unknown". - case other.CounterResetHint == UnknownCounterReset: - // Similar to before, but now we have to set the receiver's CounterResetHint to "unknown". - h.CounterResetHint = UnknownCounterReset - default: - // All other cases shouldn't actually happen. - // They are a direct collision of CounterReset and NotCounterReset. - // Conservatively set the CounterResetHint to "unknown" and issue a warning. - h.CounterResetHint = UnknownCounterReset - counterResetCollision = true - } - + counterResetCollision = h.adjustCounterReset(other) if !h.UsesCustomBuckets() { otherZeroCount := h.reconcileZeroBuckets(other) h.ZeroCount += otherZeroCount @@ -417,19 +392,16 @@ func (h *FloatHistogram) Add(other *FloatHistogram) (res *FloatHistogram, counte return h, counterResetCollision, nil } -// Sub works like Add but subtracts the other histogram. +// Sub works like Add but subtracts the other histogram. It uses the same logic +// to adjust the counter reset hint. This is useful where this method is used +// for incremental mean calculation. However, if it is used for the actual "-" +// operator in PromQL, the counter reset needs to be set to GaugeType after +// calling this method. func (h *FloatHistogram) Sub(other *FloatHistogram) (res *FloatHistogram, counterResetCollision bool, err error) { - if h.UsesCustomBuckets() != other.UsesCustomBuckets() { - return nil, false, ErrHistogramsIncompatibleSchema + if err := h.checkSchemaAndBounds(other); err != nil { + return nil, false, err } - if h.UsesCustomBuckets() && !CustomBucketBoundsMatch(h.CustomValues, other.CustomValues) { - return nil, false, ErrHistogramsIncompatibleBounds - } - - counterResetCollision = hasCounterResetCollision(h, other) - - h.CounterResetHint = GaugeType - + counterResetCollision = h.adjustCounterReset(other) if !h.UsesCustomBuckets() { otherZeroCount := h.reconcileZeroBuckets(other) h.ZeroCount -= otherZeroCount @@ -472,13 +444,6 @@ func (h *FloatHistogram) Sub(other *FloatHistogram) (res *FloatHistogram, counte return h, counterResetCollision, nil } -// hasCounterResetCollision returns true iff one of two histogram indicates -// a counter reset (CounterReset) while the other indicates no reset (NotCounterReset). -func hasCounterResetCollision(a, b *FloatHistogram) bool { - return a.CounterResetHint == CounterReset && b.CounterResetHint == NotCounterReset || - a.CounterResetHint == NotCounterReset && b.CounterResetHint == CounterReset -} - // Equals returns true if the given float histogram matches exactly. // Exact match is when there are no new buckets (even empty) and no missing buckets, // and all the bucket values match. Spans can have different empty length spans in between, @@ -1387,3 +1352,49 @@ func (h *FloatHistogram) ReduceResolution(targetSchema int32) *FloatHistogram { h.Schema = targetSchema return h } + +// checkSchemaAndBounds checks if two histograms are compatible because they +// both use a standard exponential schema or because they both are NHCBs. In the +// latter case, they also have to use the same custom bounds. +func (h *FloatHistogram) checkSchemaAndBounds(other *FloatHistogram) error { + if h.UsesCustomBuckets() != other.UsesCustomBuckets() { + return ErrHistogramsIncompatibleSchema + } + if h.UsesCustomBuckets() && !CustomBucketBoundsMatch(h.CustomValues, other.CustomValues) { + return ErrHistogramsIncompatibleBounds + } + return nil +} + +// adjustCounterReset is used for addition and subtraction. Those operation are +// usually only performed between gauge histograms, but if one or both are +// counters, we try to at least set the counter reset hint to something +// meaningful (see code comments below). The return counterResetCollision is +// true if one histogram has a counter reset hint of CounterReset and the other +// NotCounterReset. All other combinations are not considered a collision. +func (h *FloatHistogram) adjustCounterReset(other *FloatHistogram) (counterResetCollision bool) { + switch { + case other.CounterResetHint == h.CounterResetHint: + // Adding apples to apples, all good. No need to change anything. + case h.CounterResetHint == GaugeType: + // Adding something else to a gauge. That's probably OK. Outcome is a gauge. + // Nothing to do since the receiver is already marked as gauge. + case other.CounterResetHint == GaugeType: + // Similar to before, but this time the receiver is "something else" and we have to change it to gauge. + h.CounterResetHint = GaugeType + case h.CounterResetHint == UnknownCounterReset: + // With the receiver's CounterResetHint being "unknown", this could still be legitimate + // if the caller knows what they are doing. Outcome is then again "unknown". + // No need to do anything since the receiver's CounterResetHint is already "unknown". + case other.CounterResetHint == UnknownCounterReset: + // Similar to before, but now we have to set the receiver's CounterResetHint to "unknown". + h.CounterResetHint = UnknownCounterReset + default: + // All other cases shouldn't actually happen. + // They are a direct collision of CounterReset and NotCounterReset. + // Conservatively set the CounterResetHint to "unknown" and issue a warning. + h.CounterResetHint = UnknownCounterReset + return true + } + return false +} diff --git a/model/histogram/float_histogram_test.go b/model/histogram/float_histogram_test.go index b756f641bb..be92c87f62 100644 --- a/model/histogram/float_histogram_test.go +++ b/model/histogram/float_histogram_test.go @@ -2380,15 +2380,14 @@ func TestFloatHistogramSub(t *testing.T) { NegativeBuckets: []float64{1, 1, 4, 4}, }, expected: &FloatHistogram{ - ZeroThreshold: 0.01, - ZeroCount: 3, - Count: 9, - Sum: 11, - PositiveSpans: []Span{{-2, 2}, {1, 3}}, - PositiveBuckets: []float64{1, 0, 1, 1, 1}, - NegativeSpans: []Span{{3, 2}, {3, 2}}, - NegativeBuckets: []float64{2, 0, 1, 2}, - CounterResetHint: GaugeType, + ZeroThreshold: 0.01, + ZeroCount: 3, + Count: 9, + Sum: 11, + PositiveSpans: []Span{{-2, 2}, {1, 3}}, + PositiveBuckets: []float64{1, 0, 1, 1, 1}, + NegativeSpans: []Span{{3, 2}, {3, 2}}, + NegativeBuckets: []float64{2, 0, 1, 2}, }, }, { @@ -2416,15 +2415,14 @@ func TestFloatHistogramSub(t *testing.T) { NegativeBuckets: []float64{3, 0.5, 0.5, 2, 3, 2, 4}, }, expected: &FloatHistogram{ - ZeroThreshold: 0.01, - ZeroCount: 6, - Count: 40, - Sum: 0.889, - PositiveSpans: []Span{{-2, 5}, {0, 3}}, - PositiveBuckets: []float64{1, 5, 4, 2, 2, 2, 0, 5}, - NegativeSpans: []Span{{3, 3}, {1, 3}}, - NegativeBuckets: []float64{1, 9, 1, 4, 9, 1}, - CounterResetHint: GaugeType, + ZeroThreshold: 0.01, + ZeroCount: 6, + Count: 40, + Sum: 0.889, + PositiveSpans: []Span{{-2, 5}, {0, 3}}, + PositiveBuckets: []float64{1, 5, 4, 2, 2, 2, 0, 5}, + NegativeSpans: []Span{{3, 3}, {1, 3}}, + NegativeBuckets: []float64{1, 9, 1, 4, 9, 1}, }, }, { @@ -2446,13 +2444,12 @@ func TestFloatHistogramSub(t *testing.T) { CustomValues: []float64{1, 2, 3, 4}, }, expected: &FloatHistogram{ - Schema: CustomBucketsSchema, - Count: 4, - Sum: 11, - PositiveSpans: []Span{{0, 2}, {1, 3}}, - PositiveBuckets: []float64{1, 0, 1, 1, 1}, - CustomValues: []float64{1, 2, 3, 4}, - CounterResetHint: GaugeType, + Schema: CustomBucketsSchema, + Count: 4, + Sum: 11, + PositiveSpans: []Span{{0, 2}, {1, 3}}, + PositiveBuckets: []float64{1, 0, 1, 1, 1}, + CustomValues: []float64{1, 2, 3, 4}, }, }, { @@ -2520,6 +2517,10 @@ func TestFloatHistogramSub(t *testing.T) { var expectedNegative *FloatHistogram if c.expected != nil { expectedNegative = c.expected.Copy().Mul(-1) + // Mul(-1) sets the counter reset hint to + // GaugeType, but we want to retain the original + // counter reset hint for this test. + expectedNegative.CounterResetHint = c.expected.CounterResetHint } testFloatHistogramSub(t, c.in2, c.in1, expectedNegative, c.expErrMsg, c.expCounterResetCollision) }) diff --git a/promql/engine.go b/promql/engine.go index 31d910da9a..6603972c2a 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -3125,6 +3125,8 @@ func vectorElemBinop(op parser.ItemType, lhs, rhs float64, hlhs, hrhs *histogram if err != nil { return 0, nil, false, err } + // The result must be marked as gauge. + res.CounterResetHint = histogram.GaugeType if counterResetCollision { err = annotations.NewHistogramCounterResetCollisionWarning(pos, annotations.HistogramSub) } @@ -3158,6 +3160,8 @@ type groupedAggregation struct { incompatibleHistograms bool // If true, group has seen mixed exponential and custom buckets, or incompatible custom buckets. groupAggrComplete bool // Used by LIMITK to short-cut series loop when we've reached K elem on every group. incrementalMean bool // True after reverting to incremental calculation of the mean value. + counterResetSeen bool // Counter reset hint CounterReset seen. Currently only used for histogram samples. + notCounterResetSeen bool // Counter reset hint NotCounterReset seen. Currently only used for histogram samples. } // aggregation evaluates sum, avg, count, stdvar, stddev or quantile at one timestep on inputMatrix. @@ -3194,6 +3198,12 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix } else { group.histogramValue = h.Copy() group.hasHistogram = true + switch h.CounterResetHint { + case histogram.CounterReset: + group.counterResetSeen = true + case histogram.NotCounterReset: + group.notCounterResetSeen = true + } } case parser.STDVAR, parser.STDDEV: switch { @@ -3241,14 +3251,17 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix if h != nil { group.hasHistogram = true if group.histogramValue != nil { - _, counterResetCollision, err := group.histogramValue.Add(h) + switch h.CounterResetHint { + case histogram.CounterReset: + group.counterResetSeen = true + case histogram.NotCounterReset: + group.notCounterResetSeen = true + } + _, _, err := group.histogramValue.Add(h) if err != nil { handleAggregationError(err, e, inputMatrix[si].Metric.Get(model.MetricNameLabel), &annos) group.incompatibleHistograms = true } - if counterResetCollision { - annos.Add(annotations.NewHistogramCounterResetCollisionWarning(e.Expr.PositionRange(), annotations.HistogramAgg)) - } } // Otherwise the aggregation contained floats // previously and will be invalid anyway. No @@ -3295,26 +3308,26 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix if h != nil { group.hasHistogram = true if group.histogramValue != nil { + switch h.CounterResetHint { + case histogram.CounterReset: + group.counterResetSeen = true + case histogram.NotCounterReset: + group.notCounterResetSeen = true + } left := h.Copy().Div(group.groupCount) right := group.histogramValue.Copy().Div(group.groupCount) - toAdd, counterResetCollision, err := left.Sub(right) + toAdd, _, err := left.Sub(right) if err != nil { handleAggregationError(err, e, inputMatrix[si].Metric.Get(model.MetricNameLabel), &annos) group.incompatibleHistograms = true continue } - if counterResetCollision { - annos.Add(annotations.NewHistogramCounterResetCollisionWarning(e.Expr.PositionRange(), annotations.HistogramAgg)) - } - _, counterResetCollision, err = group.histogramValue.Add(toAdd) + _, _, err = group.histogramValue.Add(toAdd) if err != nil { handleAggregationError(err, e, inputMatrix[si].Metric.Get(model.MetricNameLabel), &annos) group.incompatibleHistograms = true continue } - if counterResetCollision { - annos.Add(annotations.NewHistogramCounterResetCollisionWarning(e.Expr.PositionRange(), annotations.HistogramAgg)) - } } // Otherwise the aggregation contained floats // previously and will be invalid anyway. No @@ -3446,10 +3459,18 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix default: aggr.floatValue += aggr.floatKahanC } + default: // For other aggregations, we already have the right value. } + // This only is relevant for AVG and SUM with native histograms + // involved, but since those booleans aren't touched in other + // cases, we can just do it here in general. + if aggr.counterResetSeen && aggr.notCounterResetSeen { + annos.Add(annotations.NewHistogramCounterResetCollisionWarning(e.Expr.PositionRange(), annotations.HistogramAgg)) + } + ss := &outputMatrix[ri] addToSeries(ss, enh.Ts, aggr.floatValue, aggr.histogramValue, numSteps) ss.DropName = inputMatrix[ri].DropName diff --git a/promql/functions.go b/promql/functions.go index 6dfe9611f1..2d9d26dfb1 100644 --- a/promql/functions.go +++ b/promql/functions.go @@ -820,25 +820,38 @@ func funcAvgOverTime(_ []Vector, matrixVal Matrix, args parser.Expressions, enh // The passed values only contain histograms. var annos annotations.Annotations vec, err := aggrHistOverTime(matrixVal, enh, func(s Series) (*histogram.FloatHistogram, error) { + var counterResetSeen, notCounterResetSeen bool + + trackCounterReset := func(h *histogram.FloatHistogram) { + switch h.CounterResetHint { + case histogram.CounterReset: + counterResetSeen = true + case histogram.NotCounterReset: + notCounterResetSeen = true + } + } + + defer func() { + if counterResetSeen && notCounterResetSeen { + annos.Add(annotations.NewHistogramCounterResetCollisionWarning(args[0].PositionRange(), annotations.HistogramAgg)) + } + }() + mean := s.Histograms[0].H.Copy() + trackCounterReset(mean) for i, h := range s.Histograms[1:] { + trackCounterReset(h.H) count := float64(i + 2) left := h.H.Copy().Div(count) right := mean.Copy().Div(count) - toAdd, counterResetCollision, err := left.Sub(right) + toAdd, _, err := left.Sub(right) if err != nil { return mean, err } - if counterResetCollision { - annos.Add(annotations.NewHistogramCounterResetCollisionWarning(args[0].PositionRange(), annotations.HistogramSub)) - } - _, counterResetCollision, err = mean.Add(toAdd) + _, _, err = mean.Add(toAdd) if err != nil { return mean, err } - if counterResetCollision { - annos.Add(annotations.NewHistogramCounterResetCollisionWarning(args[0].PositionRange(), annotations.HistogramAdd)) - } } return mean, nil }) @@ -1077,15 +1090,26 @@ func funcSumOverTime(_ []Vector, matrixVal Matrix, args parser.Expressions, enh // The passed values only contain histograms. var annos annotations.Annotations vec, err := aggrHistOverTime(matrixVal, enh, func(s Series) (*histogram.FloatHistogram, error) { + var counterResetSeen, notCounterResetSeen bool + + defer func() { + if counterResetSeen && notCounterResetSeen { + annos.Add(annotations.NewHistogramCounterResetCollisionWarning(args[0].PositionRange(), annotations.HistogramAgg)) + } + }() + sum := s.Histograms[0].H.Copy() for _, h := range s.Histograms[1:] { - _, counterResetCollision, err := sum.Add(h.H) + switch h.H.CounterResetHint { + case histogram.CounterReset: + counterResetSeen = true + case histogram.NotCounterReset: + notCounterResetSeen = true + } + _, _, err := sum.Add(h.H) if err != nil { return sum, err } - if counterResetCollision { - annos.Add(annotations.NewHistogramCounterResetCollisionWarning(args[0].PositionRange(), annotations.HistogramAdd)) - } } return sum, nil }) diff --git a/promql/parser/parse_test.go b/promql/parser/parse_test.go index d310984fa8..e0d6ae77f2 100644 --- a/promql/parser/parse_test.go +++ b/promql/parser/parse_test.go @@ -5603,7 +5603,6 @@ func TestParseHistogramSeries(t *testing.T) { Offset: 0, Length: 3, }}, - CounterResetHint: histogram.GaugeType, }, { Schema: 1, @@ -5612,7 +5611,6 @@ func TestParseHistogramSeries(t *testing.T) { Offset: 0, Length: 3, }}, - CounterResetHint: histogram.GaugeType, }, }, }, diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test index 0aac43e0ad..9ec222c17c 100644 --- a/promql/promqltest/testdata/native_histograms.test +++ b/promql/promqltest/testdata/native_histograms.test @@ -1198,7 +1198,7 @@ eval range from 0 to 12m step 6m avg(metric) clear -# Test incompatible custom bucket schemas. +# Test incompatible custom bucket boundaries. load 6m metric{series="1"} _ {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} metric{series="2"} {{schema:-53 sum:1 count:1 custom_values:[2] buckets:[1]}} _ {{schema:-53 sum:1 count:1 custom_values:[2] buckets:[1]}} @@ -1215,7 +1215,7 @@ eval range from 0 to 12m step 6m avg(metric) expect warn {} _ {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} _ -# Test incompatible schemas with additional aggregation operators +# Test incompatible boundaries with additional aggregation operators eval range from 0 to 12m step 6m count(metric) {} 2 2 3 @@ -1235,7 +1235,7 @@ eval range from 0 to 12m step 6m limit_ratio(1, metric) metric{series="2"} {{schema:-53 sum:1 count:1 custom_values:[2] buckets:[1]}} _ {{schema:-53 sum:1 count:1 custom_values:[2] buckets:[1]}} metric{series="3"} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} -# Test incompatible schemas with and/or +# Test incompatible boundaries with and/or eval range from 0 to 12m step 6m metric{series="1"} and ignoring(series) metric{series="2"} metric{series="1"} _ _ {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} @@ -1243,7 +1243,7 @@ eval range from 0 to 12m step 6m metric{series="1"} or ignoring(series) metric{s metric{series="1"} _ {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} metric{series="2"} {{schema:-53 sum:1 count:1 custom_values:[2] buckets:[1]}} _ _ -# Test incompatible schemas with arithmetic binary operators +# Test incompatible boundaries with arithmetic binary operators eval range from 0 to 12m step 6m metric{series="2"} + ignoring (series) metric{series="3"} expect warn @@ -1252,7 +1252,7 @@ eval range from 0 to 12m step 6m metric{series="2"} - ignoring (series) metric{s clear -# Test incompatible schemas with comparison binary operators +# Test incompatible boundaries with comparison binary operators load 6m metric1 {{schema:-53 sum:1 count:1 custom_values:[2] buckets:[1]}} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} metric2 {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} @@ -1552,26 +1552,11 @@ eval instant at 54m30s histogram_count(increase(metric[54m45s])) clear -# Test that `rate` adds warning when applied to an average of native histograms. +# Test counter reset hint adjustment in subtraction and aggregation, including _over_time. load 5m metric{id="1"} {{schema:0 sum:4 count:4 buckets:[1 2 1]}}x10 metric{id="2"} {{schema:0 sum:4 count:4 buckets:[1 2 1]}}x10 - -# Don't warn if there is only a single histogram. -eval instant at 5m rate(avg(metric{id="1"})[10m:]) - expect no_warn - {} {{counter_reset_hint:gauge}} - -# Rate warns if histogram is not a counter and avg sets gauge hint. -eval instant at 5m rate(avg(metric)[10m:]) - expect warn regex: this native histogram metric is not a counter: "" - {} {{counter_reset_hint:gauge}} - -# Rate warns if histogram is not a counter and avg_over_time sets gauge hint. -eval instant at 5m rate(avg_over_time(metric{id="1"}[10m])[5m:]) - expect warn regex: this native histogram metric is not a counter: "metric" - {id="1"} {{counter_reset_hint:gauge}} - + # Unary minus turns counters into gauges. eval instant at 5m -metric expect no_warn @@ -1579,8 +1564,152 @@ eval instant at 5m -metric {id="1"} {{count:-4 sum:-4 counter_reset_hint:gauge buckets:[-1 -2 -1]}} {id="2"} {{count:-4 sum:-4 counter_reset_hint:gauge buckets:[-1 -2 -1]}} +# Subtraction results in gauges, even if the result is not negative. +eval instant at 5m metric - 0.5 * metric + expect no_warn + expect no_info + {id="1"} {{count:2 sum:2 counter_reset_hint:gauge buckets:[0.5 1 0.5]}} + {id="2"} {{count:2 sum:2 counter_reset_hint:gauge buckets:[0.5 1 0.5]}} + +# Subtraction results in gauges, now with actually negtive result. +eval instant at 5m metric - 2 * metric + expect no_warn + expect no_info + {id="1"} {{count:-4 sum:-4 counter_reset_hint:gauge buckets:[-1 -2 -1]}} + {id="2"} {{count:-4 sum:-4 counter_reset_hint:gauge buckets:[-1 -2 -1]}} + +# sum and avg of counters yield a counter. +eval instant at 5m sum(metric) + expect no_warn + expect no_info + {} {{count:8 sum:8 counter_reset_hint:not_reset buckets:[2 4 2]}} + +eval instant at 5m avg(metric) + expect no_warn + expect no_info + {} {{count:4 sum:4 counter_reset_hint:not_reset buckets:[1 2 1]}} + clear +# Note that with all the series below, we never get counter_reset_hint:reset +# as a result because of of https://github.com/prometheus/prometheus/issues/15346 . +# Therefore, all the tests only look at the hints gauge, not_reset, and unknown. +load 1m + metric{type="gauge"} {{sum:4 count:4 counter_reset_hint:gauge buckets:[1 2 1]}}+{{sum:2 count:3 counter_reset_hint:gauge buckets:[1 1 1]}}x10 + metric{type="counter"} {{sum:6 count:5 buckets:[2 2 1]}}+{{sum:2 count:3 buckets:[1 1 1]}}x10 + metric{type="counter_with_reset"} {{sum:6 count:5 buckets:[2 2 1]}}+{{sum:2 count:3 buckets:[1 1 1]}}x5 {{sum:4 count:4 buckets:[1 2 1]}}+{{sum:2 count:3 buckets:[1 1 1]}}x5 + mixed {{sum:6 count:5 buckets:[2 2 1]}}+{{sum:2 count:3 buckets:[1 1 1]}}x4 {{sum:4 count:4 counter_reset_hint:gauge buckets:[1 2 1]}} {{sum:6 count:5 buckets:[2 2 1]}}+{{sum:2 count:3 buckets:[1 1 1]}}x4 {{sum:4 count:4 buckets:[1 2 1]}}+{{sum:2 count:3 buckets:[1 1 1]}}x5 + +# Mix of gauge and not_reset results in gauge. +eval instant at 3m sum(metric) + expect no_warn + expect no_info + {} {{count:41 sum:34 counter_reset_hint:gauge buckets:[14 15 12]}} + +eval instant at 3m avg(metric) + expect no_warn + expect no_info + {} {{count:13.666666666666668 sum:11.333333333333334 counter_reset_hint:gauge buckets:[4.666666666666667 5 4]}} + +eval instant at 5m sum_over_time(mixed[3m]) + expect no_warn + expect no_info + {} {{count:35 sum:30 counter_reset_hint:gauge buckets:[12 13 10]}} + +eval instant at 5m avg_over_time(mixed[3m]) + expect no_warn + expect no_info + {} {{count:11.666666666666666 sum:10 counter_reset_hint:gauge buckets:[4 4.333333333333334 3.333333333333333]}} + +# Mix of gauge, not_reset, and unknown results in gauge. +eval instant at 6m sum(metric) + expect no_warn + expect no_info + {} {{count:49 sum:38 counter_reset_hint:gauge buckets:[16 18 15]}} + +eval instant at 6m avg(metric) + expect no_warn + expect no_info + {} {{count:16.333333333333332 sum:12.666666666666666 counter_reset_hint:gauge buckets:[5.333333333333334 6 5]}} + +eval instant at 14m sum_over_time(mixed[10m]) + expect no_warn + expect no_info + {} {{count:93 sum:82 counter_reset_hint:gauge buckets:[31 36 26]}} + +eval instant at 14m avg_over_time(mixed[10m]) + expect no_warn + expect no_info + {} {{count:9.3 sum:8.2 counter_reset_hint:gauge buckets:[3.1 3.6 2.6]}} + +# Only not_reset results in not_reset. +eval instant at 3m sum(metric{type=~"counter.*"}) + expect no_warn + expect no_info + {} {{count:28 sum:24 counter_reset_hint:not_reset buckets:[10 10 8]}} + +eval instant at 3m avg(metric{type=~"counter.*"}) + expect no_warn + expect no_info + {} {{count:14 sum:12 counter_reset_hint:not_reset buckets:[5 5 4]}} + +eval instant at 3m sum_over_time(mixed[3m]) + expect no_warn + expect no_info + {} {{count:33 sum:30 counter_reset_hint:not_reset buckets:[12 12 9]}} + +eval instant at 3m avg_over_time(mixed[3m]) + expect no_warn + expect no_info + {} {{count:11 sum:10 counter_reset_hint:not_reset buckets:[4 4 3]}} + +# Mix of not_reset and unknown results in unknown. +eval instant at 6m sum(metric{type=~"counter.*"}) + expect no_warn + expect no_info + {} {{count:27 sum:22 counter_reset_hint:unknown buckets:[9 10 8]}} + +eval instant at 6m avg(metric{type=~"counter.*"}) + expect no_warn + expect no_info + {} {{count:13.5 sum:11 counter_reset_hint:unknown buckets:[4.5 5 4]}} + +eval instant at 15m sum_over_time(mixed[10m]) + expect no_warn + expect no_info + {} {{count:105 sum:90 counter_reset_hint:unknown buckets:[35 40 30]}} + +eval instant at 15m avg_over_time(mixed[10m]) + expect no_warn + expect no_info + {} {{count:10.5 sum:9 counter_reset_hint:unknown buckets:[3.5 4 3]}} + +# To finally test the warning about a direct counter reset collisions, we can +# utilize the HistogramStatsIterator (by calling histogram_count()). This +# special iterator does counter reset detection on the fly and therefore +# is able to create the counter reset hint "reset", which we can then mix +# with the "not_reset" hint in the test and provoke the warning. +eval instant at 6m histogram_count(sum(metric)) + expect warn msg:PromQL warning: conflicting counter resets during histogram aggregation + expect no_info + {} 49 + +eval instant at 6m histogram_count(avg(metric)) + expect warn msg:PromQL warning: conflicting counter resets during histogram aggregation + expect no_info + {} 16.333333333333332 + +eval instant at 14m histogram_count(sum_over_time(mixed[10m])) + expect warn msg:PromQL warning: conflicting counter resets during histogram aggregation + expect no_info + {} 93 + +eval instant at 14m histogram_count(avg_over_time(mixed[10m])) + expect warn msg:PromQL warning: conflicting counter resets during histogram aggregation + expect no_info + {} 9.3 + + # Test histogram_quantile annotations. load 1m nonmonotonic_bucket{le="0.1"} 0+2x10 @@ -1636,7 +1765,7 @@ load 1m # Trigger an annotation about conflicting counter resets by going through the # HistogramStatsIterator, which creates counter reset hints on the fly. eval instant at 5m histogram_count(sum_over_time(reset{timing="late"}[5m])) - expect warn msg: PromQL warning: conflicting counter resets during histogram addition + expect warn msg: PromQL warning: conflicting counter resets during histogram aggregation {timing="late"} 7 eval instant at 5m histogram_count(sum(reset)) From 1c58399160f7745cfb61a7c84c7f5d537890e94c Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Fri, 10 Oct 2025 09:16:33 +0100 Subject: [PATCH 9/9] PromQL: Speed up parsing of variadic functions (#17316) * PromQL: Add benchmark case with variadic function Signed-off-by: Bryan Boreham * PromQL: Speed up parsing of variadic functions Defer formatting of an error message until we hit an error. Signed-off-by: Bryan Boreham --------- Signed-off-by: Bryan Boreham --- promql/bench_test.go | 1 + promql/parser/parse.go | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/promql/bench_test.go b/promql/bench_test.go index 661faa6556..98b51f5f79 100644 --- a/promql/bench_test.go +++ b/promql/bench_test.go @@ -664,6 +664,7 @@ func BenchmarkParser(b *testing.B) { `foo{a="b", foo!="bar", test=~"test", bar!~"baz"}`, `min_over_time(rate(foo{bar="baz"}[2s])[5m:])[4m:3s]`, "sum without(and, by, avg, count, alert, annotations)(some_metric) [30m:10s]", + `sort_by_label(metric, "foo", "bar")`, } errCases := []string{ "(", diff --git a/promql/parser/parse.go b/promql/parser/parse.go index 3a76ddb44d..d7f3b93994 100644 --- a/promql/parser/parse.go +++ b/promql/parser/parse.go @@ -819,7 +819,9 @@ func (p *parser) checkAST(node Node) (typ ValueType) { } i = len(n.Func.ArgTypes) - 1 } - p.expectType(arg, n.Func.ArgTypes[i], fmt.Sprintf("call to function %q", n.Func.Name)) + if t := p.checkAST(arg); t != n.Func.ArgTypes[i] { + p.addParseErrf(arg.PositionRange(), "expected type %s in call to function %q, got %s", DocumentedType(n.Func.ArgTypes[i]), n.Func.Name, DocumentedType(t)) + } } case *ParenExpr: