From e6b838391a0b6d2ed36552a28daec1e10d5cf755 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Mon, 2 Jun 2025 15:12:40 +0200 Subject: [PATCH 1/3] test(promql): histogram_count inconsistent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ref: https://github.com/prometheus/prometheus/issues/16681 Signed-off-by: György Krajcsovits --- promql/engine_test.go | 96 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/promql/engine_test.go b/promql/engine_test.go index 7d09938bfc..c6136b148a 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -37,6 +37,7 @@ import ( "github.com/prometheus/prometheus/promql/parser/posrange" "github.com/prometheus/prometheus/promql/promqltest" "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/tsdb" "github.com/prometheus/prometheus/tsdb/chunkenc" "github.com/prometheus/prometheus/util/annotations" "github.com/prometheus/prometheus/util/logging" @@ -3744,3 +3745,98 @@ eval instant at 10m max_over_time(metric_total{env="1"}[10m]) {env="1"} 120 `, engine) } + +// TestInconsistentHistogramCount is testing for +// https://github.com/prometheus/prometheus/issues/16681 which needs mixed +// integer and float histograms. The promql test framework only uses float +// histograms, so we cannot move this to promql tests. +func TestInconsistentHistogramCount(t *testing.T) { + dir := t.TempDir() + + opts := tsdb.DefaultHeadOptions() + opts.EnableNativeHistograms.Store(true) + opts.ChunkDirRoot = dir + // We use TSDB head only. By using full TSDB DB, and appending samples to it, closing it would cause unnecessary HEAD compaction, which slows down the test. + head, err := tsdb.NewHead(nil, nil, nil, nil, opts, nil) + require.NoError(t, err) + t.Cleanup(func() { + _ = head.Close() + }) + + app := head.Appender(context.Background()) + + l := labels.FromStrings("__name__", "series_1") + + mint := time.Now().Add(-time.Hour).UnixMilli() + maxt := mint + dT := int64(15 * 1000) // 15 seconds in milliseconds. + inHs := []*histogram.Histogram{ + { + Count: 2, + Sum: 100, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: 1}, + }, + PositiveBuckets: []int64{2}, + }, + { + Count: 4, + Sum: 200, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: 1}, + }, + PositiveBuckets: []int64{4}, + }, + {}, // Empty histogram to trigger counter reset. + } + + for i, h := range inHs { + maxt = mint + dT*int64(i) + _, err = app.AppendHistogram(0, l, maxt, h, nil) + require.NoError(t, err) + } + + require.NoError(t, app.Commit()) + queryable := storage.QueryableFunc(func(mint, maxt int64) (storage.Querier, error) { + return tsdb.NewBlockQuerier(head, mint, maxt) + }) + + engine := promql.NewEngine(promql.EngineOpts{ + MaxSamples: 1000000, + Timeout: 10 * time.Second, + LookbackDelta: 5 * time.Minute, + }) + + var ( + query promql.Query + queryResult *promql.Result + v promql.Vector + ) + + query, err = engine.NewInstantQuery(context.Background(), queryable, nil, "(rate(series_1[1m]))", time.UnixMilli(maxt)) + require.NoError(t, err) + queryResult = query.Exec(context.Background()) + require.NoError(t, queryResult.Err) + require.NotNil(t, queryResult) + v, err = queryResult.Vector() + require.NoError(t, err) + require.Len(t, v, 1) + require.NotNil(t, v[0].H) + require.Greater(t, v[0].H.Count, float64(0)) + query.Close() + countFromHistogram := v[0].H.Count + + query, err = engine.NewInstantQuery(context.Background(), queryable, nil, "histogram_count((rate(series_1[1m])))", time.UnixMilli(maxt)) + require.NoError(t, err) + queryResult = query.Exec(context.Background()) + require.NoError(t, queryResult.Err) + require.NotNil(t, queryResult) + v, err = queryResult.Vector() + require.NoError(t, err) + require.Len(t, v, 1) + require.Nil(t, v[0].H) + query.Close() + countFromFunction := float64(v[0].F) + + require.Equal(t, countFromHistogram, countFromFunction, "histogram_count function should return the same count as the histogram itself") +} From df94ecebab21a58107938d35d67cc624486c1f83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Tue, 3 Jun 2025 12:17:39 +0200 Subject: [PATCH 2/3] fix(promql): histogram_count inconsistent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The problem is in the counter reset detection. The code that loads the samples is matrixIterSlice which uses the typed Buffer iterator, which will preload the integer histogram samples, however the last sample is always(!) loaded as a float histogram sample in matrixIterSlice and the optimized iterator fails to detect counter resets in that case. Also the iterator does not reset lastH, lastFH properly. Ref: https://github.com/prometheus/prometheus/issues/16681 Signed-off-by: György Krajcsovits --- promql/histogram_stats_iterator.go | 15 +++++++-- promql/histogram_stats_iterator_test.go | 41 +++++++++++++++++++++++-- 2 files changed, 51 insertions(+), 5 deletions(-) diff --git a/promql/histogram_stats_iterator.go b/promql/histogram_stats_iterator.go index 459d5924ae..7b749edd71 100644 --- a/promql/histogram_stats_iterator.go +++ b/promql/histogram_stats_iterator.go @@ -105,6 +105,7 @@ func (f *histogramStatsIterator) AtFloatHistogram(fh *histogram.FloatHistogram) } func (f *histogramStatsIterator) setLastH(h *histogram.Histogram) { + f.lastFH = nil if f.lastH == nil { f.lastH = h.Copy() } else { @@ -113,6 +114,7 @@ func (f *histogramStatsIterator) setLastH(h *histogram.Histogram) { } func (f *histogramStatsIterator) setLastFH(fh *histogram.FloatHistogram) { + f.lastH = nil if f.lastFH == nil { f.lastFH = fh.Copy() } else { @@ -125,7 +127,13 @@ func (f *histogramStatsIterator) getFloatResetHint(hint histogram.CounterResetHi return hint } if f.lastFH == nil { - return histogram.NotCounterReset + // If there was no previous histogram, this will not be used, + // and if there was a previous integer histogram, this will + // force a reset detection. The later can happen if we used the + // iterator to read integer histograms before, but switched to + // float histograms for whatever reason. + // https://github.com/prometheus/prometheus/issues/16681 + return histogram.UnknownCounterReset } if f.currentFH.DetectReset(f.lastFH) { @@ -139,7 +147,10 @@ func (f *histogramStatsIterator) getResetHint(h *histogram.Histogram) histogram. return h.CounterResetHint } if f.lastH == nil { - return histogram.NotCounterReset + // If there was no previous histogram, this will not be used, + // and if there was a previous float histogram, this will + // force a reset detection. + return histogram.UnknownCounterReset } fh, prevFH := h.ToFloat(nil), f.lastH.ToFloat(nil) diff --git a/promql/histogram_stats_iterator_test.go b/promql/histogram_stats_iterator_test.go index 3b99f6ea6f..1460da60f9 100644 --- a/promql/histogram_stats_iterator_test.go +++ b/promql/histogram_stats_iterator_test.go @@ -68,7 +68,7 @@ func TestHistogramStatsDecoding(t *testing.T) { tsdbutil.GenerateTestHistogramWithHint(1, histogram.UnknownCounterReset), }, expectedHints: []histogram.CounterResetHint{ - histogram.NotCounterReset, + histogram.UnknownCounterReset, }, }, { @@ -78,7 +78,7 @@ func TestHistogramStatsDecoding(t *testing.T) { tsdbutil.GenerateTestHistogramWithHint(1, histogram.UnknownCounterReset), }, expectedHints: []histogram.CounterResetHint{ - histogram.NotCounterReset, + histogram.UnknownCounterReset, histogram.CounterReset, }, }, @@ -90,7 +90,7 @@ func TestHistogramStatsDecoding(t *testing.T) { tsdbutil.GenerateTestHistogramWithHint(1, histogram.UnknownCounterReset), }, expectedHints: []histogram.CounterResetHint{ - histogram.NotCounterReset, + histogram.UnknownCounterReset, histogram.UnknownCounterReset, histogram.CounterReset, }, @@ -141,6 +141,41 @@ func TestHistogramStatsDecoding(t *testing.T) { } } +func TestHistogramStatsMixedUse(t *testing.T) { + histograms := []*histogram.Histogram{ + tsdbutil.GenerateTestHistogramWithHint(2, histogram.UnknownCounterReset), + tsdbutil.GenerateTestHistogramWithHint(4, histogram.UnknownCounterReset), + tsdbutil.GenerateTestHistogramWithHint(0, histogram.UnknownCounterReset), + } + + series := newHistogramSeries(histograms) + it := series.Iterator(nil) + + statsIterator := NewHistogramStatsIterator(it) + + expectedHints := []histogram.CounterResetHint{ + histogram.UnknownCounterReset, + histogram.NotCounterReset, + histogram.UnknownCounterReset, + } + actualHints := make([]histogram.CounterResetHint, 3) + typ := statsIterator.Next() + require.Equal(t, chunkenc.ValHistogram, typ) + _, h := statsIterator.AtHistogram(nil) + actualHints[0] = h.CounterResetHint + typ = statsIterator.Next() + require.Equal(t, chunkenc.ValHistogram, typ) + _, h = statsIterator.AtHistogram(nil) + actualHints[1] = h.CounterResetHint + typ = statsIterator.Next() + require.Equal(t, chunkenc.ValHistogram, typ) + _, fh := statsIterator.AtFloatHistogram(nil) + actualHints[2] = fh.CounterResetHint + + require.Equal(t, chunkenc.ValNone, statsIterator.Next()) + require.Equal(t, expectedHints, actualHints) +} + type histogramSeries struct { histograms []*histogram.Histogram } From bccb1d645954880c70eae1eeb703186c1f92c697 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Wed, 4 Jun 2025 10:24:50 +0200 Subject: [PATCH 3/3] fix(promql): do not loose information about buckets when doing the detect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: György Krajcsovits --- promql/histogram_stats_iterator.go | 33 +++++++++++++------------ promql/histogram_stats_iterator_test.go | 2 +- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/promql/histogram_stats_iterator.go b/promql/histogram_stats_iterator.go index 7b749edd71..59814d1b35 100644 --- a/promql/histogram_stats_iterator.go +++ b/promql/histogram_stats_iterator.go @@ -126,17 +126,15 @@ func (f *histogramStatsIterator) getFloatResetHint(hint histogram.CounterResetHi if hint != histogram.UnknownCounterReset { return hint } - if f.lastFH == nil { - // If there was no previous histogram, this will not be used, - // and if there was a previous integer histogram, this will - // force a reset detection. The later can happen if we used the - // iterator to read integer histograms before, but switched to - // float histograms for whatever reason. - // https://github.com/prometheus/prometheus/issues/16681 - return histogram.UnknownCounterReset + prevFH := f.lastFH + if prevFH == nil { + if f.lastH == nil { + // We don't know if there's a counter reset. + return histogram.UnknownCounterReset + } + prevFH = f.lastH.ToFloat(nil) } - - if f.currentFH.DetectReset(f.lastFH) { + if f.currentFH.DetectReset(prevFH) { return histogram.CounterReset } return histogram.NotCounterReset @@ -146,14 +144,17 @@ func (f *histogramStatsIterator) getResetHint(h *histogram.Histogram) histogram. if h.CounterResetHint != histogram.UnknownCounterReset { return h.CounterResetHint } + var prevFH *histogram.FloatHistogram if f.lastH == nil { - // If there was no previous histogram, this will not be used, - // and if there was a previous float histogram, this will - // force a reset detection. - return histogram.UnknownCounterReset + if f.lastFH == nil { + // We don't know if there's a counter reset. + return histogram.UnknownCounterReset + } + prevFH = f.lastFH + } else { + prevFH = f.lastH.ToFloat(nil) } - - fh, prevFH := h.ToFloat(nil), f.lastH.ToFloat(nil) + fh := h.ToFloat(nil) if fh.DetectReset(prevFH) { return histogram.CounterReset } diff --git a/promql/histogram_stats_iterator_test.go b/promql/histogram_stats_iterator_test.go index 1460da60f9..abe3436a5a 100644 --- a/promql/histogram_stats_iterator_test.go +++ b/promql/histogram_stats_iterator_test.go @@ -156,7 +156,7 @@ func TestHistogramStatsMixedUse(t *testing.T) { expectedHints := []histogram.CounterResetHint{ histogram.UnknownCounterReset, histogram.NotCounterReset, - histogram.UnknownCounterReset, + histogram.CounterReset, } actualHints := make([]histogram.CounterResetHint, 3) typ := statsIterator.Next()