From 51e0982c915e931ae9a069986aaf90ebea330759 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Thu, 9 Oct 2025 02:10:07 +0200 Subject: [PATCH] promql(histograms): Fix counter reset hint handling when aggregating Fixes #17308. As explained adding the warn-annotation about conflicting counter reset hints doesn't happen consistently. Furthermore, because of incremental mean calculation being used so far (which includes subtraction), avg calculation always created gauge histograms. The fix is to make Sub behave like Add WRT counter reset handling, and then set the result of a subtraction to gauge explicitly in actual PromQL subtraction (rather than using Sub for something else, like incremental mean calculation). Also, track the presence of a CounterReset hint and a NotCounterReset hint separately for the entirety of aggregated histograms and create the warn-annotation based on that. As a minor fix, this commit also consistently creates the warn annotation in aggregation to be about "aggregation" rather than "subtraction" or "addition", because the latter are just internal operations within the aggregation, which is not of interest for the user. Signed-off-by: beorn7 --- model/histogram/float_histogram.go | 111 ++++++----- model/histogram/float_histogram_test.go | 51 ++--- promql/engine.go | 45 +++-- promql/functions.go | 48 +++-- promql/parser/parse_test.go | 2 - .../testdata/native_histograms.test | 175 +++++++++++++++--- 6 files changed, 308 insertions(+), 124 deletions(-) diff --git a/model/histogram/float_histogram.go b/model/histogram/float_histogram.go index 6d7365b8b6..08469ad124 100644 --- a/model/histogram/float_histogram.go +++ b/model/histogram/float_histogram.go @@ -283,7 +283,8 @@ func (h *FloatHistogram) ZeroBucket() Bucket[float64] { // bucket counts including the zero bucket and the count and the sum of // observations. The bucket layout stays the same. This method changes the // receiving histogram directly (rather than acting on a copy). It returns a -// pointer to the receiving histogram for convenience. +// pointer to the receiving histogram for convenience. If factor is negative, +// the counter reset hint is set to GaugeType. func (h *FloatHistogram) Mul(factor float64) *FloatHistogram { h.ZeroCount *= factor h.Count *= factor @@ -301,7 +302,8 @@ func (h *FloatHistogram) Mul(factor float64) *FloatHistogram { } // Div works like Mul but divides instead of multiplies. -// When dividing by 0, everything will be set to Inf. +// When dividing by 0, everything will be set to Inf. If scalar is negative, +// the counter reset hint is set to GaugeType. func (h *FloatHistogram) Div(scalar float64) *FloatHistogram { h.ZeroCount /= scalar h.Count /= scalar @@ -343,37 +345,10 @@ func (h *FloatHistogram) Div(scalar float64) *FloatHistogram { // // This method returns a pointer to the receiving histogram for convenience. func (h *FloatHistogram) Add(other *FloatHistogram) (res *FloatHistogram, counterResetCollision bool, err error) { - if h.UsesCustomBuckets() != other.UsesCustomBuckets() { - return nil, false, ErrHistogramsIncompatibleSchema + if err := h.checkSchemaAndBounds(other); err != nil { + return nil, false, err } - if h.UsesCustomBuckets() && !CustomBucketBoundsMatch(h.CustomValues, other.CustomValues) { - return nil, false, ErrHistogramsIncompatibleBounds - } - - switch { - case other.CounterResetHint == h.CounterResetHint: - // Adding apples to apples, all good. No need to change anything. - case h.CounterResetHint == GaugeType: - // Adding something else to a gauge. That's probably OK. Outcome is a gauge. - // Nothing to do since the receiver is already marked as gauge. - case other.CounterResetHint == GaugeType: - // Similar to before, but this time the receiver is "something else" and we have to change it to gauge. - h.CounterResetHint = GaugeType - case h.CounterResetHint == UnknownCounterReset: - // With the receiver's CounterResetHint being "unknown", this could still be legitimate - // if the caller knows what they are doing. Outcome is then again "unknown". - // No need to do anything since the receiver's CounterResetHint is already "unknown". - case other.CounterResetHint == UnknownCounterReset: - // Similar to before, but now we have to set the receiver's CounterResetHint to "unknown". - h.CounterResetHint = UnknownCounterReset - default: - // All other cases shouldn't actually happen. - // They are a direct collision of CounterReset and NotCounterReset. - // Conservatively set the CounterResetHint to "unknown" and issue a warning. - h.CounterResetHint = UnknownCounterReset - counterResetCollision = true - } - + counterResetCollision = h.adjustCounterReset(other) if !h.UsesCustomBuckets() { otherZeroCount := h.reconcileZeroBuckets(other) h.ZeroCount += otherZeroCount @@ -417,19 +392,16 @@ func (h *FloatHistogram) Add(other *FloatHistogram) (res *FloatHistogram, counte return h, counterResetCollision, nil } -// Sub works like Add but subtracts the other histogram. +// Sub works like Add but subtracts the other histogram. It uses the same logic +// to adjust the counter reset hint. This is useful where this method is used +// for incremental mean calculation. However, if it is used for the actual "-" +// operator in PromQL, the counter reset needs to be set to GaugeType after +// calling this method. func (h *FloatHistogram) Sub(other *FloatHistogram) (res *FloatHistogram, counterResetCollision bool, err error) { - if h.UsesCustomBuckets() != other.UsesCustomBuckets() { - return nil, false, ErrHistogramsIncompatibleSchema + if err := h.checkSchemaAndBounds(other); err != nil { + return nil, false, err } - if h.UsesCustomBuckets() && !CustomBucketBoundsMatch(h.CustomValues, other.CustomValues) { - return nil, false, ErrHistogramsIncompatibleBounds - } - - counterResetCollision = hasCounterResetCollision(h, other) - - h.CounterResetHint = GaugeType - + counterResetCollision = h.adjustCounterReset(other) if !h.UsesCustomBuckets() { otherZeroCount := h.reconcileZeroBuckets(other) h.ZeroCount -= otherZeroCount @@ -472,13 +444,6 @@ func (h *FloatHistogram) Sub(other *FloatHistogram) (res *FloatHistogram, counte return h, counterResetCollision, nil } -// hasCounterResetCollision returns true iff one of two histogram indicates -// a counter reset (CounterReset) while the other indicates no reset (NotCounterReset). -func hasCounterResetCollision(a, b *FloatHistogram) bool { - return a.CounterResetHint == CounterReset && b.CounterResetHint == NotCounterReset || - a.CounterResetHint == NotCounterReset && b.CounterResetHint == CounterReset -} - // Equals returns true if the given float histogram matches exactly. // Exact match is when there are no new buckets (even empty) and no missing buckets, // and all the bucket values match. Spans can have different empty length spans in between, @@ -1387,3 +1352,49 @@ func (h *FloatHistogram) ReduceResolution(targetSchema int32) *FloatHistogram { h.Schema = targetSchema return h } + +// checkSchemaAndBounds checks if two histograms are compatible because they +// both use a standard exponential schema or because they both are NHCBs. In the +// latter case, they also have to use the same custom bounds. +func (h *FloatHistogram) checkSchemaAndBounds(other *FloatHistogram) error { + if h.UsesCustomBuckets() != other.UsesCustomBuckets() { + return ErrHistogramsIncompatibleSchema + } + if h.UsesCustomBuckets() && !CustomBucketBoundsMatch(h.CustomValues, other.CustomValues) { + return ErrHistogramsIncompatibleBounds + } + return nil +} + +// adjustCounterReset is used for addition and subtraction. Those operation are +// usually only performed between gauge histograms, but if one or both are +// counters, we try to at least set the counter reset hint to something +// meaningful (see code comments below). The return counterResetCollision is +// true if one histogram has a counter reset hint of CounterReset and the other +// NotCounterReset. All other combinations are not considered a collision. +func (h *FloatHistogram) adjustCounterReset(other *FloatHistogram) (counterResetCollision bool) { + switch { + case other.CounterResetHint == h.CounterResetHint: + // Adding apples to apples, all good. No need to change anything. + case h.CounterResetHint == GaugeType: + // Adding something else to a gauge. That's probably OK. Outcome is a gauge. + // Nothing to do since the receiver is already marked as gauge. + case other.CounterResetHint == GaugeType: + // Similar to before, but this time the receiver is "something else" and we have to change it to gauge. + h.CounterResetHint = GaugeType + case h.CounterResetHint == UnknownCounterReset: + // With the receiver's CounterResetHint being "unknown", this could still be legitimate + // if the caller knows what they are doing. Outcome is then again "unknown". + // No need to do anything since the receiver's CounterResetHint is already "unknown". + case other.CounterResetHint == UnknownCounterReset: + // Similar to before, but now we have to set the receiver's CounterResetHint to "unknown". + h.CounterResetHint = UnknownCounterReset + default: + // All other cases shouldn't actually happen. + // They are a direct collision of CounterReset and NotCounterReset. + // Conservatively set the CounterResetHint to "unknown" and issue a warning. + h.CounterResetHint = UnknownCounterReset + return true + } + return false +} diff --git a/model/histogram/float_histogram_test.go b/model/histogram/float_histogram_test.go index b756f641bb..be92c87f62 100644 --- a/model/histogram/float_histogram_test.go +++ b/model/histogram/float_histogram_test.go @@ -2380,15 +2380,14 @@ func TestFloatHistogramSub(t *testing.T) { NegativeBuckets: []float64{1, 1, 4, 4}, }, expected: &FloatHistogram{ - ZeroThreshold: 0.01, - ZeroCount: 3, - Count: 9, - Sum: 11, - PositiveSpans: []Span{{-2, 2}, {1, 3}}, - PositiveBuckets: []float64{1, 0, 1, 1, 1}, - NegativeSpans: []Span{{3, 2}, {3, 2}}, - NegativeBuckets: []float64{2, 0, 1, 2}, - CounterResetHint: GaugeType, + ZeroThreshold: 0.01, + ZeroCount: 3, + Count: 9, + Sum: 11, + PositiveSpans: []Span{{-2, 2}, {1, 3}}, + PositiveBuckets: []float64{1, 0, 1, 1, 1}, + NegativeSpans: []Span{{3, 2}, {3, 2}}, + NegativeBuckets: []float64{2, 0, 1, 2}, }, }, { @@ -2416,15 +2415,14 @@ func TestFloatHistogramSub(t *testing.T) { NegativeBuckets: []float64{3, 0.5, 0.5, 2, 3, 2, 4}, }, expected: &FloatHistogram{ - ZeroThreshold: 0.01, - ZeroCount: 6, - Count: 40, - Sum: 0.889, - PositiveSpans: []Span{{-2, 5}, {0, 3}}, - PositiveBuckets: []float64{1, 5, 4, 2, 2, 2, 0, 5}, - NegativeSpans: []Span{{3, 3}, {1, 3}}, - NegativeBuckets: []float64{1, 9, 1, 4, 9, 1}, - CounterResetHint: GaugeType, + ZeroThreshold: 0.01, + ZeroCount: 6, + Count: 40, + Sum: 0.889, + PositiveSpans: []Span{{-2, 5}, {0, 3}}, + PositiveBuckets: []float64{1, 5, 4, 2, 2, 2, 0, 5}, + NegativeSpans: []Span{{3, 3}, {1, 3}}, + NegativeBuckets: []float64{1, 9, 1, 4, 9, 1}, }, }, { @@ -2446,13 +2444,12 @@ func TestFloatHistogramSub(t *testing.T) { CustomValues: []float64{1, 2, 3, 4}, }, expected: &FloatHistogram{ - Schema: CustomBucketsSchema, - Count: 4, - Sum: 11, - PositiveSpans: []Span{{0, 2}, {1, 3}}, - PositiveBuckets: []float64{1, 0, 1, 1, 1}, - CustomValues: []float64{1, 2, 3, 4}, - CounterResetHint: GaugeType, + Schema: CustomBucketsSchema, + Count: 4, + Sum: 11, + PositiveSpans: []Span{{0, 2}, {1, 3}}, + PositiveBuckets: []float64{1, 0, 1, 1, 1}, + CustomValues: []float64{1, 2, 3, 4}, }, }, { @@ -2520,6 +2517,10 @@ func TestFloatHistogramSub(t *testing.T) { var expectedNegative *FloatHistogram if c.expected != nil { expectedNegative = c.expected.Copy().Mul(-1) + // Mul(-1) sets the counter reset hint to + // GaugeType, but we want to retain the original + // counter reset hint for this test. + expectedNegative.CounterResetHint = c.expected.CounterResetHint } testFloatHistogramSub(t, c.in2, c.in1, expectedNegative, c.expErrMsg, c.expCounterResetCollision) }) diff --git a/promql/engine.go b/promql/engine.go index 31d910da9a..6603972c2a 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -3125,6 +3125,8 @@ func vectorElemBinop(op parser.ItemType, lhs, rhs float64, hlhs, hrhs *histogram if err != nil { return 0, nil, false, err } + // The result must be marked as gauge. + res.CounterResetHint = histogram.GaugeType if counterResetCollision { err = annotations.NewHistogramCounterResetCollisionWarning(pos, annotations.HistogramSub) } @@ -3158,6 +3160,8 @@ type groupedAggregation struct { incompatibleHistograms bool // If true, group has seen mixed exponential and custom buckets, or incompatible custom buckets. groupAggrComplete bool // Used by LIMITK to short-cut series loop when we've reached K elem on every group. incrementalMean bool // True after reverting to incremental calculation of the mean value. + counterResetSeen bool // Counter reset hint CounterReset seen. Currently only used for histogram samples. + notCounterResetSeen bool // Counter reset hint NotCounterReset seen. Currently only used for histogram samples. } // aggregation evaluates sum, avg, count, stdvar, stddev or quantile at one timestep on inputMatrix. @@ -3194,6 +3198,12 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix } else { group.histogramValue = h.Copy() group.hasHistogram = true + switch h.CounterResetHint { + case histogram.CounterReset: + group.counterResetSeen = true + case histogram.NotCounterReset: + group.notCounterResetSeen = true + } } case parser.STDVAR, parser.STDDEV: switch { @@ -3241,14 +3251,17 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix if h != nil { group.hasHistogram = true if group.histogramValue != nil { - _, counterResetCollision, err := group.histogramValue.Add(h) + switch h.CounterResetHint { + case histogram.CounterReset: + group.counterResetSeen = true + case histogram.NotCounterReset: + group.notCounterResetSeen = true + } + _, _, err := group.histogramValue.Add(h) if err != nil { handleAggregationError(err, e, inputMatrix[si].Metric.Get(model.MetricNameLabel), &annos) group.incompatibleHistograms = true } - if counterResetCollision { - annos.Add(annotations.NewHistogramCounterResetCollisionWarning(e.Expr.PositionRange(), annotations.HistogramAgg)) - } } // Otherwise the aggregation contained floats // previously and will be invalid anyway. No @@ -3295,26 +3308,26 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix if h != nil { group.hasHistogram = true if group.histogramValue != nil { + switch h.CounterResetHint { + case histogram.CounterReset: + group.counterResetSeen = true + case histogram.NotCounterReset: + group.notCounterResetSeen = true + } left := h.Copy().Div(group.groupCount) right := group.histogramValue.Copy().Div(group.groupCount) - toAdd, counterResetCollision, err := left.Sub(right) + toAdd, _, err := left.Sub(right) if err != nil { handleAggregationError(err, e, inputMatrix[si].Metric.Get(model.MetricNameLabel), &annos) group.incompatibleHistograms = true continue } - if counterResetCollision { - annos.Add(annotations.NewHistogramCounterResetCollisionWarning(e.Expr.PositionRange(), annotations.HistogramAgg)) - } - _, counterResetCollision, err = group.histogramValue.Add(toAdd) + _, _, err = group.histogramValue.Add(toAdd) if err != nil { handleAggregationError(err, e, inputMatrix[si].Metric.Get(model.MetricNameLabel), &annos) group.incompatibleHistograms = true continue } - if counterResetCollision { - annos.Add(annotations.NewHistogramCounterResetCollisionWarning(e.Expr.PositionRange(), annotations.HistogramAgg)) - } } // Otherwise the aggregation contained floats // previously and will be invalid anyway. No @@ -3446,10 +3459,18 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix default: aggr.floatValue += aggr.floatKahanC } + default: // For other aggregations, we already have the right value. } + // This only is relevant for AVG and SUM with native histograms + // involved, but since those booleans aren't touched in other + // cases, we can just do it here in general. + if aggr.counterResetSeen && aggr.notCounterResetSeen { + annos.Add(annotations.NewHistogramCounterResetCollisionWarning(e.Expr.PositionRange(), annotations.HistogramAgg)) + } + ss := &outputMatrix[ri] addToSeries(ss, enh.Ts, aggr.floatValue, aggr.histogramValue, numSteps) ss.DropName = inputMatrix[ri].DropName diff --git a/promql/functions.go b/promql/functions.go index 6dfe9611f1..2d9d26dfb1 100644 --- a/promql/functions.go +++ b/promql/functions.go @@ -820,25 +820,38 @@ func funcAvgOverTime(_ []Vector, matrixVal Matrix, args parser.Expressions, enh // The passed values only contain histograms. var annos annotations.Annotations vec, err := aggrHistOverTime(matrixVal, enh, func(s Series) (*histogram.FloatHistogram, error) { + var counterResetSeen, notCounterResetSeen bool + + trackCounterReset := func(h *histogram.FloatHistogram) { + switch h.CounterResetHint { + case histogram.CounterReset: + counterResetSeen = true + case histogram.NotCounterReset: + notCounterResetSeen = true + } + } + + defer func() { + if counterResetSeen && notCounterResetSeen { + annos.Add(annotations.NewHistogramCounterResetCollisionWarning(args[0].PositionRange(), annotations.HistogramAgg)) + } + }() + mean := s.Histograms[0].H.Copy() + trackCounterReset(mean) for i, h := range s.Histograms[1:] { + trackCounterReset(h.H) count := float64(i + 2) left := h.H.Copy().Div(count) right := mean.Copy().Div(count) - toAdd, counterResetCollision, err := left.Sub(right) + toAdd, _, err := left.Sub(right) if err != nil { return mean, err } - if counterResetCollision { - annos.Add(annotations.NewHistogramCounterResetCollisionWarning(args[0].PositionRange(), annotations.HistogramSub)) - } - _, counterResetCollision, err = mean.Add(toAdd) + _, _, err = mean.Add(toAdd) if err != nil { return mean, err } - if counterResetCollision { - annos.Add(annotations.NewHistogramCounterResetCollisionWarning(args[0].PositionRange(), annotations.HistogramAdd)) - } } return mean, nil }) @@ -1077,15 +1090,26 @@ func funcSumOverTime(_ []Vector, matrixVal Matrix, args parser.Expressions, enh // The passed values only contain histograms. var annos annotations.Annotations vec, err := aggrHistOverTime(matrixVal, enh, func(s Series) (*histogram.FloatHistogram, error) { + var counterResetSeen, notCounterResetSeen bool + + defer func() { + if counterResetSeen && notCounterResetSeen { + annos.Add(annotations.NewHistogramCounterResetCollisionWarning(args[0].PositionRange(), annotations.HistogramAgg)) + } + }() + sum := s.Histograms[0].H.Copy() for _, h := range s.Histograms[1:] { - _, counterResetCollision, err := sum.Add(h.H) + switch h.H.CounterResetHint { + case histogram.CounterReset: + counterResetSeen = true + case histogram.NotCounterReset: + notCounterResetSeen = true + } + _, _, err := sum.Add(h.H) if err != nil { return sum, err } - if counterResetCollision { - annos.Add(annotations.NewHistogramCounterResetCollisionWarning(args[0].PositionRange(), annotations.HistogramAdd)) - } } return sum, nil }) diff --git a/promql/parser/parse_test.go b/promql/parser/parse_test.go index d310984fa8..e0d6ae77f2 100644 --- a/promql/parser/parse_test.go +++ b/promql/parser/parse_test.go @@ -5603,7 +5603,6 @@ func TestParseHistogramSeries(t *testing.T) { Offset: 0, Length: 3, }}, - CounterResetHint: histogram.GaugeType, }, { Schema: 1, @@ -5612,7 +5611,6 @@ func TestParseHistogramSeries(t *testing.T) { Offset: 0, Length: 3, }}, - CounterResetHint: histogram.GaugeType, }, }, }, diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test index 0aac43e0ad..9ec222c17c 100644 --- a/promql/promqltest/testdata/native_histograms.test +++ b/promql/promqltest/testdata/native_histograms.test @@ -1198,7 +1198,7 @@ eval range from 0 to 12m step 6m avg(metric) clear -# Test incompatible custom bucket schemas. +# Test incompatible custom bucket boundaries. load 6m metric{series="1"} _ {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} metric{series="2"} {{schema:-53 sum:1 count:1 custom_values:[2] buckets:[1]}} _ {{schema:-53 sum:1 count:1 custom_values:[2] buckets:[1]}} @@ -1215,7 +1215,7 @@ eval range from 0 to 12m step 6m avg(metric) expect warn {} _ {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} _ -# Test incompatible schemas with additional aggregation operators +# Test incompatible boundaries with additional aggregation operators eval range from 0 to 12m step 6m count(metric) {} 2 2 3 @@ -1235,7 +1235,7 @@ eval range from 0 to 12m step 6m limit_ratio(1, metric) metric{series="2"} {{schema:-53 sum:1 count:1 custom_values:[2] buckets:[1]}} _ {{schema:-53 sum:1 count:1 custom_values:[2] buckets:[1]}} metric{series="3"} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} -# Test incompatible schemas with and/or +# Test incompatible boundaries with and/or eval range from 0 to 12m step 6m metric{series="1"} and ignoring(series) metric{series="2"} metric{series="1"} _ _ {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} @@ -1243,7 +1243,7 @@ eval range from 0 to 12m step 6m metric{series="1"} or ignoring(series) metric{s metric{series="1"} _ {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} metric{series="2"} {{schema:-53 sum:1 count:1 custom_values:[2] buckets:[1]}} _ _ -# Test incompatible schemas with arithmetic binary operators +# Test incompatible boundaries with arithmetic binary operators eval range from 0 to 12m step 6m metric{series="2"} + ignoring (series) metric{series="3"} expect warn @@ -1252,7 +1252,7 @@ eval range from 0 to 12m step 6m metric{series="2"} - ignoring (series) metric{s clear -# Test incompatible schemas with comparison binary operators +# Test incompatible boundaries with comparison binary operators load 6m metric1 {{schema:-53 sum:1 count:1 custom_values:[2] buckets:[1]}} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} metric2 {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} @@ -1552,26 +1552,11 @@ eval instant at 54m30s histogram_count(increase(metric[54m45s])) clear -# Test that `rate` adds warning when applied to an average of native histograms. +# Test counter reset hint adjustment in subtraction and aggregation, including _over_time. load 5m metric{id="1"} {{schema:0 sum:4 count:4 buckets:[1 2 1]}}x10 metric{id="2"} {{schema:0 sum:4 count:4 buckets:[1 2 1]}}x10 - -# Don't warn if there is only a single histogram. -eval instant at 5m rate(avg(metric{id="1"})[10m:]) - expect no_warn - {} {{counter_reset_hint:gauge}} - -# Rate warns if histogram is not a counter and avg sets gauge hint. -eval instant at 5m rate(avg(metric)[10m:]) - expect warn regex: this native histogram metric is not a counter: "" - {} {{counter_reset_hint:gauge}} - -# Rate warns if histogram is not a counter and avg_over_time sets gauge hint. -eval instant at 5m rate(avg_over_time(metric{id="1"}[10m])[5m:]) - expect warn regex: this native histogram metric is not a counter: "metric" - {id="1"} {{counter_reset_hint:gauge}} - + # Unary minus turns counters into gauges. eval instant at 5m -metric expect no_warn @@ -1579,8 +1564,152 @@ eval instant at 5m -metric {id="1"} {{count:-4 sum:-4 counter_reset_hint:gauge buckets:[-1 -2 -1]}} {id="2"} {{count:-4 sum:-4 counter_reset_hint:gauge buckets:[-1 -2 -1]}} +# Subtraction results in gauges, even if the result is not negative. +eval instant at 5m metric - 0.5 * metric + expect no_warn + expect no_info + {id="1"} {{count:2 sum:2 counter_reset_hint:gauge buckets:[0.5 1 0.5]}} + {id="2"} {{count:2 sum:2 counter_reset_hint:gauge buckets:[0.5 1 0.5]}} + +# Subtraction results in gauges, now with actually negtive result. +eval instant at 5m metric - 2 * metric + expect no_warn + expect no_info + {id="1"} {{count:-4 sum:-4 counter_reset_hint:gauge buckets:[-1 -2 -1]}} + {id="2"} {{count:-4 sum:-4 counter_reset_hint:gauge buckets:[-1 -2 -1]}} + +# sum and avg of counters yield a counter. +eval instant at 5m sum(metric) + expect no_warn + expect no_info + {} {{count:8 sum:8 counter_reset_hint:not_reset buckets:[2 4 2]}} + +eval instant at 5m avg(metric) + expect no_warn + expect no_info + {} {{count:4 sum:4 counter_reset_hint:not_reset buckets:[1 2 1]}} + clear +# Note that with all the series below, we never get counter_reset_hint:reset +# as a result because of of https://github.com/prometheus/prometheus/issues/15346 . +# Therefore, all the tests only look at the hints gauge, not_reset, and unknown. +load 1m + metric{type="gauge"} {{sum:4 count:4 counter_reset_hint:gauge buckets:[1 2 1]}}+{{sum:2 count:3 counter_reset_hint:gauge buckets:[1 1 1]}}x10 + metric{type="counter"} {{sum:6 count:5 buckets:[2 2 1]}}+{{sum:2 count:3 buckets:[1 1 1]}}x10 + metric{type="counter_with_reset"} {{sum:6 count:5 buckets:[2 2 1]}}+{{sum:2 count:3 buckets:[1 1 1]}}x5 {{sum:4 count:4 buckets:[1 2 1]}}+{{sum:2 count:3 buckets:[1 1 1]}}x5 + mixed {{sum:6 count:5 buckets:[2 2 1]}}+{{sum:2 count:3 buckets:[1 1 1]}}x4 {{sum:4 count:4 counter_reset_hint:gauge buckets:[1 2 1]}} {{sum:6 count:5 buckets:[2 2 1]}}+{{sum:2 count:3 buckets:[1 1 1]}}x4 {{sum:4 count:4 buckets:[1 2 1]}}+{{sum:2 count:3 buckets:[1 1 1]}}x5 + +# Mix of gauge and not_reset results in gauge. +eval instant at 3m sum(metric) + expect no_warn + expect no_info + {} {{count:41 sum:34 counter_reset_hint:gauge buckets:[14 15 12]}} + +eval instant at 3m avg(metric) + expect no_warn + expect no_info + {} {{count:13.666666666666668 sum:11.333333333333334 counter_reset_hint:gauge buckets:[4.666666666666667 5 4]}} + +eval instant at 5m sum_over_time(mixed[3m]) + expect no_warn + expect no_info + {} {{count:35 sum:30 counter_reset_hint:gauge buckets:[12 13 10]}} + +eval instant at 5m avg_over_time(mixed[3m]) + expect no_warn + expect no_info + {} {{count:11.666666666666666 sum:10 counter_reset_hint:gauge buckets:[4 4.333333333333334 3.333333333333333]}} + +# Mix of gauge, not_reset, and unknown results in gauge. +eval instant at 6m sum(metric) + expect no_warn + expect no_info + {} {{count:49 sum:38 counter_reset_hint:gauge buckets:[16 18 15]}} + +eval instant at 6m avg(metric) + expect no_warn + expect no_info + {} {{count:16.333333333333332 sum:12.666666666666666 counter_reset_hint:gauge buckets:[5.333333333333334 6 5]}} + +eval instant at 14m sum_over_time(mixed[10m]) + expect no_warn + expect no_info + {} {{count:93 sum:82 counter_reset_hint:gauge buckets:[31 36 26]}} + +eval instant at 14m avg_over_time(mixed[10m]) + expect no_warn + expect no_info + {} {{count:9.3 sum:8.2 counter_reset_hint:gauge buckets:[3.1 3.6 2.6]}} + +# Only not_reset results in not_reset. +eval instant at 3m sum(metric{type=~"counter.*"}) + expect no_warn + expect no_info + {} {{count:28 sum:24 counter_reset_hint:not_reset buckets:[10 10 8]}} + +eval instant at 3m avg(metric{type=~"counter.*"}) + expect no_warn + expect no_info + {} {{count:14 sum:12 counter_reset_hint:not_reset buckets:[5 5 4]}} + +eval instant at 3m sum_over_time(mixed[3m]) + expect no_warn + expect no_info + {} {{count:33 sum:30 counter_reset_hint:not_reset buckets:[12 12 9]}} + +eval instant at 3m avg_over_time(mixed[3m]) + expect no_warn + expect no_info + {} {{count:11 sum:10 counter_reset_hint:not_reset buckets:[4 4 3]}} + +# Mix of not_reset and unknown results in unknown. +eval instant at 6m sum(metric{type=~"counter.*"}) + expect no_warn + expect no_info + {} {{count:27 sum:22 counter_reset_hint:unknown buckets:[9 10 8]}} + +eval instant at 6m avg(metric{type=~"counter.*"}) + expect no_warn + expect no_info + {} {{count:13.5 sum:11 counter_reset_hint:unknown buckets:[4.5 5 4]}} + +eval instant at 15m sum_over_time(mixed[10m]) + expect no_warn + expect no_info + {} {{count:105 sum:90 counter_reset_hint:unknown buckets:[35 40 30]}} + +eval instant at 15m avg_over_time(mixed[10m]) + expect no_warn + expect no_info + {} {{count:10.5 sum:9 counter_reset_hint:unknown buckets:[3.5 4 3]}} + +# To finally test the warning about a direct counter reset collisions, we can +# utilize the HistogramStatsIterator (by calling histogram_count()). This +# special iterator does counter reset detection on the fly and therefore +# is able to create the counter reset hint "reset", which we can then mix +# with the "not_reset" hint in the test and provoke the warning. +eval instant at 6m histogram_count(sum(metric)) + expect warn msg:PromQL warning: conflicting counter resets during histogram aggregation + expect no_info + {} 49 + +eval instant at 6m histogram_count(avg(metric)) + expect warn msg:PromQL warning: conflicting counter resets during histogram aggregation + expect no_info + {} 16.333333333333332 + +eval instant at 14m histogram_count(sum_over_time(mixed[10m])) + expect warn msg:PromQL warning: conflicting counter resets during histogram aggregation + expect no_info + {} 93 + +eval instant at 14m histogram_count(avg_over_time(mixed[10m])) + expect warn msg:PromQL warning: conflicting counter resets during histogram aggregation + expect no_info + {} 9.3 + + # Test histogram_quantile annotations. load 1m nonmonotonic_bucket{le="0.1"} 0+2x10 @@ -1636,7 +1765,7 @@ load 1m # Trigger an annotation about conflicting counter resets by going through the # HistogramStatsIterator, which creates counter reset hints on the fly. eval instant at 5m histogram_count(sum_over_time(reset{timing="late"}[5m])) - expect warn msg: PromQL warning: conflicting counter resets during histogram addition + expect warn msg: PromQL warning: conflicting counter resets during histogram aggregation {timing="late"} 7 eval instant at 5m histogram_count(sum(reset))