From 03588328d229098e5428467f88afbe1db49d8028 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Wed, 27 Aug 2025 18:19:30 +0200 Subject: [PATCH 1/3] promqltest: Test for counter reset conflict warnings This is an attempt to make sure that we are not accidentally warning about conflicting counter resets in rate calculation, see https://github.com/prometheus/prometheus/pull/17051#issuecomment-3226503416 . This is done by being more explicit about the warn expectation. However, as long as https://github.com/prometheus/prometheus/issues/15346 is not addressed, we won't be able to trigger the annotation this way anyway. However, we can play a trick, by wrapping a suitable expression in `histogram_count` or `histogram_sum`, which will invoke the `HistogramStatsIterator`, which in turn creates counter reset hints on the fly. So this commit also adds tests with that, both for absence of an annotation with `rate` and presence of an annotation with `sum_over_time`. Signed-off-by: beorn7 test tbs Signed-off-by: beorn7 --- promql/promqltest/testdata/functions.test | 20 ++-- .../testdata/native_histograms.test | 112 ++++++++++++++---- 2 files changed, 98 insertions(+), 34 deletions(-) diff --git a/promql/promqltest/testdata/functions.test b/promql/promqltest/testdata/functions.test index b1eda909f8..559f5c9197 100644 --- a/promql/promqltest/testdata/functions.test +++ b/promql/promqltest/testdata/functions.test @@ -254,15 +254,15 @@ eval instant at 20m irate(http_requests_histogram{path="/b"}[6m]) expect no_warn eval instant at 20m irate(http_requests_histogram{path="/c"}[20m]) - expect warn + expect warn msg: PromQL warning: this native histogram metric is not a counter: "http_requests_histogram" {path="/c"} {{sum:0.01 count:0.01 counter_reset_hint:gauge}} eval instant at 20m irate(http_requests_histogram{path="/d"}[20m]) - expect warn + expect warn msg: PromQL warning: this native histogram metric is not a counter: "http_requests_histogram" {path="/d"} {{sum:0.01 count:0.01 counter_reset_hint:gauge}} eval instant at 20m irate(http_requests_histogram{path="/e"}[20m]) - expect warn + expect warn msg: PromQL warning: encountered a mix of histograms and floats for metric name "http_requests_histogram" eval instant at 20m irate(http_requests_histogram{path="/f"}[20m]) expect no_warn @@ -293,12 +293,12 @@ eval instant at 20m delta(http_requests_gauge[20m]) # delta emits warn annotation for non-gauge histogram types. eval instant at 20m delta(http_requests_counter[20m]) - expect warn + expect warn msg: PromQL warning: this native histogram metric is not a gauge: "http_requests_counter" {path="/foo"} {{schema:0 sum:4 count:8 buckets:[4 4 4]}} # delta emits warn annotation for mix of histogram and floats. eval instant at 20m delta(http_requests_mix[20m]) - expect warn + expect warn msg: PromQL warning: encountered a mix of histograms and floats for metric name "http_requests_mix" #empty clear @@ -337,21 +337,21 @@ eval instant at 20m idelta(http_requests_histogram{path="/b"}[6m]) expect no_warn eval instant at 20m idelta(http_requests_histogram{path="/c"}[20m]) - expect warn + expect warn msg: PromQL warning: this native histogram metric is not a gauge: "http_requests_histogram" {path="/c"} {{sum:1 count:1 counter_reset_hint:gauge}} eval instant at 20m idelta(http_requests_histogram{path="/d"}[20m]) - expect warn + expect warn msg: PromQL warning: this native histogram metric is not a gauge: "http_requests_histogram" {path="/d"} {{sum:1 count:1 counter_reset_hint:gauge}} eval instant at 20m idelta(http_requests_histogram{path="/e"}[20m]) - expect warn + expect warn msg: PromQL warning: encountered a mix of histograms and floats for metric name "http_requests_histogram" eval instant at 20m idelta(http_requests_histogram{path="/f"}[20m]) - expect warn + expect warn msg: PromQL warning: vector contains a mix of histograms with exponential and custom buckets schemas for metric name "http_requests_histogram" eval instant at 20m idelta(http_requests_histogram{path="/g"}[20m]) - expect warn + expect warn msg: PromQL warning: vector contains histograms with incompatible custom buckets for metric name "http_requests_histogram" clear diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test index 52ee853802..0958b8951e 100644 --- a/promql/promqltest/testdata/native_histograms.test +++ b/promql/promqltest/testdata/native_histograms.test @@ -101,54 +101,56 @@ clear # with an upper limit of 1 and offset:1 is the bucket which follows to the right. Negative offsets represent bucket # positions for upper limits <1 (tending toward zero), where offset:-1 is the bucket to the left of offset:0. load 5m - incr_histogram {{schema:0 sum:4 count:4 buckets:[1 2 1]}}+{{sum:2 count:1 buckets:[1] offset:1}}x10 + incr_histogram {{schema:0 sum:4 count:4 buckets:[1 2 1]}}+{{sum:2 count:1 buckets:[1] offset:1}}x10 eval instant at 5m histogram_count(incr_histogram) - {} 5 + {} 5 eval instant at 5m histogram_sum(incr_histogram) - {} 6 + {} 6 eval instant at 5m histogram_avg(incr_histogram) - {} 1.2 + {} 1.2 # We expect 3/5ths of the values to fall in the range 1 < x <= 2. eval instant at 5m histogram_fraction(1, 2, incr_histogram) - {} 0.6 + {} 0.6 # See explanation for exponential interpolation above. eval instant at 5m histogram_quantile(0.5, incr_histogram) - {} 1.414213562373095 + {} 1.414213562373095 eval instant at 50m incr_histogram - {__name__="incr_histogram"} {{count:14 sum:24 buckets:[1 12 1]}} + {__name__="incr_histogram"} {{count:14 sum:24 buckets:[1 12 1]}} eval instant at 50m histogram_count(incr_histogram) - {} 14 + {} 14 eval instant at 50m histogram_sum(incr_histogram) - {} 24 + {} 24 eval instant at 50m histogram_avg(incr_histogram) {} 1.7142857142857142 # We expect 12/14ths of the values to fall in the range 1 < x <= 2. eval instant at 50m histogram_fraction(1, 2, incr_histogram) - {} 0.8571428571428571 + {} 0.8571428571428571 # See explanation for exponential interpolation above. eval instant at 50m histogram_quantile(0.5, incr_histogram) - {} 1.414213562373095 + {} 1.414213562373095 # Per-second average rate of increase should be 1/(5*60) for count and buckets, then 2/(5*60) for sum. eval instant at 50m rate(incr_histogram[10m]) + expect no_warn {} {{count:0.0033333333333333335 sum:0.006666666666666667 offset:1 buckets:[0.0033333333333333335]}} # Calculate the 50th percentile of observations over the last 10m. # See explanation for exponential interpolation above. eval instant at 50m histogram_quantile(0.5, rate(incr_histogram[10m])) - {} 1.414213562373095 + expect no_warn + {} 1.414213562373095 clear @@ -291,9 +293,11 @@ load 15s histogram_rate {{schema:1 count:12 sum:18.4 z_bucket:2 z_bucket_w:0.001 buckets:[1 2 0 1 1] n_buckets:[1 2 0 1 1]}}+{{schema:1 count:9 sum:18.4 z_bucket:1 z_bucket_w:0.001 buckets:[1 1 0 1 1] n_buckets:[1 1 0 1 1]}}x100 eval instant at 5m rate(histogram_rate[45s]) + expect no_warn {} {{schema:1 count:0.6 sum:1.2266666666666652 z_bucket:0.06666666666666667 z_bucket_w:0.001 buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667] n_buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667]}} eval range from 5m to 5m30s step 30s rate(histogram_rate[45s]) + expect no_warn {} {{schema:1 count:0.6 sum:1.2266666666666652 z_bucket:0.06666666666666667 z_bucket_w:0.001 buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667] n_buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667]}}x1 clear @@ -1044,13 +1048,16 @@ load 5m reset_in_bucket {{schema:0 count:4 sum:5 buckets:[1 2 1]}} {{schema:0 count:5 sum:6 buckets:[1 1 3]}} {{schema:0 count:6 sum:7 buckets:[1 2 3]}} eval instant at 10m increase(reset_in_bucket[15m]) + expect no_warn {} {{count:9 sum:10.5 buckets:[1.5 3 4.5]}} # The following two test the "fast path" where only sum and count is decoded. eval instant at 10m histogram_count(increase(reset_in_bucket[15m])) + expect no_warn {} 9 eval instant at 10m histogram_sum(increase(reset_in_bucket[15m])) + expect no_warn {} 10.5 clear @@ -1076,12 +1083,12 @@ load 30s # Test the case where we only have two points for rate eval instant at 30s rate(some_metric[1m]) - expect warn + expect warn msg: PromQL warning: this native histogram metric is not a counter: "some_metric" {} {{count:0.03333333333333333 sum:0.03333333333333333 buckets:[0.03333333333333333]}} # Test the case where we have more than two points for rate eval instant at 1m rate(some_metric[1m30s]) - expect warn + expect warn msg: PromQL warning: this native histogram metric is not a counter: "some_metric" {} {{count:0.03333333333333333 sum:0.03333333333333333 buckets:[0.03333333333333333]}} clear @@ -1092,24 +1099,26 @@ load 30s # Start and end with exponential, with custom in the middle. eval instant at 1m rate(some_metric[1m30s]) - expect warn + expect warn msg: PromQL warning: vector contains a mix of histograms with exponential and custom buckets schemas for metric name "some_metric" # Should produce no results. # Start and end with custom, with exponential in the middle. eval instant at 1m30s rate(some_metric[1m30s]) - expect warn + expect warn msg: PromQL warning: vector contains a mix of histograms with exponential and custom buckets schemas for metric name "some_metric" # Should produce no results. # Start with custom, end with exponential. Return the exponential histogram divided by 48. # (The 1st sample is the NHCB with count:1. It is mostly ignored with the exception of the # count, which means the rate calculation extrapolates until the count hits 0.) eval instant at 1m rate(some_metric[1m]) + expect no_warn {} {{count:0.08333333333333333 sum:0.10416666666666666 counter_reset_hint:gauge buckets:[0.020833333333333332 0.041666666666666664 0.020833333333333332]}} # Start with exponential, end with custom. Return the custom buckets histogram divided by 30. # (With the 2nd sample having a count of 1, the extrapolation to zero lands exactly at the # left boundary of the range, so no extrapolation limitation needed.) eval instant at 30s rate(some_metric[1m]) + expect no_warn {} {{schema:-53 sum:0.03333333333333333 count:0.03333333333333333 custom_values:[5 10] buckets:[0.03333333333333333]}} clear @@ -1121,41 +1130,50 @@ load 1m # There is no change to the bucket count over time, thus rate is 0 in each bucket. # However native histograms do not represent empty buckets, so here the zeros are implicit. eval instant at 5m rate(const_histogram[5m]) + expect no_warn {} {{schema:0 sum:0 count:0}} # Zero buckets mean no observations, thus the denominator in the average is 0 # leading to 0/0, which is NaN. eval instant at 5m histogram_avg(rate(const_histogram[5m])) + expect no_warn {} NaN # Zero buckets mean no observations, so count is 0. eval instant at 5m histogram_count(rate(const_histogram[5m])) + expect no_warn {} 0.0 # Zero buckets mean no observations and empty histogram has a sum of 0 by definition. eval instant at 5m histogram_sum(rate(const_histogram[5m])) + expect no_warn {} 0.0 # Zero buckets mean no observations, thus the denominator in the fraction is 0, # leading to 0/0, which is NaN. eval instant at 5m histogram_fraction(0.0, 1.0, rate(const_histogram[5m])) + expect no_warn {} NaN # Workaround to calculate the observation count corresponding to NaN fraction. eval instant at 5m histogram_count(rate(const_histogram[5m])) == 0.0 or histogram_fraction(0.0, 1.0, rate(const_histogram[5m])) * histogram_count(rate(const_histogram[5m])) + expect no_warn {} 0.0 # Zero buckets mean no observations, so there is no value that observations fall below, # which means that any quantile is a NaN. eval instant at 5m histogram_quantile(1.0, rate(const_histogram[5m])) + expect no_warn {} NaN # Zero buckets mean no observations, so there is no standard deviation. eval instant at 5m histogram_stddev(rate(const_histogram[5m])) + expect no_warn {} NaN # Zero buckets mean no observations, so there is no standard variance. eval instant at 5m histogram_stdvar(rate(const_histogram[5m])) + expect no_warn {} NaN clear @@ -1259,10 +1277,10 @@ load 6m # while the 3rd one has different ones. eval instant at 12m sum_over_time(nhcb_metric[13m]) - expect warn + expect warn msg: PromQL warning: vector contains histograms with incompatible custom buckets for metric name "nhcb_metric" eval instant at 12m avg_over_time(nhcb_metric[13m]) - expect warn + expect warn msg: PromQL warning: vector contains histograms with incompatible custom buckets for metric name "nhcb_metric" eval instant at 12m last_over_time(nhcb_metric[13m]) expect no_warn @@ -1281,13 +1299,13 @@ eval instant at 12m changes(nhcb_metric[13m]) {} 1 eval instant at 12m delta(nhcb_metric[13m]) - expect warn + expect warn msg: PromQL warning: vector contains histograms with incompatible custom buckets for metric name "nhcb_metric" eval instant at 12m increase(nhcb_metric[13m]) - expect warn + expect warn msg: PromQL warning: vector contains histograms with incompatible custom buckets for metric name "nhcb_metric" eval instant at 12m rate(nhcb_metric[13m]) - expect warn + expect warn msg: PromQL warning: vector contains histograms with incompatible custom buckets for metric name "nhcb_metric" eval instant at 12m resets(nhcb_metric[13m]) expect no_warn @@ -1299,10 +1317,10 @@ eval instant at 12m resets(nhcb_metric[13m]) # otherwise. eval instant at 18m sum_over_time(nhcb_metric[13m]) - expect warn + expect warn msg: PromQL warning: vector contains histograms with incompatible custom buckets for metric name "nhcb_metric" eval instant at 18m avg_over_time(nhcb_metric[13m]) - expect warn + expect warn msg: PromQL warning: vector contains histograms with incompatible custom buckets for metric name "nhcb_metric" eval instant at 18m last_over_time(nhcb_metric[13m]) expect no_warn @@ -1321,7 +1339,7 @@ eval instant at 18m changes(nhcb_metric[13m]) {} 1 eval instant at 18m delta(nhcb_metric[13m]) - expect warn + expect warn msg: PromQL warning: vector contains histograms with incompatible custom buckets for metric name "nhcb_metric" eval instant at 18m increase(nhcb_metric[13m]) expect no_warn @@ -1485,6 +1503,7 @@ load 1m # Note that the 2nd bucket has an exaggerated increase of 2479.939393939394 (although # it has a value of only 2475 at the end of the range). eval instant at 55m increase(metric[90m]) + expect no_warn {type="histogram"} {{count:2490 sum:50.303030303030305 counter_reset_hint:gauge buckets:[10.06060606060606 2479.939393939394]}} {type="counter"} 2490 @@ -1492,6 +1511,7 @@ eval instant at 55m increase(metric[90m]) # The 2nd bucket again has an exaggerated increase, but it is less obvious because of the # right-side extrapolation. eval instant at 54m30s increase(metric[90m]) + expect no_warn {type="histogram"} {{count:2512.9166666666665 sum:50.76599326599326 counter_reset_hint:gauge buckets:[10.153198653198652 2502.7634680134674]}} {type="counter"} 2512.9166666666665 @@ -1501,6 +1521,7 @@ eval instant at 54m30s increase(metric[90m]) # easily here because the last sample in the range coincides with the boundary, where the 2nd bucket has # a value of 2475 but has increased by 2476.2045454545455 according to the returned result. eval instant at 55m increase(metric[55m15s]) + expect no_warn {type="histogram"} {{count:2486.25 sum:50.227272727272734 counter_reset_hint:gauge buckets:[10.045454545454547 2476.2045454545455]}} {type="counter"} 2486.25 @@ -1508,20 +1529,25 @@ eval instant at 55m increase(metric[55m15s]) # This means no change of extrapolation is required for the histogram count (and neither for the float counter), # however, the 2nd bucket's extrapolation will reach zero within the range. eval instant at 54m30s increase(metric[54m45s]) + expect no_warn {type="histogram"} {{count:2509.375 sum:50.69444444444444 counter_reset_hint:gauge buckets:[10.13888888888889 2499.236111111111]}} {type="counter"} 2509.375 # Try the same, but now extract just the histogram count via `histogram_count`. eval instant at 55m histogram_count(increase(metric[90m])) + expect no_warn {type="histogram"} 2490 eval instant at 54m30s histogram_count(increase(metric[90m])) + expect no_warn {type="histogram"} 2512.9166666666665 eval instant at 55m histogram_count(increase(metric[55m15s])) + expect no_warn {type="histogram"} 2486.25 eval instant at 54m30s histogram_count(increase(metric[54m45s])) + expect no_warn {type="histogram"} 2509.375 clear @@ -1581,3 +1607,41 @@ eval instant at 1m histogram_quantile(0.5, myHistogram2) eval instant at 1m histogram_quantile(0.5, mixedHistogram) expect warn msg: PromQL warning: vector contains a mix of classic and native histograms for metric name "mixedHistogram" + + +clear + +load 1m + reset{timing="late"} {{schema:0 sum:1 count:0 buckets:[1 1 1]}} {{schema:0 sum:1 count:2 buckets:[1 1 1]}} {{schema:0 sum:1 count:3 buckets:[1 1 1]}} {{schema:0 sum:1 count:2 buckets:[1 1 1]}} + reset{timing="early"} {{schema:0 sum:1 count:3 buckets:[1 1 1]}} {{schema:0 sum:1 count:2 buckets:[1 1 1]}} {{schema:0 sum:1 count:2 buckets:[1 1 1]}} {{schema:0 sum:1 count:3 buckets:[1 1 1]}} + +# Trigger an annotation about conflicting counter resets by going through the +# HistogramStatsIterator, which creates counter reset hints on the fly. +eval instant at 5m 1*histogram_count(sum_over_time(reset{timing="late"}[5m])) + expect warn msg: PromQL warning: conflicting counter resets during histogram addition + {timing="late"} 7 + +eval instant at 5m 1*histogram_count(sum(reset)) + expect warn msg: PromQL warning: conflicting counter resets during histogram aggregation + {} 5 + +eval instant at 5m 1*histogram_count(avg(reset)) + expect warn msg: PromQL warning: conflicting counter resets during histogram aggregation + {} 2.5 + +# No annotation with the right timing. +eval instant at 30s 1*histogram_count(sum(reset)) + expect no_warn + {} 3 + +eval instant at 30s 1*histogram_count(avg(reset)) + expect no_warn + {} 1.5 + +# Ensure that the annotation does not happen with rate. +eval instant at 5m 1*histogram_count(rate(reset{timing="late"}[5m])) + expect no_warn + {timing="late"} 0.0175 + +# NOTE: The `1*` part in the expressions above should not be needed. +# It can be removed once https://github.com/prometheus/prometheus/pull/17127 is merged. From 262832029210788c3714d1ff4eb9cdeeb449fdce Mon Sep 17 00:00:00 2001 From: beorn7 Date: Wed, 27 Aug 2025 18:39:33 +0200 Subject: [PATCH 2/3] promql: Fix when to emit a `HistogramCounterResetCollisionWarning` So far, we emitted a `HistogramCounterResetCollisionWarning` when encountering conflicting counter resets in the calculation of (i)rate and friends. We even tested for that. However, in the rate calculation, we are not interested in those collisions. They are actually expected. On the other hand, we did not warn about those collisions when doing a `sum` aggregation, where such a warning would be appropriate. This commit removes the warning in the former case and adds it in the latter. Sadly, we cannot really test this as we still remove the counter reset hint for the first sample in a chunk. (And that's the only sample where we could get a `NotCounterReset` hint.) Signed-off-by: beorn7 --- promql/engine.go | 15 +++++++-- promql/functions.go | 24 +++++++------- promql/functions_internal_test.go | 52 ------------------------------- util/annotations/annotations.go | 3 +- 4 files changed, 25 insertions(+), 69 deletions(-) diff --git a/promql/engine.go b/promql/engine.go index 92bedc9ac3..414fecc5b8 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -3081,11 +3081,14 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix if h != nil { group.hasHistogram = true if group.histogramValue != nil { - _, _, err := group.histogramValue.Add(h) + _, counterResetCollision, err := group.histogramValue.Add(h) if err != nil { handleAggregationError(err, e, inputMatrix[si].Metric.Get(model.MetricNameLabel), &annos) group.incompatibleHistograms = true } + if counterResetCollision { + annos.Add(annotations.NewHistogramCounterResetCollisionWarning(e.Expr.PositionRange(), annotations.HistogramAgg)) + } } // Otherwise the aggregation contained floats // previously and will be invalid anyway. No @@ -3134,18 +3137,24 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix if group.histogramValue != nil { left := h.Copy().Div(group.groupCount) right := group.histogramValue.Copy().Div(group.groupCount) - toAdd, _, err := left.Sub(right) + toAdd, counterResetCollision, err := left.Sub(right) if err != nil { handleAggregationError(err, e, inputMatrix[si].Metric.Get(model.MetricNameLabel), &annos) group.incompatibleHistograms = true continue } - _, _, err = group.histogramValue.Add(toAdd) + if counterResetCollision { + annos.Add(annotations.NewHistogramCounterResetCollisionWarning(e.Expr.PositionRange(), annotations.HistogramAgg)) + } + _, counterResetCollision, err = group.histogramValue.Add(toAdd) if err != nil { handleAggregationError(err, e, inputMatrix[si].Metric.Get(model.MetricNameLabel), &annos) group.incompatibleHistograms = true continue } + if counterResetCollision { + annos.Add(annotations.NewHistogramCounterResetCollisionWarning(e.Expr.PositionRange(), annotations.HistogramAgg)) + } } // Otherwise the aggregation contained floats // previously and will be invalid anyway. No diff --git a/promql/functions.go b/promql/functions.go index 01b37603e4..675403f43a 100644 --- a/promql/functions.go +++ b/promql/functions.go @@ -251,7 +251,10 @@ func histogramRate(points []HPoint, isCounter bool, metricName string, pos posra } h := last.CopyToSchema(minSchema) - _, counterResetCollision, err := h.Sub(prev) + // This subtraction may deliberately include conflicting counter resets. + // Counter resets are treated explicitly in this function, so the + // information about conflicting counter resets is ignored here. + _, _, err := h.Sub(prev) if err != nil { if errors.Is(err, histogram.ErrHistogramsIncompatibleSchema) { return nil, annotations.New().Add(annotations.NewMixedExponentialCustomHistogramsWarning(metricName, pos)) @@ -260,16 +263,13 @@ func histogramRate(points []HPoint, isCounter bool, metricName string, pos posra } } - if counterResetCollision { - annos.Add(annotations.NewHistogramCounterResetCollisionWarning(pos, annotations.HistogramSub)) - } - if isCounter { // Second iteration to deal with counter resets. for _, currPoint := range points[1:] { curr := currPoint.H if curr.DetectReset(prev) { - _, counterResetCollision, err := h.Add(prev) + // Counter reset conflict ignored here for the same reason as above. + _, _, err := h.Add(prev) if err != nil { if errors.Is(err, histogram.ErrHistogramsIncompatibleSchema) { return nil, annotations.New().Add(annotations.NewMixedExponentialCustomHistogramsWarning(metricName, pos)) @@ -277,9 +277,6 @@ func histogramRate(points []HPoint, isCounter bool, metricName string, pos posra return nil, annotations.New().Add(annotations.NewIncompatibleCustomBucketsHistogramsWarning(metricName, pos)) } } - if counterResetCollision { - annos.Add(annotations.NewHistogramCounterResetCollisionWarning(pos, annotations.HistogramAdd)) - } } prev = curr } @@ -393,15 +390,16 @@ func instantValue(vals Matrix, args parser.Expressions, out Vector, isRate bool) annos.Add(annotations.NewNativeHistogramNotGaugeWarning(metricName, args.PositionRange())) } if !isRate || !ss[1].H.DetectReset(ss[0].H) { - _, counterResetCollision, err := resultSample.H.Sub(ss[0].H) + // This subtraction may deliberately include conflicting + // counter resets. Counter resets are treated explicitly + // in this function, so the information about + // conflicting counter resets is ignored here. + _, _, err := resultSample.H.Sub(ss[0].H) if errors.Is(err, histogram.ErrHistogramsIncompatibleSchema) { return out, annos.Add(annotations.NewMixedExponentialCustomHistogramsWarning(metricName, args.PositionRange())) } else if errors.Is(err, histogram.ErrHistogramsIncompatibleBounds) { return out, annos.Add(annotations.NewIncompatibleCustomBucketsHistogramsWarning(metricName, args.PositionRange())) } - if counterResetCollision { - annos.Add(annotations.NewHistogramCounterResetCollisionWarning(args.PositionRange(), annotations.HistogramSub)) - } } resultSample.H.CounterResetHint = histogram.GaugeType resultSample.H.Compact(0) diff --git a/promql/functions_internal_test.go b/promql/functions_internal_test.go index fc7e604dc0..85c6cd7973 100644 --- a/promql/functions_internal_test.go +++ b/promql/functions_internal_test.go @@ -19,10 +19,6 @@ import ( "testing" "github.com/stretchr/testify/require" - - "github.com/prometheus/prometheus/model/histogram" - "github.com/prometheus/prometheus/promql/parser/posrange" - "github.com/prometheus/prometheus/util/annotations" ) func TestKahanSumInc(t *testing.T) { @@ -83,51 +79,3 @@ func TestKahanSumInc(t *testing.T) { }) } } - -func newAnnotations(errs ...error) annotations.Annotations { - var annos annotations.Annotations - for _, err := range errs { - annos.Add(err) - } - return annos -} - -// TODO(juliusmh): this test ensures histogramRate sets correct annotations. -// This test can be removed in favor of a user facing promqltest when -// https://github.com/prometheus/prometheus/issues/15346 is resolved. -func TestHistogramRate_Annotations(t *testing.T) { - const metricName = "test" - pos := posrange.PositionRange{} - for _, tc := range []struct { - name string - points []HPoint - wantAnnotations annotations.Annotations - }{ - { - name: "empty histograms", - points: []HPoint{ - {H: &histogram.FloatHistogram{}}, - {H: &histogram.FloatHistogram{}}, - }, - wantAnnotations: newAnnotations( - annotations.NewNativeHistogramNotGaugeWarning(metricName, pos), - ), - }, - { - name: "counter reset hint collision", - points: []HPoint{ - {H: &histogram.FloatHistogram{CounterResetHint: histogram.NotCounterReset}}, - {H: &histogram.FloatHistogram{CounterResetHint: histogram.CounterReset}}, - }, - wantAnnotations: newAnnotations( - annotations.NewNativeHistogramNotGaugeWarning(metricName, pos), - annotations.NewHistogramCounterResetCollisionWarning(pos, annotations.HistogramSub), - ), - }, - } { - t.Run(tc.name, func(t *testing.T) { - _, annos := histogramRate(tc.points, false, metricName, pos) - require.Equal(t, tc.wantAnnotations, annos) - }) - } -} diff --git a/util/annotations/annotations.go b/util/annotations/annotations.go index 37f983dde7..bbd3b0927f 100644 --- a/util/annotations/annotations.go +++ b/util/annotations/annotations.go @@ -362,13 +362,14 @@ type HistogramOperation string const ( HistogramAdd HistogramOperation = "addition" HistogramSub HistogramOperation = "subtraction" + HistogramAgg HistogramOperation = "aggregation" ) // NewHistogramCounterResetCollisionWarning is used when two counter histograms are added or subtracted where one has // a CounterReset hint and the other has NotCounterReset. func NewHistogramCounterResetCollisionWarning(pos posrange.PositionRange, operation HistogramOperation) error { switch operation { - case HistogramAdd, HistogramSub: + case HistogramAdd, HistogramSub, HistogramAgg: default: operation = "unknown operation" } From 53a720eed5a13cf2f9b7f841a29975aa5737b734 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Thu, 28 Aug 2025 15:00:25 +0200 Subject: [PATCH 3/3] promql: Minor improvements for HistogramStatsIterator - Add a code comment about a counter reset edge case (which is hopefully not relevant in practice). - Rename the receiver from `f` to `hsi`. (`f` seemed like completely off as a name. `i` or `it` might have worked, too, but I ended up with `hsi` as the easiest for the reader.) Signed-off-by: beorn7 --- promql/histogram_stats_iterator.go | 110 ++++++++++++++++------------- 1 file changed, 59 insertions(+), 51 deletions(-) diff --git a/promql/histogram_stats_iterator.go b/promql/histogram_stats_iterator.go index cbc717cac0..f5224825d3 100644 --- a/promql/histogram_stats_iterator.go +++ b/promql/histogram_stats_iterator.go @@ -46,127 +46,135 @@ func NewHistogramStatsIterator(it chunkenc.Iterator) *HistogramStatsIterator { // Reset resets this iterator for use with a new underlying iterator, reusing // objects already allocated where possible. -func (f *HistogramStatsIterator) Reset(it chunkenc.Iterator) { - f.Iterator = it - f.currentSeriesRead = false +func (hsi *HistogramStatsIterator) Reset(it chunkenc.Iterator) { + hsi.Iterator = it + hsi.currentSeriesRead = false } // AtHistogram returns the next timestamp/histogram pair. The counter reset // detection is guaranteed to be correct only when the caller does not switch // between AtHistogram and AtFloatHistogram calls. -func (f *HistogramStatsIterator) AtHistogram(h *histogram.Histogram) (int64, *histogram.Histogram) { +func (hsi *HistogramStatsIterator) AtHistogram(h *histogram.Histogram) (int64, *histogram.Histogram) { var t int64 - t, f.currentH = f.Iterator.AtHistogram(f.currentH) - if value.IsStaleNaN(f.currentH.Sum) { - h = &histogram.Histogram{Sum: f.currentH.Sum} + t, hsi.currentH = hsi.Iterator.AtHistogram(hsi.currentH) + if value.IsStaleNaN(hsi.currentH.Sum) { + h = &histogram.Histogram{Sum: hsi.currentH.Sum} return t, h } if h == nil { h = &histogram.Histogram{ - CounterResetHint: f.getResetHint(f.currentH), - Count: f.currentH.Count, - Sum: f.currentH.Sum, + CounterResetHint: hsi.getResetHint(hsi.currentH), + Count: hsi.currentH.Count, + Sum: hsi.currentH.Sum, } - f.setLastH(f.currentH) + hsi.setLastH(hsi.currentH) return t, h } returnValue := histogram.Histogram{ - CounterResetHint: f.getResetHint(f.currentH), - Count: f.currentH.Count, - Sum: f.currentH.Sum, + CounterResetHint: hsi.getResetHint(hsi.currentH), + Count: hsi.currentH.Count, + Sum: hsi.currentH.Sum, } returnValue.CopyTo(h) - f.setLastH(f.currentH) + hsi.setLastH(hsi.currentH) return t, h } // AtFloatHistogram returns the next timestamp/float histogram pair. The counter // reset detection is guaranteed to be correct only when the caller does not // switch between AtHistogram and AtFloatHistogram calls. -func (f *HistogramStatsIterator) AtFloatHistogram(fh *histogram.FloatHistogram) (int64, *histogram.FloatHistogram) { +func (hsi *HistogramStatsIterator) AtFloatHistogram(fh *histogram.FloatHistogram) (int64, *histogram.FloatHistogram) { var t int64 - t, f.currentFH = f.Iterator.AtFloatHistogram(f.currentFH) - if value.IsStaleNaN(f.currentFH.Sum) { - return t, &histogram.FloatHistogram{Sum: f.currentFH.Sum} + t, hsi.currentFH = hsi.Iterator.AtFloatHistogram(hsi.currentFH) + if value.IsStaleNaN(hsi.currentFH.Sum) { + return t, &histogram.FloatHistogram{Sum: hsi.currentFH.Sum} } if fh == nil { fh = &histogram.FloatHistogram{ - CounterResetHint: f.getFloatResetHint(f.currentFH.CounterResetHint), - Count: f.currentFH.Count, - Sum: f.currentFH.Sum, + CounterResetHint: hsi.getFloatResetHint(hsi.currentFH.CounterResetHint), + Count: hsi.currentFH.Count, + Sum: hsi.currentFH.Sum, } - f.setLastFH(f.currentFH) + hsi.setLastFH(hsi.currentFH) return t, fh } returnValue := histogram.FloatHistogram{ - CounterResetHint: f.getFloatResetHint(f.currentFH.CounterResetHint), - Count: f.currentFH.Count, - Sum: f.currentFH.Sum, + CounterResetHint: hsi.getFloatResetHint(hsi.currentFH.CounterResetHint), + Count: hsi.currentFH.Count, + Sum: hsi.currentFH.Sum, } returnValue.CopyTo(fh) - f.setLastFH(f.currentFH) + hsi.setLastFH(hsi.currentFH) return t, fh } -func (f *HistogramStatsIterator) setLastH(h *histogram.Histogram) { - f.lastFH = nil - if f.lastH == nil { - f.lastH = h.Copy() +func (hsi *HistogramStatsIterator) setLastH(h *histogram.Histogram) { + hsi.lastFH = nil + if hsi.lastH == nil { + hsi.lastH = h.Copy() } else { - h.CopyTo(f.lastH) + h.CopyTo(hsi.lastH) } - f.currentSeriesRead = true + hsi.currentSeriesRead = true } -func (f *HistogramStatsIterator) setLastFH(fh *histogram.FloatHistogram) { - f.lastH = nil - if f.lastFH == nil { - f.lastFH = fh.Copy() +func (hsi *HistogramStatsIterator) setLastFH(fh *histogram.FloatHistogram) { + hsi.lastH = nil + if hsi.lastFH == nil { + hsi.lastFH = fh.Copy() } else { - fh.CopyTo(f.lastFH) + fh.CopyTo(hsi.lastFH) } - f.currentSeriesRead = true + hsi.currentSeriesRead = true } -func (f *HistogramStatsIterator) getFloatResetHint(hint histogram.CounterResetHint) histogram.CounterResetHint { +func (hsi *HistogramStatsIterator) getFloatResetHint(hint histogram.CounterResetHint) histogram.CounterResetHint { if hint != histogram.UnknownCounterReset { return hint } - prevFH := f.lastFH - if prevFH == nil || !f.currentSeriesRead { - if f.lastH == nil || !f.currentSeriesRead { + prevFH := hsi.lastFH + if prevFH == nil || !hsi.currentSeriesRead { + if hsi.lastH == nil || !hsi.currentSeriesRead { // We don't know if there's a counter reset. return histogram.UnknownCounterReset } - prevFH = f.lastH.ToFloat(nil) + prevFH = hsi.lastH.ToFloat(nil) } - if f.currentFH.DetectReset(prevFH) { + if hsi.currentFH.DetectReset(prevFH) { return histogram.CounterReset } return histogram.NotCounterReset } -func (f *HistogramStatsIterator) getResetHint(h *histogram.Histogram) histogram.CounterResetHint { +func (hsi *HistogramStatsIterator) getResetHint(h *histogram.Histogram) histogram.CounterResetHint { if h.CounterResetHint != histogram.UnknownCounterReset { return h.CounterResetHint } var prevFH *histogram.FloatHistogram - if f.lastH == nil || !f.currentSeriesRead { - if f.lastFH == nil || !f.currentSeriesRead { - // We don't know if there's a counter reset. + if hsi.lastH == nil || !hsi.currentSeriesRead { + if hsi.lastFH == nil || !hsi.currentSeriesRead { + // We don't know if there's a counter reset. Note that + // this generally will trigger an explicit counter reset + // detection by the PromQL engine, which in turn isn't + // as reliable in this case because the PromQL engine + // will not see the buckets. However, we can assume that + // in cases where the counter reset detection is + // relevant, an iteration through the series has + // happened, and therefore we do not end up here in the + // first place. return histogram.UnknownCounterReset } - prevFH = f.lastFH + prevFH = hsi.lastFH } else { - prevFH = f.lastH.ToFloat(nil) + prevFH = hsi.lastH.ToFloat(nil) } fh := h.ToFloat(nil) if fh.DetectReset(prevFH) {