From e894d7a271f85ee0be0d7442f6dcd0b0ca208acb Mon Sep 17 00:00:00 2001 From: aviralgarg05 Date: Sat, 29 Nov 2025 17:15:59 +0530 Subject: [PATCH 01/46] promqltest: Add optional counter reset hint comparison for native histograms This commit implements counter reset hint comparison in the promqltest framework to address issue #17615. Previously, while test definitions could specify a counter_reset_hint in expected native histogram results, the framework did not actually compare this hint between expected and actual results. The implementation adds optional comparison logic to the compareNativeHistogram function: - If the expected histogram has UnknownCounterReset (the default), the hint is not compared (meaning "don't care") - If the expected histogram explicitly specifies CounterReset, NotCounterReset, or GaugeType, it is verified against the actual histogram's hint This allows tests to verify that PromQL functions correctly set or preserve counter reset hints while maintaining backward compatibility with existing tests that don't specify explicit hints. Fixes #17615 Signed-off-by: aviralgarg05 --- promql/promqltest/test.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/promql/promqltest/test.go b/promql/promqltest/test.go index b16433c14e..d1702ba61b 100644 --- a/promql/promqltest/test.go +++ b/promql/promqltest/test.go @@ -1163,6 +1163,14 @@ func compareNativeHistogram(exp, cur *histogram.FloatHistogram) bool { return false } + // Compare CounterResetHint only if explicitly specified in expected histogram. + // UnknownCounterReset (the default) means "don't care about the hint". + if exp.CounterResetHint != histogram.UnknownCounterReset { + if exp.CounterResetHint != cur.CounterResetHint { + return false + } + } + return true } From 488466246fccfa9b8c0c1454489726cb1f87c86a Mon Sep 17 00:00:00 2001 From: aviralgarg05 Date: Sun, 30 Nov 2025 18:01:51 +0530 Subject: [PATCH 02/46] promqltest: Fix test expectation for counter reset hint comparison The test at line 1283 for avg_over_time(nhcb_metric[13m]) incorrectly expected counter_reset_hint:gauge in the result. However, the actual avg_over_time implementation does not explicitly set the CounterResetHint to GaugeType on its output histogram. With the new counter reset hint comparison logic added to the promqltest framework (which compares hints when explicitly specified in expected results), this incorrect expectation was now being caught. This fix removes the incorrect counter_reset_hint:gauge from the expected result, allowing the test to correctly verify the avg_over_time behavior without asserting a specific hint value that the function does not set. The counter reset hint comparison logic works as designed: if the expected histogram has UnknownCounterReset (the default when not specified), no comparison is performed. Only when a hint is explicitly specified in the test expectation will it be compared against the actual result. Fixes the test failure introduced by the counter reset hint comparison feature in promqltest. Signed-off-by: Aviral Garg Signed-off-by: aviralgarg05 --- promql/promqltest/testdata/native_histograms.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test index fd4b1f4178..d66400f787 100644 --- a/promql/promqltest/testdata/native_histograms.test +++ b/promql/promqltest/testdata/native_histograms.test @@ -1283,7 +1283,7 @@ eval instant at 12m sum_over_time(nhcb_metric[13m]) eval instant at 12m avg_over_time(nhcb_metric[13m]) expect no_warn expect info msg: PromQL info: mismatched custom buckets were reconciled during aggregation - {} {{schema:-53 count:1 sum:1 custom_values:[5] counter_reset_hint:gauge buckets:[1]}} + {} {{schema:-53 count:1 sum:1 custom_values:[5] buckets:[1]}} eval instant at 12m last_over_time(nhcb_metric[13m]) expect no_warn From 119e75d78b8e2c98984b8bbec2eedf78a41533b9 Mon Sep 17 00:00:00 2001 From: aviralgarg05 Date: Fri, 19 Dec 2025 23:32:08 +0530 Subject: [PATCH 03/46] promqltest: Properly distinguish explicit counter_reset_hint specification This commit addresses the PR feedback for issue #17615. The previous implementation could not distinguish between: - No counter reset hint specified (meaning "don't care") - counter_reset_hint:unknown explicitly specified (meaning "verify it's unknown") Changes: - Added CounterResetHintSet field to parser.SequenceValue to track whether counter_reset_hint was explicitly specified in the test file - Modified buildHistogramFromMap to set this flag when the hint is present in the descriptor map - Updated newHistogramSequenceValue helper and histogramsSeries functions to propagate the flag through histogram series creation - Updated yacc grammar to use the new helper function - Modified compareNativeHistogram to accept the flag and only compare hints when explicitly specified This allows tests to: 1. Not specify a hint (no comparison, backward compatible) 2. Explicitly specify counter_reset_hint:unknown (verify it's unknown) 3. Explicitly specify counter_reset_hint:gauge/reset/not_reset (verify match) Fixes #17615 Signed-off-by: aviralgarg05 --- promql/parser/generated_parser.y | 5 ++-- promql/parser/generated_parser.y.go | 5 ++-- promql/parser/parse.go | 39 +++++++++++++++++++++++++---- promql/promqltest/test.go | 25 ++++++++++++------ 4 files changed, 58 insertions(+), 16 deletions(-) diff --git a/promql/parser/generated_parser.y b/promql/parser/generated_parser.y index d9bbb10b28..0f196ef5af 100644 --- a/promql/parser/generated_parser.y +++ b/promql/parser/generated_parser.y @@ -790,14 +790,15 @@ series_item : BLANK // Histogram descriptions (part of unit testing). | histogram_series_value { - $$ = []SequenceValue{{Histogram:$1}} + $$ = []SequenceValue{yylex.(*parser).newHistogramSequenceValue($1)} } | histogram_series_value TIMES uint { $$ = []SequenceValue{} // Add an additional value for time 0, which we ignore in tests. + sv := yylex.(*parser).newHistogramSequenceValue($1) for i:=uint64(0); i <= $3; i++{ - $$ = append($$, SequenceValue{Histogram:$1}) + $$ = append($$, sv) //$1 += $2 } } diff --git a/promql/parser/generated_parser.y.go b/promql/parser/generated_parser.y.go index eb4b32129a..b649e86440 100644 --- a/promql/parser/generated_parser.y.go +++ b/promql/parser/generated_parser.y.go @@ -1835,15 +1835,16 @@ yydefault: case 158: yyDollar = yyS[yypt-1 : yypt+1] { - yyVAL.series = []SequenceValue{{Histogram: yyDollar[1].histogram}} + yyVAL.series = []SequenceValue{yylex.(*parser).newHistogramSequenceValue(yyDollar[1].histogram)} } case 159: yyDollar = yyS[yypt-3 : yypt+1] { yyVAL.series = []SequenceValue{} // Add an additional value for time 0, which we ignore in tests. + sv := yylex.(*parser).newHistogramSequenceValue(yyDollar[1].histogram) for i := uint64(0); i <= yyDollar[3].uint; i++ { - yyVAL.series = append(yyVAL.series, SequenceValue{Histogram: yyDollar[1].histogram}) + yyVAL.series = append(yyVAL.series, sv) //$1 += $2 } } diff --git a/promql/parser/parse.go b/promql/parser/parse.go index bcd511f467..212a5758e7 100644 --- a/promql/parser/parse.go +++ b/promql/parser/parse.go @@ -67,6 +67,11 @@ type parser struct { generatedParserResult any parseErrors ParseErrors + + // lastHistogramCounterResetHintSet is set to true when the most recently + // built histogram had a counter_reset_hint explicitly specified. + // This is used to populate CounterResetHintSet in SequenceValue. + lastHistogramCounterResetHintSet bool } type Opt func(p *parser) @@ -234,6 +239,11 @@ type SequenceValue struct { Value float64 Omitted bool Histogram *histogram.FloatHistogram + // CounterResetHintSet is true if the counter reset hint was explicitly + // specified in the test file using counter_reset_hint:... syntax. + // This allows distinguishing between "no hint specified" (don't care) + // vs "counter_reset_hint:unknown" (verify it's unknown). + CounterResetHintSet bool } func (v SequenceValue) String() string { @@ -496,25 +506,30 @@ func (p *parser) mergeMaps(left, right *map[string]any) (ret *map[string]any) { } func (p *parser) histogramsIncreaseSeries(base, inc *histogram.FloatHistogram, times uint64) ([]SequenceValue, error) { - return p.histogramsSeries(base, inc, times, func(a, b *histogram.FloatHistogram) (*histogram.FloatHistogram, error) { + // Capture the hint set flag immediately after inc histogram is built. + // The base histogram's hint set flag was already captured. + hintSet := p.lastHistogramCounterResetHintSet + return p.histogramsSeries(base, inc, times, hintSet, func(a, b *histogram.FloatHistogram) (*histogram.FloatHistogram, error) { res, _, _, err := a.Add(b) return res, err }) } func (p *parser) histogramsDecreaseSeries(base, inc *histogram.FloatHistogram, times uint64) ([]SequenceValue, error) { - return p.histogramsSeries(base, inc, times, func(a, b *histogram.FloatHistogram) (*histogram.FloatHistogram, error) { + // Capture the hint set flag immediately after inc histogram is built. + hintSet := p.lastHistogramCounterResetHintSet + return p.histogramsSeries(base, inc, times, hintSet, func(a, b *histogram.FloatHistogram) (*histogram.FloatHistogram, error) { res, _, _, err := a.Sub(b) return res, err }) } -func (*parser) histogramsSeries(base, inc *histogram.FloatHistogram, times uint64, +func (*parser) histogramsSeries(base, inc *histogram.FloatHistogram, times uint64, counterResetHintSet bool, combine func(*histogram.FloatHistogram, *histogram.FloatHistogram) (*histogram.FloatHistogram, error), ) ([]SequenceValue, error) { ret := make([]SequenceValue, times+1) // Add an additional value (the base) for time 0, which we ignore in tests. - ret[0] = SequenceValue{Histogram: base} + ret[0] = SequenceValue{Histogram: base, CounterResetHintSet: counterResetHintSet} cur := base for i := uint64(1); i <= times; i++ { if cur.Schema > inc.Schema { @@ -526,7 +541,7 @@ func (*parser) histogramsSeries(base, inc *histogram.FloatHistogram, times uint6 if err != nil { return ret, err } - ret[i] = SequenceValue{Histogram: cur} + ret[i] = SequenceValue{Histogram: cur, CounterResetHintSet: counterResetHintSet} } return ret, nil @@ -535,6 +550,8 @@ func (*parser) histogramsSeries(base, inc *histogram.FloatHistogram, times uint6 // buildHistogramFromMap is used in the grammar to take then individual parts of the histogram and complete it. func (p *parser) buildHistogramFromMap(desc *map[string]any) *histogram.FloatHistogram { output := &histogram.FloatHistogram{} + // Reset the flag for each new histogram being built. + p.lastHistogramCounterResetHintSet = false val, ok := (*desc)["schema"] if ok { @@ -595,6 +612,8 @@ func (p *parser) buildHistogramFromMap(desc *map[string]any) *histogram.FloatHis val, ok = (*desc)["counter_reset_hint"] if ok { + // Mark that the counter reset hint was explicitly specified. + p.lastHistogramCounterResetHintSet = true resetHint, ok := val.(Item) if ok { @@ -626,6 +645,16 @@ func (p *parser) buildHistogramFromMap(desc *map[string]any) *histogram.FloatHis return output } +// newHistogramSequenceValue creates a SequenceValue for a histogram, +// setting CounterResetHintSet based on whether counter_reset_hint was +// explicitly specified in the histogram description. +func (p *parser) newHistogramSequenceValue(h *histogram.FloatHistogram) SequenceValue { + return SequenceValue{ + Histogram: h, + CounterResetHintSet: p.lastHistogramCounterResetHintSet, + } +} + func (p *parser) buildHistogramBucketsAndSpans(desc *map[string]any, bucketsKey, offsetKey string, ) (buckets []float64, spans []histogram.Span) { bucketCount := 0 diff --git a/promql/promqltest/test.go b/promql/promqltest/test.go index d1702ba61b..0170236587 100644 --- a/promql/promqltest/test.go +++ b/promql/promqltest/test.go @@ -1009,7 +1009,12 @@ func (ev *evalCmd) compareResult(result parser.Value) error { exp := ev.expected[hash] var expectedFloats []promql.FPoint - var expectedHistograms []promql.HPoint + // expectedHPoint wraps HPoint with CounterResetHintSet flag from SequenceValue. + type expectedHPoint struct { + promql.HPoint + CounterResetHintSet bool + } + var expectedHistograms []expectedHPoint for i, e := range exp.vals { ts := ev.start.Add(time.Duration(i) * ev.step) @@ -1021,7 +1026,10 @@ func (ev *evalCmd) compareResult(result parser.Value) error { t := ts.UnixNano() / int64(time.Millisecond/time.Nanosecond) if e.Histogram != nil { - expectedHistograms = append(expectedHistograms, promql.HPoint{T: t, H: e.Histogram}) + expectedHistograms = append(expectedHistograms, expectedHPoint{ + HPoint: promql.HPoint{T: t, H: e.Histogram}, + CounterResetHintSet: e.CounterResetHintSet, + }) } else if !e.Omitted { expectedFloats = append(expectedFloats, promql.FPoint{T: t, F: e.Value}) } @@ -1050,7 +1058,7 @@ func (ev *evalCmd) compareResult(result parser.Value) error { return fmt.Errorf("expected histogram value at index %v for %s to have timestamp %v, but it had timestamp %v (result has %s)", i, ev.metrics[hash], expected.T, actual.T, formatSeriesResult(s)) } - if !compareNativeHistogram(expected.H.Compact(0), actual.H.Compact(0)) { + if !compareNativeHistogram(expected.H.Compact(0), actual.H.Compact(0), expected.CounterResetHintSet) { return fmt.Errorf("expected histogram value at index %v (t=%v) for %s to be %v, but got %v (result has %s)", i, actual.T, ev.metrics[hash], expected.H.TestExpression(), actual.H.TestExpression(), formatSeriesResult(s)) } } @@ -1089,7 +1097,7 @@ func (ev *evalCmd) compareResult(result parser.Value) error { if expH != nil && v.H == nil { return fmt.Errorf("expected histogram %s for %s but got float value %v", HistogramTestExpression(expH), v.Metric, v.F) } - if expH != nil && !compareNativeHistogram(expH.Compact(0), v.H.Compact(0)) { + if expH != nil && !compareNativeHistogram(expH.Compact(0), v.H.Compact(0), exp0.CounterResetHintSet) { return fmt.Errorf("expected %v for %s but got %s", HistogramTestExpression(expH), v.Metric, HistogramTestExpression(v.H)) } if !almost.Equal(exp0.Value, v.F, defaultEpsilon) { @@ -1127,7 +1135,9 @@ func (ev *evalCmd) compareResult(result parser.Value) error { // compareNativeHistogram is helper function to compare two native histograms // which can tolerate some differ in the field of float type, such as Count, Sum. -func compareNativeHistogram(exp, cur *histogram.FloatHistogram) bool { +// The counterResetHintSet parameter indicates whether the counter reset hint was +// explicitly specified in the expected histogram (from the test file). +func compareNativeHistogram(exp, cur *histogram.FloatHistogram, counterResetHintSet bool) bool { if exp == nil || cur == nil { return false } @@ -1164,8 +1174,9 @@ func compareNativeHistogram(exp, cur *histogram.FloatHistogram) bool { } // Compare CounterResetHint only if explicitly specified in expected histogram. - // UnknownCounterReset (the default) means "don't care about the hint". - if exp.CounterResetHint != histogram.UnknownCounterReset { + // When counterResetHintSet is false, no hint was specified, meaning "don't care". + // When counterResetHintSet is true, the hint was explicitly specified and must match. + if counterResetHintSet { if exp.CounterResetHint != cur.CounterResetHint { return false } From 035952bc8b34661b17a4889afe9437dc8cf97887 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EB=AF=BC=EC=98=81?= Date: Sun, 11 Jan 2026 00:29:23 +0900 Subject: [PATCH 04/46] refactor(ui): Remove explicit any from globals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 김민영 --- web/ui/react-app/src/globals.ts | 7 +++---- web/ui/react-app/src/types/index.d.ts | 5 +++++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/web/ui/react-app/src/globals.ts b/web/ui/react-app/src/globals.ts index d2a5f1d50a..7a59bdbffd 100644 --- a/web/ui/react-app/src/globals.ts +++ b/web/ui/react-app/src/globals.ts @@ -1,6 +1,5 @@ import jquery from 'jquery'; +import moment from 'moment'; -// eslint-disable-next-line @typescript-eslint/no-explicit-any -(window as any).jQuery = jquery; -// eslint-disable-next-line @typescript-eslint/no-explicit-any -(window as any).moment = require('moment'); +window.jQuery = jquery; +window.moment = moment; diff --git a/web/ui/react-app/src/types/index.d.ts b/web/ui/react-app/src/types/index.d.ts index addf1cc702..9cf8fbd7cc 100644 --- a/web/ui/react-app/src/types/index.d.ts +++ b/web/ui/react-app/src/types/index.d.ts @@ -68,3 +68,8 @@ interface JQueryStatic { scale: () => Color; }; } + +interface Window { + jQuery: JQueryStatic; + moment: typeof import('moment'); +} From 5499260964b94e4a396f0ce2327388b174cb38f8 Mon Sep 17 00:00:00 2001 From: Rahulrairai59 Date: Tue, 13 Jan 2026 21:25:56 -0600 Subject: [PATCH 05/46] Update react-router version to v7.12.0 to fix CVE-2026-21884 in package-lock.json To fix CVE-2026-21884 HIGH severity vulnerability Signed-off-by: Rahulrairai59 --- web/ui/package-lock.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/web/ui/package-lock.json b/web/ui/package-lock.json index 764fd87820..a1f72ff228 100644 --- a/web/ui/package-lock.json +++ b/web/ui/package-lock.json @@ -7823,9 +7823,9 @@ } }, "node_modules/react-router": { - "version": "7.9.5", - "resolved": "https://registry.npmjs.org/react-router/-/react-router-7.9.5.tgz", - "integrity": "sha512-JmxqrnBZ6E9hWmf02jzNn9Jm3UqyeimyiwzD69NjxGySG6lIz/1LVPsoTCwN7NBX2XjCEa1LIX5EMz1j2b6u6A==", + "version": "7.12.0", + "resolved": "https://registry.npmjs.org/react-router/-/react-router-7.12.0.tgz", + "integrity": "sha512-kTPDYPFzDVGIIGNLS5VJykK0HfHLY5MF3b+xj0/tTyNYL1gF1qs7u67Z9jEhQk2sQ98SUaHxlG31g1JtF7IfVw==", "dependencies": { "cookie": "^1.0.1", "set-cookie-parser": "^2.6.0" From adaf1398261fe44c41c8226e1f3a03024a714284 Mon Sep 17 00:00:00 2001 From: Julien Pivotto <291750+roidelapluie@users.noreply.github.com> Date: Mon, 19 Jan 2026 15:41:50 +0100 Subject: [PATCH 06/46] Makefile.common: Push major version tags to registry In commit 74775d732 "Add major version tag (#8026)" from 2020, the docker-tag-latest target was updated to create major version tags (v2, v3, etc.) but these tags were never actually pushed to the registry. They existed locally only after tagging but were never published. This commit fixes the issue by: - Adding logic to docker-publish to push major version tags when DOCKER_IMAGE_TAG="latest" (triggered by promci during releases) - Adding logic to docker-manifest to create major version manifests when DOCKER_IMAGE_TAG="latest" Pre-release filtering is handled at the promci level, where the regex check ^v[0-9]+(\.[0-9]+){2}$ already ensures only stable releases trigger the "latest" tagging workflow. Signed-off-by: Julien Pivotto <291750+roidelapluie@users.noreply.github.com> --- Makefile.common | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/Makefile.common b/Makefile.common index e3adb8958e..3e6118109e 100644 --- a/Makefile.common +++ b/Makefile.common @@ -286,6 +286,16 @@ $(PUBLISH_DOCKER_ARCHS): common-docker-publish-%: echo "Pushing default variant ($$variant_name) for linux-$*"; \ docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)"; \ fi; \ + if [ "$(DOCKER_IMAGE_TAG)" = "latest" ]; then \ + if [ "$$dockerfile" != "Dockerfile" ] || [ "$$variant_name" != "default" ]; then \ + echo "Pushing $$variant_name variant version tags for linux-$*"; \ + docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:v$(DOCKER_MAJOR_VERSION_TAG)-$$variant_name"; \ + fi; \ + if [ "$$dockerfile" = "Dockerfile" ]; then \ + echo "Pushing default variant version tag for linux-$*"; \ + docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:v$(DOCKER_MAJOR_VERSION_TAG)"; \ + fi; \ + fi; \ done DOCKER_MAJOR_VERSION_TAG = $(firstword $(subst ., ,$(shell cat VERSION))) @@ -322,6 +332,18 @@ common-docker-manifest: DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create -a "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(SANITIZED_DOCKER_IMAGE_TAG)" $(foreach ARCH,$(DOCKER_ARCHS),$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$(ARCH):$(SANITIZED_DOCKER_IMAGE_TAG)); \ DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(SANITIZED_DOCKER_IMAGE_TAG)"; \ fi; \ + if [ "$(DOCKER_IMAGE_TAG)" = "latest" ]; then \ + if [ "$$dockerfile" != "Dockerfile" ] || [ "$$variant_name" != "default" ]; then \ + echo "Creating manifest for $$variant_name variant version tag"; \ + DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create -a "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):v$(DOCKER_MAJOR_VERSION_TAG)-$$variant_name" $(foreach ARCH,$(DOCKER_ARCHS),$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$(ARCH):v$(DOCKER_MAJOR_VERSION_TAG)-$$variant_name); \ + DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):v$(DOCKER_MAJOR_VERSION_TAG)-$$variant_name"; \ + fi; \ + if [ "$$dockerfile" = "Dockerfile" ]; then \ + echo "Creating default variant version tag manifest"; \ + DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create -a "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):v$(DOCKER_MAJOR_VERSION_TAG)" $(foreach ARCH,$(DOCKER_ARCHS),$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$(ARCH):v$(DOCKER_MAJOR_VERSION_TAG)); \ + DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):v$(DOCKER_MAJOR_VERSION_TAG)"; \ + fi; \ + fi; \ done .PHONY: promu From 21fb899c3292829ec49b5fef63b3291bdc8a519d Mon Sep 17 00:00:00 2001 From: George Krajcsovits Date: Wed, 21 Jan 2026 16:25:31 +0100 Subject: [PATCH 07/46] fix(teststorage/appender.go): TODO and Sample staleness check (#17905) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(teststorage/appender.go): TODO and Sample staleness check Allow different order of consecutive stale samples between the expected and actual array for RequireEqual and RequireNotEqual by trying to swap the expected side until it matches. Also fix the definition of stale sample in the test, it's not only float, but defined for native histograms as well. Signed-off-by: György Krajcsovits * add unit tests Signed-off-by: György Krajcsovits --------- Signed-off-by: György Krajcsovits --- util/teststorage/appender.go | 83 +++++++++++++++-------- util/teststorage/appender_test.go | 105 ++++++++++++++++++++++++++++++ 2 files changed, 162 insertions(+), 26 deletions(-) diff --git a/util/teststorage/appender.go b/util/teststorage/appender.go index d88d905694..dc0825f98f 100644 --- a/util/teststorage/appender.go +++ b/util/teststorage/appender.go @@ -24,7 +24,6 @@ import ( "testing" "github.com/google/go-cmp/cmp" - "github.com/google/go-cmp/cmp/cmpopts" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" "go.uber.org/atomic" @@ -97,37 +96,32 @@ func (s Sample) Equals(other Sample) bool { slices.EqualFunc(s.ES, other.ES, exemplar.Exemplar.Equals) } -var ( - sampleComparer = cmp.Comparer(func(a, b Sample) bool { - return a.Equals(b) - }) - byLabelSort = cmpopts.SortSlices(func(a, b Sample) int { - return labels.Compare(a.L, b.L) - }) -) - -func includeStaleNaNs(s []Sample) bool { - for _, e := range s { - if value.IsStaleNaN(e.V) { - return true - } +// IsStale returns whether the sample represents a stale sample, according to +// https://prometheus.io/docs/specs/native_histograms/#staleness-markers. +func (s Sample) IsStale() bool { + switch { + case s.FH != nil: + return value.IsStaleNaN(s.FH.Sum) + case s.H != nil: + return value.IsStaleNaN(s.H.Sum) + default: + return value.IsStaleNaN(s.V) } - return false } +var sampleComparer = cmp.Comparer(func(a, b Sample) bool { + return a.Equals(b) +}) + // RequireEqual is a special require equal that correctly compare Prometheus structures. // // In comparison to testutil.RequireEqual, this function adds special logic for comparing []Samples. // -// It also ignores ordering when expected slice contains at least one StaleNaN. This is because the -// scrape StaleNan samples are generated by iterating over a map, thus expectedly different. -// -// TODO(bwplotka): We should likely reorder only within a group of sequential NaNs or only in scrape package. +// It also ignores ordering between consecutive stale samples to avoid false +// negatives due to map iteration order in staleness tracking. func RequireEqual(t testing.TB, expected, got []Sample, msgAndArgs ...any) { opts := []cmp.Option{sampleComparer} - if includeStaleNaNs(expected) { - opts = append(opts, byLabelSort) - } + expected = reorderExpectedForStaleness(expected, got) testutil.RequireEqualWithOptions(t, expected, got, opts, msgAndArgs...) } @@ -136,9 +130,7 @@ func RequireNotEqual(t testing.TB, expected, got []Sample, msgAndArgs ...any) { t.Helper() opts := []cmp.Option{cmp.Comparer(labels.Equal), sampleComparer} - if includeStaleNaNs(expected) { - opts = append(opts, byLabelSort) - } + expected = reorderExpectedForStaleness(expected, got) if !cmp.Equal(expected, got, opts...) { return } @@ -147,6 +139,45 @@ func RequireNotEqual(t testing.TB, expected, got []Sample, msgAndArgs ...any) { "b: %s", expected, got), msgAndArgs...) } +func reorderExpectedForStaleness(expected, got []Sample) []Sample { + if len(expected) != len(got) || !includeStaleNaNs(expected) { + return expected + } + result := make([]Sample, len(expected)) + copy(result, expected) + + // Try to reorder only consecutive stale samples to avoid false negatives + // due to map iteration order in staleness tracking. + for i := range result { + if !result[i].IsStale() { + continue + } + if result[i].Equals(got[i]) { + continue + } + for j := i + 1; j < len(result); j++ { + if !result[j].IsStale() { + break + } + if result[j].Equals(got[i]) { + // Swap. + result[i], result[j] = result[j], result[i] + break + } + } + } + return result +} + +func includeStaleNaNs(s []Sample) bool { + for _, e := range s { + if e.IsStale() { + return true + } + } + return false +} + // Appendable is a storage.Appendable mock. // It allows recording all samples that were added through the appender and injecting errors. // Appendable will panic if more than one Appender is open. diff --git a/util/teststorage/appender_test.go b/util/teststorage/appender_test.go index bbd6b54125..41260ba43f 100644 --- a/util/teststorage/appender_test.go +++ b/util/teststorage/appender_test.go @@ -306,3 +306,108 @@ func TestConcurrentAppenderV2_ReturnsErrAppender(t *testing.T) { require.Error(t, app.Commit()) require.Error(t, app.Rollback()) } + +func TestReorderExpectedForStaleness(t *testing.T) { + testcases := []struct { + name string + inExpected []Sample + inGot []Sample + expected []Sample + }{ + { + name: "no staleness markers", + inExpected: []Sample{ + {L: labels.FromStrings("a", "1"), T: 1, V: 1}, + {L: labels.FromStrings("a", "2"), T: 1, V: 2}, + }, + inGot: []Sample{ + {L: labels.FromStrings("a", "2"), T: 1, V: 2}, + {L: labels.FromStrings("a", "1"), T: 1, V: 1}, + }, + }, + { + name: "with staleness markers", + inExpected: []Sample{ + {L: labels.FromStrings("a", "1"), T: 1, V: 1}, + {L: labels.FromStrings("a", "2"), T: 2, V: 2}, + {L: labels.FromStrings("a", "3"), T: 3, V: math.Float64frombits(value.StaleNaN)}, + {L: labels.FromStrings("a", "4"), T: 4, V: math.Float64frombits(value.StaleNaN)}, + }, + inGot: []Sample{ + {L: labels.FromStrings("a", "1"), T: 1, V: 1}, + {L: labels.FromStrings("a", "2"), T: 2, V: 2}, + {L: labels.FromStrings("a", "3"), T: 3, V: math.Float64frombits(value.StaleNaN)}, + {L: labels.FromStrings("a", "4"), T: 4, V: math.Float64frombits(value.StaleNaN)}, + }, + }, + { + name: "with staleness markers wrong order", + inExpected: []Sample{ + {L: labels.FromStrings("a", "1"), T: 1, V: 1}, + {L: labels.FromStrings("a", "2"), T: 2, V: 2}, + {L: labels.FromStrings("a", "3"), T: 3, V: math.Float64frombits(value.StaleNaN)}, + {L: labels.FromStrings("a", "4"), T: 4, V: math.Float64frombits(value.StaleNaN)}, + }, + inGot: []Sample{ + {L: labels.FromStrings("a", "2"), T: 2, V: 2}, + {L: labels.FromStrings("a", "1"), T: 1, V: 1}, + {L: labels.FromStrings("a", "4"), T: 4, V: math.Float64frombits(value.StaleNaN)}, + {L: labels.FromStrings("a", "3"), T: 3, V: math.Float64frombits(value.StaleNaN)}, + }, + expected: []Sample{ + {L: labels.FromStrings("a", "1"), T: 1, V: 1}, + {L: labels.FromStrings("a", "2"), T: 2, V: 2}, + {L: labels.FromStrings("a", "4"), T: 4, V: math.Float64frombits(value.StaleNaN)}, + {L: labels.FromStrings("a", "3"), T: 3, V: math.Float64frombits(value.StaleNaN)}, + }, + }, + { + name: "with staleness markers wrong order but not consecutive", + inExpected: []Sample{ + {L: labels.FromStrings("a", "1"), T: 1, V: 1}, + {L: labels.FromStrings("a", "3"), T: 3, V: math.Float64frombits(value.StaleNaN)}, + {L: labels.FromStrings("a", "2"), T: 2, V: 2}, + {L: labels.FromStrings("a", "4"), T: 4, V: math.Float64frombits(value.StaleNaN)}, + }, + inGot: []Sample{ + {L: labels.FromStrings("a", "2"), T: 2, V: 2}, + {L: labels.FromStrings("a", "1"), T: 1, V: 1}, + {L: labels.FromStrings("a", "4"), T: 4, V: math.Float64frombits(value.StaleNaN)}, + {L: labels.FromStrings("a", "3"), T: 3, V: math.Float64frombits(value.StaleNaN)}, + }, + expected: []Sample{ + {L: labels.FromStrings("a", "1"), T: 1, V: 1}, + {L: labels.FromStrings("a", "3"), T: 3, V: math.Float64frombits(value.StaleNaN)}, + {L: labels.FromStrings("a", "2"), T: 2, V: 2}, + {L: labels.FromStrings("a", "4"), T: 4, V: math.Float64frombits(value.StaleNaN)}, + }, + }, + } + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + if tc.expected == nil { + tc.expected = tc.inExpected + } + RequireEqual(t, tc.expected, reorderExpectedForStaleness(tc.inExpected, tc.inGot)) + }) + } +} + +func TestSampleIsStale(t *testing.T) { + s1 := Sample{V: 1} + require.False(t, s1.IsStale()) + s2 := Sample{V: math.Float64frombits(value.StaleNaN)} + require.True(t, s2.IsStale()) + h := tsdbutil.GenerateTestHistogram(0) + h1 := Sample{V: math.Float64frombits(value.StaleNaN), H: h} + require.False(t, h1.IsStale()) // Histogram takes precedence over V. + h.Sum = math.Float64frombits(value.StaleNaN) + h2 := Sample{V: 1, H: h} + require.True(t, h2.IsStale()) + fh := tsdbutil.GenerateTestFloatHistogram(0) + fh1 := Sample{V: math.Float64frombits(value.StaleNaN), H: h, FH: fh} + require.False(t, fh1.IsStale()) // FloatHistogram takes precedence over all. + fh.Sum = math.Float64frombits(value.StaleNaN) + fh2 := Sample{V: 1, H: tsdbutil.GenerateTestHistogram(1), FH: fh} + require.True(t, fh2.IsStale()) +} From 39e524088c110c3db25b98d452a42feb93931be9 Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 22 Jan 2026 10:34:01 +0000 Subject: [PATCH 08/46] Fix(discovery/aws): Create Copies of Default Config (#17769) Signed-off-by: matt-gp --- discovery/aws/aws.go | 9 ++-- discovery/aws/aws_test.go | 106 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+), 3 deletions(-) diff --git a/discovery/aws/aws.go b/discovery/aws/aws.go index 1ac97b3c9e..be6b4dabbe 100644 --- a/discovery/aws/aws.go +++ b/discovery/aws/aws.go @@ -101,7 +101,8 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(any) error) error { switch c.Role { case RoleEC2: if c.EC2SDConfig == nil { - c.EC2SDConfig = &DefaultEC2SDConfig + ec2Config := DefaultEC2SDConfig + c.EC2SDConfig = &ec2Config } c.EC2SDConfig.HTTPClientConfig = c.HTTPClientConfig if c.Region != "" { @@ -133,7 +134,8 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(any) error) error { } case RoleECS: if c.ECSSDConfig == nil { - c.ECSSDConfig = &DefaultECSSDConfig + ecsConfig := DefaultECSSDConfig + c.ECSSDConfig = &ecsConfig } c.ECSSDConfig.HTTPClientConfig = c.HTTPClientConfig if c.Region != "" { @@ -165,7 +167,8 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(any) error) error { } case RoleLightsail: if c.LightsailSDConfig == nil { - c.LightsailSDConfig = &DefaultLightsailSDConfig + lightsailConfig := DefaultLightsailSDConfig + c.LightsailSDConfig = &lightsailConfig } c.LightsailSDConfig.HTTPClientConfig = c.HTTPClientConfig if c.Region != "" { diff --git a/discovery/aws/aws_test.go b/discovery/aws/aws_test.go index a2f03a8b99..9d3728911b 100644 --- a/discovery/aws/aws_test.go +++ b/discovery/aws/aws_test.go @@ -177,3 +177,109 @@ port: 9300`, }) } } + +// TestMultipleSDConfigsDoNotShareState verifies that multiple AWS SD configs +// don't share the same underlying configuration object. This was a bug where +// all configs pointed to the same global default, causing port and other +// settings from one job to overwrite settings in another job. +func TestMultipleSDConfigsDoNotShareState(t *testing.T) { + tests := []struct { + name string + yaml string + validateFunc func(t *testing.T, cfg1, cfg2 *SDConfig) + }{ + { + name: "EC2MultipleJobsDifferentPorts", + yaml: ` +- role: ec2 + region: us-west-2 + port: 9100 + filters: + - name: tag:Name + values: [host-1] +- role: ec2 + region: us-west-2 + port: 9101 + filters: + - name: tag:Name + values: [host-2]`, + validateFunc: func(t *testing.T, cfg1, cfg2 *SDConfig) { + require.Equal(t, RoleEC2, cfg1.Role) + require.Equal(t, RoleEC2, cfg2.Role) + require.NotNil(t, cfg1.EC2SDConfig) + require.NotNil(t, cfg2.EC2SDConfig) + + // Verify ports are different and not shared + require.Equal(t, 9100, cfg1.EC2SDConfig.Port) + require.Equal(t, 9101, cfg2.EC2SDConfig.Port) + + // Verify filters are different and not shared + require.Len(t, cfg1.EC2SDConfig.Filters, 1) + require.Len(t, cfg2.EC2SDConfig.Filters, 1) + require.Equal(t, []string{"host-1"}, cfg1.EC2SDConfig.Filters[0].Values) + require.Equal(t, []string{"host-2"}, cfg2.EC2SDConfig.Filters[0].Values) + + // Most importantly: verify they're not the same pointer + require.NotSame(t, cfg1.EC2SDConfig, cfg2.EC2SDConfig, + "EC2SDConfig objects should not share the same memory address") + }, + }, + { + name: "ECSMultipleJobsDifferentPorts", + yaml: ` +- role: ecs + region: us-east-1 + port: 8080 + clusters: [cluster-a] +- role: ecs + region: us-east-1 + port: 8081 + clusters: [cluster-b]`, + validateFunc: func(t *testing.T, cfg1, cfg2 *SDConfig) { + require.Equal(t, RoleECS, cfg1.Role) + require.Equal(t, RoleECS, cfg2.Role) + require.NotNil(t, cfg1.ECSSDConfig) + require.NotNil(t, cfg2.ECSSDConfig) + + require.Equal(t, 8080, cfg1.ECSSDConfig.Port) + require.Equal(t, 8081, cfg2.ECSSDConfig.Port) + require.Equal(t, []string{"cluster-a"}, cfg1.ECSSDConfig.Clusters) + require.Equal(t, []string{"cluster-b"}, cfg2.ECSSDConfig.Clusters) + + require.NotSame(t, cfg1.ECSSDConfig, cfg2.ECSSDConfig, + "ECSSDConfig objects should not share the same memory address") + }, + }, + { + name: "LightsailMultipleJobsDifferentPorts", + yaml: ` +- role: lightsail + region: eu-west-1 + port: 7070 +- role: lightsail + region: eu-west-1 + port: 7071`, + validateFunc: func(t *testing.T, cfg1, cfg2 *SDConfig) { + require.Equal(t, RoleLightsail, cfg1.Role) + require.Equal(t, RoleLightsail, cfg2.Role) + require.NotNil(t, cfg1.LightsailSDConfig) + require.NotNil(t, cfg2.LightsailSDConfig) + + require.Equal(t, 7070, cfg1.LightsailSDConfig.Port) + require.Equal(t, 7071, cfg2.LightsailSDConfig.Port) + + require.NotSame(t, cfg1.LightsailSDConfig, cfg2.LightsailSDConfig, + "LightsailSDConfig objects should not share the same memory address") + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var configs []SDConfig + require.NoError(t, yaml.Unmarshal([]byte(tt.yaml), &configs)) + require.Len(t, configs, 2) + tt.validateFunc(t, &configs[0], &configs[1]) + }) + } +} From d9ccd70ac1950e201e8391073b50ae79b5265957 Mon Sep 17 00:00:00 2001 From: Siavash Safi Date: Thu, 22 Jan 2026 12:24:35 +0100 Subject: [PATCH 09/46] fix(notify): flaky tests (#17899) Add a helper function to set up AlertmanagerSets. Fix all flaky tests. Signed-off-by: Siavash Safi --- notifier/manager_test.go | 364 +++++++++------------------------------ 1 file changed, 82 insertions(+), 282 deletions(-) diff --git a/notifier/manager_test.go b/notifier/manager_test.go index f82a7ad511..39fc35a409 100644 --- a/notifier/manager_test.go +++ b/notifier/manager_test.go @@ -90,6 +90,33 @@ func newTestHTTPServerBuilder(expected *[]*Alert, errc chan<- error, u, p string })) } +func newTestAlertmanagerSet( + cfg *config.AlertmanagerConfig, + client *http.Client, + opts *Options, + metrics *alertMetrics, + alertmanagerURLs ...string, +) *alertmanagerSet { + ams := make([]alertmanager, len(alertmanagerURLs)) + for i, am := range alertmanagerURLs { + ams[i] = alertmanagerMock{urlf: func() string { return am }} + } + logger := slog.New(slog.DiscardHandler) + sendLoops := make(map[string]*sendLoop) + for _, am := range alertmanagerURLs { + sendLoops[am] = newSendLoop(am, client, cfg, opts, logger, metrics) + } + return &alertmanagerSet{ + ams: ams, + cfg: cfg, + client: client, + logger: logger, + metrics: metrics, + opts: opts, + sendLoops: sendLoops, + } +} + func TestHandlerSendAll(t *testing.T) { var ( errc = make(chan error, 1) @@ -107,7 +134,8 @@ func TestHandlerSendAll(t *testing.T) { defer server2.Close() defer server3.Close() - h := NewManager(&Options{}, model.UTF8Validation, nil) + reg := prometheus.NewRegistry() + h := NewManager(&Options{Registerer: reg}, model.UTF8Validation, nil) authClient, _ := config_util.NewClientFromConfig( config_util.HTTPClientConfig{ @@ -129,53 +157,10 @@ func TestHandlerSendAll(t *testing.T) { am3Cfg.Timeout = model.Duration(time.Second) opts := &Options{Do: do, QueueCapacity: 10_000, MaxBatchSize: DefaultMaxBatchSize} - logger := slog.New(slog.DiscardHandler) - h.alertmanagers["1"] = &alertmanagerSet{ - ams: []alertmanager{ - alertmanagerMock{ - urlf: func() string { return server1.URL }, - }, - }, - cfg: &am1Cfg, - client: authClient, - sendLoops: map[string]*sendLoop{ - server1.URL: newSendLoop(server1.URL, authClient, &am1Cfg, opts, logger, h.metrics), - }, - opts: opts, - metrics: h.metrics, - logger: logger, - } - - h.alertmanagers["2"] = &alertmanagerSet{ - ams: []alertmanager{ - alertmanagerMock{ - urlf: func() string { return server2.URL }, - }, - alertmanagerMock{ - urlf: func() string { return server3.URL }, - }, - }, - cfg: &am2Cfg, - sendLoops: map[string]*sendLoop{ - server2.URL: newSendLoop(server2.URL, nil, &am2Cfg, opts, logger, h.metrics), - server3.URL: newSendLoop(server3.URL, nil, &am3Cfg, opts, logger, h.metrics), - }, - opts: opts, - metrics: h.metrics, - logger: logger, - } - - h.alertmanagers["3"] = &alertmanagerSet{ - ams: []alertmanager{}, // empty set - cfg: &am3Cfg, - sendLoops: map[string]*sendLoop{ - server3.URL: newSendLoop(server3.URL, nil, &am3Cfg, opts, logger, h.metrics), - }, - opts: opts, - metrics: h.metrics, - logger: logger, - } + h.alertmanagers["1"] = newTestAlertmanagerSet(&am1Cfg, authClient, opts, h.metrics, server1.URL) + h.alertmanagers["2"] = newTestAlertmanagerSet(&am2Cfg, nil, opts, h.metrics, server2.URL, server3.URL) + h.alertmanagers["3"] = newTestAlertmanagerSet(&am3Cfg, nil, opts, h.metrics) var alerts []*Alert for i := range DefaultMaxBatchSize { @@ -196,7 +181,7 @@ func TestHandlerSendAll(t *testing.T) { } } - // start send loops + // Start send loops. for _, ams := range h.alertmanagers { ams.startSendLoops(ams.ams) } @@ -212,32 +197,38 @@ func TestHandlerSendAll(t *testing.T) { }, time.Second*2, time.Millisecond*10) checkNoErr() - // the only am in set 1 is down + // The only am in set 1 is down. status1.Store(int32(http.StatusNotFound)) h.Send(alerts...) + // Wait for all send loops to process before changing any status. require.Eventually(t, func() bool { - return prom_testutil.ToFloat64(h.metrics.errors.WithLabelValues(server1.URL)) == DefaultMaxBatchSize + return prom_testutil.ToFloat64(h.metrics.errors.WithLabelValues(server1.URL)) == DefaultMaxBatchSize && + prom_testutil.ToFloat64(h.metrics.sent.WithLabelValues(server2.URL)) == DefaultMaxBatchSize*2 && + prom_testutil.ToFloat64(h.metrics.sent.WithLabelValues(server3.URL)) == DefaultMaxBatchSize*2 }, time.Second*2, time.Millisecond*10) checkNoErr() - // fix the am + // Fix the am. status1.Store(int32(http.StatusOK)) - // only one of the ams in set 2 is down + // Only one of the ams in set 2 is down. status2.Store(int32(http.StatusInternalServerError)) h.Send(alerts...) + // Wait for all send loops to either send or fail with errors depending on their status. require.Eventually(t, func() bool { - return prom_testutil.ToFloat64(h.metrics.errors.WithLabelValues(server2.URL)) == DefaultMaxBatchSize + return prom_testutil.ToFloat64(h.metrics.errors.WithLabelValues(server2.URL)) == DefaultMaxBatchSize && + prom_testutil.ToFloat64(h.metrics.sent.WithLabelValues(server1.URL)) == DefaultMaxBatchSize*2 && + prom_testutil.ToFloat64(h.metrics.sent.WithLabelValues(server3.URL)) == DefaultMaxBatchSize*3 }, time.Second*2, time.Millisecond*10) checkNoErr() - // both ams in set 2 are down + // Both ams in set 2 are down. status3.Store(int32(http.StatusInternalServerError)) h.Send(alerts...) require.Eventually(t, func() bool { return prom_testutil.ToFloat64(h.metrics.errors.WithLabelValues(server2.URL)) == DefaultMaxBatchSize*2 && prom_testutil.ToFloat64(h.metrics.errors.WithLabelValues(server3.URL)) == DefaultMaxBatchSize - }, time.Second*3, time.Millisecond*10) + }, time.Second*2, time.Millisecond*10) checkNoErr() } @@ -262,7 +253,8 @@ func TestHandlerSendAllRemapPerAm(t *testing.T) { defer server2.Close() defer server3.Close() - h := NewManager(&Options{}, model.UTF8Validation, nil) + reg := prometheus.NewRegistry() + h := NewManager(&Options{QueueCapacity: 10_000, Registerer: reg}, model.UTF8Validation, nil) h.alertmanagers = make(map[string]*alertmanagerSet) am1Cfg := config.DefaultAlertmanagerConfig @@ -290,65 +282,14 @@ func TestHandlerSendAllRemapPerAm(t *testing.T) { }, } - opts := &Options{Do: do, QueueCapacity: 10_000, MaxBatchSize: DefaultMaxBatchSize} - logger := slog.New(slog.DiscardHandler) - - h.alertmanagers = map[string]*alertmanagerSet{ - // Drop no alerts. - "1": { - ams: []alertmanager{ - alertmanagerMock{ - urlf: func() string { return server1.URL }, - }, - }, - cfg: &am1Cfg, - sendLoops: map[string]*sendLoop{ - server1.URL: newSendLoop(server1.URL, nil, &am1Cfg, opts, logger, h.metrics), - }, - opts: opts, - metrics: h.metrics, - logger: logger, - }, - // Drop only alerts with the "alertnamedrop" label. - "2": { - ams: []alertmanager{ - alertmanagerMock{ - urlf: func() string { return server2.URL }, - }, - }, - cfg: &am2Cfg, - sendLoops: map[string]*sendLoop{ - server2.URL: newSendLoop(server2.URL, nil, &am2Cfg, opts, logger, h.metrics), - }, - opts: opts, - metrics: h.metrics, - logger: logger, - }, - // Drop all alerts. - "3": { - ams: []alertmanager{ - alertmanagerMock{ - urlf: func() string { return server3.URL }, - }, - }, - cfg: &am3Cfg, - sendLoops: map[string]*sendLoop{ - server3.URL: newSendLoop(server3.URL, nil, &am3Cfg, opts, logger, h.metrics), - }, - opts: opts, - metrics: h.metrics, - logger: logger, - }, - // Empty list of Alertmanager endpoints. - "4": { - ams: []alertmanager{}, - cfg: &config.DefaultAlertmanagerConfig, - sendLoops: make(map[string]*sendLoop), - opts: opts, - metrics: h.metrics, - logger: logger, - }, - } + // Drop no alerts. + h.alertmanagers["1"] = newTestAlertmanagerSet(&am1Cfg, nil, h.opts, h.metrics, server1.URL) + // Drop only alerts with the "alertnamedrop" label. + h.alertmanagers["2"] = newTestAlertmanagerSet(&am2Cfg, nil, h.opts, h.metrics, server2.URL) + // Drop all alerts. + h.alertmanagers["3"] = newTestAlertmanagerSet(&am3Cfg, nil, h.opts, h.metrics, server3.URL) + // Empty list of Alertmanager endpoints. + h.alertmanagers["4"] = newTestAlertmanagerSet(&config.DefaultAlertmanagerConfig, nil, h.opts, h.metrics) var alerts []*Alert for i := range make([]struct{}, DefaultMaxBatchSize/2) { @@ -383,38 +324,38 @@ func TestHandlerSendAllRemapPerAm(t *testing.T) { } } - // start send loops + // Start send loops. for _, ams := range h.alertmanagers { ams.startSendLoops(ams.ams) } defer func() { - // stop send loops. + // Stop send loops. for _, ams := range h.alertmanagers { ams.cleanSendLoops(ams.ams...) } }() - // all ams are up + // All ams are up. h.Send(alerts...) require.Eventually(t, func() bool { return prom_testutil.ToFloat64(h.metrics.sent.WithLabelValues(server1.URL)) == DefaultMaxBatchSize }, time.Second*2, time.Millisecond*10) checkNoErr() - // the only am in set 1 goes down + // The only am in set 1 goes down. status1.Store(int32(http.StatusInternalServerError)) h.Send(alerts...) - // wait for metrics to update + // Wait for metrics to update. require.Eventually(t, func() bool { return prom_testutil.ToFloat64(h.metrics.errors.WithLabelValues(server1.URL)) == DefaultMaxBatchSize }, time.Second*2, time.Millisecond*10) checkNoErr() - // reset set 1 + // Reset set 1. status1.Store(int32(http.StatusOK)) - // set 3 loses its only am, but all alerts were dropped - // so there was nothing to send, keeping sendAll true + // Set 3 loses its only am, but all alerts were dropped + // so there was nothing to send, keeping sendAll true. status3.Store(int32(http.StatusInternalServerError)) h.Send(alerts...) checkNoErr() @@ -441,12 +382,7 @@ func TestExternalLabels(t *testing.T) { cfg := config.DefaultAlertmanagerConfig h.alertmanagers = map[string]*alertmanagerSet{ - "test": { - cfg: &cfg, - sendLoops: map[string]*sendLoop{ - "test": newSendLoop("test", nil, &cfg, h.opts, slog.New(slog.DiscardHandler), h.metrics), - }, - }, + "test": newTestAlertmanagerSet(&cfg, nil, h.opts, h.metrics, "test"), } // This alert should get the external label attached. @@ -494,12 +430,7 @@ func TestHandlerRelabel(t *testing.T) { cfg := config.DefaultAlertmanagerConfig h.alertmanagers = map[string]*alertmanagerSet{ - "test": { - cfg: &cfg, - sendLoops: map[string]*sendLoop{ - "test": newSendLoop("test", nil, &cfg, h.opts, slog.New(slog.DiscardHandler), h.metrics), - }, - }, + "test": newTestAlertmanagerSet(&cfg, nil, h.opts, h.metrics, "test"), } // This alert should be dropped due to the configuration @@ -576,23 +507,12 @@ func TestHandlerQueuing(t *testing.T) { am1Cfg := config.DefaultAlertmanagerConfig am1Cfg.Timeout = model.Duration(time.Second) - - h.alertmanagers["1"] = &alertmanagerSet{ - ams: []alertmanager{ - alertmanagerMock{ - urlf: func() string { return server.URL }, - }, - }, - cfg: &am1Cfg, - sendLoops: map[string]*sendLoop{ - server.URL: newSendLoop(server.URL, nil, &am1Cfg, h.opts, slog.New(slog.DiscardHandler), h.metrics), - }, - } + h.alertmanagers["1"] = newTestAlertmanagerSet(&am1Cfg, nil, h.opts, h.metrics, server.URL) go h.Run(nil) defer h.Stop() - // start send loops + // Start send loops. for _, ams := range h.alertmanagers { ams.startSendLoops(ams.ams) } @@ -619,13 +539,6 @@ func TestHandlerQueuing(t *testing.T) { } } - // If the batch is larger than the queue capacity, it should be truncated - // from the front. - h.Send(alerts[:4*DefaultMaxBatchSize]...) - for i := 1; i < 4; i++ { - assertAlerts(alerts[i*DefaultMaxBatchSize : (i+1)*DefaultMaxBatchSize]) - } - // Send one batch, wait for it to arrive and block the server so the queue fills up. h.Send(alerts[:DefaultMaxBatchSize]...) <-called @@ -633,7 +546,7 @@ func TestHandlerQueuing(t *testing.T) { // Send several batches while the server is still blocked so the queue // fills up to its maximum capacity (3*DefaultMaxBatchSize). Then check that the // queue is truncated in the front. - h.Send(alerts[1*DefaultMaxBatchSize : 2*DefaultMaxBatchSize]...) // this batch should be dropped. + h.Send(alerts[1*DefaultMaxBatchSize : 2*DefaultMaxBatchSize]...) // This batch should be dropped. h.Send(alerts[2*DefaultMaxBatchSize : 3*DefaultMaxBatchSize]...) h.Send(alerts[3*DefaultMaxBatchSize : 4*DefaultMaxBatchSize]...) @@ -854,24 +767,7 @@ func TestHangingNotifier(t *testing.T) { notifier.alertmanagers = make(map[string]*alertmanagerSet) amCfg := config.DefaultAlertmanagerConfig amCfg.Timeout = model.Duration(sendTimeout) - notifier.alertmanagers["config-0"] = &alertmanagerSet{ - ams: []alertmanager{ - alertmanagerMock{ - urlf: func() string { return faultyURL.String() }, - }, - alertmanagerMock{ - urlf: func() string { return functionalURL.String() }, - }, - }, - cfg: &amCfg, - metrics: notifier.metrics, - sendLoops: map[string]*sendLoop{ - faultyURL.String(): newSendLoop(faultyURL.String(), nil, &amCfg, notifier.opts, slog.New(slog.DiscardHandler), notifier.metrics), - functionalURL.String(): newSendLoop(functionalURL.String(), nil, &amCfg, notifier.opts, slog.New(slog.DiscardHandler), notifier.metrics), - }, - opts: &Options{Do: do, MaxBatchSize: DefaultMaxBatchSize}, - logger: slog.New(slog.DiscardHandler), - } + notifier.alertmanagers["config-0"] = newTestAlertmanagerSet(&amCfg, nil, notifier.opts, notifier.metrics, faultyURL.String(), functionalURL.String()) for _, ams := range notifier.alertmanagers { ams.startSendLoops(ams.ams) @@ -932,7 +828,7 @@ loop2: // The faulty alertmanager was dropped. if len(notifier.Alertmanagers()) == 1 { // The notifier should not wait until the alerts queue of the functional am is empty to apply the discovery changes. - require.NotEmpty(t, notifier.alertmanagers["config-0"].sendLoops[functionalURL.String()].queue) + require.NotZero(t, notifier.alertmanagers["config-0"].sendLoops[functionalURL.String()].queueLen()) break loop2 } } @@ -982,20 +878,7 @@ func TestStop_DrainingDisabled(t *testing.T) { am1Cfg := config.DefaultAlertmanagerConfig am1Cfg.Timeout = model.Duration(time.Second) - - m.alertmanagers["1"] = &alertmanagerSet{ - ams: []alertmanager{ - alertmanagerMock{ - urlf: func() string { return server.URL }, - }, - }, - cfg: &am1Cfg, - sendLoops: map[string]*sendLoop{ - server.URL: newSendLoop(server.URL, nil, &am1Cfg, m.opts, slog.New(slog.DiscardHandler), m.metrics), - }, - opts: &Options{Do: do, MaxBatchSize: DefaultMaxBatchSize}, - logger: slog.New(slog.DiscardHandler), - } + m.alertmanagers["1"] = newTestAlertmanagerSet(&am1Cfg, nil, m.opts, m.metrics, server.URL) for _, ams := range m.alertmanagers { ams.startSendLoops(ams.ams) @@ -1080,21 +963,7 @@ func TestStop_DrainingEnabled(t *testing.T) { am1Cfg := config.DefaultAlertmanagerConfig am1Cfg.Timeout = model.Duration(time.Second) - - m.alertmanagers["1"] = &alertmanagerSet{ - ams: []alertmanager{ - alertmanagerMock{ - urlf: func() string { return server.URL }, - }, - }, - cfg: &am1Cfg, - sendLoops: map[string]*sendLoop{ - server.URL: newSendLoop(server.URL, nil, &am1Cfg, m.opts, slog.New(slog.DiscardHandler), m.metrics), - }, - opts: &Options{Do: do, MaxBatchSize: DefaultMaxBatchSize}, - metrics: m.metrics, - logger: slog.New(slog.DiscardHandler), - } + m.alertmanagers["1"] = newTestAlertmanagerSet(&am1Cfg, nil, m.opts, m.metrics, server.URL) for _, ams := range m.alertmanagers { ams.startSendLoops(ams.ams) @@ -1145,29 +1014,12 @@ func TestQueuesDrainingOnApplyConfig(t *testing.T) { server := newImmediateAlertManager(alertSent) defer server.Close() - h := NewManager(&Options{}, model.UTF8Validation, nil) + h := NewManager(&Options{QueueCapacity: 10, DrainOnShutdown: drainOnShutDown}, model.UTF8Validation, nil) h.alertmanagers = make(map[string]*alertmanagerSet) amCfg := config.DefaultAlertmanagerConfig amCfg.Timeout = model.Duration(time.Second) - - opts := &Options{Do: do, QueueCapacity: 10, MaxBatchSize: DefaultMaxBatchSize, DrainOnShutdown: drainOnShutDown} - logger := slog.New(slog.DiscardHandler) - - h.alertmanagers["1"] = &alertmanagerSet{ - ams: []alertmanager{ - alertmanagerMock{ - urlf: func() string { return server.URL }, - }, - }, - cfg: &amCfg, - sendLoops: map[string]*sendLoop{ - server.URL: newSendLoop(server.URL, nil, &amCfg, opts, logger, h.metrics), - }, - opts: opts, - metrics: h.metrics, - logger: logger, - } + h.alertmanagers["1"] = newTestAlertmanagerSet(&amCfg, nil, h.opts, h.metrics, server.URL) // The send loops were not started, nothing will be sent. h.Send([]*Alert{{Labels: labels.FromStrings("alertname", "foo")}}...) @@ -1313,7 +1165,7 @@ func TestAlerstRelabelingIsIsolated(t *testing.T) { defer server1.Close() defer server2.Close() - h := NewManager(&Options{}, model.UTF8Validation, nil) + h := NewManager(&Options{QueueCapacity: 10}, model.UTF8Validation, nil) h.alertmanagers = make(map[string]*alertmanagerSet) am1Cfg := config.DefaultAlertmanagerConfig @@ -1333,37 +1185,11 @@ func TestAlerstRelabelingIsIsolated(t *testing.T) { am2Cfg.Timeout = model.Duration(time.Second) h.alertmanagers = map[string]*alertmanagerSet{ - "am1": { - ams: []alertmanager{ - alertmanagerMock{ - urlf: func() string { return server1.URL }, - }, - }, - cfg: &am1Cfg, - sendLoops: map[string]*sendLoop{ - server1.URL: newSendLoop(server1.URL, nil, &am1Cfg, &Options{}, h.logger, h.metrics), - }, - opts: &Options{}, - metrics: h.metrics, - logger: h.logger, - }, - "am2": { - ams: []alertmanager{ - alertmanagerMock{ - urlf: func() string { return server2.URL }, - }, - }, - cfg: &am2Cfg, - sendLoops: map[string]*sendLoop{ - server2.URL: newSendLoop(server2.URL, nil, &am2Cfg, &Options{}, h.logger, h.metrics), - }, - opts: &Options{}, - metrics: h.metrics, - logger: h.logger, - }, + "am1": newTestAlertmanagerSet(&am1Cfg, nil, h.opts, h.metrics, server1.URL), + "am2": newTestAlertmanagerSet(&am2Cfg, nil, h.opts, h.metrics, server2.URL), } - // start send loops + // Start send loops. for _, ams := range h.alertmanagers { ams.startSendLoops(ams.ams) } @@ -1381,7 +1207,7 @@ func TestAlerstRelabelingIsIsolated(t *testing.T) { Labels: labels.FromStrings("alertname", "test", "parasite", "yes"), }) - // am2 shouldn't get the parasite label. + // Am2 shouldn't get the parasite label. expected2 = append(expected2, &Alert{ Labels: labels.FromStrings("alertname", "test"), }) @@ -1431,34 +1257,8 @@ func TestNotifierQueueIndependentOfFailedAlertmanager(t *testing.T) { amCfg := config.DefaultAlertmanagerConfig amCfg.Timeout = model.Duration(time.Hour * 24 * 365) - - h.alertmanagers["1"] = &alertmanagerSet{ - ams: []alertmanager{ - alertmanagerMock{ - urlf: func() string { return blackHoleAM.URL }, - }, - }, - cfg: &amCfg, - opts: h.opts, - sendLoops: map[string]*sendLoop{ - blackHoleAM.URL: newSendLoop(blackHoleAM.URL, http.DefaultClient, &amCfg, h.opts, slog.New(slog.DiscardHandler), h.metrics), - }, - metrics: h.metrics, - } - - h.alertmanagers["2"] = &alertmanagerSet{ - ams: []alertmanager{ - alertmanagerMock{ - urlf: func() string { return immediateAM.URL }, - }, - }, - cfg: &amCfg, - opts: h.opts, - sendLoops: map[string]*sendLoop{ - immediateAM.URL: newSendLoop(immediateAM.URL, http.DefaultClient, &amCfg, h.opts, slog.New(slog.DiscardHandler), h.metrics), - }, - metrics: h.metrics, - } + h.alertmanagers["1"] = newTestAlertmanagerSet(&amCfg, http.DefaultClient, h.opts, h.metrics, blackHoleAM.URL) + h.alertmanagers["2"] = newTestAlertmanagerSet(&amCfg, http.DefaultClient, h.opts, h.metrics, immediateAM.URL) doneSendAll := make(chan struct{}) for _, ams := range h.alertmanagers { From 9a49316c59fbee99e1000e7962a50edea8a4ffac Mon Sep 17 00:00:00 2001 From: zenador Date: Thu, 22 Jan 2026 22:24:41 +0800 Subject: [PATCH 10/46] promql: info function: fix unit test for ignoring info metrics themselves (#17911) Signed-off-by: Jeanette Tan --- promql/promqltest/testdata/info.test | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/promql/promqltest/testdata/info.test b/promql/promqltest/testdata/info.test index a0bf97322f..9bc4ed0fbc 100644 --- a/promql/promqltest/testdata/info.test +++ b/promql/promqltest/testdata/info.test @@ -70,7 +70,11 @@ eval range from 0m to 10m step 5m info(metric, {__name__=~".+_info"}) metric{instance="a", job="1", label="value", build_data="build", data="info", another_data="another info"} 0 1 2 # Info metrics themselves are ignored when it comes to enriching with info metric data labels. -eval range from 0m to 10m step 5m info(build_info, {__name__=~".+_info", build_data=~".+"}) +eval range from 0m to 10m step 5m info(build_info, {__name__=~".+_info", another_data=~".+"}) + build_info{instance="a", job="1", build_data="build"} 1 1 1 + +# Info metrics themselves are ignored when it comes to enriching with info metric data labels. +eval range from 0m to 10m step 5m info(build_info, {__name__=~".+_info"}) build_info{instance="a", job="1", build_data="build"} 1 1 1 clear From 9b549fa1183b008d10f77cd7453c06a406832ada Mon Sep 17 00:00:00 2001 From: Ben Kochie Date: Thu, 22 Jan 2026 16:36:30 +0100 Subject: [PATCH 11/46] Update Go yaml v3 library (#17913) Replace archived `gopkg.in/yaml.v3` with supported `go.yaml.in/yaml/v3`. Fixes: https://github.com/prometheus/prometheus/issues/16415 Signed-off-by: SuperQ --- discovery/aws/aws_test.go | 2 +- go.mod | 4 ++-- model/rulefmt/rulefmt.go | 2 +- model/rulefmt/rulefmt_test.go | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/discovery/aws/aws_test.go b/discovery/aws/aws_test.go index 9d3728911b..dc1f2044ec 100644 --- a/discovery/aws/aws_test.go +++ b/discovery/aws/aws_test.go @@ -20,7 +20,7 @@ import ( "github.com/prometheus/common/model" "github.com/stretchr/testify/require" - "gopkg.in/yaml.v3" + "go.yaml.in/yaml/v3" ) func TestRoleUnmarshalYAML(t *testing.T) { diff --git a/go.mod b/go.mod index ab3464f72a..afc3f2740d 100644 --- a/go.mod +++ b/go.mod @@ -84,6 +84,7 @@ require ( go.uber.org/automaxprocs v1.6.0 go.uber.org/goleak v1.3.0 go.yaml.in/yaml/v2 v2.4.3 + go.yaml.in/yaml/v3 v3.0.4 golang.org/x/oauth2 v0.34.0 golang.org/x/sync v0.19.0 golang.org/x/sys v0.39.0 @@ -92,7 +93,6 @@ require ( google.golang.org/genproto/googleapis/api v0.0.0-20251222181119-0a764e51fe1b google.golang.org/grpc v1.78.0 google.golang.org/protobuf v1.36.11 - gopkg.in/yaml.v3 v3.0.1 k8s.io/api v0.34.3 k8s.io/apimachinery v0.34.3 k8s.io/client-go v0.34.3 @@ -114,7 +114,7 @@ require ( github.com/go-openapi/swag/typeutils v0.25.4 // indirect github.com/go-openapi/swag/yamlutils v0.25.4 // indirect go.uber.org/multierr v1.11.0 // indirect - go.yaml.in/yaml/v3 v3.0.4 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect ) diff --git a/model/rulefmt/rulefmt.go b/model/rulefmt/rulefmt.go index 70541eb0d3..2cbfdf4cfc 100644 --- a/model/rulefmt/rulefmt.go +++ b/model/rulefmt/rulefmt.go @@ -24,7 +24,7 @@ import ( "time" "github.com/prometheus/common/model" - "gopkg.in/yaml.v3" + "go.yaml.in/yaml/v3" "github.com/prometheus/prometheus/model/timestamp" "github.com/prometheus/prometheus/promql" diff --git a/model/rulefmt/rulefmt_test.go b/model/rulefmt/rulefmt_test.go index ec16052bc0..ea8d09af0d 100644 --- a/model/rulefmt/rulefmt_test.go +++ b/model/rulefmt/rulefmt_test.go @@ -21,7 +21,7 @@ import ( "github.com/prometheus/common/model" "github.com/stretchr/testify/require" - "gopkg.in/yaml.v3" + "go.yaml.in/yaml/v3" ) func TestParseFileSuccess(t *testing.T) { From 2a890d6fcfe3233bae4e24f904fad6bec635440a Mon Sep 17 00:00:00 2001 From: Ben Kochie Date: Thu, 22 Jan 2026 16:36:54 +0100 Subject: [PATCH 12/46] Bump promci action (#17912) Update promci to pick up `latest` tag fix. Fixes: https://github.com/prometheus/prometheus/issues/16238 Signed-off-by: SuperQ --- .github/workflows/ci.yml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8d25176252..d1f3a0c988 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,7 +19,7 @@ jobs: - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: persist-credentials: false - - uses: prometheus/promci@c0916f0a41f13444612a8f0f5e700ea34edd7c19 # v0.5.3 + - uses: prometheus/promci@fc721ff8497a70a93a881cd552b71af7fb3a9d53 # v0.5.4 - uses: ./.github/promci/actions/setup_environment with: enable_npm: true @@ -37,7 +37,7 @@ jobs: - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: persist-credentials: false - - uses: prometheus/promci@c0916f0a41f13444612a8f0f5e700ea34edd7c19 # v0.5.3 + - uses: prometheus/promci@fc721ff8497a70a93a881cd552b71af7fb3a9d53 # v0.5.4 - uses: ./.github/promci/actions/setup_environment - run: go test --tags=dedupelabels ./... - run: go test --tags=slicelabels -race ./cmd/prometheus ./model/textparse ./prompb/... @@ -81,7 +81,7 @@ jobs: - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: persist-credentials: false - - uses: prometheus/promci@c0916f0a41f13444612a8f0f5e700ea34edd7c19 # v0.5.3 + - uses: prometheus/promci@fc721ff8497a70a93a881cd552b71af7fb3a9d53 # v0.5.4 - uses: ./.github/promci/actions/setup_environment with: enable_go: false @@ -146,7 +146,7 @@ jobs: - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: persist-credentials: false - - uses: prometheus/promci@c0916f0a41f13444612a8f0f5e700ea34edd7c19 # v0.5.3 + - uses: prometheus/promci@fc721ff8497a70a93a881cd552b71af7fb3a9d53 # v0.5.4 - uses: ./.github/promci/actions/build with: promu_opts: "-p linux/amd64 -p windows/amd64 -p linux/arm64 -p darwin/amd64 -p darwin/arm64 -p linux/386" @@ -173,7 +173,7 @@ jobs: - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: persist-credentials: false - - uses: prometheus/promci@c0916f0a41f13444612a8f0f5e700ea34edd7c19 # v0.5.3 + - uses: prometheus/promci@fc721ff8497a70a93a881cd552b71af7fb3a9d53 # v0.5.4 - uses: ./.github/promci/actions/build with: parallelism: 12 @@ -212,7 +212,7 @@ jobs: uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: persist-credentials: false - - uses: prometheus/promci@c0916f0a41f13444612a8f0f5e700ea34edd7c19 # v0.5.3 + - uses: prometheus/promci@fc721ff8497a70a93a881cd552b71af7fb3a9d53 # v0.5.4 - uses: ./.github/promci/actions/setup_environment with: enable_npm: true @@ -270,7 +270,7 @@ jobs: - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: persist-credentials: false - - uses: prometheus/promci@c0916f0a41f13444612a8f0f5e700ea34edd7c19 # v0.5.3 + - uses: prometheus/promci@fc721ff8497a70a93a881cd552b71af7fb3a9d53 # v0.5.4 - uses: ./.github/promci/actions/publish_main with: docker_hub_login: ${{ secrets.docker_hub_login }} @@ -289,7 +289,7 @@ jobs: - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: persist-credentials: false - - uses: prometheus/promci@c0916f0a41f13444612a8f0f5e700ea34edd7c19 # v0.5.3 + - uses: prometheus/promci@fc721ff8497a70a93a881cd552b71af7fb3a9d53 # v0.5.4 - uses: ./.github/promci/actions/publish_release with: docker_hub_login: ${{ secrets.docker_hub_login }} @@ -306,7 +306,7 @@ jobs: uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: persist-credentials: false - - uses: prometheus/promci@c0916f0a41f13444612a8f0f5e700ea34edd7c19 # v0.5.3 + - uses: prometheus/promci@fc721ff8497a70a93a881cd552b71af7fb3a9d53 # v0.5.4 - name: Install nodejs uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # v6.1.0 with: From 2437977bffc82b76ec298622ddb2904bfd70b9df Mon Sep 17 00:00:00 2001 From: Siavash Safi Date: Thu, 22 Jan 2026 22:22:44 +0100 Subject: [PATCH 13/46] fix(notify): apply config sendloop cleanup fix (#17915) These bugs were discovered accidentally with code analysis: - https://app.devin.ai/review/prometheus/prometheus/pull/16355 Upon further inspection and performing more analysis, 3 potential bugs were found: 1. sendloops could continue running if corresponding AM changed position in the config 2. multiple configs with the same hash would share sendloops resulting in sets without sendloops 3. sendloops could continue running if the config hash was changed - `TestApplyConfigSendLoopsNotStoppedOnKeyChange`: Verifies sendLoops work when keys swap (no fix needed) - `TestApplyConfigDuplicateHashSharesSendLoops`: Verifies sendLoops are independent with duplicate hashes (bug fixed) - `TestApplyConfigHashChangeLeaksSendLoops`: Verifies sendLoops are cleaned up when hash changes (bug fixed) Signed-off-by: Siavash Safi --- notifier/manager.go | 26 +++- notifier/manager_test.go | 275 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 295 insertions(+), 6 deletions(-) diff --git a/notifier/manager.go b/notifier/manager.go index e362f2bfd4..7eeed79b79 100644 --- a/notifier/manager.go +++ b/notifier/manager.go @@ -163,19 +163,33 @@ func (n *Manager) ApplyConfig(conf *config.Config) error { if oldAmSet, ok := configToAlertmanagers[hash]; ok { ams.ams = oldAmSet.ams ams.droppedAms = oldAmSet.droppedAms - ams.sendLoops = oldAmSet.sendLoops + // Only transfer sendLoops to the first new config with this hash. + // Subsequent configs with the same hash should not share the sendLoops + // map reference, as that would cause shared mutable state between + // alertmanagerSets (cleanup in one would affect the other). + oldAmSet.mtx.Lock() + if oldAmSet.sendLoops != nil { + ams.mtx.Lock() + ams.sendLoops = oldAmSet.sendLoops + oldAmSet.sendLoops = nil + ams.mtx.Unlock() + } + oldAmSet.mtx.Unlock() } amSets[k] = ams } - // Clean up the send loops of sets that don't exist in the new config. - for k, oldAmSet := range n.alertmanagers { - if _, exists := amSets[k]; !exists { - oldAmSet.mtx.Lock() + // Clean up sendLoops that weren't transferred to new config. + // This happens when: (1) key was removed, or (2) key exists but hash changed. + // After the transfer loop above, any oldAmSet with non-nil sendLoops + // had its sendLoops NOT transferred (since we set it to nil on transfer). + for _, oldAmSet := range n.alertmanagers { + oldAmSet.mtx.Lock() + if oldAmSet.sendLoops != nil { oldAmSet.cleanSendLoops(oldAmSet.ams...) - oldAmSet.mtx.Unlock() } + oldAmSet.mtx.Unlock() } n.alertmanagers = amSets diff --git a/notifier/manager_test.go b/notifier/manager_test.go index 39fc35a409..ed224462ff 100644 --- a/notifier/manager_test.go +++ b/notifier/manager_test.go @@ -1292,6 +1292,281 @@ func TestNotifierQueueIndependentOfFailedAlertmanager(t *testing.T) { } } +// TestApplyConfigSendLoopsNotStoppedOnKeyChange reproduces a bug where sendLoops +// are incorrectly stopped when the alertmanager config key changes but the config +// content (and thus its hash) remains the same. +// +// The bug scenario: +// 1. Old config has alertmanager set with key "config-0" and config hash X +// 2. New config has TWO alertmanager sets where the SECOND one ("config-1") has hash X +// 3. sendLoops are transferred from old "config-0" to new "config-1" (hash match) +// 4. Cleanup checks if key "config-0" exists in new config — it does (different config) +// 5. No cleanup happens for old "config-0", sendLoops work correctly +// +// However, there's a variant where the key disappears completely: +// 1. Old config: "config-0" with hash X, "config-1" with hash Y +// 2. New config: "config-0" with hash Y (was "config-1"), no "config-1" +// 3. sendLoops from old "config-0" (hash X) have nowhere to go +// 4. Cleanup sees "config-1" doesn't exist, tries to clean up old "config-1" +// +// This test verifies that when config keys change, sendLoops are correctly preserved. +func TestApplyConfigSendLoopsNotStoppedOnKeyChange(t *testing.T) { + alertReceived := make(chan struct{}, 10) + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + select { + case alertReceived <- struct{}{}: + default: + } + })) + defer server.Close() + + targetURL := server.Listener.Addr().String() + targetGroup := &targetgroup.Group{ + Targets: []model.LabelSet{ + { + "__address__": model.LabelValue(targetURL), + }, + }, + } + + n := NewManager(&Options{QueueCapacity: 10}, model.UTF8Validation, nil) + cfg := &config.Config{} + + // Initial config with TWO alertmanager configs. + // "config-0" uses file_sd_configs with foo.json (hash X) + // "config-1" uses file_sd_configs with bar.json (hash Y) + s := ` +alerting: + alertmanagers: + - file_sd_configs: + - files: + - foo.json + - file_sd_configs: + - files: + - bar.json +` + require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg)) + require.NoError(t, n.ApplyConfig(cfg)) + + // Reload with target groups to discover alertmanagers. + tgs := map[string][]*targetgroup.Group{ + "config-0": {targetGroup}, + "config-1": {targetGroup}, + } + n.reload(tgs) + require.Len(t, n.Alertmanagers(), 2) + + // Verify sendLoops exist for both configs. + require.Len(t, n.alertmanagers["config-0"].sendLoops, 1) + require.Len(t, n.alertmanagers["config-1"].sendLoops, 1) + + // Start the send loops. + for _, ams := range n.alertmanagers { + ams.startSendLoops(ams.ams) + } + defer func() { + for _, ams := range n.alertmanagers { + ams.mtx.Lock() + ams.cleanSendLoops(ams.ams...) + ams.mtx.Unlock() + } + }() + + // Send an alert and verify it's received (twice, once per alertmanager set). + n.Send(&Alert{Labels: labels.FromStrings("alertname", "test1")}) + for range 2 { + select { + case <-alertReceived: + // Good, alert was sent. + case <-time.After(2 * time.Second): + require.FailNow(t, "timeout waiting for first alert") + } + } + + // Apply a new config that REVERSES the order of alertmanager configs. + // Now "config-0" has hash Y (was bar.json) and "config-1" has hash X (was foo.json). + // The sendLoops should be transferred based on hash matching. + s = ` +alerting: + alertmanagers: + - file_sd_configs: + - files: + - bar.json + - file_sd_configs: + - files: + - foo.json +` + require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg)) + require.NoError(t, n.ApplyConfig(cfg)) + + // CRITICAL CHECK: After ApplyConfig but BEFORE reload, the sendLoops should + // have been transferred based on hash matching and NOT stopped. + // - Old "config-0" (foo.json, hash X) -> New "config-1" (foo.json, hash X) + // - Old "config-1" (bar.json, hash Y) -> New "config-0" (bar.json, hash Y) + // Both old keys exist in new config, so no cleanup should happen. + require.Len(t, n.alertmanagers["config-0"].sendLoops, 1, "sendLoops should be transferred to config-0") + require.Len(t, n.alertmanagers["config-1"].sendLoops, 1, "sendLoops should be transferred to config-1") + + // Reload with target groups for the new config. + tgs = map[string][]*targetgroup.Group{ + "config-0": {targetGroup}, + "config-1": {targetGroup}, + } + n.reload(tgs) + + // The alertmanagers should still be discoverable. + require.Len(t, n.Alertmanagers(), 2) + + // The critical test: send another alert and verify it's received by both. + n.Send(&Alert{Labels: labels.FromStrings("alertname", "test2")}) + for range 2 { + select { + case <-alertReceived: + // Good, alert was sent - sendLoops are still working. + case <-time.After(2 * time.Second): + require.FailNow(t, "timeout waiting for second alert - sendLoops may have been incorrectly stopped") + } + } +} + +// TestApplyConfigDuplicateHashSharesSendLoops tests a bug where multiple new +// alertmanager configs with identical content (same hash) all receive the same +// sendLoops map reference, causing shared mutable state between alertmanagerSets. +// +// Bug scenario: +// 1. Old config: "config-0" with hash X +// 2. New config: "config-0" AND "config-1" both with hash X (identical configs) +// 3. Both new sets get `sendLoops = oldAmSet.sendLoops` (same map reference!) +// 4. Now config-0 and config-1 share the same sendLoops map +// 5. When config-1's alertmanager is removed via sync(), it cleans up the shared +// sendLoops, breaking config-0's ability to send alerts +func TestApplyConfigDuplicateHashSharesSendLoops(t *testing.T) { + n := NewManager(&Options{QueueCapacity: 10}, model.UTF8Validation, nil) + cfg := &config.Config{} + + // Initial config with ONE alertmanager. + s := ` +alerting: + alertmanagers: + - file_sd_configs: + - files: + - foo.json +` + require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg)) + require.NoError(t, n.ApplyConfig(cfg)) + + targetGroup := &targetgroup.Group{ + Targets: []model.LabelSet{ + {"__address__": "alertmanager:9093"}, + }, + } + tgs := map[string][]*targetgroup.Group{"config-0": {targetGroup}} + n.reload(tgs) + + require.Len(t, n.alertmanagers["config-0"].sendLoops, 1) + + // Apply a new config with TWO IDENTICAL alertmanager configs. + // Both have the same hash, so both will receive sendLoops from the same old set. + s = ` +alerting: + alertmanagers: + - file_sd_configs: + - files: + - foo.json + - file_sd_configs: + - files: + - foo.json +` + require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg)) + require.NoError(t, n.ApplyConfig(cfg)) + + // Reload with target groups for both configs - same alertmanager URL for both. + tgs = map[string][]*targetgroup.Group{ + "config-0": {targetGroup}, + "config-1": {targetGroup}, + } + n.reload(tgs) + + // Both alertmanagerSets should have independent sendLoops. + sendLoops0 := n.alertmanagers["config-0"].sendLoops + sendLoops1 := n.alertmanagers["config-1"].sendLoops + + require.Len(t, sendLoops0, 1, "config-0 should have sendLoops") + require.Len(t, sendLoops1, 1, "config-1 should have sendLoops") + + // Verify that the two alertmanagerSets have INDEPENDENT sendLoops maps. + // They should NOT share the same sendLoop objects. + for k := range sendLoops0 { + if loop1, ok := sendLoops1[k]; ok { + require.NotSame(t, sendLoops0[k], loop1, + "config-0 and config-1 should have independent sendLoop instances, not shared references") + } + } +} + +// TestApplyConfigHashChangeLeaksSendLoops tests a bug where sendLoops goroutines +// are leaked when the config key remains the same but the config hash changes. +// +// Bug scenario: +// 1. Old config has "config-0" with hash H1 and running sendLoops +// 2. New config has "config-0" with hash H2 (modified config) +// 3. Since hash differs, sendLoops are NOT transferred to the new alertmanagerSet +// 4. Cleanup only checks if key exists in amSets - it does, so no cleanup +// 5. Old sendLoops goroutines continue running and are never stopped +func TestApplyConfigHashChangeLeaksSendLoops(t *testing.T) { + n := NewManager(&Options{QueueCapacity: 10}, model.UTF8Validation, nil) + cfg := &config.Config{} + + // Initial config with one alertmanager. + s := ` +alerting: + alertmanagers: + - file_sd_configs: + - files: + - foo.json +` + require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg)) + require.NoError(t, n.ApplyConfig(cfg)) + + targetGroup := &targetgroup.Group{ + Targets: []model.LabelSet{ + {"__address__": "alertmanager:9093"}, + }, + } + tgs := map[string][]*targetgroup.Group{"config-0": {targetGroup}} + n.reload(tgs) + + // Capture the old sendLoop. + oldSendLoops := n.alertmanagers["config-0"].sendLoops + require.Len(t, oldSendLoops, 1) + var oldSendLoop *sendLoop + for _, sl := range oldSendLoops { + oldSendLoop = sl + } + + // Apply a new config with DIFFERENT hash (added path_prefix). + s = ` +alerting: + alertmanagers: + - file_sd_configs: + - files: + - foo.json + path_prefix: /changed +` + require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg)) + require.NoError(t, n.ApplyConfig(cfg)) + + // The old sendLoop should have been stopped since hash changed. + // Check that the stopped channel is closed. + select { + case <-oldSendLoop.stopped: + // Good - sendLoop was properly stopped + default: + t.Fatal("BUG: old sendLoop was not stopped when config hash changed - goroutine leak") + } +} + func newBlackHoleAlertmanager(stop <-chan struct{}) *httptest.Server { return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { // Do nothing, wait to be canceled. From 0d116b09944cf98822b7434ef5a132640ff437cf Mon Sep 17 00:00:00 2001 From: Bartlomiej Plotka Date: Fri, 23 Jan 2026 08:41:35 +0000 Subject: [PATCH 14/46] tests(teststorage): Close Storage in the helper (#17902) Signed-off-by: bwplotka --- cmd/promtool/main_test.go | 1 - cmd/promtool/tsdb_test.go | 2 -- promql/bench_test.go | 6 +---- promql/engine_test.go | 9 +------ promql/functions_test.go | 2 +- promql/promql_test.go | 2 +- rules/alerting_test.go | 9 ------- rules/manager_test.go | 48 ++++++++++++++----------------------- rules/recording_test.go | 3 --- scrape/scrape_test.go | 21 ---------------- storage/fanout_test.go | 8 ------- util/teststorage/storage.go | 8 +++++++ web/api/v1/api_test.go | 8 ++----- web/federate_test.go | 2 -- 14 files changed, 32 insertions(+), 97 deletions(-) diff --git a/cmd/promtool/main_test.go b/cmd/promtool/main_test.go index 4f4ca3de71..9e6e7268f7 100644 --- a/cmd/promtool/main_test.go +++ b/cmd/promtool/main_test.go @@ -734,7 +734,6 @@ func TestTSDBDumpCommand(t *testing.T) { load 1m metric{foo="bar"} 1 2 3 `) - t.Cleanup(func() { storage.Close() }) for _, c := range []struct { name string diff --git a/cmd/promtool/tsdb_test.go b/cmd/promtool/tsdb_test.go index 3a2a5aff72..859c521d64 100644 --- a/cmd/promtool/tsdb_test.go +++ b/cmd/promtool/tsdb_test.go @@ -97,7 +97,6 @@ func TestTSDBDump(t *testing.T) { heavy_metric{foo="bar"} 5 4 3 2 1 heavy_metric{foo="foo"} 5 4 3 2 1 `) - t.Cleanup(func() { storage.Close() }) tests := []struct { name string @@ -196,7 +195,6 @@ func TestTSDBDumpOpenMetrics(t *testing.T) { my_counter{foo="bar", baz="abc"} 1 2 3 4 5 my_gauge{bar="foo", abc="baz"} 9 8 0 4 7 `) - t.Cleanup(func() { storage.Close() }) tests := []struct { name string diff --git a/promql/bench_test.go b/promql/bench_test.go index f647b03600..2e70718b3b 100644 --- a/promql/bench_test.go +++ b/promql/bench_test.go @@ -338,7 +338,7 @@ func BenchmarkRangeQuery(b *testing.B) { }) stor := teststorage.New(b) stor.DisableCompactions() // Don't want auto-compaction disrupting timings. - defer stor.Close() + opts := promql.EngineOpts{ Logger: nil, Reg: nil, @@ -383,7 +383,6 @@ func BenchmarkRangeQuery(b *testing.B) { func BenchmarkJoinQuery(b *testing.B) { stor := teststorage.New(b) stor.DisableCompactions() // Don't want auto-compaction disrupting timings. - defer stor.Close() opts := promql.EngineOpts{ Logger: nil, @@ -445,7 +444,6 @@ func BenchmarkJoinQuery(b *testing.B) { func BenchmarkNativeHistograms(b *testing.B) { testStorage := teststorage.New(b) - defer testStorage.Close() app := testStorage.Appender(context.TODO()) if err := generateNativeHistogramSeries(app, 3000); err != nil { @@ -523,7 +521,6 @@ func BenchmarkNativeHistograms(b *testing.B) { func BenchmarkNativeHistogramsCustomBuckets(b *testing.B) { testStorage := teststorage.New(b) - defer testStorage.Close() app := testStorage.Appender(context.TODO()) if err := generateNativeHistogramCustomBucketsSeries(app, 3000); err != nil { @@ -594,7 +591,6 @@ func BenchmarkNativeHistogramsCustomBuckets(b *testing.B) { func BenchmarkInfoFunction(b *testing.B) { // Initialize test storage and generate test series data. testStorage := teststorage.New(b) - defer testStorage.Close() start := time.Unix(0, 0) end := start.Add(2 * time.Hour) diff --git a/promql/engine_test.go b/promql/engine_test.go index 0eff93af4c..ca1d5471c1 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -676,7 +676,6 @@ func TestEngineEvalStmtTimestamps(t *testing.T) { load 10s metric 1 2 `) - t.Cleanup(func() { storage.Close() }) cases := []struct { Query string @@ -789,7 +788,6 @@ load 10s metricWith3SampleEvery10Seconds{a="3",b="2"} 1+1x100 metricWith1HistogramEvery10Seconds {{schema:1 count:5 sum:20 buckets:[1 2 1 1]}}+{{schema:1 count:10 sum:5 buckets:[1 2 3 4]}}x100 `) - t.Cleanup(func() { storage.Close() }) cases := []struct { Query string @@ -1339,7 +1337,6 @@ load 10s bigmetric{a="1"} 1+1x100 bigmetric{a="2"} 1+1x100 `) - t.Cleanup(func() { storage.Close() }) // These test cases should be touching the limit exactly (hence no exceeding). // Exceeding the limit will be tested by doing -1 to the MaxSamples. @@ -1523,7 +1520,6 @@ func TestExtendedRangeSelectors(t *testing.T) { withreset 1+1x4 1+1x5 notregular 0 5 100 2 8 `) - t.Cleanup(func() { storage.Close() }) tc := []struct { query string @@ -1677,7 +1673,6 @@ load 10s load 1ms metric_ms 0+1x10000 `) - t.Cleanup(func() { storage.Close() }) lbls1 := labels.FromStrings("__name__", "metric", "job", "1") lbls2 := labels.FromStrings("__name__", "metric", "job", "2") @@ -2283,7 +2278,6 @@ func TestSubquerySelector(t *testing.T) { t.Run("", func(t *testing.T) { engine := newTestEngine(t) storage := promqltest.LoadedStorage(t, tst.loadString) - t.Cleanup(func() { storage.Close() }) for _, c := range tst.cases { t.Run(c.Query, func(t *testing.T) { @@ -3410,7 +3404,6 @@ metric 0 1 2 t.Run(c.name, func(t *testing.T) { engine := promqltest.NewTestEngine(t, false, c.engineLookback, promqltest.DefaultMaxSamplesPerQuery) storage := promqltest.LoadedStorage(t, load) - t.Cleanup(func() { storage.Close() }) opts := promql.NewPrometheusQueryOpts(false, c.queryLookback) qry, err := engine.NewInstantQuery(context.Background(), storage, opts, query, c.ts) @@ -3444,7 +3437,7 @@ func TestHistogramCopyFromIteratorRegression(t *testing.T) { histogram {{sum:4 count:4 buckets:[2 2]}} {{sum:6 count:6 buckets:[3 3]}} {{sum:1 count:1 buckets:[1]}} ` storage := promqltest.LoadedStorage(t, load) - t.Cleanup(func() { storage.Close() }) + engine := promqltest.NewTestEngine(t, false, 0, promqltest.DefaultMaxSamplesPerQuery) verify := func(t *testing.T, qry promql.Query, expected []histogram.FloatHistogram) { diff --git a/promql/functions_test.go b/promql/functions_test.go index 2566843092..023417bfc2 100644 --- a/promql/functions_test.go +++ b/promql/functions_test.go @@ -33,7 +33,7 @@ func TestDeriv(t *testing.T) { // This requires more precision than the usual test system offers, // so we test it by hand. storage := teststorage.New(t) - defer storage.Close() + opts := promql.EngineOpts{ Logger: nil, Reg: nil, diff --git a/promql/promql_test.go b/promql/promql_test.go index fc13f7e64f..a6bc437b6b 100644 --- a/promql/promql_test.go +++ b/promql/promql_test.go @@ -39,7 +39,7 @@ func TestEvaluations(t *testing.T) { // Run a lot of queries at the same time, to check for race conditions. func TestConcurrentRangeQueries(t *testing.T) { stor := teststorage.New(t) - defer stor.Close() + opts := promql.EngineOpts{ Logger: nil, Reg: nil, diff --git a/rules/alerting_test.go b/rules/alerting_test.go index caf32e6472..ec53d9086b 100644 --- a/rules/alerting_test.go +++ b/rules/alerting_test.go @@ -158,7 +158,6 @@ func TestAlertingRuleLabelsUpdate(t *testing.T) { load 1m http_requests{job="app-server", instance="0"} 75 85 70 70 stale `) - t.Cleanup(func() { storage.Close() }) expr, err := parser.ParseExpr(`http_requests < 100`) require.NoError(t, err) @@ -264,7 +263,6 @@ func TestAlertingRuleExternalLabelsInTemplate(t *testing.T) { load 1m http_requests{job="app-server", instance="0"} 75 85 70 70 `) - t.Cleanup(func() { storage.Close() }) expr, err := parser.ParseExpr(`http_requests < 100`) require.NoError(t, err) @@ -359,7 +357,6 @@ func TestAlertingRuleExternalURLInTemplate(t *testing.T) { load 1m http_requests{job="app-server", instance="0"} 75 85 70 70 `) - t.Cleanup(func() { storage.Close() }) expr, err := parser.ParseExpr(`http_requests < 100`) require.NoError(t, err) @@ -454,7 +451,6 @@ func TestAlertingRuleEmptyLabelFromTemplate(t *testing.T) { load 1m http_requests{job="app-server", instance="0"} 75 85 70 70 `) - t.Cleanup(func() { storage.Close() }) expr, err := parser.ParseExpr(`http_requests < 100`) require.NoError(t, err) @@ -510,7 +506,6 @@ func TestAlertingRuleQueryInTemplate(t *testing.T) { load 1m http_requests{job="app-server", instance="0"} 70 85 70 70 `) - t.Cleanup(func() { storage.Close() }) expr, err := parser.ParseExpr(`sum(http_requests) < 100`) require.NoError(t, err) @@ -584,7 +579,6 @@ func BenchmarkAlertingRuleAtomicField(b *testing.B) { func TestAlertingRuleDuplicate(t *testing.T) { storage := teststorage.New(t) - defer storage.Close() opts := promql.EngineOpts{ Logger: nil, @@ -621,7 +615,6 @@ func TestAlertingRuleLimit(t *testing.T) { metric{label="1"} 1 metric{label="2"} 1 `) - t.Cleanup(func() { storage.Close() }) tests := []struct { limit int @@ -805,7 +798,6 @@ func TestKeepFiringFor(t *testing.T) { load 1m http_requests{job="app-server", instance="0"} 75 85 70 70 10x5 `) - t.Cleanup(func() { storage.Close() }) expr, err := parser.ParseExpr(`http_requests > 50`) require.NoError(t, err) @@ -916,7 +908,6 @@ func TestPendingAndKeepFiringFor(t *testing.T) { load 1m http_requests{job="app-server", instance="0"} 75 10x10 `) - t.Cleanup(func() { storage.Close() }) expr, err := parser.ParseExpr(`http_requests > 50`) require.NoError(t, err) diff --git a/rules/manager_test.go b/rules/manager_test.go index a716304b7a..3fcb90808e 100644 --- a/rules/manager_test.go +++ b/rules/manager_test.go @@ -62,7 +62,6 @@ func TestAlertingRule(t *testing.T) { http_requests{job="app-server", instance="0", group="canary", severity="overwrite-me"} 75 85 95 105 105 95 85 http_requests{job="app-server", instance="1", group="canary", severity="overwrite-me"} 80 90 100 110 120 130 140 `) - t.Cleanup(func() { storage.Close() }) expr, err := parser.ParseExpr(`http_requests{group="canary", job="app-server"} < 100`) require.NoError(t, err) @@ -205,7 +204,6 @@ func TestForStateAddSamples(t *testing.T) { http_requests{job="app-server", instance="0", group="canary", severity="overwrite-me"} 75 85 95 105 105 95 85 http_requests{job="app-server", instance="1", group="canary", severity="overwrite-me"} 80 90 100 110 120 130 140 `) - t.Cleanup(func() { storage.Close() }) expr, err := parser.ParseExpr(`http_requests{group="canary", job="app-server"} < 100`) require.NoError(t, err) @@ -367,7 +365,6 @@ func TestForStateRestore(t *testing.T) { http_requests{job="app-server", instance="0", group="canary", severity="overwrite-me"} 75 85 50 0 0 25 0 0 40 0 120 http_requests{job="app-server", instance="1", group="canary", severity="overwrite-me"} 125 90 60 0 0 25 0 0 40 0 130 `) - t.Cleanup(func() { storage.Close() }) expr, err := parser.ParseExpr(`http_requests{group="canary", job="app-server"} < 100`) require.NoError(t, err) @@ -538,7 +535,7 @@ func TestForStateRestore(t *testing.T) { func TestStaleness(t *testing.T) { for _, queryOffset := range []time.Duration{0, time.Minute} { st := teststorage.New(t) - defer st.Close() + engineOpts := promql.EngineOpts{ Logger: nil, Reg: nil, @@ -726,7 +723,7 @@ func TestCopyState(t *testing.T) { func TestDeletedRuleMarkedStale(t *testing.T) { st := teststorage.New(t) - defer st.Close() + oldGroup := &Group{ rules: []Rule{ NewRecordingRule("rule1", nil, labels.FromStrings("l1", "v1")), @@ -772,7 +769,7 @@ func TestUpdate(t *testing.T) { "test": labels.FromStrings("name", "value"), } st := teststorage.New(t) - defer st.Close() + opts := promql.EngineOpts{ Logger: nil, Reg: nil, @@ -910,7 +907,7 @@ func reloadAndValidate(rgs *rulefmt.RuleGroups, t *testing.T, tmpFile *os.File, func TestNotify(t *testing.T) { storage := teststorage.New(t) - defer storage.Close() + engineOpts := promql.EngineOpts{ Logger: nil, Reg: nil, @@ -984,7 +981,7 @@ func TestMetricsUpdate(t *testing.T) { } storage := teststorage.New(t) - defer storage.Close() + registry := prometheus.NewRegistry() opts := promql.EngineOpts{ Logger: nil, @@ -1057,7 +1054,7 @@ func TestGroupStalenessOnRemoval(t *testing.T) { sameFiles := []string{"fixtures/rules2_copy.yaml"} storage := teststorage.New(t) - defer storage.Close() + opts := promql.EngineOpts{ Logger: nil, Reg: nil, @@ -1135,7 +1132,7 @@ func TestMetricsStalenessOnManagerShutdown(t *testing.T) { files := []string{"fixtures/rules2.yaml"} storage := teststorage.New(t) - defer storage.Close() + opts := promql.EngineOpts{ Logger: nil, Reg: nil, @@ -1205,7 +1202,7 @@ func TestRuleMovedBetweenGroups(t *testing.T) { storage := teststorage.New(t, func(opt *tsdb.Options) { opt.OutOfOrderTimeWindow = 600000 }) - defer storage.Close() + opts := promql.EngineOpts{ Logger: nil, Reg: nil, @@ -1287,7 +1284,7 @@ func TestGroupHasAlertingRules(t *testing.T) { func TestRuleHealthUpdates(t *testing.T) { st := teststorage.New(t) - defer st.Close() + engineOpts := promql.EngineOpts{ Logger: nil, Reg: nil, @@ -1348,7 +1345,6 @@ func TestRuleGroupEvalIterationFunc(t *testing.T) { load 5m http_requests{instance="0"} 75 85 50 0 0 25 0 0 40 0 120 `) - t.Cleanup(func() { storage.Close() }) expr, err := parser.ParseExpr(`http_requests{group="canary", job="app-server"} < 100`) require.NoError(t, err) @@ -1463,7 +1459,6 @@ func TestRuleGroupEvalIterationFunc(t *testing.T) { func TestNativeHistogramsInRecordingRules(t *testing.T) { storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) // Add some histograms. db := storage.DB @@ -1525,9 +1520,6 @@ func TestNativeHistogramsInRecordingRules(t *testing.T) { func TestManager_LoadGroups_ShouldCheckWhetherEachRuleHasDependentsAndDependencies(t *testing.T) { storage := teststorage.New(t) - t.Cleanup(func() { - require.NoError(t, storage.Close()) - }) ruleManager := NewManager(&ManagerOptions{ Context: context.Background(), @@ -2021,7 +2013,7 @@ func TestAsyncRuleEvaluation(t *testing.T) { t.Run("synchronous evaluation with independent rules", func(t *testing.T) { t.Parallel() storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) + inflightQueries := atomic.Int32{} maxInflight := atomic.Int32{} @@ -2060,7 +2052,7 @@ func TestAsyncRuleEvaluation(t *testing.T) { t.Run("asynchronous evaluation with independent and dependent rules", func(t *testing.T) { t.Parallel() storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) + inflightQueries := atomic.Int32{} maxInflight := atomic.Int32{} @@ -2099,7 +2091,7 @@ func TestAsyncRuleEvaluation(t *testing.T) { t.Run("asynchronous evaluation of all independent rules, insufficient concurrency", func(t *testing.T) { t.Parallel() storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) + inflightQueries := atomic.Int32{} maxInflight := atomic.Int32{} @@ -2144,7 +2136,7 @@ func TestAsyncRuleEvaluation(t *testing.T) { t.Run("asynchronous evaluation of all independent rules, sufficient concurrency", func(t *testing.T) { t.Parallel() storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) + inflightQueries := atomic.Int32{} maxInflight := atomic.Int32{} @@ -2192,7 +2184,7 @@ func TestAsyncRuleEvaluation(t *testing.T) { t.Run("asynchronous evaluation of independent rules, with indeterminate. Should be synchronous", func(t *testing.T) { t.Parallel() storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) + inflightQueries := atomic.Int32{} maxInflight := atomic.Int32{} @@ -2231,7 +2223,7 @@ func TestAsyncRuleEvaluation(t *testing.T) { t.Run("asynchronous evaluation of rules that benefit from reordering", func(t *testing.T) { t.Parallel() storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) + inflightQueries := atomic.Int32{} maxInflight := atomic.Int32{} @@ -2277,7 +2269,7 @@ func TestAsyncRuleEvaluation(t *testing.T) { t.Run("attempted asynchronous evaluation of chained rules", func(t *testing.T) { t.Parallel() storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) + inflightQueries := atomic.Int32{} maxInflight := atomic.Int32{} @@ -2325,7 +2317,7 @@ func TestAsyncRuleEvaluation(t *testing.T) { func TestNewRuleGroupRestoration(t *testing.T) { t.Parallel() store := teststorage.New(t) - t.Cleanup(func() { store.Close() }) + var ( inflightQueries atomic.Int32 maxInflight atomic.Int32 @@ -2389,7 +2381,7 @@ func TestNewRuleGroupRestoration(t *testing.T) { func TestNewRuleGroupRestorationWithRestoreNewGroupOption(t *testing.T) { t.Parallel() store := teststorage.New(t) - t.Cleanup(func() { store.Close() }) + var ( inflightQueries atomic.Int32 maxInflight atomic.Int32 @@ -2459,7 +2451,6 @@ func TestNewRuleGroupRestorationWithRestoreNewGroupOption(t *testing.T) { func TestBoundedRuleEvalConcurrency(t *testing.T) { storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) var ( inflightQueries atomic.Int32 @@ -2514,7 +2505,6 @@ func TestUpdateWhenStopped(t *testing.T) { func TestGroup_Eval_RaceConditionOnStoppingGroupEvaluationWhileRulesAreEvaluatedConcurrently(t *testing.T) { storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) var ( inflightQueries atomic.Int32 @@ -2733,7 +2723,6 @@ func TestRuleDependencyController_AnalyseRules(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) ruleManager := NewManager(&ManagerOptions{ Context: context.Background(), @@ -2762,7 +2751,6 @@ func TestRuleDependencyController_AnalyseRules(t *testing.T) { func BenchmarkRuleDependencyController_AnalyseRules(b *testing.B) { storage := teststorage.New(b) - b.Cleanup(func() { storage.Close() }) ruleManager := NewManager(&ManagerOptions{ Context: context.Background(), diff --git a/rules/recording_test.go b/rules/recording_test.go index 29208b6392..3a8bb9c2ff 100644 --- a/rules/recording_test.go +++ b/rules/recording_test.go @@ -121,7 +121,6 @@ func setUpRuleEvalTest(t testing.TB) *teststorage.TestStorage { func TestRuleEval(t *testing.T) { storage := setUpRuleEvalTest(t) - t.Cleanup(func() { storage.Close() }) ng := testEngine(t) for _, scenario := range ruleEvalTestScenarios { @@ -158,7 +157,6 @@ func BenchmarkRuleEval(b *testing.B) { // TestRuleEvalDuplicate tests for duplicate labels in recorded metrics, see #5529. func TestRuleEvalDuplicate(t *testing.T) { storage := teststorage.New(t) - defer storage.Close() opts := promql.EngineOpts{ Logger: nil, @@ -185,7 +183,6 @@ func TestRecordingRuleLimit(t *testing.T) { metric{label="1"} 1 metric{label="2"} 1 `) - t.Cleanup(func() { storage.Close() }) tests := []struct { limit int diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index 9c12a31ab3..74fdf8a962 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -131,7 +131,6 @@ func testStorageHandlesOutOfOrderTimestamps(t *testing.T, appV2 bool) { // Test with default OutOfOrderTimeWindow (0) t.Run("Out-Of-Order Sample Disabled", func(t *testing.T) { s := teststorage.New(t) - t.Cleanup(func() { _ = s.Close() }) runScrapeLoopTest(t, appV2, s, false) }) @@ -140,7 +139,6 @@ func testStorageHandlesOutOfOrderTimestamps(t *testing.T, appV2 bool) { s := teststorage.New(t, func(opt *tsdb.Options) { opt.OutOfOrderTimeWindow = 600000 }) - t.Cleanup(func() { _ = s.Close() }) runScrapeLoopTest(t, appV2, s, true) }) @@ -1610,7 +1608,6 @@ func benchScrapeLoopAppend( opt.MaxExemplars = 1e5 } }) - b.Cleanup(func() { _ = s.Close() }) sl, _ := newTestScrapeLoop(b, withAppendable(s, appV2), func(sl *scrapeLoop) { sl.appendMetadataToWAL = appendMetadataToWAL @@ -1697,7 +1694,6 @@ func BenchmarkScrapeLoopScrapeAndReport(b *testing.B) { parsableText := readTextParseTestMetrics(b) s := teststorage.New(b) - b.Cleanup(func() { _ = s.Close() }) sl, scraper := newTestScrapeLoop(b, withAppendable(s, appV2), func(sl *scrapeLoop) { sl.fallbackScrapeProtocol = "application/openmetrics-text" @@ -1730,7 +1726,6 @@ func testSetOptionsHandlingStaleness(t *testing.T, appV2 bool) { s := teststorage.New(t, func(opt *tsdb.Options) { opt.OutOfOrderTimeWindow = 600000 }) - t.Cleanup(func() { _ = s.Close() }) signal := make(chan struct{}, 1) ctx, cancel := context.WithCancel(t.Context()) @@ -2001,7 +1996,6 @@ func TestScrapeLoopCache(t *testing.T) { func testScrapeLoopCache(t *testing.T, appV2 bool) { s := teststorage.New(t) - t.Cleanup(func() { _ = s.Close() }) signal := make(chan struct{}, 1) @@ -2071,7 +2065,6 @@ func TestScrapeLoopCacheMemoryExhaustionProtection(t *testing.T) { func testScrapeLoopCacheMemoryExhaustionProtection(t *testing.T, appV2 bool) { s := teststorage.New(t) - t.Cleanup(func() { _ = s.Close() }) signal := make(chan struct{}, 1) @@ -3881,7 +3874,6 @@ func TestScrapeLoop_RespectTimestamps(t *testing.T) { func testScrapeLoopRespectTimestamps(t *testing.T, appV2 bool) { s := teststorage.New(t) - t.Cleanup(func() { _ = s.Close() }) appTest := teststorage.NewAppendable().Then(s) sl, _ := newTestScrapeLoop(t, withAppendable(appTest, appV2)) @@ -3910,7 +3902,6 @@ func TestScrapeLoop_DiscardTimestamps(t *testing.T) { func testScrapeLoopDiscardTimestamps(t *testing.T, appV2 bool) { s := teststorage.New(t) - t.Cleanup(func() { _ = s.Close() }) appTest := teststorage.NewAppendable().Then(s) sl, _ := newTestScrapeLoop(t, withAppendable(appTest, appV2), func(sl *scrapeLoop) { @@ -3941,7 +3932,6 @@ func TestScrapeLoopDiscardDuplicateLabels(t *testing.T) { func testScrapeLoopDiscardDuplicateLabels(t *testing.T, appV2 bool) { s := teststorage.New(t) - t.Cleanup(func() { _ = s.Close() }) appTest := teststorage.NewAppendable().Then(s) sl, _ := newTestScrapeLoop(t, withAppendable(appTest, appV2)) @@ -3983,7 +3973,6 @@ func TestScrapeLoopDiscardUnnamedMetrics(t *testing.T) { func testScrapeLoopDiscardUnnamedMetrics(t *testing.T, appV2 bool) { s := teststorage.New(t) - t.Cleanup(func() { _ = s.Close() }) appTest := teststorage.NewAppendable().Then(s) sl, _ := newTestScrapeLoop(t, withAppendable(appTest, appV2), func(sl *scrapeLoop) { @@ -4274,7 +4263,6 @@ func TestScrapeAddFast(t *testing.T) { func testScrapeAddFast(t *testing.T, appV2 bool) { s := teststorage.New(t) - t.Cleanup(func() { _ = s.Close() }) sl, _ := newTestScrapeLoop(t, withAppendable(s, appV2)) @@ -4357,7 +4345,6 @@ func TestScrapeReportSingleAppender(t *testing.T) { func testScrapeReportSingleAppender(t *testing.T, appV2 bool) { t.Parallel() s := teststorage.New(t) - t.Cleanup(func() { _ = s.Close() }) signal := make(chan struct{}, 1) @@ -4417,7 +4404,6 @@ func TestScrapeReportLimit(t *testing.T) { func testScrapeReportLimit(t *testing.T, appV2 bool) { s := teststorage.New(t) - t.Cleanup(func() { _ = s.Close() }) cfg := &config.ScrapeConfig{ JobName: "test", @@ -4480,7 +4466,6 @@ func TestScrapeUTF8(t *testing.T) { func testScrapeUTF8(t *testing.T, appV2 bool) { s := teststorage.New(t) - t.Cleanup(func() { _ = s.Close() }) cfg := &config.ScrapeConfig{ JobName: "test", @@ -4678,7 +4663,6 @@ func TestLeQuantileReLabel(t *testing.T) { func testLeQuantileReLabel(t *testing.T, appV2 bool) { s := teststorage.New(t) - t.Cleanup(func() { _ = s.Close() }) cfg := &config.ScrapeConfig{ JobName: "test", @@ -5205,7 +5189,6 @@ metric: < t.Run(fmt.Sprintf("%s with %s", name, metricsTextName), func(t *testing.T) { t.Parallel() s := teststorage.New(t) - t.Cleanup(func() { _ = s.Close() }) sl, _ := newTestScrapeLoop(t, withAppendable(s, appV2), func(sl *scrapeLoop) { sl.alwaysScrapeClassicHist = tc.alwaysScrapeClassicHistograms @@ -5293,7 +5276,6 @@ func TestTypeUnitReLabel(t *testing.T) { func testTypeUnitReLabel(t *testing.T, appV2 bool) { s := teststorage.New(t) - t.Cleanup(func() { _ = s.Close() }) cfg := &config.ScrapeConfig{ JobName: "test", @@ -5438,7 +5420,6 @@ func TestScrapeLoopCompression(t *testing.T) { func testScrapeLoopCompression(t *testing.T, appV2 bool) { s := teststorage.New(t) - t.Cleanup(func() { _ = s.Close() }) metricsText := makeTestGauges(10) @@ -5768,7 +5749,6 @@ scrape_configs: `, minBucketFactor, strings.ReplaceAll(metricsServer.URL, "http://", "")) s := teststorage.New(t) - t.Cleanup(func() { _ = s.Close() }) reg := prometheus.NewRegistry() mng, err := NewManager(&Options{DiscoveryReloadInterval: model.Duration(10 * time.Millisecond)}, nil, nil, s, reg) @@ -6464,7 +6444,6 @@ func testNewScrapeLoopHonorLabelsWiring(t *testing.T, appV2 bool) { require.NoError(t, err) s := teststorage.New(t) - defer s.Close() cfg := &config.ScrapeConfig{ JobName: "test", diff --git a/storage/fanout_test.go b/storage/fanout_test.go index 25f61341cd..948934d041 100644 --- a/storage/fanout_test.go +++ b/storage/fanout_test.go @@ -39,7 +39,6 @@ func TestFanout_SelectSorted(t *testing.T) { ctx := context.Background() priStorage := teststorage.New(t) - defer priStorage.Close() app1 := priStorage.Appender(ctx) app1.Append(0, inputLabel, 0, 0) inputTotalSize++ @@ -51,7 +50,6 @@ func TestFanout_SelectSorted(t *testing.T) { require.NoError(t, err) remoteStorage1 := teststorage.New(t) - defer remoteStorage1.Close() app2 := remoteStorage1.Appender(ctx) app2.Append(0, inputLabel, 3000, 3) inputTotalSize++ @@ -63,7 +61,6 @@ func TestFanout_SelectSorted(t *testing.T) { require.NoError(t, err) remoteStorage2 := teststorage.New(t) - defer remoteStorage2.Close() app3 := remoteStorage2.Appender(ctx) app3.Append(0, inputLabel, 6000, 6) @@ -142,7 +139,6 @@ func TestFanout_SelectSorted_AppenderV2(t *testing.T) { inputTotalSize := 0 priStorage := teststorage.New(t) - defer priStorage.Close() app1 := priStorage.AppenderV2(t.Context()) _, err := app1.Append(0, inputLabel, 0, 0, 0, nil, nil, storage.AOptions{}) require.NoError(t, err) @@ -156,7 +152,6 @@ func TestFanout_SelectSorted_AppenderV2(t *testing.T) { require.NoError(t, app1.Commit()) remoteStorage1 := teststorage.New(t) - defer remoteStorage1.Close() app2 := remoteStorage1.AppenderV2(t.Context()) _, err = app2.Append(0, inputLabel, 0, 3000, 3, nil, nil, storage.AOptions{}) require.NoError(t, err) @@ -170,8 +165,6 @@ func TestFanout_SelectSorted_AppenderV2(t *testing.T) { require.NoError(t, app2.Commit()) remoteStorage2 := teststorage.New(t) - defer remoteStorage2.Close() - app3 := remoteStorage2.AppenderV2(t.Context()) _, err = app3.Append(0, inputLabel, 0, 6000, 6, nil, nil, storage.AOptions{}) require.NoError(t, err) @@ -246,7 +239,6 @@ func TestFanout_SelectSorted_AppenderV2(t *testing.T) { func TestFanoutErrors(t *testing.T) { workingStorage := teststorage.New(t) - defer workingStorage.Close() cases := []struct { primary storage.Storage diff --git a/util/teststorage/storage.go b/util/teststorage/storage.go index dd83ff8763..055bf3ff22 100644 --- a/util/teststorage/storage.go +++ b/util/teststorage/storage.go @@ -32,14 +32,22 @@ type Option func(opt *tsdb.Options) // New returns a new TestStorage for testing purposes // that removes all associated files on closing. +// +// Caller does not need to close the TestStorage after use, it's deferred via t.Cleanup. func New(t testing.TB, o ...Option) *TestStorage { s, err := NewWithError(o...) require.NoError(t, err) + + t.Cleanup(func() { + _ = s.Close() // Ignore errors, as it could be a double close. + }) return s } // NewWithError returns a new TestStorage for user facing tests, which reports // errors directly. +// +// It's a caller responsibility to close the TestStorage after use. func NewWithError(o ...Option) (*TestStorage, error) { // Tests just load data for a series sequentially. Thus we // need a long appendable window. diff --git a/web/api/v1/api_test.go b/web/api/v1/api_test.go index 39c1fa6080..87fe756544 100644 --- a/web/api/v1/api_test.go +++ b/web/api/v1/api_test.go @@ -324,7 +324,6 @@ func (m *rulesRetrieverMock) CreateRuleGroups() { m.CreateAlertingRules() arules := m.AlertingRules() storage := teststorage.New(m.testing) - defer storage.Close() engineOpts := promql.EngineOpts{ Logger: nil, @@ -414,7 +413,6 @@ func TestEndpoints(t *testing.T) { test_metric5{"host.name"="localhost"} 1+0x100 test_metric5{"junk\n{},=: chars"="bar"} 1+0x100 `) - t.Cleanup(func() { storage.Close() }) start := time.Unix(0, 0) exemplars := []exemplar.QueryResult{ @@ -575,7 +573,7 @@ func TestGetSeries(t *testing.T) { test_metric2{foo="boo", xyz="qwerty"} 1+0x100 test_metric2{foo="baz", abc="qwerty"} 1+0x100 `) - t.Cleanup(func() { storage.Close() }) + api := &API{ Queryable: storage, } @@ -682,7 +680,6 @@ func TestQueryExemplars(t *testing.T) { test_metric4{foo="boo", dup="1"} 1+0x100 test_metric4{foo="boo"} 1+0x100 `) - t.Cleanup(func() { storage.Close() }) api := &API{ Queryable: storage, @@ -798,7 +795,7 @@ func TestLabelNames(t *testing.T) { test_metric2{foo="boo", xyz="qwerty"} 1+0x100 test_metric2{foo="baz", abc="qwerty"} 1+0x100 `) - t.Cleanup(func() { storage.Close() }) + api := &API{ Queryable: storage, } @@ -901,7 +898,6 @@ func (testStats) Builtin() (_ stats.BuiltinStats) { func TestStats(t *testing.T) { storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) api := &API{ Queryable: storage, diff --git a/web/federate_test.go b/web/federate_test.go index 932639e2e6..8e0a15d57b 100644 --- a/web/federate_test.go +++ b/web/federate_test.go @@ -212,7 +212,6 @@ func TestFederation(t *testing.T) { test_metric_stale 1+10x99 stale test_metric_old 1+10x98 `) - t.Cleanup(func() { storage.Close() }) h := &Handler{ localStorage: &dbAdapter{storage.DB}, @@ -303,7 +302,6 @@ func normalizeBody(body *bytes.Buffer) string { func TestFederationWithNativeHistograms(t *testing.T) { storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) var expVec promql.Vector From bec70227f12c3f8614cf0cf7badf6a3ee7e4ea04 Mon Sep 17 00:00:00 2001 From: Bartlomiej Plotka Date: Fri, 23 Jan 2026 09:04:05 +0000 Subject: [PATCH 15/46] feat(scrape)[PART5b]: Add AppenderV2 support to scrape.NewManager constructor (#17872) * feat(scrape)[PART5b]: Add AppenderV2 support to scrape.NewManager optionally to V1 Signed-off-by: bwplotka * Update scrape/manager.go Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: Bartlomiej Plotka * fixes after rebase Signed-off-by: bwplotka * Apply suggestions from code review Co-authored-by: Arve Knudsen Signed-off-by: Bartlomiej Plotka --------- Signed-off-by: bwplotka Signed-off-by: Bartlomiej Plotka Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Arve Knudsen --- cmd/prometheus/main.go | 2 +- scrape/manager.go | 23 +++- scrape/manager_test.go | 40 +++---- scrape/scrape_test.go | 8 +- tsdb/head_append_v2_test.go | 205 +++++++++++++++++++++++++++++++++++- 5 files changed, 250 insertions(+), 28 deletions(-) diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index 210d3ddc4e..06d5540380 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -875,7 +875,7 @@ func main() { &cfg.scrape, logger.With("component", "scrape manager"), logging.NewJSONFileLogger, - fanoutStorage, + fanoutStorage, nil, // TODO(bwplotka): Switch to AppendableV2. prometheus.DefaultRegisterer, ) if err != nil { diff --git a/scrape/manager.go b/scrape/manager.go index ef226ad507..aafd8c1931 100644 --- a/scrape/manager.go +++ b/scrape/manager.go @@ -39,14 +39,32 @@ import ( "github.com/prometheus/prometheus/util/pool" ) -// NewManager is the Manager constructor using Appendable. -func NewManager(o *Options, logger *slog.Logger, newScrapeFailureLogger func(string) (*logging.JSONFileLogger, error), appendable storage.Appendable, registerer prometheus.Registerer) (*Manager, error) { +// NewManager is the Manager constructor using storage.Appendable or storage.AppendableV2. +// +// If unsure which one to use/implement, implement AppendableV2 as it significantly simplifies implementation and allows more +// (passing ST, always-on metadata, exemplars per sample). +// +// NewManager returns error if both appendable and appendableV2 are specified. +// +// Switch to AppendableV2 is in progress (https://github.com/prometheus/prometheus/issues/17632). +// storage.Appendable will be removed soon (ETA: Q2 2026). +func NewManager( + o *Options, + logger *slog.Logger, + newScrapeFailureLogger func(string) (*logging.JSONFileLogger, error), + appendable storage.Appendable, + appendableV2 storage.AppendableV2, + registerer prometheus.Registerer, +) (*Manager, error) { if o == nil { o = &Options{} } if logger == nil { logger = promslog.NewNopLogger() } + if appendable != nil && appendableV2 != nil { + return nil, errors.New("scrape.NewManager: appendable and appendableV2 cannot be provided at the same time") + } sm, err := newScrapeMetrics(registerer) if err != nil { @@ -55,6 +73,7 @@ func NewManager(o *Options, logger *slog.Logger, newScrapeFailureLogger func(str m := &Manager{ appendable: appendable, + appendableV2: appendableV2, opts: o, logger: logger, newScrapeFailureLogger: newScrapeFailureLogger, diff --git a/scrape/manager_test.go b/scrape/manager_test.go index 8b289cb7e2..17152e8eb1 100644 --- a/scrape/manager_test.go +++ b/scrape/manager_test.go @@ -522,7 +522,7 @@ scrape_configs: ) opts := Options{} - scrapeManager, err := NewManager(&opts, nil, nil, nil, testRegistry) + scrapeManager, err := NewManager(&opts, nil, nil, nil, nil, testRegistry) require.NoError(t, err) newLoop := func(scrapeLoopOptions) loop { ch <- struct{}{} @@ -578,7 +578,7 @@ scrape_configs: func TestManagerTargetsUpdates(t *testing.T) { opts := Options{} testRegistry := prometheus.NewRegistry() - m, err := NewManager(&opts, nil, nil, nil, testRegistry) + m, err := NewManager(&opts, nil, nil, nil, nil, testRegistry) require.NoError(t, err) ts := make(chan map[string][]*targetgroup.Group) @@ -631,7 +631,7 @@ global: opts := Options{} testRegistry := prometheus.NewRegistry() - scrapeManager, err := NewManager(&opts, nil, nil, nil, testRegistry) + scrapeManager, err := NewManager(&opts, nil, nil, nil, nil, testRegistry) require.NoError(t, err) // Load the first config. @@ -701,7 +701,7 @@ scrape_configs: } opts := Options{} - scrapeManager, err := NewManager(&opts, nil, nil, nil, testRegistry) + scrapeManager, err := NewManager(&opts, nil, nil, nil, nil, testRegistry) require.NoError(t, err) reload(scrapeManager, cfg1) @@ -735,6 +735,8 @@ func setupTestServer(t *testing.T, typ string, toWrite []byte) *httptest.Server } // TestManagerSTZeroIngestion tests scrape manager for various ST cases. +// NOTE(bwplotka): There is no AppenderV2 test for this STZeroIngestion feature as in V2 flow it's +// moved to AppenderV2 implementation (e.g. storage) and it's tested there, e.g. tsdb.TestHeadAppenderV2_Append_EnableSTAsZeroSample. func TestManagerSTZeroIngestion(t *testing.T) { t.Parallel() const ( @@ -766,7 +768,7 @@ func TestManagerSTZeroIngestion(t *testing.T) { discoveryManager, scrapeManager := runManagers(t, ctx, &Options{ EnableStartTimestampZeroIngestion: testSTZeroIngest, skipOffsetting: true, - }, app) + }, app, nil) defer scrapeManager.Stop() server := setupTestServer(t, config.ScrapeProtocolsHeaders[testFormat], encoded) @@ -905,6 +907,8 @@ func generateTestHistogram(i int) *dto.Histogram { return h } +// NOTE(bwplotka): There is no AppenderV2 test for this STZeroIngestion feature as in V2 flow it's +// moved to AppenderV2 implementation (e.g. storage) and it's tested there, e.g. tsdb.TestHeadAppenderV2_Append_EnableSTAsZeroSample. func TestManagerSTZeroIngestionHistogram(t *testing.T) { t.Parallel() const mName = "expected_histogram" @@ -950,7 +954,7 @@ func TestManagerSTZeroIngestionHistogram(t *testing.T) { discoveryManager, scrapeManager := runManagers(t, ctx, &Options{ EnableStartTimestampZeroIngestion: tc.enableSTZeroIngestion, skipOffsetting: true, - }, app) + }, app, nil) defer scrapeManager.Stop() once := sync.Once{} @@ -1030,7 +1034,7 @@ func TestUnregisterMetrics(t *testing.T) { // Check that all metrics can be unregistered, allowing a second manager to be created. for range 2 { opts := Options{} - manager, err := NewManager(&opts, nil, nil, nil, reg) + manager, err := NewManager(&opts, nil, nil, nil, nil, reg) require.NotNil(t, manager) require.NoError(t, err) // Unregister all metrics. @@ -1043,6 +1047,9 @@ func TestUnregisterMetrics(t *testing.T) { // This test addresses issue #17216 by ensuring the previously blocking check has been removed. // The test verifies that the presence of exemplars in the input does not cause errors, // although exemplars are not preserved during NHCB conversion (as documented below). +// +// NOTE(bwplotka): There is no AppenderV2 test for this STZeroIngestion feature as in V2 flow it's +// moved to AppenderV2 implementation (e.g. storage) and it's tested there, e.g. tsdb.TestHeadAppenderV2_Append_EnableSTAsZeroSample. func TestNHCBAndSTZeroIngestion(t *testing.T) { t.Parallel() @@ -1059,7 +1066,7 @@ func TestNHCBAndSTZeroIngestion(t *testing.T) { discoveryManager, scrapeManager := runManagers(t, ctx, &Options{ EnableStartTimestampZeroIngestion: true, skipOffsetting: true, - }, app) + }, app, nil) defer scrapeManager.Stop() once := sync.Once{} @@ -1153,16 +1160,13 @@ func applyConfig( require.NoError(t, discoveryManager.ApplyConfig(c)) } -func runManagers(t *testing.T, ctx context.Context, opts *Options, app storage.Appendable) (*discovery.Manager, *Manager) { +func runManagers(t *testing.T, ctx context.Context, opts *Options, app storage.Appendable, appV2 storage.AppendableV2) (*discovery.Manager, *Manager) { t.Helper() if opts == nil { opts = &Options{} } opts.DiscoveryReloadInterval = model.Duration(100 * time.Millisecond) - if app == nil { - app = teststorage.NewAppendable() - } reg := prometheus.NewRegistry() sdMetrics, err := discovery.RegisterSDMetrics(reg, discovery.NewRefreshMetrics(reg)) @@ -1178,7 +1182,7 @@ func runManagers(t *testing.T, ctx context.Context, opts *Options, app storage.A opts, nil, nil, - app, + app, appV2, prometheus.NewRegistry(), ) require.NoError(t, err) @@ -1251,7 +1255,7 @@ scrape_configs: - files: ['%s'] ` - discoveryManager, scrapeManager := runManagers(t, ctx, nil, nil) + discoveryManager, scrapeManager := runManagers(t, ctx, nil, nil, nil) defer scrapeManager.Stop() applyConfig( @@ -1350,7 +1354,7 @@ scrape_configs: file_sd_configs: - files: ['%s', '%s'] ` - discoveryManager, scrapeManager := runManagers(t, ctx, nil, nil) + discoveryManager, scrapeManager := runManagers(t, ctx, nil, nil, nil) defer scrapeManager.Stop() applyConfig( @@ -1409,7 +1413,7 @@ scrape_configs: file_sd_configs: - files: ['%s'] ` - discoveryManager, scrapeManager := runManagers(t, ctx, nil, nil) + discoveryManager, scrapeManager := runManagers(t, ctx, nil, nil, nil) defer scrapeManager.Stop() applyConfig( @@ -1475,7 +1479,7 @@ scrape_configs: - targets: ['%s'] ` - discoveryManager, scrapeManager := runManagers(t, ctx, nil, nil) + discoveryManager, scrapeManager := runManagers(t, ctx, nil, nil, nil) defer scrapeManager.Stop() // Apply the initial config with an existing file @@ -1559,7 +1563,7 @@ scrape_configs: cfg := loadConfiguration(t, cfgText) - m, err := NewManager(&Options{}, nil, nil, teststorage.NewAppendable(), prometheus.NewRegistry()) + m, err := NewManager(&Options{}, nil, nil, nil, nil, prometheus.NewRegistry()) require.NoError(t, err) defer m.Stop() require.NoError(t, m.ApplyConfig(cfg)) diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index 74fdf8a962..f9a0834bd1 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -5751,14 +5751,10 @@ scrape_configs: s := teststorage.New(t) reg := prometheus.NewRegistry() - mng, err := NewManager(&Options{DiscoveryReloadInterval: model.Duration(10 * time.Millisecond)}, nil, nil, s, reg) + sa := selectAppendable(s, appV2) + mng, err := NewManager(&Options{DiscoveryReloadInterval: model.Duration(10 * time.Millisecond)}, nil, nil, sa.V1(), sa.V2(), reg) require.NoError(t, err) - if appV2 { - mng.appendableV2 = s - mng.appendable = nil - } - cfg, err := config.Load(configStr, promslog.NewNopLogger()) require.NoError(t, err) require.NoError(t, mng.ApplyConfig(cfg)) diff --git a/tsdb/head_append_v2_test.go b/tsdb/head_append_v2_test.go index 91f6ba81cc..20401c16fe 100644 --- a/tsdb/head_append_v2_test.go +++ b/tsdb/head_append_v2_test.go @@ -4111,10 +4111,18 @@ func TestHeadAppenderV2_Append_EnableSTAsZeroSample(t *testing.T) { // Make sure counter resets hints are non-zero, so we can detect ST histogram samples. testHistogram := tsdbutil.GenerateTestHistogram(1) testHistogram.CounterResetHint = histogram.NotCounterReset + testFloatHistogram := tsdbutil.GenerateTestFloatHistogram(1) testFloatHistogram.CounterResetHint = histogram.NotCounterReset + + testNHCB := tsdbutil.GenerateTestCustomBucketsHistogram(1) + testNHCB.CounterResetHint = histogram.NotCounterReset + + testFloatNHCB := tsdbutil.GenerateTestCustomBucketsFloatHistogram(1) + testFloatNHCB.CounterResetHint = histogram.NotCounterReset + // TODO(beorn7): Once issue #15346 is fixed, the CounterResetHint of the - // following two zero histograms should be histogram.CounterReset. + // following zero histograms should be histogram.CounterReset. testZeroHistogram := &histogram.Histogram{ Schema: testHistogram.Schema, ZeroThreshold: testHistogram.ZeroThreshold, @@ -4131,6 +4139,19 @@ func TestHeadAppenderV2_Append_EnableSTAsZeroSample(t *testing.T) { PositiveBuckets: []float64{0, 0, 0, 0}, NegativeBuckets: []float64{0, 0, 0, 0}, } + testZeroNHCB := &histogram.Histogram{ + Schema: testNHCB.Schema, + PositiveSpans: testNHCB.PositiveSpans, + PositiveBuckets: []int64{0, 0, 0, 0}, + CustomValues: testNHCB.CustomValues, + } + testZeroFloatNHCB := &histogram.FloatHistogram{ + Schema: testFloatNHCB.Schema, + PositiveSpans: testFloatNHCB.PositiveSpans, + PositiveBuckets: []float64{0, 0, 0, 0}, + CustomValues: testFloatNHCB.CustomValues, + } + type appendableSamples struct { ts int64 fSample float64 @@ -4183,6 +4204,34 @@ func TestHeadAppenderV2_Append_EnableSTAsZeroSample(t *testing.T) { } }(), }, + { + name: "In order ct+normal sample/NHCB", + appendableSamples: []appendableSamples{ + {ts: 100, h: testNHCB, st: 1}, + {ts: 101, h: testNHCB, st: 1}, + }, + expectedSamples: func() []chunks.Sample { + return []chunks.Sample{ + sample{t: 1, h: testZeroNHCB}, + sample{t: 100, h: testNHCB}, + sample{t: 101, h: testNHCB}, + } + }(), + }, + { + name: "In order ct+normal sample/floatNHCB", + appendableSamples: []appendableSamples{ + {ts: 100, fh: testFloatNHCB, st: 1}, + {ts: 101, fh: testFloatNHCB, st: 1}, + }, + expectedSamples: func() []chunks.Sample { + return []chunks.Sample{ + sample{t: 1, fh: testZeroFloatNHCB}, + sample{t: 100, fh: testFloatNHCB}, + sample{t: 101, fh: testFloatNHCB}, + } + }(), + }, { name: "Consecutive appends with same st ignore st/floatSample", appendableSamples: []appendableSamples{ @@ -4223,6 +4272,34 @@ func TestHeadAppenderV2_Append_EnableSTAsZeroSample(t *testing.T) { } }(), }, + { + name: "Consecutive appends with same st ignore st/NHCB", + appendableSamples: []appendableSamples{ + {ts: 100, h: testNHCB, st: 1}, + {ts: 101, h: testNHCB, st: 1}, + }, + expectedSamples: func() []chunks.Sample { + return []chunks.Sample{ + sample{t: 1, h: testZeroNHCB}, + sample{t: 100, h: testNHCB}, + sample{t: 101, h: testNHCB}, + } + }(), + }, + { + name: "Consecutive appends with same st ignore st/floatNHCB", + appendableSamples: []appendableSamples{ + {ts: 100, fh: testFloatNHCB, st: 1}, + {ts: 101, fh: testFloatNHCB, st: 1}, + }, + expectedSamples: func() []chunks.Sample { + return []chunks.Sample{ + sample{t: 1, fh: testZeroFloatNHCB}, + sample{t: 100, fh: testFloatNHCB}, + sample{t: 101, fh: testFloatNHCB}, + } + }(), + }, { name: "Consecutive appends with newer st do not ignore st/floatSample", appendableSamples: []appendableSamples{ @@ -4262,6 +4339,32 @@ func TestHeadAppenderV2_Append_EnableSTAsZeroSample(t *testing.T) { sample{t: 102, fh: testFloatHistogram}, }, }, + { + name: "Consecutive appends with newer st do not ignore st/NHCB", + appendableSamples: []appendableSamples{ + {ts: 100, h: testNHCB, st: 1}, + {ts: 102, h: testNHCB, st: 101}, + }, + expectedSamples: []chunks.Sample{ + sample{t: 1, h: testZeroNHCB}, + sample{t: 100, h: testNHCB}, + sample{t: 101, h: testZeroNHCB}, + sample{t: 102, h: testNHCB}, + }, + }, + { + name: "Consecutive appends with newer st do not ignore st/floatNHCB", + appendableSamples: []appendableSamples{ + {ts: 100, fh: testFloatNHCB, st: 1}, + {ts: 102, fh: testFloatNHCB, st: 101}, + }, + expectedSamples: []chunks.Sample{ + sample{t: 1, fh: testZeroFloatNHCB}, + sample{t: 100, fh: testFloatNHCB}, + sample{t: 101, fh: testZeroFloatNHCB}, + sample{t: 102, fh: testFloatNHCB}, + }, + }, { name: "ST equals to previous sample timestamp is ignored/floatSample", appendableSamples: []appendableSamples{ @@ -4302,6 +4405,34 @@ func TestHeadAppenderV2_Append_EnableSTAsZeroSample(t *testing.T) { } }(), }, + { + name: "ST equals to previous sample timestamp is ignored/NHCB", + appendableSamples: []appendableSamples{ + {ts: 100, h: testNHCB, st: 1}, + {ts: 101, h: testNHCB, st: 100}, + }, + expectedSamples: func() []chunks.Sample { + return []chunks.Sample{ + sample{t: 1, h: testZeroNHCB}, + sample{t: 100, h: testNHCB}, + sample{t: 101, h: testNHCB}, + } + }(), + }, + { + name: "ST equals to previous sample timestamp is ignored/floatNHCB", + appendableSamples: []appendableSamples{ + {ts: 100, fh: testFloatNHCB, st: 1}, + {ts: 101, fh: testFloatNHCB, st: 100}, + }, + expectedSamples: func() []chunks.Sample { + return []chunks.Sample{ + sample{t: 1, fh: testZeroFloatNHCB}, + sample{t: 100, fh: testFloatNHCB}, + sample{t: 101, fh: testFloatNHCB}, + } + }(), + }, { name: "ST lower than minValidTime/float", appendableSamples: []appendableSamples{ @@ -4349,6 +4480,40 @@ func TestHeadAppenderV2_Append_EnableSTAsZeroSample(t *testing.T) { } }(), }, + { + name: "ST lower than minValidTime/NHCB", + appendableSamples: []appendableSamples{ + {ts: 100, h: testNHCB, st: -1}, + }, + // ST results ErrOutOfBounds, but ST append is best effort, so + // ST should be ignored, but sample appended. + expectedSamples: func() []chunks.Sample { + // NOTE: Without ST, on query, first histogram sample will get + // CounterReset adjusted to 0. + firstSample := testNHCB.Copy() + firstSample.CounterResetHint = histogram.UnknownCounterReset + return []chunks.Sample{ + sample{t: 100, h: firstSample}, + } + }(), + }, + { + name: "ST lower than minValidTime/floatNHCB", + appendableSamples: []appendableSamples{ + {ts: 100, fh: testFloatNHCB, st: -1}, + }, + // ST results ErrOutOfBounds, but ST append is best effort, so + // ST should be ignored, but sample appended. + expectedSamples: func() []chunks.Sample { + // NOTE: Without ST, on query, first histogram sample will get + // CounterReset adjusted to 0. + firstSample := testFloatNHCB.Copy() + firstSample.CounterResetHint = histogram.UnknownCounterReset + return []chunks.Sample{ + sample{t: 100, fh: firstSample}, + } + }(), + }, { name: "ST duplicates an existing sample/float", appendableSamples: []appendableSamples{ @@ -4402,6 +4567,44 @@ func TestHeadAppenderV2_Append_EnableSTAsZeroSample(t *testing.T) { } }(), }, + { + name: "ST duplicates an existing sample/NHCB", + appendableSamples: []appendableSamples{ + {ts: 100, h: testNHCB}, + {ts: 200, h: testNHCB, st: 100}, + }, + // ST results ErrDuplicateSampleForTimestamp, but ST append is best effort, so + // ST should be ignored, but sample appended. + expectedSamples: func() []chunks.Sample { + // NOTE: Without ST, on query, first histogram sample will get + // CounterReset adjusted to 0. + firstSample := testNHCB.Copy() + firstSample.CounterResetHint = histogram.UnknownCounterReset + return []chunks.Sample{ + sample{t: 100, h: firstSample}, + sample{t: 200, h: testNHCB}, + } + }(), + }, + { + name: "ST duplicates an existing sample/floatNHCB", + appendableSamples: []appendableSamples{ + {ts: 100, fh: testFloatNHCB}, + {ts: 200, fh: testFloatNHCB, st: 100}, + }, + // ST results ErrDuplicateSampleForTimestamp, but ST append is best effort, so + // ST should ignored, but sample appended. + expectedSamples: func() []chunks.Sample { + // NOTE: Without ST, on query, first histogram sample will get + // CounterReset adjusted to 0. + firstSample := testFloatNHCB.Copy() + firstSample.CounterResetHint = histogram.UnknownCounterReset + return []chunks.Sample{ + sample{t: 100, fh: firstSample}, + sample{t: 200, fh: testFloatNHCB}, + } + }(), + }, } { t.Run(tc.name, func(t *testing.T) { opts := newTestHeadDefaultOptions(DefaultBlockDuration, false) From 9b444b57afcc72d5820e303f047a965366168a7e Mon Sep 17 00:00:00 2001 From: Ganesh Vernekar Date: Fri, 23 Jan 2026 17:59:41 -0800 Subject: [PATCH 16/46] tsdb: Add StaleHead and GC for stale series in the Head block Signed-off-by: Ganesh Vernekar --- tsdb/head.go | 256 ++++++++++++++++++++++++++++++++++++++++++++-- tsdb/head_read.go | 107 +++++++++++++++++++ tsdb/head_test.go | 2 +- 3 files changed, 355 insertions(+), 10 deletions(-) diff --git a/tsdb/head.go b/tsdb/head.go index 4410da407e..3d700944d9 100644 --- a/tsdb/head.go +++ b/tsdb/head.go @@ -1203,6 +1203,36 @@ func (h *Head) truncateMemory(mint int64) (err error) { return h.truncateSeriesAndChunkDiskMapper("truncateMemory") } +// truncateStaleSeries removes the provided series as long as they are still stale. +func (h *Head) truncateStaleSeries(seriesRefs []storage.SeriesRef, maxt int64) error { + h.chunkSnapshotMtx.Lock() + defer h.chunkSnapshotMtx.Unlock() + + if h.MinTime() >= maxt { + return nil + } + + h.WaitForPendingReadersInTimeRange(h.MinTime(), maxt) + + deleted := h.gcStaleSeries(seriesRefs, maxt) + + // Record these stale series refs in the WAL so that we can ignore them during replay. + if h.wal != nil { + stones := make([]tombstones.Stone, 0, len(seriesRefs)) + for ref := range deleted { + stones = append(stones, tombstones.Stone{ + Ref: ref, + Intervals: tombstones.Intervals{{Mint: math.MinInt64, Maxt: math.MaxInt64}}, + }) + } + var enc record.Encoder + if err := h.wal.Log(enc.Tombstones(stones, nil)); err != nil { + return err + } + } + return nil +} + // WaitForPendingReadersInTimeRange waits for queries overlapping with given range to finish querying. // The query timeout limits the max wait time of this function implicitly. // The mint is inclusive and maxt is the truncation time hence exclusive. @@ -1556,6 +1586,53 @@ func (h *RangeHead) String() string { return fmt.Sprintf("range head (mint: %d, maxt: %d)", h.MinTime(), h.MaxTime()) } +// StaleHead allows querying the stale series in the Head via an IndexReader, ChunkReader and tombstones.Reader. +// Used only for compactions. +type StaleHead struct { + RangeHead + staleSeriesRefs []storage.SeriesRef +} + +// NewStaleHead returns a *StaleHead. +func NewStaleHead(head *Head, mint, maxt int64, staleSeriesRefs []storage.SeriesRef) *StaleHead { + return &StaleHead{ + RangeHead: RangeHead{ + head: head, + mint: mint, + maxt: maxt, + }, + staleSeriesRefs: staleSeriesRefs, + } +} + +func (h *StaleHead) Index() (_ IndexReader, err error) { + return h.head.staleIndex(h.mint, h.maxt, h.staleSeriesRefs) +} + +func (h *StaleHead) NumSeries() uint64 { + return h.head.NumStaleSeries() +} + +var staleHeadULID = ulid.MustParse("0000000000XXXXXXXSTALEHEAD") + +func (h *StaleHead) Meta() BlockMeta { + return BlockMeta{ + MinTime: h.MinTime(), + MaxTime: h.MaxTime(), + ULID: staleHeadULID, + Stats: BlockStats{ + NumSeries: h.NumSeries(), + }, + } +} + +// String returns an human readable representation of the stake head. It's important to +// keep this function in order to avoid the struct dump when the head is stringified in +// errors or logs. +func (h *StaleHead) String() string { + return fmt.Sprintf("stale head (mint: %d, maxt: %d)", h.MinTime(), h.MaxTime()) +} + // Delete all samples in the range of [mint, maxt] for series that satisfy the given // label matchers. func (h *Head) Delete(ctx context.Context, mint, maxt int64, ms ...*labels.Matcher) error { @@ -1625,13 +1702,14 @@ func (h *Head) gc() (actualInOrderMint, minOOOTime int64, minMmapFile int) { // Drop old chunks and remember series IDs and hashes if they can be // deleted entirely. - deleted, affected, chunksRemoved, actualInOrderMint, minOOOTime, minMmapFile := h.series.gc(mint, minOOOMmapRef, &h.numStaleSeries) + deleted, affected, chunksRemoved, staleSeriesDeleted, actualInOrderMint, minOOOTime, minMmapFile := h.series.gc(mint, minOOOMmapRef) seriesRemoved := len(deleted) h.metrics.seriesRemoved.Add(float64(seriesRemoved)) h.metrics.chunksRemoved.Add(float64(chunksRemoved)) h.metrics.chunks.Sub(float64(chunksRemoved)) h.numSeries.Sub(uint64(seriesRemoved)) + h.numStaleSeries.Sub(uint64(staleSeriesDeleted)) // Remove deleted series IDs from the postings lists. h.postings.Delete(deleted, affected) @@ -1948,13 +2026,14 @@ func newStripeSeries(stripeSize int, seriesCallback SeriesLifecycleCallback) *st // but the returned map goes into postings.Delete() which expects a map[storage.SeriesRef]struct // and there's no easy way to cast maps. // minMmapFile is the min mmap file number seen in the series (in-order and out-of-order) after gc'ing the series. -func (s *stripeSeries) gc(mint int64, minOOOMmapRef chunks.ChunkDiskMapperRef, numStaleSeries *atomic.Uint64) (_ map[storage.SeriesRef]struct{}, _ map[labels.Label]struct{}, _ int, _, _ int64, minMmapFile int) { +func (s *stripeSeries) gc(mint int64, minOOOMmapRef chunks.ChunkDiskMapperRef) (_ map[storage.SeriesRef]struct{}, _ map[labels.Label]struct{}, _, _ int, _, _ int64, minMmapFile int) { var ( - deleted = map[storage.SeriesRef]struct{}{} - affected = map[labels.Label]struct{}{} - rmChunks = 0 - actualMint int64 = math.MaxInt64 - minOOOTime int64 = math.MaxInt64 + deleted = map[storage.SeriesRef]struct{}{} + affected = map[labels.Label]struct{}{} + rmChunks = 0 + staleSeriesDeleted = 0 + actualMint int64 = math.MaxInt64 + minOOOTime int64 = math.MaxInt64 ) minMmapFile = math.MaxInt32 @@ -2009,7 +2088,7 @@ func (s *stripeSeries) gc(mint int64, minOOOMmapRef chunks.ChunkDiskMapperRef, n if value.IsStaleNaN(series.lastValue) || (series.lastHistogramValue != nil && value.IsStaleNaN(series.lastHistogramValue.Sum)) || (series.lastFloatHistogramValue != nil && value.IsStaleNaN(series.lastFloatHistogramValue.Sum)) { - numStaleSeries.Dec() + staleSeriesDeleted++ } deleted[storage.SeriesRef(series.ref)] = struct{}{} @@ -2025,7 +2104,166 @@ func (s *stripeSeries) gc(mint int64, minOOOMmapRef chunks.ChunkDiskMapperRef, n actualMint = mint } - return deleted, affected, rmChunks, actualMint, minOOOTime, minMmapFile + return deleted, affected, rmChunks, staleSeriesDeleted, actualMint, minOOOTime, minMmapFile +} + +// gcStaleSeries removes all the provided series as long as they are still stale +// and the series maxt is <= the given max. +// The returned references are the series that got deleted. +func (h *Head) gcStaleSeries(seriesRefs []storage.SeriesRef, maxt int64) map[storage.SeriesRef]struct{} { + // Drop old chunks and remember series IDs and hashes if they can be + // deleted entirely. + deleted, affected, chunksRemoved := h.series.gcStaleSeries(seriesRefs, maxt) + seriesRemoved := len(deleted) + + h.metrics.seriesRemoved.Add(float64(seriesRemoved)) + h.metrics.chunksRemoved.Add(float64(chunksRemoved)) + h.metrics.chunks.Sub(float64(chunksRemoved)) + h.numSeries.Sub(uint64(seriesRemoved)) + h.numStaleSeries.Sub(uint64(seriesRemoved)) + + // Remove deleted series IDs from the postings lists. + h.postings.Delete(deleted, affected) + + // Remove tombstones referring to the deleted series. + h.tombstones.DeleteTombstones(deleted) + + if h.wal != nil { + _, last, _ := wlog.Segments(h.wal.Dir()) + h.walExpiriesMtx.Lock() + // Keep series records until we're past segment 'last' + // because the WAL will still have samples records with + // this ref ID. If we didn't keep these series records then + // on start up when we replay the WAL, or any other code + // that reads the WAL, wouldn't be able to use those + // samples since we would have no labels for that ref ID. + for ref := range deleted { + h.walExpiries[chunks.HeadSeriesRef(ref)] = int64(last) + } + h.walExpiriesMtx.Unlock() + } + + return deleted +} + +// deleteSeriesByID deletes the series with the given reference. +// Only used for WAL replay. +func (h *Head) deleteSeriesByID(refs []chunks.HeadSeriesRef) { + var ( + deleted = map[storage.SeriesRef]struct{}{} + affected = map[labels.Label]struct{}{} + staleSeriesDeleted = 0 + chunksRemoved = 0 + ) + + for _, ref := range refs { + refShard := int(ref) & (h.series.size - 1) + h.series.locks[refShard].Lock() + + // Copying getByID here to avoid locking and unlocking twice. + series := h.series.series[refShard][ref] + if series == nil { + h.series.locks[refShard].Unlock() + continue + } + + if value.IsStaleNaN(series.lastValue) || + (series.lastHistogramValue != nil && value.IsStaleNaN(series.lastHistogramValue.Sum)) || + (series.lastFloatHistogramValue != nil && value.IsStaleNaN(series.lastFloatHistogramValue.Sum)) { + staleSeriesDeleted++ + } + + hash := series.lset.Hash() + hashShard := int(hash) & (h.series.size - 1) + + chunksRemoved += len(series.mmappedChunks) + if series.headChunks != nil { + chunksRemoved += series.headChunks.len() + } + + deleted[storage.SeriesRef(series.ref)] = struct{}{} + series.lset.Range(func(l labels.Label) { affected[l] = struct{}{} }) + h.series.hashes[hashShard].del(hash, series.ref) + delete(h.series.series[refShard], series.ref) + + h.series.locks[refShard].Unlock() + } + + h.metrics.seriesRemoved.Add(float64(len(deleted))) + h.metrics.chunksRemoved.Add(float64(chunksRemoved)) + h.metrics.chunks.Sub(float64(chunksRemoved)) + h.numSeries.Sub(uint64(len(deleted))) + h.numStaleSeries.Sub(uint64(staleSeriesDeleted)) + + // Remove deleted series IDs from the postings lists. + h.postings.Delete(deleted, affected) + + // Remove tombstones referring to the deleted series. + h.tombstones.DeleteTombstones(deleted) +} + +// gcStaleSeries removes all the stale series provided that they are still stale +// and the series maxt is <= the given max. +func (s *stripeSeries) gcStaleSeries(seriesRefs []storage.SeriesRef, maxt int64) (_ map[storage.SeriesRef]struct{}, _ map[labels.Label]struct{}, _ int) { + var ( + deleted = map[storage.SeriesRef]struct{}{} + affected = map[labels.Label]struct{}{} + rmChunks = 0 + ) + + staleSeriesMap := map[storage.SeriesRef]struct{}{} + for _, ref := range seriesRefs { + staleSeriesMap[ref] = struct{}{} + } + + check := func(hashShard int, hash uint64, series *memSeries, deletedForCallback map[chunks.HeadSeriesRef]labels.Labels) { + if _, exists := staleSeriesMap[storage.SeriesRef(series.ref)]; !exists { + // This series was not compacted. Skip it. + return + } + + series.Lock() + defer series.Unlock() + + if series.maxTime() > maxt { + return + } + + // Check if the series is still stale. + isStale := value.IsStaleNaN(series.lastValue) || + (series.lastHistogramValue != nil && value.IsStaleNaN(series.lastHistogramValue.Sum)) || + (series.lastFloatHistogramValue != nil && value.IsStaleNaN(series.lastFloatHistogramValue.Sum)) + + if !isStale { + return + } + + if series.headChunks != nil { + rmChunks += series.headChunks.len() + } + rmChunks += len(series.mmappedChunks) + + // The series is gone entirely. We need to keep the series lock + // and make sure we have acquired the stripe locks for hash and ID of the + // series alike. + // If we don't hold them all, there's a very small chance that a series receives + // samples again while we are half-way into deleting it. + refShard := int(series.ref) & (s.size - 1) + if hashShard != refShard { + s.locks[refShard].Lock() + defer s.locks[refShard].Unlock() + } + + deleted[storage.SeriesRef(series.ref)] = struct{}{} + series.lset.Range(func(l labels.Label) { affected[l] = struct{}{} }) + s.hashes[hashShard].del(hash, series.ref) + delete(s.series[refShard], series.ref) + deletedForCallback[series.ref] = series.lset // OK to access lset; series is locked at the top of this function. + } + + s.iterForDeletion(check) + + return deleted, affected, rmChunks } // The iterForDeletion function iterates through all series, invoking the checkDeletedFunc for each. diff --git a/tsdb/head_read.go b/tsdb/head_read.go index 924b04bf0a..f0a1331fbb 100644 --- a/tsdb/head_read.go +++ b/tsdb/head_read.go @@ -22,6 +22,7 @@ import ( "sync" "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/model/value" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunkenc" "github.com/prometheus/prometheus/tsdb/chunks" @@ -201,6 +202,112 @@ func (h *headIndexReader) Series(ref storage.SeriesRef, builder *labels.ScratchB return nil } +func (h *Head) staleIndex(mint, maxt int64, staleSeriesRefs []storage.SeriesRef) (*headStaleIndexReader, error) { + return &headStaleIndexReader{ + headIndexReader: h.indexRange(mint, maxt), + staleSeriesRefs: staleSeriesRefs, + }, nil +} + +// headStaleIndexReader gives the stale series that have no out-of-order data. +// This is only used for stale series compaction at the moment, that will only ask for all +// the series during compaction. So to make that efficient, this index reader requires the +// pre-calculated list of stale series refs that can be returned without re-reading the Head. +type headStaleIndexReader struct { + *headIndexReader + staleSeriesRefs []storage.SeriesRef +} + +func (h *headStaleIndexReader) Postings(ctx context.Context, name string, values ...string) (index.Postings, error) { + // If all postings are requested, return the precalculated list. + k, v := index.AllPostingsKey() + if len(h.staleSeriesRefs) > 0 && name == k && len(values) == 1 && values[0] == v { + return index.NewListPostings(h.staleSeriesRefs), nil + } + seriesRefs, err := h.head.filterStaleSeriesAndSortPostings(h.head.postings.Postings(ctx, name, values...)) + if err != nil { + return index.ErrPostings(err), err + } + return index.NewListPostings(seriesRefs), nil +} + +func (h *headStaleIndexReader) PostingsForLabelMatching(ctx context.Context, name string, match func(string) bool) index.Postings { + // Unused for compaction, so we don't need to optimise. + seriesRefs, err := h.head.filterStaleSeriesAndSortPostings(h.head.postings.PostingsForLabelMatching(ctx, name, match)) + if err != nil { + return index.ErrPostings(err) + } + return index.NewListPostings(seriesRefs) +} + +func (h *headStaleIndexReader) PostingsForAllLabelValues(ctx context.Context, name string) index.Postings { + // Unused for compaction, so we don't need to optimise. + seriesRefs, err := h.head.filterStaleSeriesAndSortPostings(h.head.postings.PostingsForAllLabelValues(ctx, name)) + if err != nil { + return index.ErrPostings(err) + } + return index.NewListPostings(seriesRefs) +} + +// filterStaleSeriesAndSortPostings returns the stale series references from the given postings +// that also do not have any out-of-order data. +func (h *Head) filterStaleSeriesAndSortPostings(p index.Postings) ([]storage.SeriesRef, error) { + series := make([]*memSeries, 0, 1024) + + notFoundSeriesCount := 0 + for p.Next() { + s := h.series.getByID(chunks.HeadSeriesRef(p.At())) + if s == nil { + notFoundSeriesCount++ + continue + } + + s.Lock() + if s.ooo != nil { + // Has out-of-order data; skip it because we cannot determine if a series + // is stale when it's getting out-of-order data. + s.Unlock() + continue + } + + if value.IsStaleNaN(s.lastValue) || + (s.lastHistogramValue != nil && value.IsStaleNaN(s.lastHistogramValue.Sum)) || + (s.lastFloatHistogramValue != nil && value.IsStaleNaN(s.lastFloatHistogramValue.Sum)) { + series = append(series, s) + } + s.Unlock() + } + if notFoundSeriesCount > 0 { + h.logger.Debug("Looked up stale series not found", "count", notFoundSeriesCount) + } + if err := p.Err(); err != nil { + return nil, fmt.Errorf("expand postings: %w", err) + } + + slices.SortFunc(series, func(a, b *memSeries) int { + return labels.Compare(a.labels(), b.labels()) + }) + + refs := make([]storage.SeriesRef, 0, len(series)) + for _, p := range series { + refs = append(refs, storage.SeriesRef(p.ref)) + } + return refs, nil +} + +// SortedPostings returns the postings as it is because we expect any postings obtained via +// headStaleIndexReader to be already sorted. +func (*headStaleIndexReader) SortedPostings(p index.Postings) index.Postings { + // All the postings function above already give the sorted list of postings. + return p +} + +// SortedStaleSeriesRefsNoOOOData returns all the series refs of the stale series that do not have any out-of-order data. +func (h *Head) SortedStaleSeriesRefsNoOOOData(ctx context.Context) ([]storage.SeriesRef, error) { + k, v := index.AllPostingsKey() + return h.filterStaleSeriesAndSortPostings(h.postings.Postings(ctx, k, v)) +} + func appendSeriesChunks(s *memSeries, mint, maxt int64, chks []chunks.Meta) []chunks.Meta { for i, c := range s.mmappedChunks { // Do not expose chunks that are outside of the specified range. diff --git a/tsdb/head_test.go b/tsdb/head_test.go index e2b87b6f3f..493f938860 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -6519,7 +6519,7 @@ func TestStripeSeries_gc(t *testing.T) { s, ms1, ms2 := stripeSeriesWithCollidingSeries(t) hash := ms1.lset.Hash() - s.gc(0, 0, nil) + s.gc(0, 0) // Verify that we can get neither ms1 nor ms2 after gc-ing corresponding series got := s.getByHash(hash, ms1.lset) From 43e69388df9ffc6aaa5f390df810da0d2d8e1088 Mon Sep 17 00:00:00 2001 From: Ganesh Vernekar Date: Fri, 23 Jan 2026 18:00:08 -0800 Subject: [PATCH 17/46] tsdb: Add stale series compaction support in the DB Signed-off-by: Ganesh Vernekar --- tsdb/block.go | 16 ++++++++++++++++ tsdb/compact.go | 3 +++ tsdb/db.go | 48 +++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 66 insertions(+), 1 deletion(-) diff --git a/tsdb/block.go b/tsdb/block.go index 3f089b9da7..92638df164 100644 --- a/tsdb/block.go +++ b/tsdb/block.go @@ -228,6 +228,18 @@ func (bm *BlockMetaCompaction) FromOutOfOrder() bool { return slices.Contains(bm.Hints, CompactionHintFromOutOfOrder) } +func (bm *BlockMetaCompaction) SetStaleSeries() { + if bm.FromStaleSeries() { + return + } + bm.Hints = append(bm.Hints, CompactionHintFromStaleSeries) + slices.Sort(bm.Hints) +} + +func (bm *BlockMetaCompaction) FromStaleSeries() bool { + return slices.Contains(bm.Hints, CompactionHintFromStaleSeries) +} + const ( indexFilename = "index" metaFilename = "meta.json" @@ -236,6 +248,10 @@ const ( // CompactionHintFromOutOfOrder is a hint noting that the block // was created from out-of-order chunks. CompactionHintFromOutOfOrder = "from-out-of-order" + + // CompactionHintFromStaleSeries is a hint noting that the block + // was created from stale series. + CompactionHintFromStaleSeries = "from-stale-series" ) func chunkDir(dir string) string { return filepath.Join(dir, "chunks") } diff --git a/tsdb/compact.go b/tsdb/compact.go index 7c21cbcc13..35e0a5b1fd 100644 --- a/tsdb/compact.go +++ b/tsdb/compact.go @@ -598,6 +598,9 @@ func (c *LeveledCompactor) Write(dest string, b BlockReader, mint, maxt int64, b if base.Compaction.FromOutOfOrder() { meta.Compaction.SetOutOfOrder() } + if base.Compaction.FromStaleSeries() { + meta.Compaction.SetStaleSeries() + } } err := c.write(dest, meta, DefaultBlockPopulator{}, b) diff --git a/tsdb/db.go b/tsdb/db.go index 3f8bf16209..e3b00a2d11 100644 --- a/tsdb/db.go +++ b/tsdb/db.go @@ -1583,6 +1583,52 @@ func (db *DB) compactHead(head *RangeHead) error { return nil } +func (db *DB) CompactStaleHead() error { + db.cmtx.Lock() + defer db.cmtx.Unlock() + + db.logger.Info("Starting stale series compaction") + start := time.Now() + + // We get the stale series reference first because this list can change during the compaction below. + // It is more efficient and easier to provide an index interface for the stale series when we have a static list. + staleSeriesRefs, err := db.head.SortedStaleSeriesRefsNoOOOData(context.Background()) + if err != nil { + return err + } + meta := &BlockMeta{} + meta.Compaction.SetStaleSeries() + mint, maxt := db.head.opts.ChunkRange*(db.head.MinTime()/db.head.opts.ChunkRange), db.head.MaxTime() + for ; mint < maxt; mint += db.head.chunkRange.Load() { + staleHead := NewStaleHead(db.Head(), mint, mint+db.head.chunkRange.Load()-1, staleSeriesRefs) + + uids, err := db.compactor.Write(db.dir, staleHead, staleHead.MinTime(), staleHead.BlockMaxTime(), meta) + if err != nil { + return fmt.Errorf("persist stale head: %w", err) + } + + db.logger.Info("Stale series block created", "ulids", fmt.Sprintf("%v", uids), "min_time", mint, "max_time", maxt) + + if err := db.reloadBlocks(); err != nil { + errs := []error{fmt.Errorf("reloadBlocks blocks: %w", err)} + for _, uid := range uids { + if errRemoveAll := os.RemoveAll(filepath.Join(db.dir, uid.String())); errRemoveAll != nil { + errs = append(errs, fmt.Errorf("delete persisted stale head block after failed db reloadBlocks:%s: %w", uid, errRemoveAll)) + } + } + return errors.Join(errs...) + } + } + + if err := db.head.truncateStaleSeries(staleSeriesRefs, maxt); err != nil { + return fmt.Errorf("head truncate: %w", err) + } + db.head.RebuildSymbolTable(db.logger) + + db.logger.Info("Ending stale series compaction", "num_series", meta.Stats.NumSeries, "duration", time.Since(start)) + return nil +} + // compactBlocks compacts all the eligible on-disk blocks. // The db.cmtx should be held before calling this method. func (db *DB) compactBlocks() (err error) { @@ -2042,7 +2088,7 @@ func (db *DB) inOrderBlocksMaxTime() (maxt int64, ok bool) { maxt, ok = int64(math.MinInt64), false // If blocks are overlapping, last block might not have the max time. So check all blocks. for _, b := range db.Blocks() { - if !b.meta.Compaction.FromOutOfOrder() && b.meta.MaxTime > maxt { + if !b.meta.Compaction.FromOutOfOrder() && !b.meta.Compaction.FromStaleSeries() && b.meta.MaxTime > maxt { ok = true maxt = b.meta.MaxTime } From 43dc23afe77db51070736eafb98fa87cd7ebd707 Mon Sep 17 00:00:00 2001 From: Ganesh Vernekar Date: Fri, 23 Jan 2026 18:02:45 -0800 Subject: [PATCH 18/46] tsdb: Clear stale series from the Head during WAL replay Signed-off-by: Ganesh Vernekar --- tsdb/head_wal.go | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/tsdb/head_wal.go b/tsdb/head_wal.go index bbcad9d855..b323f0dbf6 100644 --- a/tsdb/head_wal.go +++ b/tsdb/head_wal.go @@ -308,7 +308,21 @@ Outer: } h.wlReplaySamplesPool.Put(v) case []tombstones.Stone: + // Tombstone records will be fairly rare, so not trying to optimise the allocations here. + deleteSeriesShards := make([][]chunks.HeadSeriesRef, concurrency) for _, s := range v { + if len(s.Intervals) == 1 && s.Intervals[0].Mint == math.MinInt64 && s.Intervals[0].Maxt == math.MaxInt64 { + // This series was fully deleted at this point. This record is only done for stale series at the moment. + mod := uint64(s.Ref) % uint64(concurrency) + deleteSeriesShards[mod] = append(deleteSeriesShards[mod], chunks.HeadSeriesRef(s.Ref)) + + // If the series is with a different reference, try deleting that. + if r, ok := multiRef[chunks.HeadSeriesRef(s.Ref)]; ok { + mod := uint64(r) % uint64(concurrency) + deleteSeriesShards[mod] = append(deleteSeriesShards[mod], r) + } + continue + } for _, itv := range s.Intervals { if itv.Maxt < h.minValidTime.Load() { continue @@ -326,6 +340,14 @@ Outer: h.tombstones.AddInterval(s.Ref, itv) } } + + for i := range concurrency { + if len(deleteSeriesShards[i]) > 0 { + processors[i].input <- walSubsetProcessorInputItem{deletedSeriesRefs: deleteSeriesShards[i]} + deleteSeriesShards[i] = nil + } + } + h.wlReplaytStonesPool.Put(v) case []record.RefExemplar: for _, e := range v { @@ -558,10 +580,11 @@ type walSubsetProcessor struct { } type walSubsetProcessorInputItem struct { - samples []record.RefSample - histogramSamples []histogramRecord - existingSeries *memSeries - walSeriesRef chunks.HeadSeriesRef + samples []record.RefSample + histogramSamples []histogramRecord + existingSeries *memSeries + walSeriesRef chunks.HeadSeriesRef + deletedSeriesRefs []chunks.HeadSeriesRef } func (wp *walSubsetProcessor) setup() { @@ -712,6 +735,10 @@ func (wp *walSubsetProcessor) processWALSamples(h *Head, mmappedChunks, oooMmapp case wp.histogramsOutput <- in.histogramSamples: default: } + + if len(in.deletedSeriesRefs) > 0 { + h.deleteSeriesByID(in.deletedSeriesRefs) + } } h.updateMinMaxTime(mint, maxt) From 4f3de8da29f5b57b02f325311a8e540aa50b24a9 Mon Sep 17 00:00:00 2001 From: Ganesh Vernekar Date: Fri, 23 Jan 2026 18:07:34 -0800 Subject: [PATCH 19/46] tsdb: Add unit tests for stale series compaction Signed-off-by: Ganesh Vernekar --- storage/series.go | 12 ++- tsdb/db_test.go | 242 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 251 insertions(+), 3 deletions(-) diff --git a/storage/series.go b/storage/series.go index ebc5a16c07..bf6df7db3e 100644 --- a/storage/series.go +++ b/storage/series.go @@ -447,7 +447,17 @@ func (e errChunksIterator) Err() error { return e.err } // ExpandSamples iterates over all samples in the iterator, buffering all in slice. // Optionally it takes samples constructor, useful when you want to compare sample slices with different // sample implementations. if nil, sample type from this package will be used. +// For float sample, NaN values are replaced with -42. func ExpandSamples(iter chunkenc.Iterator, newSampleFn func(st, t int64, f float64, h *histogram.Histogram, fh *histogram.FloatHistogram) chunks.Sample) ([]chunks.Sample, error) { + return expandSamples(iter, true, newSampleFn) +} + +// ExpandSamplesWithoutReplacingNaNs is same as ExpandSamples but it does not replace float sample NaN values with anything. +func ExpandSamplesWithoutReplacingNaNs(iter chunkenc.Iterator, newSampleFn func(st, t int64, f float64, h *histogram.Histogram, fh *histogram.FloatHistogram) chunks.Sample) ([]chunks.Sample, error) { + return expandSamples(iter, false, newSampleFn) +} + +func expandSamples(iter chunkenc.Iterator, replaceNaN bool, newSampleFn func(st, t int64, f float64, h *histogram.Histogram, fh *histogram.FloatHistogram) chunks.Sample) ([]chunks.Sample, error) { if newSampleFn == nil { newSampleFn = func(st, t int64, f float64, h *histogram.Histogram, fh *histogram.FloatHistogram) chunks.Sample { switch { @@ -470,7 +480,7 @@ func ExpandSamples(iter chunkenc.Iterator, newSampleFn func(st, t int64, f float t, f := iter.At() st := iter.AtST() // NaNs can't be compared normally, so substitute for another value. - if math.IsNaN(f) { + if replaceNaN && math.IsNaN(f) { f = -42 } result = append(result, newSampleFn(st, t, f, nil, nil)) diff --git a/tsdb/db_test.go b/tsdb/db_test.go index 5e57982b5d..2dbcb11645 100644 --- a/tsdb/db_test.go +++ b/tsdb/db_test.go @@ -52,6 +52,7 @@ import ( "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/metadata" + "github.com/prometheus/prometheus/model/value" "github.com/prometheus/prometheus/prompb" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/storage/remote" @@ -145,6 +146,16 @@ func TestDBClose_AfterClose(t *testing.T) { // query runs a matcher query against the querier and fully expands its data. func query(t testing.TB, q storage.Querier, matchers ...*labels.Matcher) map[string][]chunks.Sample { + return queryHelper(t, q, true, matchers...) +} + +// queryWithoutReplacingNaNs runs a matcher query against the querier and fully expands its data. +func queryWithoutReplacingNaNs(t testing.TB, q storage.Querier, matchers ...*labels.Matcher) map[string][]chunks.Sample { + return queryHelper(t, q, false, matchers...) +} + +// queryHelper runs a matcher query against the querier and fully expands its data. +func queryHelper(t testing.TB, q storage.Querier, withNaNReplacement bool, matchers ...*labels.Matcher) map[string][]chunks.Sample { ss := q.Select(context.Background(), false, nil, matchers...) defer func() { require.NoError(t, q.Close()) @@ -156,7 +167,13 @@ func query(t testing.TB, q storage.Querier, matchers ...*labels.Matcher) map[str series := ss.At() it = series.Iterator(it) - samples, err := storage.ExpandSamples(it, newSample) + var samples []chunks.Sample + var err error + if withNaNReplacement { + samples, err = storage.ExpandSamples(it, newSample) + } else { + samples, err = storage.ExpandSamplesWithoutReplacingNaNs(it, newSample) + } require.NoError(t, err) require.NoError(t, it.Err()) @@ -2610,7 +2627,7 @@ func TestDBReadOnly_FlushWAL(t *testing.T) { db.DisableCompactions() app := db.Appender(ctx) maxt = 1000 - for i := 0; i < maxt; i++ { + for i := range maxt { _, err := app.Append(0, labels.FromStrings(defaultLabelName, "flush"), int64(i), 1.0) require.NoError(t, err) } @@ -9323,3 +9340,224 @@ func TestBlockReloadInterval(t *testing.T) { }) } } + +func TestStaleSeriesCompaction(t *testing.T) { + opts := DefaultOptions() + opts.MinBlockDuration = 1000 + opts.MaxBlockDuration = 1000 + db := newTestDB(t, withOpts(opts)) + db.DisableCompactions() + t.Cleanup(func() { + require.NoError(t, db.Close()) + }) + + var ( + nonStaleSeries, staleSeries, + nonStaleHist, staleHist, + nonStaleFHist, staleFHist, + staleSeriesCrossingBoundary, staleHistCrossingBoundary, staleFHistCrossingBoundary []labels.Labels + numSeriesPerCategory = 1 + ) + for i := range numSeriesPerCategory { + nonStaleSeries = append(nonStaleSeries, labels.FromStrings("name", fmt.Sprintf("series%d", 1000+i))) + nonStaleHist = append(nonStaleHist, labels.FromStrings("name", fmt.Sprintf("series%d", 2000+i))) + nonStaleFHist = append(nonStaleFHist, labels.FromStrings("name", fmt.Sprintf("series%d", 3000+i))) + + staleSeries = append(staleSeries, labels.FromStrings("name", fmt.Sprintf("series%d", 4000+i))) + staleHist = append(staleHist, labels.FromStrings("name", fmt.Sprintf("series%d", 5000+i))) + staleFHist = append(staleFHist, labels.FromStrings("name", fmt.Sprintf("series%d", 6000+i))) + + staleSeriesCrossingBoundary = append(staleSeriesCrossingBoundary, labels.FromStrings("name", fmt.Sprintf("series%d", 7000+i))) + staleHistCrossingBoundary = append(staleHistCrossingBoundary, labels.FromStrings("name", fmt.Sprintf("series%d", 8000+i))) + staleFHistCrossingBoundary = append(staleFHistCrossingBoundary, labels.FromStrings("name", fmt.Sprintf("series%d", 9000+i))) + } + + var ( + v = 10.0 + staleV = math.Float64frombits(value.StaleNaN) + h = tsdbutil.GenerateTestHistograms(1)[0] + fh = tsdbutil.GenerateTestFloatHistograms(1)[0] + staleH = &histogram.Histogram{Sum: staleV} + staleFH = &histogram.FloatHistogram{Sum: staleV} + ) + + addNormalSamples := func(ts int64, floatSeries, histSeries, floatHistSeries []labels.Labels) { + app := db.Appender(context.Background()) + for i := range len(floatSeries) { + _, err := app.Append(0, floatSeries[i], ts, v) + require.NoError(t, err) + _, err = app.AppendHistogram(0, histSeries[i], ts, h, nil) + require.NoError(t, err) + _, err = app.AppendHistogram(0, floatHistSeries[i], ts, nil, fh) + require.NoError(t, err) + } + require.NoError(t, app.Commit()) + } + addStaleSamples := func(ts int64, floatSeries, histSeries, floatHistSeries []labels.Labels) { + app := db.Appender(context.Background()) + for i := range len(floatSeries) { + _, err := app.Append(0, floatSeries[i], ts, staleV) + require.NoError(t, err) + _, err = app.AppendHistogram(0, histSeries[i], ts, staleH, nil) + require.NoError(t, err) + _, err = app.AppendHistogram(0, floatHistSeries[i], ts, nil, staleFH) + require.NoError(t, err) + } + require.NoError(t, app.Commit()) + } + + // Normal sample for all. + addNormalSamples(100, nonStaleSeries, nonStaleHist, nonStaleFHist) + addNormalSamples(100, staleSeries, staleHist, staleFHist) + + // Stale sample for the stale series. Normal sample for the non-stale series. + addNormalSamples(200, nonStaleSeries, nonStaleHist, nonStaleFHist) + addStaleSamples(200, staleSeries, staleHist, staleFHist) + + // Normal samples for the non-stale series later + addNormalSamples(300, nonStaleSeries, nonStaleHist, nonStaleFHist) + + require.Equal(t, uint64(6*numSeriesPerCategory), db.Head().NumSeries()) + require.Equal(t, uint64(3*numSeriesPerCategory), db.Head().NumStaleSeries()) + + // Series crossing block boundary and gets stale. + addNormalSamples(300, staleSeriesCrossingBoundary, staleHistCrossingBoundary, staleFHistCrossingBoundary) + addNormalSamples(700, staleSeriesCrossingBoundary, staleHistCrossingBoundary, staleFHistCrossingBoundary) + addNormalSamples(1100, staleSeriesCrossingBoundary, staleHistCrossingBoundary, staleFHistCrossingBoundary) + addStaleSamples(1200, staleSeriesCrossingBoundary, staleHistCrossingBoundary, staleFHistCrossingBoundary) + + require.NoError(t, db.CompactStaleHead()) + + require.Equal(t, uint64(3*numSeriesPerCategory), db.Head().NumSeries()) + require.Equal(t, uint64(0), db.Head().NumStaleSeries()) + + require.Len(t, db.Blocks(), 2) + m := db.Blocks()[0].Meta() + require.Equal(t, int64(0), m.MinTime) + require.Equal(t, int64(1000), m.MaxTime) + require.Truef(t, m.Compaction.FromStaleSeries(), "stale series info not found in block meta") + m = db.Blocks()[1].Meta() + require.Equal(t, int64(1000), m.MinTime) + require.Equal(t, int64(2000), m.MaxTime) + require.Truef(t, m.Compaction.FromStaleSeries(), "stale series info not found in block meta") + + // To make sure that Head is not truncated based on stale series block. + require.NoError(t, db.reload()) + + nonFirstH := h.Copy() + nonFirstH.CounterResetHint = histogram.NotCounterReset + nonFirstFH := fh.Copy() + nonFirstFH.CounterResetHint = histogram.NotCounterReset + + // Verify head block. + verifyHeadBlock := func() { + require.Equal(t, uint64(3), db.head.NumSeries()) + require.Equal(t, uint64(0), db.head.NumStaleSeries()) + + expHeadQuery := make(map[string][]chunks.Sample) + for i := range numSeriesPerCategory { + expHeadQuery[fmt.Sprintf(`{name="%s"}`, nonStaleSeries[i].Get("name"))] = []chunks.Sample{ + sample{t: 100, f: v}, sample{t: 200, f: v}, sample{t: 300, f: v}, + } + expHeadQuery[fmt.Sprintf(`{name="%s"}`, nonStaleHist[i].Get("name"))] = []chunks.Sample{ + sample{t: 100, h: h}, sample{t: 200, h: nonFirstH}, sample{t: 300, h: nonFirstH}, + } + expHeadQuery[fmt.Sprintf(`{name="%s"}`, nonStaleFHist[i].Get("name"))] = []chunks.Sample{ + sample{t: 100, fh: fh}, sample{t: 200, fh: nonFirstFH}, sample{t: 300, fh: nonFirstFH}, + } + } + + querier, err := NewBlockQuerier(NewRangeHead(db.head, 0, 300), 0, 300) + require.NoError(t, err) + t.Cleanup(func() { + querier.Close() + }) + seriesSet := query(t, querier, labels.MustNewMatcher(labels.MatchRegexp, "name", "series.*")) + require.Equal(t, expHeadQuery, seriesSet) + } + + verifyHeadBlock() + + // Verify blocks from stale series. + { + expBlockQuery := make(map[string][]chunks.Sample) + for i := range numSeriesPerCategory { + expBlockQuery[fmt.Sprintf(`{name="%s"}`, staleSeries[i].Get("name"))] = []chunks.Sample{ + sample{t: 100, f: v}, sample{t: 200, f: staleV}, + } + expBlockQuery[fmt.Sprintf(`{name="%s"}`, staleHist[i].Get("name"))] = []chunks.Sample{ + sample{t: 100, h: h}, sample{t: 200, h: staleH}, + } + expBlockQuery[fmt.Sprintf(`{name="%s"}`, staleFHist[i].Get("name"))] = []chunks.Sample{ + sample{t: 100, fh: fh}, sample{t: 200, fh: staleFH}, + } + expBlockQuery[fmt.Sprintf(`{name="%s"}`, staleSeriesCrossingBoundary[i].Get("name"))] = []chunks.Sample{ + sample{t: 300, f: v}, sample{t: 700, f: v}, sample{t: 1100, f: v}, sample{t: 1200, f: staleV}, + } + expBlockQuery[fmt.Sprintf(`{name="%s"}`, staleHistCrossingBoundary[i].Get("name"))] = []chunks.Sample{ + sample{t: 300, h: h}, sample{t: 700, h: nonFirstH}, sample{t: 1100, h: h}, sample{t: 1200, h: staleH}, + } + expBlockQuery[fmt.Sprintf(`{name="%s"}`, staleFHistCrossingBoundary[i].Get("name"))] = []chunks.Sample{ + sample{t: 300, fh: fh}, sample{t: 700, fh: nonFirstFH}, sample{t: 1100, fh: fh}, sample{t: 1200, fh: staleFH}, + } + } + + querier, err := NewBlockQuerier(db.Blocks()[0], 0, 1000) + require.NoError(t, err) + t.Cleanup(func() { + querier.Close() + }) + seriesSet := queryWithoutReplacingNaNs(t, querier, labels.MustNewMatcher(labels.MatchRegexp, "name", "series.*")) + + querier, err = NewBlockQuerier(db.Blocks()[1], 1000, 2000) + require.NoError(t, err) + t.Cleanup(func() { + querier.Close() + }) + seriesSet2 := queryWithoutReplacingNaNs(t, querier, labels.MustNewMatcher(labels.MatchRegexp, "name", "series.*")) + for k, v := range seriesSet2 { + seriesSet[k] = append(seriesSet[k], v...) + } + + require.Len(t, seriesSet, len(expBlockQuery)) + + // Compare all the samples except the stale value that needs special handling. + for _, category := range [][]labels.Labels{ + staleSeries, staleHist, staleFHist, + staleSeriesCrossingBoundary, staleHistCrossingBoundary, staleFHistCrossingBoundary, + } { + for i := range numSeriesPerCategory { + seriesKey := fmt.Sprintf(`{name="%s"}`, category[i].Get("name")) + samples := expBlockQuery[seriesKey] + actSamples, exists := seriesSet[seriesKey] + require.Truef(t, exists, "series not found in result %s", seriesKey) + require.Len(t, actSamples, len(samples)) + + for i := range len(samples) - 1 { + require.Equal(t, samples[i], actSamples[i]) + } + + l := len(samples) - 1 + require.Equal(t, samples[l].T(), actSamples[l].T()) + switch { + case value.IsStaleNaN(samples[l].F()): + require.True(t, value.IsStaleNaN(actSamples[l].F())) + case samples[l].H() != nil: + require.True(t, value.IsStaleNaN(actSamples[l].H().Sum)) + default: + require.True(t, value.IsStaleNaN(actSamples[l].FH().Sum)) + } + } + } + } + + { + // Restart DB and verify that stale series were discarded from WAL replay. + require.NoError(t, db.Close()) + var err error + db, err = Open(db.Dir(), db.logger, db.registerer, db.opts, nil) + require.NoError(t, err) + + verifyHeadBlock() + } +} From 3e4a094dbb3e2b60cd5a4cab2c83a56213c321ce Mon Sep 17 00:00:00 2001 From: Ganesh Vernekar Date: Fri, 23 Jan 2026 18:12:34 -0800 Subject: [PATCH 20/46] Add stale_series_compaction_threshold config file option Signed-off-by: Ganesh Vernekar --- cmd/prometheus/main.go | 3 +++ config/config.go | 4 ++++ tsdb/db.go | 42 +++++++++++++++++++++++++++++++++++++++++- 3 files changed, 48 insertions(+), 1 deletion(-) diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index 06d5540380..e4f15f5cb8 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -692,6 +692,7 @@ func main() { } if cfgFile.StorageConfig.TSDBConfig != nil { cfg.tsdb.OutOfOrderTimeWindow = cfgFile.StorageConfig.TSDBConfig.OutOfOrderTimeWindow + cfg.tsdb.StaleSeriesCompactionThreshold = cfgFile.StorageConfig.TSDBConfig.StaleSeriesCompactionThreshold if cfgFile.StorageConfig.TSDBConfig.Retention != nil { if cfgFile.StorageConfig.TSDBConfig.Retention.Time > 0 { cfg.tsdb.RetentionDuration = cfgFile.StorageConfig.TSDBConfig.Retention.Time @@ -1943,6 +1944,7 @@ type tsdbOptions struct { UseUncachedIO bool BlockCompactionExcludeFunc tsdb.BlockExcludeFilterFunc BlockReloadInterval model.Duration + StaleSeriesCompactionThreshold float64 } func (opts tsdbOptions) ToTSDBOptions() tsdb.Options { @@ -1969,6 +1971,7 @@ func (opts tsdbOptions) ToTSDBOptions() tsdb.Options { BlockCompactionExcludeFunc: opts.BlockCompactionExcludeFunc, BlockReloadInterval: time.Duration(opts.BlockReloadInterval), FeatureRegistry: features.DefaultRegistry, + StaleSeriesCompactionThreshold: opts.StaleSeriesCompactionThreshold, } } diff --git a/config/config.go b/config/config.go index 0b9b059ab2..d721d7fb86 100644 --- a/config/config.go +++ b/config/config.go @@ -1107,6 +1107,10 @@ type TSDBConfig struct { // This should not be used directly and must be converted into OutOfOrderTimeWindow. OutOfOrderTimeWindowFlag model.Duration `yaml:"out_of_order_time_window,omitempty"` + // StaleSeriesCompactionThreshold is a number between 0.0-1.0 indicating the % of stale series in + // the in-memory Head block. If the % of stale series crosses this threshold, stale series compaction is run immediately. + StaleSeriesCompactionThreshold float64 `yaml:"stale_series_compaction_threshold,omitempty"` + Retention *TSDBRetentionConfig `yaml:"retention,omitempty"` } diff --git a/tsdb/db.go b/tsdb/db.go index e3b00a2d11..1dd524a76a 100644 --- a/tsdb/db.go +++ b/tsdb/db.go @@ -100,6 +100,10 @@ func DefaultOptions() *Options { // Options of the DB storage. type Options struct { + // staleSeriesCompactionThreshold is same as below option with same name, but is atomic so that we can do live updates without locks. + // This is the one that must be used by the code. + staleSeriesCompactionThreshold atomic.Float64 + // Segments (wal files) max size. // WALSegmentSize = 0, segment size is default size. // WALSegmentSize > 0, segment size is WALSegmentSize. @@ -245,6 +249,10 @@ type Options struct { // FeatureRegistry is used to register TSDB features. FeatureRegistry features.Collector + + // StaleSeriesCompactionThreshold is a number between 0.0-1.0 indicating the % of stale series in + // the in-memory Head block. If the % of stale series crosses this threshold, stale series compaction is run immediately. + StaleSeriesCompactionThreshold float64 } type NewCompactorFunc func(ctx context.Context, r prometheus.Registerer, l *slog.Logger, ranges []int64, pool chunkenc.Pool, opts *Options) (Compactor, error) @@ -305,6 +313,10 @@ type DB struct { // out-of-order compaction and vertical queries. oooWasEnabled atomic.Bool + // lastHeadCompactionTime is the last wall clock time when the head block compaction was started, + // irrespective of success or failure. This does not include out-of-order compaction and stale series compaction. + lastHeadCompactionTime time.Time + writeNotified wlog.WriteNotified registerer prometheus.Registerer @@ -857,6 +869,8 @@ func validateOpts(opts *Options, rngs []int64) (*Options, []int64) { // configured maximum block duration. rngs = ExponentialBlockRanges(opts.MinBlockDuration, 10, 3) } + + opts.staleSeriesCompactionThreshold.Store(opts.StaleSeriesCompactionThreshold) return opts, rngs } @@ -1151,6 +1165,28 @@ func (db *DB) run(ctx context.Context) { } // We attempt mmapping of head chunks regularly. db.head.mmapHeadChunks() + + numStaleSeries, numSeries := db.Head().NumStaleSeries(), db.Head().NumSeries() + staleSeriesRatio := float64(numStaleSeries) / float64(numSeries) + if db.autoCompact && db.opts.staleSeriesCompactionThreshold.Load() > 0 && + staleSeriesRatio >= db.opts.staleSeriesCompactionThreshold.Load() { + nextCompactionIsSoon := false + if !db.lastHeadCompactionTime.IsZero() { + compactionInterval := time.Duration(db.head.chunkRange.Load()) * time.Millisecond + nextEstimatedCompactionTime := db.lastHeadCompactionTime.Add(compactionInterval) + if time.Now().Add(10 * time.Minute).After(nextEstimatedCompactionTime) { + // Next compaction is starting within next 10 mins. + nextCompactionIsSoon = true + } + } + + if !nextCompactionIsSoon { + if err := db.CompactStaleHead(); err != nil { + db.logger.Error("immediate stale series compaction failed", "err", err) + } + } + } + case <-db.compactc: db.metrics.compactionsTriggered.Inc() @@ -1203,7 +1239,7 @@ func (db *DB) ApplyConfig(conf *config.Config) error { oooTimeWindow := int64(0) if conf.StorageConfig.TSDBConfig != nil { oooTimeWindow = conf.StorageConfig.TSDBConfig.OutOfOrderTimeWindow - + db.opts.staleSeriesCompactionThreshold.Store(conf.StorageConfig.TSDBConfig.StaleSeriesCompactionThreshold) // Update retention configuration if provided. if conf.StorageConfig.TSDBConfig.Retention != nil { db.retentionMtx.Lock() @@ -1217,6 +1253,8 @@ func (db *DB) ApplyConfig(conf *config.Config) error { } db.retentionMtx.Unlock() } + } else { + db.opts.staleSeriesCompactionThreshold.Store(0) } if oooTimeWindow < 0 { oooTimeWindow = 0 @@ -1560,6 +1598,8 @@ func (db *DB) compactOOO(dest string, oooHead *OOOCompactionHead) (_ []ulid.ULID // compactHead compacts the given RangeHead. // The db.cmtx should be held before calling this method. func (db *DB) compactHead(head *RangeHead) error { + db.lastHeadCompactionTime = time.Now() + uids, err := db.compactor.Write(db.dir, head, head.MinTime(), head.BlockMaxTime(), nil) if err != nil { return fmt.Errorf("persist head block: %w", err) From 65f8482335ebc31862cf4d1a4857ae99bd63abc8 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Wed, 24 Dec 2025 11:52:37 +0100 Subject: [PATCH 21/46] fix(promql): prevent panic in trimStringByBytes on invalid UTF-8 Add bounds check to prevent index out of range panic when trimStringByBytes receives a string containing only UTF-8 continuation bytes (0x80-0xBF). Previously, the loop would decrement size below 0 when no valid rune start byte was found, causing a panic. A malicious query string with only continuation bytes could crash the Prometheus server via the ActiveQueryTracker before the query was parsed or validated. Signed-off-by: Arve Knudsen --- promql/query_logger.go | 2 +- promql/query_logger_test.go | 41 +++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/promql/query_logger.go b/promql/query_logger.go index 954f8b1a5b..0c4b218828 100644 --- a/promql/query_logger.go +++ b/promql/query_logger.go @@ -164,7 +164,7 @@ func trimStringByBytes(str string, size int) string { trimIndex := len(bytesStr) if size < len(bytesStr) { - for !utf8.RuneStart(bytesStr[size]) { + for size > 0 && !utf8.RuneStart(bytesStr[size]) { size-- } trimIndex = size diff --git a/promql/query_logger_test.go b/promql/query_logger_test.go index 8c88757bd7..edd3baad12 100644 --- a/promql/query_logger_test.go +++ b/promql/query_logger_test.go @@ -127,6 +127,47 @@ func TestMMapFile(t *testing.T) { require.Equal(t, []byte(data), bytes[:2], "Mmap failed") } +func TestTrimStringByBytes(t *testing.T) { + for _, tc := range []struct { + name string + input string + size int + expected string + }{ + { + name: "normal ASCII string", + input: "hello", + size: 3, + expected: "hel", + }, + { + name: "no trimming needed", + input: "hi", + size: 10, + expected: "hi", + }, + { + name: "UTF-8 multibyte character boundary", + input: "日本", // 6 bytes (3 bytes per character) + size: 4, + expected: "日", // trims back to complete character boundary + }, + { + name: "invalid UTF-8 continuation-only bytes", + input: string([]byte{0x80, 0x81, 0x82, 0x83, 0x84}), // only continuation bytes + size: 4, + expected: "", + }, + } { + t.Run(tc.name, func(t *testing.T) { + require.NotPanics(t, func() { + result := trimStringByBytes(tc.input, tc.size) + require.Equal(t, tc.expected, result) + }) + }) + } +} + func TestParseBrokenJSON(t *testing.T) { for _, tc := range []struct { b []byte From c8ff2d739beb7a56901f5e1fe79dd6800b963342 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Wed, 24 Dec 2025 13:47:54 +0100 Subject: [PATCH 22/46] enhancement(tsdb): add test for LeveledCompactor stopping after excluding block Add a test for `LeveledCompactor.Plan()` stopping after a block matches the `BlockExcludeFilter`, as a sub-test `TestLeveledCompactor/Plan/BlockExcludeFilter stops iteration`. Also moving `TestLeveledCompactor_plan` to a sub-test of `TestLeveledCompactor`, for consistency. Signed-off-by: Arve Knudsen --- tsdb/compact.go | 7 + tsdb/compact_test.go | 434 ++++++++++++++++++++++++------------------- 2 files changed, 254 insertions(+), 187 deletions(-) diff --git a/tsdb/compact.go b/tsdb/compact.go index 7c21cbcc13..973515888e 100644 --- a/tsdb/compact.go +++ b/tsdb/compact.go @@ -263,6 +263,13 @@ func (c *LeveledCompactor) Plan(dir string) ([]string, error) { return nil, err } if c.blockExcludeFunc != nil && c.blockExcludeFunc(meta) { + // Compactions work from oldest to newest, uploads do the same (usually). + // If you continue here you'll skip compactions on this one block, but: + // * all further blocks are NOT yet uploaded + // * some or all further blocks are uploaded + // + // If we continue and there are newer blocks to pick from, + // then you will compact in a non-continuous way, leaving gaps of individual un-compacted blocks. break } dms = append(dms, dirMeta{dir, meta}) diff --git a/tsdb/compact_test.go b/tsdb/compact_test.go index 6d2fbad91f..fcb659d040 100644 --- a/tsdb/compact_test.go +++ b/tsdb/compact_test.go @@ -173,214 +173,274 @@ func TestNoPanicFor0Tombstones(t *testing.T) { c.plan(metas) } -func TestLeveledCompactor_plan(t *testing.T) { - // This mimics our default ExponentialBlockRanges with min block size equals to 20. - compactor, err := NewLeveledCompactor(context.Background(), nil, nil, []int64{ - 20, - 60, - 180, - 540, - 1620, - }, nil, nil) - require.NoError(t, err) +func TestLeveledCompactor(t *testing.T) { + // Tests for the private plan() method. + t.Run("plan", func(t *testing.T) { + // This mimics our default ExponentialBlockRanges with min block size equals to 20. + compactor, err := NewLeveledCompactor(context.Background(), nil, nil, []int64{ + 20, + 60, + 180, + 540, + 1620, + }, nil, nil) + require.NoError(t, err) - cases := map[string]struct { - metas []dirMeta - expected []string - }{ - "Outside Range": { - metas: []dirMeta{ - metaRange("1", 0, 20, nil), + cases := map[string]struct { + metas []dirMeta + expected []string + }{ + "Outside Range": { + metas: []dirMeta{ + metaRange("1", 0, 20, nil), + }, + expected: nil, }, - expected: nil, - }, - "We should wait for four blocks of size 20 to appear before compacting.": { - metas: []dirMeta{ - metaRange("1", 0, 20, nil), - metaRange("2", 20, 40, nil), + "We should wait for four blocks of size 20 to appear before compacting.": { + metas: []dirMeta{ + metaRange("1", 0, 20, nil), + metaRange("2", 20, 40, nil), + }, + expected: nil, }, - expected: nil, - }, - `We should wait for a next block of size 20 to appear before compacting - the existing ones. We have three, but we ignore the fresh one from WAl`: { - metas: []dirMeta{ - metaRange("1", 0, 20, nil), - metaRange("2", 20, 40, nil), - metaRange("3", 40, 60, nil), + `We should wait for a next block of size 20 to appear before compacting + the existing ones. We have three, but we ignore the fresh one from WAl`: { + metas: []dirMeta{ + metaRange("1", 0, 20, nil), + metaRange("2", 20, 40, nil), + metaRange("3", 40, 60, nil), + }, + expected: nil, }, - expected: nil, - }, - "Block to fill the entire parent range appeared – should be compacted": { - metas: []dirMeta{ - metaRange("1", 0, 20, nil), - metaRange("2", 20, 40, nil), - metaRange("3", 40, 60, nil), - metaRange("4", 60, 80, nil), + "Block to fill the entire parent range appeared – should be compacted": { + metas: []dirMeta{ + metaRange("1", 0, 20, nil), + metaRange("2", 20, 40, nil), + metaRange("3", 40, 60, nil), + metaRange("4", 60, 80, nil), + }, + expected: []string{"1", "2", "3"}, }, - expected: []string{"1", "2", "3"}, - }, - `Block for the next parent range appeared with gap with size 20. Nothing will happen in the first one - anymore but we ignore fresh one still, so no compaction`: { - metas: []dirMeta{ - metaRange("1", 0, 20, nil), - metaRange("2", 20, 40, nil), - metaRange("3", 60, 80, nil), + `Block for the next parent range appeared with gap with size 20. Nothing will happen in the first one + anymore but we ignore fresh one still, so no compaction`: { + metas: []dirMeta{ + metaRange("1", 0, 20, nil), + metaRange("2", 20, 40, nil), + metaRange("3", 60, 80, nil), + }, + expected: nil, }, - expected: nil, - }, - `Block for the next parent range appeared, and we have a gap with size 20 between second and third block. - We will not get this missed gap anymore and we should compact just these two.`: { - metas: []dirMeta{ - metaRange("1", 0, 20, nil), - metaRange("2", 20, 40, nil), - metaRange("3", 60, 80, nil), - metaRange("4", 80, 100, nil), + `Block for the next parent range appeared, and we have a gap with size 20 between second and third block. + We will not get this missed gap anymore and we should compact just these two.`: { + metas: []dirMeta{ + metaRange("1", 0, 20, nil), + metaRange("2", 20, 40, nil), + metaRange("3", 60, 80, nil), + metaRange("4", 80, 100, nil), + }, + expected: []string{"1", "2"}, }, - expected: []string{"1", "2"}, - }, - "We have 20, 20, 20, 60, 60 range blocks. '5' is marked as fresh one": { - metas: []dirMeta{ - metaRange("1", 0, 20, nil), - metaRange("2", 20, 40, nil), - metaRange("3", 40, 60, nil), - metaRange("4", 60, 120, nil), - metaRange("5", 120, 180, nil), + "We have 20, 20, 20, 60, 60 range blocks. '5' is marked as fresh one": { + metas: []dirMeta{ + metaRange("1", 0, 20, nil), + metaRange("2", 20, 40, nil), + metaRange("3", 40, 60, nil), + metaRange("4", 60, 120, nil), + metaRange("5", 120, 180, nil), + }, + expected: []string{"1", "2", "3"}, }, - expected: []string{"1", "2", "3"}, - }, - "We have 20, 60, 20, 60, 240 range blocks. We can compact 20 + 60 + 60": { - metas: []dirMeta{ - metaRange("2", 20, 40, nil), - metaRange("4", 60, 120, nil), - metaRange("5", 960, 980, nil), // Fresh one. - metaRange("6", 120, 180, nil), - metaRange("7", 720, 960, nil), + "We have 20, 60, 20, 60, 240 range blocks. We can compact 20 + 60 + 60": { + metas: []dirMeta{ + metaRange("2", 20, 40, nil), + metaRange("4", 60, 120, nil), + metaRange("5", 960, 980, nil), // Fresh one. + metaRange("6", 120, 180, nil), + metaRange("7", 720, 960, nil), + }, + expected: []string{"2", "4", "6"}, }, - expected: []string{"2", "4", "6"}, - }, - "Do not select large blocks that have many tombstones when there is no fresh block": { - metas: []dirMeta{ - metaRange("1", 0, 540, &BlockStats{ - NumSeries: 10, - NumTombstones: 3, - }), + "Do not select large blocks that have many tombstones when there is no fresh block": { + metas: []dirMeta{ + metaRange("1", 0, 540, &BlockStats{ + NumSeries: 10, + NumTombstones: 3, + }), + }, + expected: nil, }, - expected: nil, - }, - "Select large blocks that have many tombstones when fresh appears": { - metas: []dirMeta{ - metaRange("1", 0, 540, &BlockStats{ - NumSeries: 10, - NumTombstones: 3, - }), - metaRange("2", 540, 560, nil), + "Select large blocks that have many tombstones when fresh appears": { + metas: []dirMeta{ + metaRange("1", 0, 540, &BlockStats{ + NumSeries: 10, + NumTombstones: 3, + }), + metaRange("2", 540, 560, nil), + }, + expected: []string{"1"}, }, - expected: []string{"1"}, - }, - "For small blocks, do not compact tombstones, even when fresh appears.": { - metas: []dirMeta{ - metaRange("1", 0, 60, &BlockStats{ - NumSeries: 10, - NumTombstones: 3, - }), - metaRange("2", 60, 80, nil), + "For small blocks, do not compact tombstones, even when fresh appears.": { + metas: []dirMeta{ + metaRange("1", 0, 60, &BlockStats{ + NumSeries: 10, + NumTombstones: 3, + }), + metaRange("2", 60, 80, nil), + }, + expected: nil, }, - expected: nil, - }, - `Regression test: we were stuck in a compact loop where we always recompacted - the same block when tombstones and series counts were zero`: { - metas: []dirMeta{ - metaRange("1", 0, 540, &BlockStats{ - NumSeries: 0, - NumTombstones: 0, - }), - metaRange("2", 540, 560, nil), + `Regression test: we were stuck in a compact loop where we always recompacted + the same block when tombstones and series counts were zero`: { + metas: []dirMeta{ + metaRange("1", 0, 540, &BlockStats{ + NumSeries: 0, + NumTombstones: 0, + }), + metaRange("2", 540, 560, nil), + }, + expected: nil, }, - expected: nil, - }, - `Regression test: we were wrongly assuming that new block is fresh from WAL when its ULID is newest. - We need to actually look on max time instead. + `Regression test: we were wrongly assuming that new block is fresh from WAL when its ULID is newest. + We need to actually look on max time instead. - With previous, wrong approach "8" block was ignored, so we were wrongly compacting 5 and 7 and introducing - block overlaps`: { - metas: []dirMeta{ - metaRange("5", 0, 360, nil), - metaRange("6", 540, 560, nil), // Fresh one. - metaRange("7", 360, 420, nil), - metaRange("8", 420, 540, nil), + With previous, wrong approach "8" block was ignored, so we were wrongly compacting 5 and 7 and introducing + block overlaps`: { + metas: []dirMeta{ + metaRange("5", 0, 360, nil), + metaRange("6", 540, 560, nil), // Fresh one. + metaRange("7", 360, 420, nil), + metaRange("8", 420, 540, nil), + }, + expected: []string{"7", "8"}, }, - expected: []string{"7", "8"}, - }, - // |--------------| - // |----------------| - // |--------------| - "Overlapping blocks 1": { - metas: []dirMeta{ - metaRange("1", 0, 20, nil), - metaRange("2", 19, 40, nil), - metaRange("3", 40, 60, nil), + // |--------------| + // |----------------| + // |--------------| + "Overlapping blocks 1": { + metas: []dirMeta{ + metaRange("1", 0, 20, nil), + metaRange("2", 19, 40, nil), + metaRange("3", 40, 60, nil), + }, + expected: []string{"1", "2"}, }, - expected: []string{"1", "2"}, - }, - // |--------------| - // |--------------| - // |--------------| - "Overlapping blocks 2": { - metas: []dirMeta{ - metaRange("1", 0, 20, nil), - metaRange("2", 20, 40, nil), - metaRange("3", 30, 50, nil), + // |--------------| + // |--------------| + // |--------------| + "Overlapping blocks 2": { + metas: []dirMeta{ + metaRange("1", 0, 20, nil), + metaRange("2", 20, 40, nil), + metaRange("3", 30, 50, nil), + }, + expected: []string{"2", "3"}, }, - expected: []string{"2", "3"}, - }, - // |--------------| - // |---------------------| - // |--------------| - "Overlapping blocks 3": { - metas: []dirMeta{ - metaRange("1", 0, 20, nil), - metaRange("2", 10, 40, nil), - metaRange("3", 30, 50, nil), + // |--------------| + // |---------------------| + // |--------------| + "Overlapping blocks 3": { + metas: []dirMeta{ + metaRange("1", 0, 20, nil), + metaRange("2", 10, 40, nil), + metaRange("3", 30, 50, nil), + }, + expected: []string{"1", "2", "3"}, }, - expected: []string{"1", "2", "3"}, - }, - // |--------------| - // |--------------------------------| - // |--------------| - // |--------------| - "Overlapping blocks 4": { - metas: []dirMeta{ - metaRange("5", 0, 360, nil), - metaRange("6", 340, 560, nil), - metaRange("7", 360, 420, nil), - metaRange("8", 420, 540, nil), + // |--------------| + // |--------------------------------| + // |--------------| + // |--------------| + "Overlapping blocks 4": { + metas: []dirMeta{ + metaRange("5", 0, 360, nil), + metaRange("6", 340, 560, nil), + metaRange("7", 360, 420, nil), + metaRange("8", 420, 540, nil), + }, + expected: []string{"5", "6", "7", "8"}, }, - expected: []string{"5", "6", "7", "8"}, - }, - // |--------------| - // |--------------| - // |--------------| - // |--------------| - "Overlapping blocks 5": { - metas: []dirMeta{ - metaRange("1", 0, 10, nil), - metaRange("2", 9, 20, nil), - metaRange("3", 30, 40, nil), - metaRange("4", 39, 50, nil), + // |--------------| + // |--------------| + // |--------------| + // |--------------| + "Overlapping blocks 5": { + metas: []dirMeta{ + metaRange("1", 0, 10, nil), + metaRange("2", 9, 20, nil), + metaRange("3", 30, 40, nil), + metaRange("4", 39, 50, nil), + }, + expected: []string{"1", "2"}, }, - expected: []string{"1", "2"}, - }, - } - - for title, c := range cases { - if !t.Run(title, func(t *testing.T) { - res, err := compactor.plan(c.metas) - require.NoError(t, err) - require.Equal(t, c.expected, res) - }) { - return } - } + + for title, c := range cases { + if !t.Run(title, func(t *testing.T) { + res, err := compactor.plan(c.metas) + require.NoError(t, err) + require.Equal(t, c.expected, res) + }) { + return + } + } + }) + + // Tests for the public Plan() method. + t.Run("Plan", func(t *testing.T) { + // Verify that when a BlockExcludeFilter excludes a block in the middle of + // the list, subsequent blocks are not processed. + t.Run("BlockExcludeFilter stops iteration", func(t *testing.T) { + dir := t.TempDir() + + // Create 4 blocks with sequential ULIDs. + block1ULID := ulid.MustNew(1, nil) + block2ULID := ulid.MustNew(2, nil) + block3ULID := ulid.MustNew(3, nil) + block4ULID := ulid.MustNew(4, nil) + + for i, uid := range []ulid.ULID{block1ULID, block2ULID, block3ULID, block4ULID} { + blockDir := filepath.Join(dir, uid.String()) + require.NoError(t, os.MkdirAll(blockDir, 0o777)) + + meta := &BlockMeta{ + ULID: uid, + MinTime: int64(i * 10), + MaxTime: int64((i + 1) * 10), + } + meta.Compaction.Level = 1 + _, err := writeMetaFile(promslog.NewNopLogger(), blockDir, meta) + require.NoError(t, err) + } + + // Track which blocks were evaluated by the exclude function. + var evaluatedBlocks []ulid.ULID + excludeFunc := func(meta *BlockMeta) bool { + evaluatedBlocks = append(evaluatedBlocks, meta.ULID) + return meta.ULID == block2ULID + } + + c, err := NewLeveledCompactorWithOptions( + context.Background(), + nil, + promslog.NewNopLogger(), + []int64{20}, + chunkenc.NewPool(), + LeveledCompactorOptions{ + BlockExcludeFilter: excludeFunc, + EnableOverlappingCompaction: true, + }, + ) + require.NoError(t, err) + + // Plan should evaluate all blocks. + _, err = c.Plan(dir) + require.NoError(t, err) + + require.Len(t, evaluatedBlocks, 2, "Expected only 2 blocks to be evaluated") + require.Contains(t, evaluatedBlocks, block1ULID) + require.Contains(t, evaluatedBlocks, block2ULID) + }) + }) } func TestRangeWithFailedCompactionWontGetSelected(t *testing.T) { From 2332962c4bb9de4c8181bee27bef04665ff32ce3 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Sat, 24 Jan 2026 16:44:08 +0100 Subject: [PATCH 23/46] otlptranslator: filter __name__ from OTLP attributes to prevent duplicates (#17917) * otlptranslator: filter __name__ from OTLP attributes to prevent duplicates OTLP metrics can have a __name__ attribute which, when combined with the metric name passed via extras, creates duplicate __name__ labels. This commit implements filtering out of any __name__ metric attribute from OTLP. Also rename TestCreateAttributes to TestPrometheusConverter_createAttributes for consistency, and add test cases for __name__, __type__, and __unit__ OTLP metric attributes. --------- Signed-off-by: Arve Knudsen --- .../prometheusremotewrite/helper.go | 11 +- .../prometheusremotewrite/helper_test.go | 117 +++++++++++++++++- .../prometheusremotewrite/histograms.go | 4 +- .../number_data_points.go | 4 +- 4 files changed, 129 insertions(+), 7 deletions(-) diff --git a/storage/remote/otlptranslator/prometheusremotewrite/helper.go b/storage/remote/otlptranslator/prometheusremotewrite/helper.go index 11f2eec6fd..669e10e0a7 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/helper.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/helper.go @@ -61,6 +61,13 @@ const ( defaultLookbackDelta = 5 * time.Minute ) +// reservedLabelNames contains label names that should be filtered from +// OTLP attributes because they are set separately (via extras parameter). +// Allowing these through could create duplicate labels. +var reservedLabelNames = []string{ + model.MetricNameLabel, // "__name__" - set from metric name +} + // createAttributes creates a slice of Prometheus Labels with OTLP attributes and pairs of string values. // Unpaired string values are ignored. String pairs overwrite OTLP labels if collisions happen and // if logOnOverwrite is true, the overwrite is logged. Resulting label names are sanitized. @@ -214,7 +221,7 @@ func (c *PrometheusConverter) addHistogramDataPoints(ctx context.Context, dataPo pt := dataPoints.At(x) timestamp := convertTimeStamp(pt.Timestamp()) startTimestamp := convertTimeStamp(pt.StartTimestamp()) - baseLabels, err := c.createAttributes(pt.Attributes(), settings, nil, false, meta) + baseLabels, err := c.createAttributes(pt.Attributes(), settings, reservedLabelNames, false, meta) if err != nil { return err } @@ -416,7 +423,7 @@ func (c *PrometheusConverter) addSummaryDataPoints(ctx context.Context, dataPoin pt := dataPoints.At(x) timestamp := convertTimeStamp(pt.Timestamp()) startTimestamp := convertTimeStamp(pt.StartTimestamp()) - baseLabels, err := c.createAttributes(pt.Attributes(), settings, nil, false, meta) + baseLabels, err := c.createAttributes(pt.Attributes(), settings, reservedLabelNames, false, meta) if err != nil { return err } diff --git a/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go b/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go index c549667dde..b86b8cb3ea 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go @@ -31,11 +31,12 @@ import ( "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/model/metadata" "github.com/prometheus/prometheus/prompb" "github.com/prometheus/prometheus/util/testutil" ) -func TestCreateAttributes(t *testing.T) { +func TestPrometheusConverter_createAttributes(t *testing.T) { resourceAttrs := map[string]string{ "service.name": "service name", "service.instance.id": "service ID", @@ -386,6 +387,18 @@ func TestCreateAttributes(t *testing.T) { "metric_multi", "multi metric", ), }, + { + name: "__name__ attribute is filtered when passed in ignoreAttrs", + promoteResourceAttributes: nil, + ignoreAttrs: []string{model.MetricNameLabel}, + expectedLabels: labels.FromStrings( + "__name__", "test_metric", + "instance", "service ID", + "job", "service name", + "metric_attr", "metric value", + "metric_attr_other", "metric value other", + ), + }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { @@ -423,6 +436,108 @@ func TestCreateAttributes(t *testing.T) { testutil.RequireEqual(t, tc.expectedLabels, lbls) }) } + + // Test that __name__ attributes in OTLP data are filtered out to prevent + // duplicate labels. + t.Run("__name__ attribute in OTLP data is filtered", func(t *testing.T) { + resource := pcommon.NewResource() + resource.Attributes().PutStr("service.name", "test-service") + resource.Attributes().PutStr("service.instance.id", "test-instance") + + // Create attributes with __name__ to simulate problematic OTLP data. + attrsWithNameLabel := pcommon.NewMap() + attrsWithNameLabel.PutStr("__name__", "wrong_metric_name") + attrsWithNameLabel.PutStr("other_attr", "value") + + mockAppender := &mockCombinedAppender{} + c := NewPrometheusConverter(mockAppender) + settings := Settings{} + + require.NoError(t, c.setResourceContext(resource, settings)) + require.NoError(t, c.setScopeContext(scope{}, settings)) + + // Call createAttributes with reservedLabelNames to filter __name__. + lbls, err := c.createAttributes( + attrsWithNameLabel, + settings, + reservedLabelNames, + true, + Metadata{}, + model.MetricNameLabel, "correct_metric_name", + ) + require.NoError(t, err) + + // Verify there's exactly one __name__ label with the correct value. + nameCount := 0 + var nameValue string + lbls.Range(func(l labels.Label) { + if l.Name == model.MetricNameLabel { + nameCount++ + nameValue = l.Value + } + }) + + require.Equal(t, 1, nameCount) + require.Equal(t, "correct_metric_name", nameValue) + require.Equal(t, "value", lbls.Get("other_attr")) + }) + + // Test that __type__ and __unit__ attributes in OTLP data are overwritten + // by auto-generated labels from metadata when EnableTypeAndUnitLabels is true. + t.Run("__type__ and __unit__ attributes are overwritten by metadata", func(t *testing.T) { + resource := pcommon.NewResource() + resource.Attributes().PutStr("service.name", "test-service") + resource.Attributes().PutStr("service.instance.id", "test-instance") + + // Create attributes with __type__ and __unit__ to simulate problematic OTLP data. + attrsWithTypeAndUnit := pcommon.NewMap() + attrsWithTypeAndUnit.PutStr(model.MetricTypeLabel, "wrong_type") + attrsWithTypeAndUnit.PutStr(model.MetricUnitLabel, "wrong_unit") + attrsWithTypeAndUnit.PutStr("other_attr", "value") + + mockAppender := &mockCombinedAppender{} + c := NewPrometheusConverter(mockAppender) + settings := Settings{EnableTypeAndUnitLabels: true} + + require.NoError(t, c.setResourceContext(resource, settings)) + require.NoError(t, c.setScopeContext(scope{}, settings)) + + // Call createAttributes with Metadata containing correct Type and Unit. + lbls, err := c.createAttributes( + attrsWithTypeAndUnit, + settings, + reservedLabelNames, + true, + Metadata{Metadata: metadata.Metadata{Type: model.MetricTypeGauge, Unit: "seconds"}}, + model.MetricNameLabel, "test_metric", + ) + require.NoError(t, err) + + // Verify there's exactly one __type__ label with the correct value (from metadata). + typeCount := 0 + var typeValue string + lbls.Range(func(l labels.Label) { + if l.Name == model.MetricTypeLabel { + typeCount++ + typeValue = l.Value + } + }) + require.Equal(t, 1, typeCount) + require.Equal(t, "gauge", typeValue) + + // Verify there's exactly one __unit__ label with the correct value (from metadata). + unitCount := 0 + var unitValue string + lbls.Range(func(l labels.Label) { + if l.Name == model.MetricUnitLabel { + unitCount++ + unitValue = l.Value + } + }) + require.Equal(t, 1, unitCount) + require.Equal(t, "seconds", unitValue) + require.Equal(t, "value", lbls.Get("other_attr")) + }) } func Test_convertTimeStamp(t *testing.T) { diff --git a/storage/remote/otlptranslator/prometheusremotewrite/histograms.go b/storage/remote/otlptranslator/prometheusremotewrite/histograms.go index dd873c41bd..e2537b5cec 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/histograms.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/histograms.go @@ -53,7 +53,7 @@ func (c *PrometheusConverter) addExponentialHistogramDataPoints(ctx context.Cont lbls, err := c.createAttributes( pt.Attributes(), settings, - nil, + reservedLabelNames, true, meta, model.MetricNameLabel, @@ -269,7 +269,7 @@ func (c *PrometheusConverter) addCustomBucketsHistogramDataPoints(ctx context.Co lbls, err := c.createAttributes( pt.Attributes(), settings, - nil, + reservedLabelNames, true, meta, model.MetricNameLabel, diff --git a/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go b/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go index d3860cb5d5..65d4fd70b2 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go @@ -38,7 +38,7 @@ func (c *PrometheusConverter) addGaugeNumberDataPoints(ctx context.Context, data labels, err := c.createAttributes( pt.Attributes(), settings, - nil, + reservedLabelNames, true, meta, model.MetricNameLabel, @@ -79,7 +79,7 @@ func (c *PrometheusConverter) addSumNumberDataPoints(ctx context.Context, dataPo lbls, err := c.createAttributes( pt.Attributes(), settings, - nil, + reservedLabelNames, true, meta, model.MetricNameLabel, From 68b7aaaf15c79e92cd9bff1fdf5d432a070ff38b Mon Sep 17 00:00:00 2001 From: Ganesh Vernekar Date: Sat, 24 Jan 2026 16:32:44 -0800 Subject: [PATCH 24/46] Add test case for loading stale_series_compaction_threshold config Signed-off-by: Ganesh Vernekar --- config/config_test.go | 5 +++-- config/testdata/conf.good.yml | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/config/config_test.go b/config/config_test.go index 08aa0b4f06..968b563e1e 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -1733,8 +1733,9 @@ var expectedConf = &Config{ }, StorageConfig: StorageConfig{ TSDBConfig: &TSDBConfig{ - OutOfOrderTimeWindow: 30 * time.Minute.Milliseconds(), - OutOfOrderTimeWindowFlag: model.Duration(30 * time.Minute), + OutOfOrderTimeWindow: 30 * time.Minute.Milliseconds(), + OutOfOrderTimeWindowFlag: model.Duration(30 * time.Minute), + StaleSeriesCompactionThreshold: 0.5, Retention: &TSDBRetentionConfig{ Time: model.Duration(24 * time.Hour), Size: 1 * units.GiB, diff --git a/config/testdata/conf.good.yml b/config/testdata/conf.good.yml index 7aa53b3b74..96bf9e2b33 100644 --- a/config/testdata/conf.good.yml +++ b/config/testdata/conf.good.yml @@ -453,6 +453,7 @@ alerting: storage: tsdb: out_of_order_time_window: 30m + stale_series_compaction_threshold: 0.5 retention: time: 1d size: 1GB From 5e66c9305fae5f68961db308170d5d480e09aac3 Mon Sep 17 00:00:00 2001 From: Aditya Prakash <64980881+prakashaditya02@users.noreply.github.com> Date: Tue, 27 Jan 2026 14:27:40 +0530 Subject: [PATCH 25/46] scrape: clarify test channel name in manager_test (#17929) Signed-off-by: Nova --- scrape/manager_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scrape/manager_test.go b/scrape/manager_test.go index 17152e8eb1..288f1d678d 100644 --- a/scrape/manager_test.go +++ b/scrape/manager_test.go @@ -581,8 +581,8 @@ func TestManagerTargetsUpdates(t *testing.T) { m, err := NewManager(&opts, nil, nil, nil, nil, testRegistry) require.NoError(t, err) - ts := make(chan map[string][]*targetgroup.Group) - go m.Run(ts) + targetSetsCh := make(chan map[string][]*targetgroup.Group) + go m.Run(targetSetsCh) defer m.Stop() tgSent := make(map[string][]*targetgroup.Group) @@ -594,7 +594,7 @@ func TestManagerTargetsUpdates(t *testing.T) { } select { - case ts <- tgSent: + case targetSetsCh <- tgSent: case <-time.After(10 * time.Millisecond): require.Fail(t, "Scrape manager's channel remained blocked after the set threshold.") } From a5f86c3fb6c394ba1c3e4d9934e4af5601ba33a4 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Tue, 27 Jan 2026 10:02:16 +0100 Subject: [PATCH 26/46] cmd/prometheus: fix flaky TestQueryLog race condition (#17933) Add waitForQueryLog helper that polls for query log entries to appear before asserting, rather than reading the file immediately after making a query. This fixes a race condition where the query log wasn't flushed to disk before the test read the file. The helper uses a 5 second timeout with 100ms polling intervals, which is generous enough to handle slow CI environments while keeping the test responsive. Signed-off-by: Arve Knudsen --- cmd/prometheus/query_log_test.go | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/cmd/prometheus/query_log_test.go b/cmd/prometheus/query_log_test.go index 5e5a9ac3b7..e410f836a9 100644 --- a/cmd/prometheus/query_log_test.go +++ b/cmd/prometheus/query_log_test.go @@ -334,7 +334,8 @@ func (p *queryLogTest) run(t *testing.T) { p.query(t) - ql := readQueryLog(t, queryLogFile.Name()) + // Wait for query log entry to be written (avoid race with file I/O). + ql := waitForQueryLog(t, queryLogFile.Name(), 1) qc := len(ql) if p.exactQueryCount() { require.Equal(t, 1, qc) @@ -361,7 +362,8 @@ func (p *queryLogTest) run(t *testing.T) { p.query(t) qc++ - ql = readQueryLog(t, queryLogFile.Name()) + // Wait for query log entry to be written (avoid race with file I/O). + ql = waitForQueryLog(t, queryLogFile.Name(), qc) if p.exactQueryCount() { require.Len(t, ql, qc) } else { @@ -392,7 +394,8 @@ func (p *queryLogTest) run(t *testing.T) { qc++ - ql = readQueryLog(t, newFile.Name()) + // Wait for query log entry to be written (avoid race with file I/O). + ql = waitForQueryLog(t, newFile.Name(), qc) if p.exactQueryCount() { require.Len(t, ql, qc) } else { @@ -404,7 +407,8 @@ func (p *queryLogTest) run(t *testing.T) { p.query(t) - ql = readQueryLog(t, queryLogFile.Name()) + // Wait for query log entry to be written (avoid race with file I/O). + ql = waitForQueryLog(t, queryLogFile.Name(), 1) qc = len(ql) if p.exactQueryCount() { require.Equal(t, 1, qc) @@ -446,6 +450,18 @@ func readQueryLog(t *testing.T, path string) []queryLogLine { return ql } +// waitForQueryLog waits for the query log to contain at least minEntries entries, +// polling at regular intervals until the timeout is reached. +func waitForQueryLog(t *testing.T, path string, minEntries int) []queryLogLine { + t.Helper() + var ql []queryLogLine + require.Eventually(t, func() bool { + ql = readQueryLog(t, path) + return len(ql) >= minEntries + }, 5*time.Second, 100*time.Millisecond, "timed out waiting for query log to have at least %d entries, got %d", minEntries, len(ql)) + return ql +} + func TestQueryLog(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") From d25c8476e7e6f230c91da54ebb5881ab18a03934 Mon Sep 17 00:00:00 2001 From: George Krajcsovits Date: Tue, 27 Jan 2026 11:06:11 +0100 Subject: [PATCH 27/46] chore(sd-ownership): add default-maintainers as default code owner (#17940) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(sd-ownership): add default-maintainers as default code owner In accordance with dev summit decision. At the same time I've set up auto assignment for code review, meaning that not everybody will get notified for all PRs. If there's already a maintainer assigned, you don't get notified. Otherwise the assignment is round-robin, 1 at a time. Also you can opt out. Signed-off-by: György Krajcsovits * Remove code owner without write access Signed-off-by: György Krajcsovits --------- Signed-off-by: György Krajcsovits --- CODEOWNERS | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/CODEOWNERS b/CODEOWNERS index f28cdbf832..4982838376 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -2,25 +2,29 @@ # Please keep this file in sync with the MAINTAINERS.md file! # +# Prometheus team members are members of the "default maintainers" github team. +# They are code owners by default for the whole repo. +* @prometheus/default-maintainers + # Subsystems. -/Makefile @simonpasquier @SuperQ -/cmd/promtool @dgl -/documentation/prometheus-mixin @metalmatze -/model/histogram @beorn7 @krajorama -/web/ui @juliusv -/web/ui/module @juliusv @nexucis -/promql @roidelapluie -/storage/remote @cstyan @bwplotka @tomwilkie @npazosmendez @alexgreenbank -/storage/remote/otlptranslator @aknuds1 @jesusvazquez @ArthurSens -/tsdb @jesusvazquez @codesome @bwplotka @krajorama +/Makefile @prometheus/default-maintainers @simonpasquier @SuperQ +/cmd/promtool @prometheus/default-maintainers @dgl +/documentation/prometheus-mixin @prometheus/default-maintainers @metalmatze +/model/histogram @prometheus/default-maintainers @beorn7 @krajorama +/web/ui @prometheus/default-maintainers @juliusv +/web/ui/module @prometheus/default-maintainers @juliusv @nexucis +/promql @prometheus/default-maintainers @roidelapluie +/storage/remote @prometheus/default-maintainers @cstyan @bwplotka @tomwilkie @alexgreenbank +/storage/remote/otlptranslator @prometheus/default-maintainers @aknuds1 @jesusvazquez @ArthurSens +/tsdb @prometheus/default-maintainers @jesusvazquez @codesome @bwplotka @krajorama # Service discovery. -/discovery/kubernetes @brancz -/discovery/stackit @jkroepke +/discovery/kubernetes @prometheus/default-maintainers @brancz +/discovery/stackit @prometheus/default-maintainers @jkroepke # Pending # https://github.com/prometheus/prometheus/pull/17105#issuecomment-3248209452 -# /discovery/aws/ @matt-gp @sysadmind +# /discovery/aws/ @prometheus/default-maintainers @matt-gp @sysadmind # https://github.com/prometheus/prometheus/pull/15212#issuecomment-3575225179 -# /discovery/aliyun @KeyOfSpectator +# /discovery/aliyun @prometheus/default-maintainers @KeyOfSpectator # https://github.com/prometheus/prometheus/pull/14108#issuecomment-2639515421 -# /discovery/nomad @jaloren @jrasell +# /discovery/nomad @prometheus/default-maintainers @jaloren @jrasell From aa0f00efdf7fc4c2ca0982063fdd031f95d7b79b Mon Sep 17 00:00:00 2001 From: Bartlomiej Plotka Date: Tue, 27 Jan 2026 10:07:56 +0000 Subject: [PATCH 28/46] tests(teststorage, api_test): Fix leaking readers; kill fake exemplar storage (#17906) * tests(teststorage): Fix leaking readers; use TSDB exemplar storage instead of fake Signed-off-by: bwplotka * switched to v1 exemplar flow for now Signed-off-by: bwplotka --------- Signed-off-by: bwplotka --- util/teststorage/storage.go | 32 ++--------- web/api/v1/api_test.go | 108 ++++++++++++++++++------------------ 2 files changed, 60 insertions(+), 80 deletions(-) diff --git a/util/teststorage/storage.go b/util/teststorage/storage.go index 055bf3ff22..65c2f87e21 100644 --- a/util/teststorage/storage.go +++ b/util/teststorage/storage.go @@ -19,12 +19,8 @@ import ( "testing" "time" - "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/require" - "github.com/prometheus/prometheus/model/exemplar" - "github.com/prometheus/prometheus/model/labels" - "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb" ) @@ -57,6 +53,10 @@ func NewWithError(o ...Option) (*TestStorage, error) { opts.RetentionDuration = 0 opts.OutOfOrderTimeWindow = 0 + // Enable exemplars storage by default. + opts.EnableExemplarStorage = true + opts.MaxExemplars = 1e5 + for _, opt := range o { opt(opts) } @@ -70,20 +70,12 @@ func NewWithError(o ...Option) (*TestStorage, error) { if err != nil { return nil, fmt.Errorf("opening test storage: %w", err) } - reg := prometheus.NewRegistry() - eMetrics := tsdb.NewExemplarMetrics(reg) - - es, err := tsdb.NewCircularExemplarStorage(10, eMetrics, opts.OutOfOrderTimeWindow) - if err != nil { - return nil, fmt.Errorf("opening test exemplar storage: %w", err) - } - return &TestStorage{DB: db, exemplarStorage: es, dir: dir}, nil + return &TestStorage{DB: db, dir: dir}, nil } type TestStorage struct { *tsdb.DB - exemplarStorage tsdb.ExemplarStorage - dir string + dir string } func (s TestStorage) Close() error { @@ -92,15 +84,3 @@ func (s TestStorage) Close() error { } return os.RemoveAll(s.dir) } - -func (s TestStorage) ExemplarAppender() storage.ExemplarAppender { - return s -} - -func (s TestStorage) ExemplarQueryable() storage.ExemplarQueryable { - return s.exemplarStorage -} - -func (s TestStorage) AppendExemplar(ref storage.SeriesRef, l labels.Labels, e exemplar.Exemplar) (storage.SeriesRef, error) { - return ref, s.exemplarStorage.AddExemplar(l, e) -} diff --git a/web/api/v1/api_test.go b/web/api/v1/api_test.go index 87fe756544..797182ce88 100644 --- a/web/api/v1/api_test.go +++ b/web/api/v1/api_test.go @@ -187,15 +187,12 @@ func (testTargetRetriever) ScrapePoolConfig(_ string) (*config.ScrapeConfig, err func (t *testTargetRetriever) SetMetadataStoreForTargets(identifier string, metadata scrape.MetricMetadataStore) error { targets, ok := t.activeTargets[identifier] - if !ok { - return errors.New("targets not found") + return fmt.Errorf("no active target for %v", identifier) } - for _, at := range targets { at.SetMetadataStore(metadata) } - return nil } @@ -323,7 +320,8 @@ func (m *rulesRetrieverMock) CreateAlertingRules() { func (m *rulesRetrieverMock) CreateRuleGroups() { m.CreateAlertingRules() arules := m.AlertingRules() - storage := teststorage.New(m.testing) + // Create separate storage for recordings to not pollute the main one. + s := teststorage.New(m.testing) engineOpts := promql.EngineOpts{ Logger: nil, @@ -333,8 +331,8 @@ func (m *rulesRetrieverMock) CreateRuleGroups() { } engine := promqltest.NewTestEngineWithOpts(m.testing, engineOpts) opts := &rules.ManagerOptions{ - QueryFunc: rules.EngineQueryFunc(engine, storage), - Appendable: storage, + QueryFunc: rules.EngineQueryFunc(engine, s), + Appendable: s, Context: context.Background(), Logger: promslog.NewNopLogger(), NotifyFunc: func(context.Context, string, ...*rules.Alert) {}, @@ -399,8 +397,23 @@ var sampleFlagMap = map[string]string{ "flag2": "value2", } +func appendExemplars(t testing.TB, s storage.Storage, ex []exemplar.QueryResult) { + t.Helper() + + // TODO(bwplotka): Use AppenderV2.AppendExemplar per series flow + // once its implemented: https://github.com/prometheus/prometheus/issues/17632#issuecomment-3759315095 + app := s.Appender(t.Context()) + for _, ed := range ex { + for _, e := range ed.Exemplars { + _, err := app.AppendExemplar(0, ed.SeriesLabels, e) + require.NoError(t, err) + } + } + require.NoError(t, app.Commit()) +} + func TestEndpoints(t *testing.T) { - storage := promqltest.LoadedStorage(t, ` + s := promqltest.LoadedStorage(t, ` load 1m test_metric1{foo="bar"} 0+100x100 test_metric1{foo="boo"} 1+0x100 @@ -414,6 +427,7 @@ func TestEndpoints(t *testing.T) { test_metric5{"junk\n{},=: chars"="bar"} 1+0x100 `) + // Add exemplar testdata here, given promqltest does not support exemplars. start := time.Unix(0, 0) exemplars := []exemplar.QueryResult{ { @@ -457,15 +471,10 @@ func TestEndpoints(t *testing.T) { }, }, } - for _, ed := range exemplars { - _, err := storage.AppendExemplar(0, ed.SeriesLabels, ed.Exemplars[0]) - require.NoError(t, err, "failed to add exemplar: %+v", ed.Exemplars[0]) - } + appendExemplars(t, s, exemplars) now := time.Now() - ng := testEngine(t) - t.Run("local", func(t *testing.T) { algr := rulesRetrieverMock{testing: t} @@ -478,9 +487,9 @@ func TestEndpoints(t *testing.T) { testTargetRetriever := setupTestTargetRetriever(t) api := &API{ - Queryable: storage, + Queryable: s, QueryEngine: ng, - ExemplarQueryable: storage.ExemplarQueryable(), + ExemplarQueryable: s, targetRetriever: testTargetRetriever.toFactory(), alertmanagerRetriever: testAlertmanagerRetriever{}.toFactory(), flagsMap: sampleFlagMap, @@ -489,14 +498,14 @@ func TestEndpoints(t *testing.T) { ready: func(f http.HandlerFunc) http.HandlerFunc { return f }, rulesRetriever: algr.toFactory(), } - testEndpoints(t, api, testTargetRetriever, storage, true) + testEndpoints(t, api, testTargetRetriever, true) }) // Run all the API tests against an API that is wired to forward queries via // the remote read client to a test server, which in turn sends them to the // data from the test storage. t.Run("remote", func(t *testing.T) { - server := setupRemote(storage) + server := setupRemote(s) defer server.Close() u, err := url.Parse(server.URL) @@ -518,6 +527,7 @@ func TestEndpoints(t *testing.T) { remote := remote.NewStorage(promslog.New(&promslogConfig), prometheus.DefaultRegisterer, func() (int64, error) { return 0, nil }, dbDir, 1*time.Second, nil, false) + t.Cleanup(func() { _ = remote.Close() }) err = remote.ApplyConfig(&config.Config{ RemoteReadConfigs: []*config.RemoteReadConfig{ @@ -543,7 +553,7 @@ func TestEndpoints(t *testing.T) { api := &API{ Queryable: remote, QueryEngine: ng, - ExemplarQueryable: storage.ExemplarQueryable(), + ExemplarQueryable: s, targetRetriever: testTargetRetriever.toFactory(), alertmanagerRetriever: testAlertmanagerRetriever{}.toFactory(), flagsMap: sampleFlagMap, @@ -552,7 +562,7 @@ func TestEndpoints(t *testing.T) { ready: func(f http.HandlerFunc) http.HandlerFunc { return f }, rulesRetriever: algr.toFactory(), } - testEndpoints(t, api, testTargetRetriever, storage, false) + testEndpoints(t, api, testTargetRetriever, false) }) } @@ -565,7 +575,7 @@ func (b byLabels) Less(i, j int) bool { return labels.Compare(b[i], b[j]) < 0 } func TestGetSeries(t *testing.T) { // TestEndpoints doesn't have enough label names to test api.labelNames // endpoint properly. Hence we test it separately. - storage := promqltest.LoadedStorage(t, ` + s := promqltest.LoadedStorage(t, ` load 1m test_metric1{foo1="bar", baz="abc"} 0+100x100 test_metric1{foo2="boo"} 1+0x100 @@ -575,7 +585,7 @@ func TestGetSeries(t *testing.T) { `) api := &API{ - Queryable: storage, + Queryable: s, } request := func(method string, matchers ...string) (*http.Request, error) { u, err := url.Parse("http://example.com") @@ -669,7 +679,7 @@ func TestGetSeries(t *testing.T) { func TestQueryExemplars(t *testing.T) { start := time.Unix(0, 0) - storage := promqltest.LoadedStorage(t, ` + s := promqltest.LoadedStorage(t, ` load 1m test_metric1{foo="bar"} 0+100x100 test_metric1{foo="boo"} 1+0x100 @@ -682,9 +692,9 @@ func TestQueryExemplars(t *testing.T) { `) api := &API{ - Queryable: storage, + Queryable: s, QueryEngine: testEngine(t), - ExemplarQueryable: storage.ExemplarQueryable(), + ExemplarQueryable: s, } request := func(method string, qs url.Values) (*http.Request, error) { @@ -762,15 +772,10 @@ func TestQueryExemplars(t *testing.T) { }, } { t.Run(tc.name, func(t *testing.T) { - es := storage + es := s ctx := context.Background() - for _, te := range tc.exemplars { - for _, e := range te.Exemplars { - _, err := es.AppendExemplar(0, te.SeriesLabels, e) - require.NoError(t, err) - } - } + appendExemplars(t, es, tc.exemplars) req, err := request(http.MethodGet, tc.query) require.NoError(t, err) @@ -787,7 +792,7 @@ func TestQueryExemplars(t *testing.T) { func TestLabelNames(t *testing.T) { // TestEndpoints doesn't have enough label names to test api.labelNames // endpoint properly. Hence we test it separately. - storage := promqltest.LoadedStorage(t, ` + s := promqltest.LoadedStorage(t, ` load 1m test_metric1{foo1="bar", baz="abc"} 0+100x100 test_metric1{foo2="boo"} 1+0x100 @@ -797,7 +802,7 @@ func TestLabelNames(t *testing.T) { `) api := &API{ - Queryable: storage, + Queryable: s, } request := func(method, limit string, matchers ...string) (*http.Request, error) { u, err := url.Parse("http://example.com") @@ -897,10 +902,10 @@ func (testStats) Builtin() (_ stats.BuiltinStats) { } func TestStats(t *testing.T) { - storage := teststorage.New(t) + s := teststorage.New(t) api := &API{ - Queryable: storage, + Queryable: s, QueryEngine: testEngine(t), now: func() time.Time { return time.Unix(123, 0) @@ -1115,7 +1120,7 @@ func setupRemote(s storage.Storage) *httptest.Server { return httptest.NewServer(handler) } -func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.ExemplarStorage, testLabelAPI bool) { +func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, testLabelAPI bool) { start := time.Unix(0, 0) type targetMetadata struct { @@ -1135,7 +1140,6 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E errType errorType sorter func(any) metadata []targetMetadata - exemplars []exemplar.QueryResult zeroFunc func(any) } @@ -2043,8 +2047,8 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E }, sorter: func(m any) { sort.Slice(m.([]metricMetadata), func(i, j int) bool { - s := m.([]metricMetadata) - return s[i].MetricFamily < s[j].MetricFamily + mm := m.([]metricMetadata) + return mm[i].MetricFamily < mm[j].MetricFamily }) }, }, @@ -3758,17 +3762,16 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E tr.ResetMetadataStore() for _, tm := range test.metadata { - tr.SetMetadataStoreForTargets(tm.identifier, &testMetaStore{Metadata: tm.metadata}) - } - - for _, te := range test.exemplars { - for _, e := range te.Exemplars { - _, err := es.AppendExemplar(0, te.SeriesLabels, e) - require.NoError(t, err) - } + // TODO: Check error and fixed broken test/bug. + // TestEndpoints/local/run_60_metricMetadata_"limit=1&limit_per_metric=1"/GET fails if we check the error. + _ = tr.SetMetadataStoreForTargets(tm.identifier, &testMetaStore{Metadata: tm.metadata}) } res := test.endpoint(req.WithContext(ctx)) + if res.finalizer != nil { + // Finalizers were added to ensure closed readers on API panics, ensure they are closed here too. + res.finalizer() + } assertAPIError(t, res.err, test.errType) if test.sorter != nil { @@ -4766,13 +4769,10 @@ func TestExtractQueryOpts(t *testing.T) { // Test query timeout parameter. func TestQueryTimeout(t *testing.T) { - storage := promqltest.LoadedStorage(t, ` + s := promqltest.LoadedStorage(t, ` load 1m test_metric1{foo="bar"} 0+100x100 `) - t.Cleanup(func() { - _ = storage.Close() - }) now := time.Now() @@ -4792,9 +4792,9 @@ func TestQueryTimeout(t *testing.T) { t.Run(tc.name, func(t *testing.T) { engine := &fakeEngine{} api := &API{ - Queryable: storage, + Queryable: s, QueryEngine: engine, - ExemplarQueryable: storage.ExemplarQueryable(), + ExemplarQueryable: s, alertmanagerRetriever: testAlertmanagerRetriever{}.toFactory(), flagsMap: sampleFlagMap, now: func() time.Time { return now }, From 2262ae2542a2850dc5d4457dfe4474d339fe8df8 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Tue, 27 Jan 2026 12:49:50 +0100 Subject: [PATCH 29/46] PR template: add a concrete release notes example (#17721) Signed-off-by: Arve Knudsen --- .github/PULL_REQUEST_TEMPLATE.md | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index ec4eef8dae..7873822f26 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -28,6 +28,7 @@ If no, just write "NONE" in the release-notes block below. Otherwise, please describe what should be mentioned in the CHANGELOG. Use the following prefixes: [FEATURE] [ENHANCEMENT] [PERF] [BUGFIX] [SECURITY] [CHANGE] Refer to the existing CHANGELOG for inspiration: https://github.com/prometheus/prometheus/blob/main/CHANGELOG.md +A concrete example may look as follows (be sure to leave out the surrounding quotes): "[FEATURE] API: Add /api/v1/features for clients to understand which features are supported". If you need help formulating your entries, consult the reviewer(s). --> ```release-notes From 8721871cf76c713192609c144ebd7b42f5bc846b Mon Sep 17 00:00:00 2001 From: Ganesh Vernekar Date: Tue, 27 Jan 2026 03:52:41 -0800 Subject: [PATCH 30/46] docs: Document the stale_series_compaction_threshold config file option (#17928) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ganesh Vernekar Signed-off-by: Björn Rabenstein Co-authored-by: Björn Rabenstein --- docs/configuration/configuration.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index 4079daae02..1f2f9931e8 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -3496,6 +3496,19 @@ with this feature. # to the timestamp of the last appended sample for the same series. [ out_of_order_time_window: | default = 0s ] +# Configures the trigger point for compacting the stale series from the memory into persistent blocks +# and remove those stale series from the memory. +# +# The threshold is a number between 0.0 and 1.0. It represents the ratio of stale series in the memory +# to the total series in the memory. The stale series compaction is triggered when this ratio crosses +# the configured threshold. It may not trigger the stale series compaction if the usual head compaction +# is about to happen soon. +# +# If set to 0, stale series compaction is disabled. +# +# This is an experimental feature, this behaviour could change or be removed in the future. +[ stale_series_compaction_threshold: | default = 0 ] + # Configures data retention settings for TSDB. # From b812c6457d3ff4432c5a6f3f9d86839f9d87f364 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Tue, 27 Jan 2026 14:51:40 +0100 Subject: [PATCH 31/46] promqltest: Document testing for counter reset hints in histograms Signed-off-by: beorn7 --- promql/promqltest/README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/promql/promqltest/README.md b/promql/promqltest/README.md index d26c01c6f1..b4efd9c128 100644 --- a/promql/promqltest/README.md +++ b/promql/promqltest/README.md @@ -110,6 +110,15 @@ eval range from to step * ` ""` (optional) for matching a string literal * `` and `` specify the expected values, and follow the same syntax as for `load` above +### Special handling of counter reset hints in native histograms + +Native histograms as part of `` may or may not contain an explicit +`counter_reset_hint` property. If a `counter_reset_hint` is provided +explicitly, the counter reset hint of the histogram is tested to have the +provided value (`unknown`, `reset`, `not_reset`, or `gauge`). However, if no +`counter_reset_hint` is specified, the `counter_reset_hint` is not tested at +all (rather than testing for the usual default value `unknown`). + ### `expect string` This can be used to specify that a string literal is the expected result. From 7a1cda057a9235da688cfa09835282baea692089 Mon Sep 17 00:00:00 2001 From: George Krajcsovits Date: Tue, 27 Jan 2026 15:33:57 +0100 Subject: [PATCH 32/46] chore(maintainers): remove Nico from remote write maintainers (#17945) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We discussed IRL.Nico no longer has time to contribute. This also syncs the file with CODEOWNERS. Signed-off-by: György Krajcsovits --- MAINTAINERS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS.md b/MAINTAINERS.md index f23c7fbd63..71734ce705 100644 --- a/MAINTAINERS.md +++ b/MAINTAINERS.md @@ -18,7 +18,7 @@ Maintainers for specific parts of the codebase: * `model/histogram` and other code related to native histograms: Björn Rabenstein ( / @beorn7), George Krajcsovits ( / @krajorama) * `storage` - * `remote`: Callum Styan ( / @cstyan), Bartłomiej Płotka ( / @bwplotka), Tom Wilkie (tom.wilkie@gmail.com / @tomwilkie), Nicolás Pazos ( / @npazosmendez), Alex Greenbank ( / @alexgreenbank) + * `remote`: Callum Styan ( / @cstyan), Bartłomiej Płotka ( / @bwplotka), Tom Wilkie (tom.wilkie@gmail.com / @tomwilkie), Alex Greenbank ( / @alexgreenbank) * `otlptranslator`: Arthur Silva Sens ( / @ArthurSens), Arve Knudsen ( / @aknuds1), Jesús Vázquez ( / @jesusvazquez) * `tsdb`: Ganesh Vernekar ( / @codesome), Bartłomiej Płotka ( / @bwplotka), Jesús Vázquez ( / @jesusvazquez), George Krajcsovits ( / @krajorama) * `web` From ade3f08eca384af91661f889c038707f21fd48d3 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Tue, 27 Jan 2026 17:06:46 +0100 Subject: [PATCH 33/46] notifier: fix flaky TestHangingNotifier race condition (#17934) * notifier: fix flaky TestHangingNotifier race condition Make deterministic through `synctest.Test()`. --------- Signed-off-by: Arve Knudsen --- notifier/manager_test.go | 241 +++++++++++++++++++-------------------- 1 file changed, 118 insertions(+), 123 deletions(-) diff --git a/notifier/manager_test.go b/notifier/manager_test.go index ed224462ff..ba1d578d99 100644 --- a/notifier/manager_test.go +++ b/notifier/manager_test.go @@ -14,6 +14,7 @@ package notifier import ( + "bytes" "context" "encoding/json" "fmt" @@ -23,6 +24,7 @@ import ( "net/http/httptest" "net/url" "strconv" + "strings" "testing" "time" @@ -41,6 +43,7 @@ import ( "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/relabel" + "github.com/prometheus/prometheus/util/testutil/synctest" ) func alertsEqual(a, b []*Alert) error { @@ -698,141 +701,133 @@ func makeInputTargetGroup() *targetgroup.Group { // queued alerts. This test reproduces the issue described in https://github.com/prometheus/prometheus/issues/13676. // and https://github.com/prometheus/prometheus/issues/8768. func TestHangingNotifier(t *testing.T) { - const ( - batches = 100 - alertsCount = DefaultMaxBatchSize * batches - ) + synctest.Test(t, func(t *testing.T) { + const ( + batches = 100 + alertsCount = DefaultMaxBatchSize * batches - var ( - sendTimeout = 100 * time.Millisecond - sdUpdatert = sendTimeout / 2 + faultyURL = "http://faulty:9093/api/v2/alerts" + functionalURL = "http://functional:9093/api/v2/alerts" + ) - done = make(chan struct{}) - ) + var ( + sendTimeout = 100 * time.Millisecond + sdUpdatert = sendTimeout / 2 + ) - // Set up a faulty Alertmanager. - var faultyCalled atomic.Bool - faultyServer := httptest.NewServer(http.HandlerFunc(func(http.ResponseWriter, *http.Request) { - faultyCalled.Store(true) - select { - case <-done: - case <-time.After(time.Hour): - } - })) - defer func() { - close(done) - }() + // Track which alertmanagers have been called. + var faultyCalled, functionalCalled atomic.Bool - faultyURL, err := url.Parse(faultyServer.URL) - require.NoError(t, err) - faultyURL.Path = "/api/v2/alerts" - - // Set up a functional Alertmanager. - var functionalCalled atomic.Bool - functionalServer := httptest.NewServer(http.HandlerFunc(func(http.ResponseWriter, *http.Request) { - functionalCalled.Store(true) - })) - defer functionalServer.Close() - functionalURL, err := url.Parse(functionalServer.URL) - require.NoError(t, err) - functionalURL.Path = "/api/v2/alerts" - - // Initialize the discovery manager - // This is relevant as the updates aren't sent continually in real life, but only each updatert. - // The old implementation of TestHangingNotifier didn't take that into account. - ctx, cancelSdManager := context.WithCancel(t.Context()) - defer cancelSdManager() - reg := prometheus.NewRegistry() - sdMetrics, err := discovery.RegisterSDMetrics(reg, discovery.NewRefreshMetrics(reg)) - require.NoError(t, err) - sdManager := discovery.NewManager( - ctx, - promslog.NewNopLogger(), - reg, - sdMetrics, - discovery.Name("sd-manager"), - discovery.Updatert(sdUpdatert), - ) - go sdManager.Run() - - // Set up the notifier with both faulty and functional Alertmanagers. - notifier := NewManager( - &Options{ - QueueCapacity: alertsCount, - Registerer: reg, - }, - model.UTF8Validation, - nil, - ) - notifier.alertmanagers = make(map[string]*alertmanagerSet) - amCfg := config.DefaultAlertmanagerConfig - amCfg.Timeout = model.Duration(sendTimeout) - notifier.alertmanagers["config-0"] = newTestAlertmanagerSet(&amCfg, nil, notifier.opts, notifier.metrics, faultyURL.String(), functionalURL.String()) - - for _, ams := range notifier.alertmanagers { - ams.startSendLoops(ams.ams) - } - - go notifier.Run(sdManager.SyncCh()) - defer notifier.Stop() - - require.Len(t, notifier.Alertmanagers(), 2) - - // Enqueue the alerts. - var alerts []*Alert - for i := range make([]struct{}, alertsCount) { - alerts = append(alerts, &Alert{ - Labels: labels.FromStrings("alertname", strconv.Itoa(i)), - }) - } - notifier.Send(alerts...) - - // Wait for the Alertmanagers to start receiving alerts. - // 10*sdUpdatert is used as an arbitrary timeout here. - timeout := time.After(10 * sdUpdatert) -loop1: - for { - select { - case <-timeout: - t.Fatalf("Timeout waiting for the alertmanagers to be reached for the first time.") - default: - if faultyCalled.Load() && functionalCalled.Load() { - break loop1 + // Fake Do function that simulates alertmanager behavior in-process. + // This runs within the synctest bubble, so time.Sleep uses fake time. + fakeDo := func(ctx context.Context, _ *http.Client, req *http.Request) (*http.Response, error) { + url := req.URL.String() + if strings.Contains(url, "faulty") { + faultyCalled.Store(true) + // Faulty alertmanager hangs until context is canceled (by timeout). + <-ctx.Done() + return nil, ctx.Err() } + // Functional alertmanager responds successfully. + // Sleep simulates network latency that real HTTP would have—without it, + // the queue drains instantly and the final queueLen() assertion fails. + functionalCalled.Store(true) + time.Sleep(sendTimeout / 2) + return &http.Response{ + StatusCode: http.StatusOK, + Body: io.NopCloser(bytes.NewBuffer(nil)), + }, nil } - } - // Request to remove the faulty Alertmanager. - c := map[string]discovery.Configs{ - "config-0": { - discovery.StaticConfig{ - &targetgroup.Group{ - Targets: []model.LabelSet{ - { - model.AddressLabel: model.LabelValue(functionalURL.Host), + // Initialize the discovery manager + // This is relevant as the updates aren't sent continually in real life, but only each updatert. + // The old implementation of TestHangingNotifier didn't take that into account. + ctx, cancelSdManager := context.WithCancel(t.Context()) + defer cancelSdManager() + reg := prometheus.NewRegistry() + sdMetrics, err := discovery.RegisterSDMetrics(reg, discovery.NewRefreshMetrics(reg)) + require.NoError(t, err) + sdManager := discovery.NewManager( + ctx, + promslog.NewNopLogger(), + reg, + sdMetrics, + discovery.Name("sd-manager"), + discovery.Updatert(sdUpdatert), + ) + go sdManager.Run() + + // Set up the notifier with both faulty and functional Alertmanagers. + notifier := NewManager( + &Options{ + QueueCapacity: alertsCount, + Registerer: reg, + Do: fakeDo, + }, + model.UTF8Validation, + nil, + ) + + notifier.alertmanagers = make(map[string]*alertmanagerSet) + amCfg := config.DefaultAlertmanagerConfig + amCfg.Timeout = model.Duration(sendTimeout) + notifier.alertmanagers["config-0"] = newTestAlertmanagerSet(&amCfg, nil, notifier.opts, notifier.metrics, faultyURL, functionalURL) + + for _, ams := range notifier.alertmanagers { + ams.startSendLoops(ams.ams) + } + + go notifier.Run(sdManager.SyncCh()) + t.Cleanup(func() { + notifier.Stop() + // Advance time so in-flight request timeouts fire. + time.Sleep(sendTimeout * 2) + }) + + require.Len(t, notifier.Alertmanagers(), 2) + + // Enqueue the alerts. + var alerts []*Alert + for i := range make([]struct{}, alertsCount) { + alerts = append(alerts, &Alert{ + Labels: labels.FromStrings("alertname", strconv.Itoa(i)), + }) + } + notifier.Send(alerts...) + + // Wait for the Alertmanagers to start receiving alerts. + // Use a polling loop since we need to wait for goroutines to process. + for !faultyCalled.Load() || !functionalCalled.Load() { + time.Sleep(sdUpdatert) + synctest.Wait() + } + + // Request to remove the faulty Alertmanager. + c := map[string]discovery.Configs{ + "config-0": { + discovery.StaticConfig{ + &targetgroup.Group{ + Targets: []model.LabelSet{ + { + model.AddressLabel: "functional:9093", + }, }, }, }, }, - }, - } - require.NoError(t, sdManager.ApplyConfig(c)) - - timeout = time.After(batches * sendTimeout) -loop2: - for { - select { - case <-timeout: - t.Fatalf("Timeout, the faulty alertmanager not removed on time.") - default: - // The faulty alertmanager was dropped. - if len(notifier.Alertmanagers()) == 1 { - // The notifier should not wait until the alerts queue of the functional am is empty to apply the discovery changes. - require.NotZero(t, notifier.alertmanagers["config-0"].sendLoops[functionalURL.String()].queueLen()) - break loop2 - } } - } + require.NoError(t, sdManager.ApplyConfig(c)) + + // Wait for the discovery update to be processed. + // Advance time to trigger the discovery manager's update interval. + // The faulty alertmanager should be dropped without waiting for its queue to drain. + for len(notifier.Alertmanagers()) != 1 { + time.Sleep(sdUpdatert) + synctest.Wait() + } + // The notifier should not wait until the alerts queue of the functional am is empty to apply the discovery changes. + require.NotZero(t, notifier.alertmanagers["config-0"].sendLoops[functionalURL].queueLen()) + }) } func TestStop_DrainingDisabled(t *testing.T) { From df31bfd59d6ea1814862e69a29e1ebecddb8b43b Mon Sep 17 00:00:00 2001 From: Jeanette Tan Date: Wed, 28 Jan 2026 02:48:07 +0800 Subject: [PATCH 34/46] Update docs about ignoring info metrics Signed-off-by: Jeanette Tan --- docs/querying/functions.md | 2 ++ web/ui/mantine-ui/src/promql/functionDocs.tsx | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/docs/querying/functions.md b/docs/querying/functions.md index 0cae149dd7..3a9b7025f8 100644 --- a/docs/querying/functions.md +++ b/docs/querying/functions.md @@ -568,6 +568,8 @@ While `info` normally automatically finds all matching info series, it's possibl restrict them by providing a `__name__` label matcher, e.g. `{__name__="target_info"}`. +Note that if there are any time series in `v` that match the `data-label-selector` (or the default `target_info` if that argument is not specified), they will be treated as info series and will be returned unchanged. + ### Limitations In its current iteration, `info` defaults to considering only info series with diff --git a/web/ui/mantine-ui/src/promql/functionDocs.tsx b/web/ui/mantine-ui/src/promql/functionDocs.tsx index a9d9ca53a9..4cc70a39e6 100644 --- a/web/ui/mantine-ui/src/promql/functionDocs.tsx +++ b/web/ui/mantine-ui/src/promql/functionDocs.tsx @@ -1756,6 +1756,12 @@ const funcDocs: Record = { .

+

+ Note that if there are any time series in v that match the data-label-selector (or the + default target_info if that argument is not specified), they will be treated as info series and + will be returned unchanged. +

+

Limitations

From 8047b05b3c85b3fbb8d70f5b443ca5e3d25d9e69 Mon Sep 17 00:00:00 2001 From: George Krajcsovits Date: Wed, 28 Jan 2026 08:20:06 +0100 Subject: [PATCH 35/46] chore(codeowners): promote matt-gp external github user to owner of AWS SD (#17946) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Branch protection means they cannot merge PRs to main/release branches. Branch protection means they cannot approve things outside their area for PRs to main/release branches. Also add sysadmind (Joe) as ower of aws, to make sure he gets notified. Signed-off-by: György Krajcsovits --- CODEOWNERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/CODEOWNERS b/CODEOWNERS index 4982838376..2c5dedbffa 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -21,9 +21,8 @@ # Service discovery. /discovery/kubernetes @prometheus/default-maintainers @brancz /discovery/stackit @prometheus/default-maintainers @jkroepke +/discovery/aws/ @prometheus/default-maintainers @matt-gp @sysadmind # Pending -# https://github.com/prometheus/prometheus/pull/17105#issuecomment-3248209452 -# /discovery/aws/ @prometheus/default-maintainers @matt-gp @sysadmind # https://github.com/prometheus/prometheus/pull/15212#issuecomment-3575225179 # /discovery/aliyun @prometheus/default-maintainers @KeyOfSpectator # https://github.com/prometheus/prometheus/pull/14108#issuecomment-2639515421 From 2597a120801f5e9bd573d43010325478c868a214 Mon Sep 17 00:00:00 2001 From: Bartlomiej Plotka Date: Wed, 28 Jan 2026 09:05:54 +0000 Subject: [PATCH 36/46] st: Add a hidden 'st-storage' feature flag for PROM-60 (#17907) Signed-off-by: bwplotka Signed-off-by: Bartlomiej Plotka --- cmd/prometheus/main.go | 57 ++++++++++++++++++++++++++++++++++-------- tsdb/agent/db.go | 5 ++++ tsdb/db.go | 5 ++++ 3 files changed, 56 insertions(+), 11 deletions(-) diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index e4f15f5cb8..02808bd652 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -265,13 +265,26 @@ func (c *flagConfig) setFeatureListOptions(logger *slog.Logger) error { case "ooo-native-histograms": logger.Warn("This option for --enable-feature is now permanently enabled and therefore a no-op.", "option", o) case "created-timestamp-zero-ingestion": + // NOTE(bwplotka): Once AppendableV1 is removed, there will be only the TSDB and agent flags. c.scrape.EnableStartTimestampZeroIngestion = true c.web.STZeroIngestionEnabled = true + c.tsdb.EnableSTAsZeroSample = true c.agent.EnableSTAsZeroSample = true + // Change relevant global variables. Hacky, but it's hard to pass a new option or default to unmarshallers. + // This is to widen the ST support surface. config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols - logger.Info("Experimental created timestamp zero ingestion enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols)) + logger.Info("Experimental start timestamp zero ingestion enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols)) + case "st-storage": + // TODO(bwplotka): Implement ST Storage as per PROM-60 and document this hidden feature flag. + c.tsdb.EnableSTStorage = true + c.agent.EnableSTStorage = true + + // Change relevant global variables. Hacky, but it's hard to pass a new option or default to unmarshallers. This is to widen the ST support surface. + config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols + config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols + logger.Info("Experimental start timestamp storage enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols)) case "delayed-compaction": c.tsdb.EnableDelayedCompaction = true logger.Info("Experimental delayed compaction is enabled.") @@ -872,16 +885,29 @@ func main() { os.Exit(1) } - scrapeManager, err := scrape.NewManager( - &cfg.scrape, - logger.With("component", "scrape manager"), - logging.NewJSONFileLogger, - fanoutStorage, nil, // TODO(bwplotka): Switch to AppendableV2. - prometheus.DefaultRegisterer, - ) - if err != nil { - logger.Error("failed to create a scrape manager", "err", err) - os.Exit(1) + var scrapeManager *scrape.Manager + { + // TODO(bwplotka): Switch to AppendableV2 by default. + // See: https://github.com/prometheus/prometheus/issues/17632 + var ( + scrapeAppendable storage.Appendable = fanoutStorage + scrapeAppendableV2 storage.AppendableV2 + ) + if cfg.tsdb.EnableSTStorage { + scrapeAppendable = nil + scrapeAppendableV2 = fanoutStorage + } + scrapeManager, err = scrape.NewManager( + &cfg.scrape, + logger.With("component", "scrape manager"), + logging.NewJSONFileLogger, + scrapeAppendable, scrapeAppendableV2, + prometheus.DefaultRegisterer, + ) + if err != nil { + logger.Error("failed to create a scrape manager", "err", err) + os.Exit(1) + } } var ( @@ -1368,6 +1394,8 @@ func main() { "WALSegmentSize", cfg.tsdb.WALSegmentSize, "WALCompressionType", cfg.tsdb.WALCompressionType, "BlockReloadInterval", cfg.tsdb.BlockReloadInterval, + "EnableSTAsZeroSample", cfg.tsdb.EnableSTAsZeroSample, + "EnableSTStorage", cfg.tsdb.EnableSTStorage, ) startTimeMargin := int64(2 * time.Duration(cfg.tsdb.MinBlockDuration).Seconds() * 1000) @@ -1425,6 +1453,7 @@ func main() { "MaxWALTime", cfg.agent.MaxWALTime, "OutOfOrderTimeWindow", cfg.agent.OutOfOrderTimeWindow, "EnableSTAsZeroSample", cfg.agent.EnableSTAsZeroSample, + "EnableSTStorage", cfg.tsdb.EnableSTStorage, ) localStorage.Set(db, 0) @@ -1944,6 +1973,8 @@ type tsdbOptions struct { UseUncachedIO bool BlockCompactionExcludeFunc tsdb.BlockExcludeFilterFunc BlockReloadInterval model.Duration + EnableSTAsZeroSample bool + EnableSTStorage bool StaleSeriesCompactionThreshold float64 } @@ -1971,6 +2002,8 @@ func (opts tsdbOptions) ToTSDBOptions() tsdb.Options { BlockCompactionExcludeFunc: opts.BlockCompactionExcludeFunc, BlockReloadInterval: time.Duration(opts.BlockReloadInterval), FeatureRegistry: features.DefaultRegistry, + EnableSTAsZeroSample: opts.EnableSTAsZeroSample, + EnableSTStorage: opts.EnableSTStorage, StaleSeriesCompactionThreshold: opts.StaleSeriesCompactionThreshold, } } @@ -1986,6 +2019,7 @@ type agentOptions struct { NoLockfile bool OutOfOrderTimeWindow int64 // TODO(bwplotka): Unused option, fix it or remove. EnableSTAsZeroSample bool + EnableSTStorage bool } func (opts agentOptions) ToAgentOptions(outOfOrderTimeWindow int64) agent.Options { @@ -2002,6 +2036,7 @@ func (opts agentOptions) ToAgentOptions(outOfOrderTimeWindow int64) agent.Option NoLockfile: opts.NoLockfile, OutOfOrderTimeWindow: outOfOrderTimeWindow, EnableSTAsZeroSample: opts.EnableSTAsZeroSample, + EnableSTStorage: opts.EnableSTStorage, } } diff --git a/tsdb/agent/db.go b/tsdb/agent/db.go index 1b29b223d7..460ceb7c04 100644 --- a/tsdb/agent/db.go +++ b/tsdb/agent/db.go @@ -92,6 +92,11 @@ type Options struct { // NOTE(bwplotka): This feature might be deprecated and removed once PROM-60 // is implemented. EnableSTAsZeroSample bool + + // EnableSTStorage determines whether agent DB should write a Start Timestamp (ST) + // per sample to WAL. + // TODO(bwplotka): Implement this option as per PROM-60, currently it's noop. + EnableSTStorage bool } // DefaultOptions used for the WAL storage. They are reasonable for setups using diff --git a/tsdb/db.go b/tsdb/db.go index 1dd524a76a..e8ab300397 100644 --- a/tsdb/db.go +++ b/tsdb/db.go @@ -235,6 +235,11 @@ type Options struct { // is implemented. EnableSTAsZeroSample bool + // EnableSTStorage determines whether TSDB should write a Start Timestamp (ST) + // per sample to WAL. + // TODO(bwplotka): Implement this option as per PROM-60, currently it's noop. + EnableSTStorage bool + // EnableMetadataWALRecords represents 'metadata-wal-records' feature flag. // NOTE(bwplotka): This feature might be deprecated and removed once PROM-60 // is implemented. From 97e7ef802cd2789aeb0feb42cb67a1579ec44b1a Mon Sep 17 00:00:00 2001 From: Callum Styan Date: Wed, 28 Jan 2026 03:47:34 -0800 Subject: [PATCH 37/46] remote write: simplify readability of timeseries filtering by using the slices package (#14318) * simplify readability of timeseries filtering by using the slices package Signed-off-by: Callum Styan * ensure that BenchmarkBuildTimeSeries doesn't account for the building of the actual proto in the benchmark results, we only care about the buildTimeSeries call Signed-off-by: Callum Styan --------- Signed-off-by: Callum Styan --- storage/remote/queue_manager.go | 18 ++++++------------ storage/remote/queue_manager_test.go | 10 ++++++++-- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/storage/remote/queue_manager.go b/storage/remote/queue_manager.go index 2b26179e58..63cdfb36f4 100644 --- a/storage/remote/queue_manager.go +++ b/storage/remote/queue_manager.go @@ -19,6 +19,7 @@ import ( "fmt" "log/slog" "math" + "slices" "strconv" "sync" "time" @@ -2105,12 +2106,11 @@ func setAtomicToNewer(value *atomic.Int64, newValue int64) (previous int64, upda func buildTimeSeries(timeSeries []prompb.TimeSeries, filter func(prompb.TimeSeries) bool) ([]prompb.TimeSeries, *timeSeriesStats) { stats := newTimeSeriesStats() - keepIdx := 0 - for i, ts := range timeSeries { + timeSeries = slices.DeleteFunc(timeSeries, func(ts prompb.TimeSeries) bool { if filter != nil && filter(ts) { stats.recordDropped(len(ts.Samples) > 0, len(ts.Exemplars) > 0, len(ts.Histograms) > 0) - continue + return true } // At the moment we only ever append a TimeSeries with a single sample or exemplar in it. @@ -2123,16 +2123,10 @@ func buildTimeSeries(timeSeries []prompb.TimeSeries, filter func(prompb.TimeSeri if len(ts.Histograms) > 0 { stats.updateTimestamp(ts.Histograms[0].Timestamp) } + return false + }) - if i != keepIdx { - // We have to swap the kept timeseries with the one which should be dropped. - // Copying any elements within timeSeries could cause data corruptions when reusing the slice in a next batch (shards.populateTimeSeries). - timeSeries[keepIdx], timeSeries[i] = timeSeries[i], timeSeries[keepIdx] - } - keepIdx++ - } - - return timeSeries[:keepIdx], stats + return timeSeries, stats } func buildWriteRequest(logger *slog.Logger, timeSeries []prompb.TimeSeries, metadata []prompb.MetricMetadata, pBuf *proto.Buffer, filter func(prompb.TimeSeries) bool, buf compression.EncodeBuffer, compr compression.Type) (_ []byte, highest, lowest int64, _ error) { diff --git a/storage/remote/queue_manager_test.go b/storage/remote/queue_manager_test.go index f1462b4406..a4b05d387a 100644 --- a/storage/remote/queue_manager_test.go +++ b/storage/remote/queue_manager_test.go @@ -871,7 +871,7 @@ func createTimeseries(numSamples, numSeries int, extraLabels ...labels.Label) ([ return samples, series } -func createProtoTimeseriesWithOld(numSamples, baseTs int64, _ ...labels.Label) []prompb.TimeSeries { +func createProtoTimeseriesWithOld(numSamples, baseTs int64) []prompb.TimeSeries { samples := make([]prompb.TimeSeries, numSamples) // use a fixed rand source so tests are consistent r := rand.New(rand.NewSource(99)) @@ -2365,8 +2365,14 @@ func BenchmarkBuildTimeSeries(b *testing.B) { // Send one sample per series, which is the typical remote_write case const numSamples = 10000 filter := func(ts prompb.TimeSeries) bool { return filterTsLimit(99, ts) } + originalSamples := createProtoTimeseriesWithOld(numSamples, 100) + + b.ReportAllocs() for b.Loop() { - samples := createProtoTimeseriesWithOld(numSamples, 100, extraLabels...) + b.StopTimer() + samples := make([]prompb.TimeSeries, len(originalSamples)) + copy(samples, originalSamples) + b.StartTimer() result, _ := buildTimeSeries(samples, filter) require.NotNil(b, result) } From dc34b90f93bf0265187ccec1dabdcc3db1a87ce0 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Wed, 28 Jan 2026 13:58:50 +0100 Subject: [PATCH 38/46] otlptranslator: fix silently swallowed error in addSumNumberDataPoints (#17954) The createAttributes error was incorrectly returning nil instead of err, causing errors to be silently discarded. This could lead to silent data loss for sum metrics during OTLP ingestion. Fixes #17953 Signed-off-by: Arve Knudsen --- .../otlptranslator/prometheusremotewrite/number_data_points.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go b/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go index 65d4fd70b2..e681bb352b 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go @@ -86,7 +86,7 @@ func (c *PrometheusConverter) addSumNumberDataPoints(ctx context.Context, dataPo meta.MetricFamilyName, ) if err != nil { - return nil + return err } var val float64 switch pt.ValueType() { From 00a7faa2e3cbef625faff9236cdabd877243f35b Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Thu, 29 Jan 2026 08:06:00 +0100 Subject: [PATCH 39/46] tsdb: fix division by zero in stale series compaction (#17952) Guard the stale series ratio calculation by checking numSeries > 0 before computing the ratio. This prevents division by zero when the head has no series. Fixes #17949 Signed-off-by: Arve Knudsen --- tsdb/db.go | 29 +++++++++++++++-------------- tsdb/db_test.go | 24 ++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 14 deletions(-) diff --git a/tsdb/db.go b/tsdb/db.go index e8ab300397..c5da5b54a6 100644 --- a/tsdb/db.go +++ b/tsdb/db.go @@ -1172,22 +1172,23 @@ func (db *DB) run(ctx context.Context) { db.head.mmapHeadChunks() numStaleSeries, numSeries := db.Head().NumStaleSeries(), db.Head().NumSeries() - staleSeriesRatio := float64(numStaleSeries) / float64(numSeries) - if db.autoCompact && db.opts.staleSeriesCompactionThreshold.Load() > 0 && - staleSeriesRatio >= db.opts.staleSeriesCompactionThreshold.Load() { - nextCompactionIsSoon := false - if !db.lastHeadCompactionTime.IsZero() { - compactionInterval := time.Duration(db.head.chunkRange.Load()) * time.Millisecond - nextEstimatedCompactionTime := db.lastHeadCompactionTime.Add(compactionInterval) - if time.Now().Add(10 * time.Minute).After(nextEstimatedCompactionTime) { - // Next compaction is starting within next 10 mins. - nextCompactionIsSoon = true + if db.autoCompact && numSeries > 0 && db.opts.staleSeriesCompactionThreshold.Load() > 0 { + staleSeriesRatio := float64(numStaleSeries) / float64(numSeries) + if staleSeriesRatio >= db.opts.staleSeriesCompactionThreshold.Load() { + nextCompactionIsSoon := false + if !db.lastHeadCompactionTime.IsZero() { + compactionInterval := time.Duration(db.head.chunkRange.Load()) * time.Millisecond + nextEstimatedCompactionTime := db.lastHeadCompactionTime.Add(compactionInterval) + if time.Now().Add(10 * time.Minute).After(nextEstimatedCompactionTime) { + // Next compaction is starting within next 10 mins. + nextCompactionIsSoon = true + } } - } - if !nextCompactionIsSoon { - if err := db.CompactStaleHead(); err != nil { - db.logger.Error("immediate stale series compaction failed", "err", err) + if !nextCompactionIsSoon { + if err := db.CompactStaleHead(); err != nil { + db.logger.Error("immediate stale series compaction failed", "err", err) + } } } } diff --git a/tsdb/db_test.go b/tsdb/db_test.go index 2dbcb11645..403ce3636a 100644 --- a/tsdb/db_test.go +++ b/tsdb/db_test.go @@ -9561,3 +9561,27 @@ func TestStaleSeriesCompaction(t *testing.T) { verifyHeadBlock() } } + +// TestStaleSeriesCompactionWithZeroSeries verifies that CompactStaleHead handles +// an empty head (0 series) gracefully without division by zero or incorrectly +// triggering compaction. This is a regression test for issue #17949. +func TestStaleSeriesCompactionWithZeroSeries(t *testing.T) { + opts := DefaultOptions() + opts.MinBlockDuration = 1000 + opts.MaxBlockDuration = 1000 + db := newTestDB(t, withOpts(opts)) + db.DisableCompactions() + t.Cleanup(func() { + require.NoError(t, db.Close()) + }) + + // Verify the head is empty. + require.Equal(t, uint64(0), db.Head().NumSeries()) + require.Equal(t, uint64(0), db.Head().NumStaleSeries()) + + // CompactStaleHead should handle zero series gracefully (no panic, no error). + require.NoError(t, db.CompactStaleHead()) + + // Should still have no blocks since there was nothing to compact. + require.Empty(t, db.Blocks()) +} From 020a0b30a0817e0027770ef324a8f4f30a577ba8 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Thu, 29 Jan 2026 08:07:32 +0100 Subject: [PATCH 40/46] notifier: fix flaky TestStop_DrainingEnabled and TestStop_DrainingDisabled race conditions (#17938) Fix flaky TestStop_DrainingEnabled and TestStop_DrainingDisabled tests. The tests used real HTTP servers and real time, making them susceptible to race conditions and timing-dependent failures. The solution is to convert both tests to use synctest for deterministic fake time. --------- Signed-off-by: Arve Knudsen --- notifier/manager_test.go | 242 ++++++++++++++++++--------------------- 1 file changed, 112 insertions(+), 130 deletions(-) diff --git a/notifier/manager_test.go b/notifier/manager_test.go index ba1d578d99..d7108c1628 100644 --- a/notifier/manager_test.go +++ b/notifier/manager_test.go @@ -831,171 +831,153 @@ func TestHangingNotifier(t *testing.T) { } func TestStop_DrainingDisabled(t *testing.T) { - releaseReceiver := make(chan struct{}) - receiverReceivedRequest := make(chan struct{}, 2) - alertsReceived := atomic.NewInt64(0) + synctest.Test(t, func(t *testing.T) { + const alertmanagerURL = "http://alertmanager:9093/api/v2/alerts" - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - // Let the test know we've received a request. - receiverReceivedRequest <- struct{}{} + handlerStarted := make(chan struct{}) + alertsReceived := atomic.NewInt64(0) - var alerts []*Alert + // Fake Do function that simulates a hanging alertmanager that times out. + fakeDo := func(ctx context.Context, _ *http.Client, req *http.Request) (*http.Response, error) { + var alerts []*Alert + b, err := io.ReadAll(req.Body) + if err != nil { + return nil, fmt.Errorf("read request body: %w", err) + } + if err := json.Unmarshal(b, &alerts); err != nil { + return nil, fmt.Errorf("unmarshal request body: %w", err) + } + alertsReceived.Add(int64(len(alerts))) - b, err := io.ReadAll(r.Body) - require.NoError(t, err) + // Signal arrival, then block until context times out. + handlerStarted <- struct{}{} + <-ctx.Done() - err = json.Unmarshal(b, &alerts) - require.NoError(t, err) + return nil, ctx.Err() + } - alertsReceived.Add(int64(len(alerts))) + reg := prometheus.NewRegistry() + m := NewManager( + &Options{ + QueueCapacity: 10, + DrainOnShutdown: false, + Registerer: reg, + Do: fakeDo, + }, + model.UTF8Validation, + nil, + ) - // Wait for the test to release us. - <-releaseReceiver + m.alertmanagers = make(map[string]*alertmanagerSet) - w.WriteHeader(http.StatusOK) - })) - defer func() { - server.Close() - }() + am1Cfg := config.DefaultAlertmanagerConfig + am1Cfg.Timeout = model.Duration(time.Second) + m.alertmanagers["1"] = newTestAlertmanagerSet(&am1Cfg, nil, m.opts, m.metrics, alertmanagerURL) - reg := prometheus.NewRegistry() - m := NewManager( - &Options{ - QueueCapacity: 10, - DrainOnShutdown: false, - Registerer: reg, - }, - model.UTF8Validation, - nil, - ) + for _, ams := range m.alertmanagers { + ams.startSendLoops(ams.ams) + } - m.alertmanagers = make(map[string]*alertmanagerSet) + // This will be waited on automatically when synctest.Test exits. + go m.Run(nil) - am1Cfg := config.DefaultAlertmanagerConfig - am1Cfg.Timeout = model.Duration(time.Second) - m.alertmanagers["1"] = newTestAlertmanagerSet(&am1Cfg, nil, m.opts, m.metrics, server.URL) + // Queue two alerts. The first should be immediately sent to the receiver, which should block until we release it later. + m.Send(&Alert{Labels: labels.FromStrings(labels.AlertName, "alert-1")}) - for _, ams := range m.alertmanagers { - ams.startSendLoops(ams.ams) - } + // Wait for receiver to get the request. + <-handlerStarted - notificationManagerStopped := make(chan struct{}) + m.Send(&Alert{Labels: labels.FromStrings(labels.AlertName, "alert-2")}) - go func() { - defer close(notificationManagerStopped) - m.Run(nil) - }() + // Stop the notification manager, then advance time to trigger the request timeout. + m.Stop() + time.Sleep(time.Second) - // Queue two alerts. The first should be immediately sent to the receiver, which should block until we release it later. - m.Send(&Alert{Labels: labels.FromStrings(labels.AlertName, "alert-1")}) + // Allow goroutines to finish. + synctest.Wait() - select { - case <-receiverReceivedRequest: - // Nothing more to do. - case <-time.After(time.Second): - require.FailNow(t, "gave up waiting for receiver to receive notification of first alert") - } - - m.Send(&Alert{Labels: labels.FromStrings(labels.AlertName, "alert-2")}) - - // Stop the notification manager, pause to allow the shutdown to be observed, and then allow the receiver to proceed. - m.Stop() - time.Sleep(time.Second) - close(releaseReceiver) - - // Wait for the notification manager to stop and confirm only the first notification was sent. - // The second notification should be dropped. - select { - case <-notificationManagerStopped: - // Nothing more to do. - case <-time.After(time.Second): - require.FailNow(t, "gave up waiting for notification manager to stop") - } - - require.Equal(t, int64(1), alertsReceived.Load()) + // Confirm only the first notification was sent. The second notification should be dropped. + require.Equal(t, int64(1), alertsReceived.Load()) + }) } func TestStop_DrainingEnabled(t *testing.T) { - releaseReceiver := make(chan struct{}) - receiverReceivedRequest := make(chan struct{}, 2) - alertsReceived := atomic.NewInt64(0) + synctest.Test(t, func(t *testing.T) { + const alertmanagerURL = "http://alertmanager:9093/api/v2/alerts" - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - var alerts []*Alert + handlerStarted := make(chan struct{}, 1) + alertsReceived := atomic.NewInt64(0) - // Let the test know we've received a request. - receiverReceivedRequest <- struct{}{} + // Fake Do function that simulates alertmanager responding slowly but successfully. + fakeDo := func(_ context.Context, _ *http.Client, req *http.Request) (*http.Response, error) { + var alerts []*Alert + b, err := io.ReadAll(req.Body) + if err != nil { + return nil, fmt.Errorf("read request body: %w", err) + } + if err := json.Unmarshal(b, &alerts); err != nil { + return nil, fmt.Errorf("unmarshal request body: %w", err) + } + alertsReceived.Add(int64(len(alerts))) - b, err := io.ReadAll(r.Body) - require.NoError(t, err) + // Signal arrival. + handlerStarted <- struct{}{} - err = json.Unmarshal(b, &alerts) - require.NoError(t, err) + // Block to allow for alert-2 to be queued while this request is in-flight. + time.Sleep(100 * time.Millisecond) - alertsReceived.Add(int64(len(alerts))) + return &http.Response{ + StatusCode: http.StatusOK, + Body: io.NopCloser(bytes.NewBuffer(nil)), + }, nil + } - // Wait for the test to release us. - <-releaseReceiver + reg := prometheus.NewRegistry() + m := NewManager( + &Options{ + QueueCapacity: 10, + DrainOnShutdown: true, + Registerer: reg, + Do: fakeDo, + }, + model.UTF8Validation, + nil, + ) - w.WriteHeader(http.StatusOK) - })) - defer func() { - server.Close() - }() + m.alertmanagers = make(map[string]*alertmanagerSet) - reg := prometheus.NewRegistry() - m := NewManager( - &Options{ - QueueCapacity: 10, - DrainOnShutdown: true, - Registerer: reg, - }, - model.UTF8Validation, - nil, - ) + am1Cfg := config.DefaultAlertmanagerConfig + am1Cfg.Timeout = model.Duration(time.Second) + m.alertmanagers["1"] = newTestAlertmanagerSet(&am1Cfg, nil, m.opts, m.metrics, alertmanagerURL) - m.alertmanagers = make(map[string]*alertmanagerSet) + for _, ams := range m.alertmanagers { + ams.startSendLoops(ams.ams) + } - am1Cfg := config.DefaultAlertmanagerConfig - am1Cfg.Timeout = model.Duration(time.Second) - m.alertmanagers["1"] = newTestAlertmanagerSet(&am1Cfg, nil, m.opts, m.metrics, server.URL) + go m.Run(nil) - for _, ams := range m.alertmanagers { - ams.startSendLoops(ams.ams) - } + // Queue two alerts. The first should be immediately sent to the receiver. + m.Send(&Alert{Labels: labels.FromStrings(labels.AlertName, "alert-1")}) - notificationManagerStopped := make(chan struct{}) + // Wait for receiver to get the first request. + <-handlerStarted - go func() { - defer close(notificationManagerStopped) - m.Run(nil) - }() + // Send second alert while first is still being processed (fakeDo has 100ms delay). + m.Send(&Alert{Labels: labels.FromStrings(labels.AlertName, "alert-2")}) - // Queue two alerts. The first should be immediately sent to the receiver, which should block until we release it later. - m.Send(&Alert{Labels: labels.FromStrings(labels.AlertName, "alert-1")}) + // Stop the notification manager. With DrainOnShutdown=true, this should wait + // for the queue to drain, ensuring both alerts are sent. + m.Stop() - select { - case <-receiverReceivedRequest: - // Nothing more to do. - case <-time.After(time.Second): - require.FailNow(t, "gave up waiting for receiver to receive notification of first alert") - } + // Advance time so in-flight requests complete. + time.Sleep(time.Second) - m.Send(&Alert{Labels: labels.FromStrings(labels.AlertName, "alert-2")}) + // Allow goroutines to finish. + synctest.Wait() - // Stop the notification manager and allow the receiver to proceed. - m.Stop() - close(releaseReceiver) - - // Wait for the notification manager to stop and confirm both notifications were sent. - select { - case <-notificationManagerStopped: - // Nothing more to do. - case <-time.After(200 * time.Millisecond): - require.FailNow(t, "gave up waiting for notification manager to stop") - } - - require.Equal(t, int64(2), alertsReceived.Load()) + // Confirm both notifications were sent. + require.Equal(t, int64(2), alertsReceived.Load()) + }) } // TestQueuesDrainingOnApplyConfig ensures that when an alertmanagerSet disappears after an ApplyConfig(), its From 36ea75d20336ef5b54a85e957c223c47e6f5783f Mon Sep 17 00:00:00 2001 From: Bartlomiej Plotka Date: Thu, 29 Jan 2026 10:50:17 +0000 Subject: [PATCH 41/46] scrape: fix flaky appender test (#17962) Fixes https://github.com/prometheus/prometheus/issues/17941 Signed-off-by: bwplotka --- scrape/manager.go | 3 +++ scrape/manager_test.go | 20 ++++++++++---------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/scrape/manager.go b/scrape/manager.go index aafd8c1931..24a63b056b 100644 --- a/scrape/manager.go +++ b/scrape/manager.go @@ -65,6 +65,9 @@ func NewManager( if appendable != nil && appendableV2 != nil { return nil, errors.New("scrape.NewManager: appendable and appendableV2 cannot be provided at the same time") } + if appendable == nil && appendableV2 == nil { + return nil, errors.New("scrape.NewManager: provide either appendable or appendableV2") + } sm, err := newScrapeMetrics(registerer) if err != nil { diff --git a/scrape/manager_test.go b/scrape/manager_test.go index 288f1d678d..395cc98a82 100644 --- a/scrape/manager_test.go +++ b/scrape/manager_test.go @@ -522,7 +522,7 @@ scrape_configs: ) opts := Options{} - scrapeManager, err := NewManager(&opts, nil, nil, nil, nil, testRegistry) + scrapeManager, err := NewManager(&opts, nil, nil, nil, teststorage.NewAppendable(), testRegistry) require.NoError(t, err) newLoop := func(scrapeLoopOptions) loop { ch <- struct{}{} @@ -578,7 +578,7 @@ scrape_configs: func TestManagerTargetsUpdates(t *testing.T) { opts := Options{} testRegistry := prometheus.NewRegistry() - m, err := NewManager(&opts, nil, nil, nil, nil, testRegistry) + m, err := NewManager(&opts, nil, nil, nil, teststorage.NewAppendable(), testRegistry) require.NoError(t, err) targetSetsCh := make(chan map[string][]*targetgroup.Group) @@ -631,7 +631,7 @@ global: opts := Options{} testRegistry := prometheus.NewRegistry() - scrapeManager, err := NewManager(&opts, nil, nil, nil, nil, testRegistry) + scrapeManager, err := NewManager(&opts, nil, nil, nil, teststorage.NewAppendable(), testRegistry) require.NoError(t, err) // Load the first config. @@ -701,7 +701,7 @@ scrape_configs: } opts := Options{} - scrapeManager, err := NewManager(&opts, nil, nil, nil, nil, testRegistry) + scrapeManager, err := NewManager(&opts, nil, nil, nil, teststorage.NewAppendable(), testRegistry) require.NoError(t, err) reload(scrapeManager, cfg1) @@ -1034,7 +1034,7 @@ func TestUnregisterMetrics(t *testing.T) { // Check that all metrics can be unregistered, allowing a second manager to be created. for range 2 { opts := Options{} - manager, err := NewManager(&opts, nil, nil, nil, nil, reg) + manager, err := NewManager(&opts, nil, nil, nil, teststorage.NewAppendable(), reg) require.NotNil(t, manager) require.NoError(t, err) // Unregister all metrics. @@ -1255,7 +1255,7 @@ scrape_configs: - files: ['%s'] ` - discoveryManager, scrapeManager := runManagers(t, ctx, nil, nil, nil) + discoveryManager, scrapeManager := runManagers(t, ctx, nil, nil, teststorage.NewAppendable()) defer scrapeManager.Stop() applyConfig( @@ -1354,7 +1354,7 @@ scrape_configs: file_sd_configs: - files: ['%s', '%s'] ` - discoveryManager, scrapeManager := runManagers(t, ctx, nil, nil, nil) + discoveryManager, scrapeManager := runManagers(t, ctx, nil, nil, teststorage.NewAppendable()) defer scrapeManager.Stop() applyConfig( @@ -1413,7 +1413,7 @@ scrape_configs: file_sd_configs: - files: ['%s'] ` - discoveryManager, scrapeManager := runManagers(t, ctx, nil, nil, nil) + discoveryManager, scrapeManager := runManagers(t, ctx, nil, nil, teststorage.NewAppendable()) defer scrapeManager.Stop() applyConfig( @@ -1479,7 +1479,7 @@ scrape_configs: - targets: ['%s'] ` - discoveryManager, scrapeManager := runManagers(t, ctx, nil, nil, nil) + discoveryManager, scrapeManager := runManagers(t, ctx, nil, nil, teststorage.NewAppendable()) defer scrapeManager.Stop() // Apply the initial config with an existing file @@ -1563,7 +1563,7 @@ scrape_configs: cfg := loadConfiguration(t, cfgText) - m, err := NewManager(&Options{}, nil, nil, nil, nil, prometheus.NewRegistry()) + m, err := NewManager(&Options{}, nil, nil, nil, teststorage.NewAppendable(), prometheus.NewRegistry()) require.NoError(t, err) defer m.Stop() require.NoError(t, m.ApplyConfig(cfg)) From 79b553499ac0d5a0b155e30dcb322a8a7ed2550e Mon Sep 17 00:00:00 2001 From: 1seal Date: Thu, 29 Jan 2026 11:59:35 +0000 Subject: [PATCH 42/46] web/api: compute relabel_steps in single pass Signed-off-by: 1seal --- web/api/v1/api.go | 16 ++++++++---- web/api/v1/api_test.go | 56 +++++++++++++++++++++++++++++++++++++++--- 2 files changed, 64 insertions(+), 8 deletions(-) diff --git a/web/api/v1/api.go b/web/api/v1/api.go index f32fee19f8..07ce482a40 100644 --- a/web/api/v1/api.go +++ b/web/api/v1/api.go @@ -1346,13 +1346,19 @@ func (api *API) targetRelabelSteps(r *http.Request) apiFuncResult { rules := scrapeConfig.RelabelConfigs steps := make([]RelabelStep, len(rules)) + lb := labels.NewBuilder(lbls) + keep := true for i, rule := range rules { - outLabels, keep := relabel.Process(lbls, rules[:i+1]...) - steps[i] = RelabelStep{ - Rule: rule, - Output: outLabels, - Keep: keep, + if keep { + keep = relabel.ProcessBuilder(lb, rule) } + + outLabels := labels.EmptyLabels() + if keep { + outLabels = lb.Labels() + } + + steps[i] = RelabelStep{Rule: rule, Output: outLabels, Keep: keep} } return apiFuncResult{&RelabelStepsResponse{Steps: steps}, nil, nil, nil} diff --git a/web/api/v1/api_test.go b/web/api/v1/api_test.go index 797182ce88..96d1cec531 100644 --- a/web/api/v1/api_test.go +++ b/web/api/v1/api_test.go @@ -166,8 +166,8 @@ func (t testTargetRetriever) TargetsDroppedCounts() map[string]int { return r } -func (testTargetRetriever) ScrapePoolConfig(_ string) (*config.ScrapeConfig, error) { - return &config.ScrapeConfig{ +func (testTargetRetriever) ScrapePoolConfig(pool string) (*config.ScrapeConfig, error) { + cfg := &config.ScrapeConfig{ RelabelConfigs: []*relabel.Config{ { Action: relabel.Replace, @@ -182,7 +182,16 @@ func (testTargetRetriever) ScrapePoolConfig(_ string) (*config.ScrapeConfig, err Regex: relabel.MustNewRegexp(`example\.com:.*`), }, }, - }, nil + } + if pool == "testpool3" { + cfg.RelabelConfigs = append(cfg.RelabelConfigs, &relabel.Config{ + Action: relabel.Replace, + TargetLabel: "job", + Regex: relabel.MustNewRegexp(".*"), + Replacement: "should_not_apply", + }) + } + return cfg, nil } func (t *testTargetRetriever) SetMetadataStoreForTargets(identifier string, metadata scrape.MetricMetadataStore) error { @@ -1937,6 +1946,47 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, testLabelAPI }, }, }, + { + endpoint: api.targetRelabelSteps, + query: url.Values{"scrapePool": []string{"testpool3"}, "labels": []string{`{"job":"test","__address__":"localhost:9090"}`}}, + response: &RelabelStepsResponse{ + Steps: []RelabelStep{ + { + Rule: &relabel.Config{ + Action: relabel.Replace, + Replacement: "example.com:443", + TargetLabel: "__address__", + Regex: relabel.MustNewRegexp(""), + NameValidationScheme: model.LegacyValidation, + }, + Output: labels.FromMap(map[string]string{ + "job": "test", + "__address__": "example.com:443", + }), + Keep: true, + }, + { + Rule: &relabel.Config{ + Action: relabel.Drop, + SourceLabels: []model.LabelName{"__address__"}, + Regex: relabel.MustNewRegexp(`example\.com:.*`), + }, + Output: labels.EmptyLabels(), + Keep: false, + }, + { + Rule: &relabel.Config{ + Action: relabel.Replace, + TargetLabel: "job", + Regex: relabel.MustNewRegexp(".*"), + Replacement: "should_not_apply", + }, + Output: labels.EmptyLabels(), + Keep: false, + }, + }, + }, + }, // With a matching metric. { endpoint: api.targetMetadata, From 75f94903b36d11b7b47dc3e0ebabab1ce3acabc7 Mon Sep 17 00:00:00 2001 From: Julien <291750+roidelapluie@users.noreply.github.com> Date: Thu, 29 Jan 2026 13:36:13 +0100 Subject: [PATCH 43/46] Add OpenAPI 3.2 specification generation for Prometheus HTTP API (#17825) * Add OpenAPI 3.2 specification generation for Prometheus HTTP API This commit introduces an OpenAPI specification for the Prometheus API. After testing multiple code-generation servers with built-in APIs, this implementation uses an independent spec file outside of the critical path. This spec file is tested with a framework present in this pull request. The specification helps clients know which parameters they can use and is served at /api/v1/openapi.yaml. The spec file will evolve with the Prometheus API and has the same version number. Downstream projects can tune the APIs presented in the spec file with configuration options using the IncludePaths setting for path filtering. In the future, there is room to generate a server from this spec file (e.g. with interfaces), but this is out of scope for this pull request. Architecture: - Core OpenAPI infrastructure (openapi.go): Dynamic spec building, caching, and thread-safe spec generation - Schema definitions (openapi_schemas.go): Complete type definitions for all API request and response types - Path specifications (openapi_paths.go): Endpoint definitions with parameters, request bodies, and response schemas - Examples (openapi_examples.go): Realistic request/response examples - Helper functions (openapi_helpers.go): Reusable builders for common OpenAPI structures Testing: - Comprehensive test suite with golden file validation - Test helpers package for API testing infrastructure - OpenAPI compliance validation utilities The golden file captures the complete specification for snapshot testing. Update with: go test -run TestOpenAPIGolden -update-openapi-spec REVIEWERS: The most important thing to check would be the OpenAPI golden file (web/api/v1/testdata/openapi_golden.yaml). Test scenarios are important as they test the actual OpenAPI spec validity. Signed-off-by: Julien Pivotto <291750+roidelapluie@users.noreply.github.com> * Add OpenAPI 3.1 support with version selection Add support for both OpenAPI 3.1 and 3.2 specifications with version selection via openapi_version query parameter. Defaults to 3.1 for broader compatibility Signed-off-by: Julien Pivotto <291750+roidelapluie@users.noreply.github.com> * Enhance OpenAPI examples and add helper functions - Add timestampExamples helper for consistent time formatting - Add exampleMap helper to simplify example creation - Improve example summaries with query details - Add matrix result example for range vector queries Signed-off-by: Julien Pivotto <291750+roidelapluie@users.noreply.github.com> * web/api: Add AtST method to test helper iterators Implement the AtST() method required by chunkenc.Iterator interface for FakeSeriesIterator and FakeHistogramSeriesIterator test helpers. The method returns 0 as these test helpers don't use start timestamps Signed-off-by: Julien Pivotto <291750+roidelapluie@users.noreply.github.com> * OpenAPI: Add minimum coverage test Signed-off-by: Julien Pivotto <291750+roidelapluie@users.noreply.github.com> * OpenAPI: Improve examples handling Signed-off-by: Julien Pivotto <291750+roidelapluie@users.noreply.github.com> --------- Signed-off-by: Julien Pivotto <291750+roidelapluie@users.noreply.github.com> --- .gitattributes | 1 + .golangci.yml | 2 + .yamllint | 1 + docs/querying/api.md | 16 + documentation/examples/remote_storage/go.mod | 2 +- go.mod | 15 +- go.sum | 48 + go.work | 2 +- internal/tools/go.mod | 2 +- web/api/testhelpers/api.go | 244 + web/api/testhelpers/assertions.go | 252 + web/api/testhelpers/fixtures.go | 178 + web/api/testhelpers/mocks.go | 534 +++ web/api/testhelpers/openapi.go | 204 + web/api/testhelpers/request.go | 145 + web/api/v1/api.go | 8 +- web/api/v1/api_scenarios_test.go | 419 ++ web/api/v1/errors_test.go | 1 + web/api/v1/openapi.go | 320 ++ web/api/v1/openapi_coverage_test.go | 258 + web/api/v1/openapi_examples.go | 1013 ++++ web/api/v1/openapi_golden_test.go | 176 + web/api/v1/openapi_helpers.go | 343 ++ web/api/v1/openapi_paths.go | 626 +++ web/api/v1/openapi_schemas.go | 1223 +++++ web/api/v1/openapi_test.go | 289 ++ web/api/v1/test_helpers.go | 157 + web/api/v1/testdata/openapi_3.1_golden.yaml | 4401 +++++++++++++++++ web/api/v1/testdata/openapi_3.2_golden.yaml | 4452 ++++++++++++++++++ web/ui/mantine-ui/src/promql/tools/go.mod | 2 +- web/web.go | 9 + web/web_test.go | 2 + 32 files changed, 15337 insertions(+), 8 deletions(-) create mode 100644 .gitattributes create mode 100644 web/api/testhelpers/api.go create mode 100644 web/api/testhelpers/assertions.go create mode 100644 web/api/testhelpers/fixtures.go create mode 100644 web/api/testhelpers/mocks.go create mode 100644 web/api/testhelpers/openapi.go create mode 100644 web/api/testhelpers/request.go create mode 100644 web/api/v1/api_scenarios_test.go create mode 100644 web/api/v1/openapi.go create mode 100644 web/api/v1/openapi_coverage_test.go create mode 100644 web/api/v1/openapi_examples.go create mode 100644 web/api/v1/openapi_golden_test.go create mode 100644 web/api/v1/openapi_helpers.go create mode 100644 web/api/v1/openapi_paths.go create mode 100644 web/api/v1/openapi_schemas.go create mode 100644 web/api/v1/openapi_test.go create mode 100644 web/api/v1/test_helpers.go create mode 100644 web/api/v1/testdata/openapi_3.1_golden.yaml create mode 100644 web/api/v1/testdata/openapi_3.2_golden.yaml diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000..432caee6f7 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +web/api/v1/testdata/openapi_golden.yaml linguist-generated diff --git a/.golangci.yml b/.golangci.yml index 0c866611e9..599a5e2b49 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -124,6 +124,8 @@ linters: # Disable this check for now since it introduces too many changes in our existing codebase. # See https://pkg.go.dev/golang.org/x/tools/go/analysis/passes/modernize#hdr-Analyzer_omitzero for more details. - omitzero + # Disable waitgroup check until we really move to Go 1.25. + - waitgroup perfsprint: # Optimizes even if it requires an int or uint type cast. int-conversion: true diff --git a/.yamllint b/.yamllint index 8d09c375fd..b329f464fb 100644 --- a/.yamllint +++ b/.yamllint @@ -2,6 +2,7 @@ extends: default ignore: | **/node_modules + web/api/v1/testdata/openapi_*_golden.yaml rules: braces: diff --git a/docs/querying/api.md b/docs/querying/api.md index 4891db8980..7324669699 100644 --- a/docs/querying/api.md +++ b/docs/querying/api.md @@ -6,6 +6,22 @@ sort_rank: 7 The current stable HTTP API is reachable under `/api/v1` on a Prometheus server. Any non-breaking additions will be added under that endpoint. +## OpenAPI Specification + +An OpenAPI specification for the HTTP API is available at `/api/v1/openapi.yaml`. +By default, it returns OpenAPI 3.1 for broader compatibility. Use `?openapi_version=3.2` +for OpenAPI 3.2, which includes advanced features and endpoints like `/api/v1/notifications/live`. + +This machine-readable specification describes all available endpoints, request parameters, +response formats, and schemas. + +The OpenAPI specification can be used to: + +- Generate client libraries in various programming languages. +- Validate API requests and responses. +- Generate interactive API documentation. +- Test API endpoints. + ## Format overview The API response format is JSON. Every successful API request returns a `2xx` diff --git a/documentation/examples/remote_storage/go.mod b/documentation/examples/remote_storage/go.mod index 17076faddd..5f2cd98037 100644 --- a/documentation/examples/remote_storage/go.mod +++ b/documentation/examples/remote_storage/go.mod @@ -1,6 +1,6 @@ module github.com/prometheus/prometheus/documentation/examples/remote_storage -go 1.24.0 +go 1.25.0 require ( github.com/alecthomas/kingpin/v2 v2.4.0 diff --git a/go.mod b/go.mod index afc3f2740d..0aa3658177 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/prometheus/prometheus -go 1.24.0 +go 1.25.0 require ( github.com/Azure/azure-sdk-for-go/sdk/azcore v1.20.0 @@ -54,6 +54,8 @@ require ( github.com/oklog/ulid/v2 v2.1.1 github.com/open-telemetry/opentelemetry-collector-contrib/processor/deltatocumulativeprocessor v0.142.0 github.com/ovh/go-ovh v1.9.0 + github.com/pb33f/libopenapi v0.31.1 + github.com/pb33f/libopenapi-validator v0.10.0 github.com/prometheus/alertmanager v0.30.0 github.com/prometheus/client_golang v1.23.2 github.com/prometheus/client_golang/exp v0.0.0-20260101091701-2cd067eb23c9 @@ -85,6 +87,7 @@ require ( go.uber.org/goleak v1.3.0 go.yaml.in/yaml/v2 v2.4.3 go.yaml.in/yaml/v3 v3.0.4 + go.yaml.in/yaml/v4 v4.0.0-rc.3 golang.org/x/oauth2 v0.34.0 golang.org/x/sync v0.19.0 golang.org/x/sys v0.39.0 @@ -93,6 +96,7 @@ require ( google.golang.org/genproto/googleapis/api v0.0.0-20251222181119-0a764e51fe1b google.golang.org/grpc v1.78.0 google.golang.org/protobuf v1.36.11 + gopkg.in/yaml.v3 v3.0.1 k8s.io/api v0.34.3 k8s.io/apimachinery v0.34.3 k8s.io/client-go v0.34.3 @@ -102,6 +106,9 @@ require ( require ( github.com/aws/aws-sdk-go-v2/service/signin v1.0.4 // indirect + github.com/bahlo/generic-list-go v0.2.0 // indirect + github.com/basgys/goxml2json v1.1.1-0.20231018121955-e66ee54ceaad // indirect + github.com/buger/jsonparser v1.1.1 // indirect github.com/go-openapi/swag/cmdutils v0.25.4 // indirect github.com/go-openapi/swag/conv v0.25.4 // indirect github.com/go-openapi/swag/fileutils v0.25.4 // indirect @@ -113,8 +120,10 @@ require ( github.com/go-openapi/swag/stringutils v0.25.4 // indirect github.com/go-openapi/swag/typeutils v0.25.4 // indirect github.com/go-openapi/swag/yamlutils v0.25.4 // indirect + github.com/pb33f/jsonpath v0.7.0 // indirect + github.com/pb33f/ordered-map/v2 v2.3.0 // indirect + github.com/santhosh-tekuri/jsonschema/v6 v6.0.2 // indirect go.uber.org/multierr v1.11.0 // indirect - gopkg.in/yaml.v3 v3.0.1 // indirect sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect ) @@ -237,7 +246,7 @@ require ( gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/ini.v1 v1.67.0 // indirect - gopkg.in/yaml.v2 v2.4.0 // indirect + gopkg.in/yaml.v2 v2.4.0 gotest.tools/v3 v3.0.3 // indirect k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b // indirect k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 // indirect diff --git a/go.sum b/go.sum index 6ac2105275..280724445a 100644 --- a/go.sum +++ b/go.sum @@ -81,6 +81,10 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.41.5 h1:SciGFVNZ4mHdm7gpD1dgZYnCuVdX github.com/aws/aws-sdk-go-v2/service/sts v1.41.5/go.mod h1:iW40X4QBmUxdP+fZNOpfmkdMZqsovezbAeO+Ubiv2pk= github.com/aws/smithy-go v1.24.0 h1:LpilSUItNPFr1eY85RYgTIg5eIEPtvFbskaFcmmIUnk= github.com/aws/smithy-go v1.24.0/go.mod h1:LEj2LM3rBRQJxPZTB4KuzZkaZYnZPnvgIhb4pu07mx0= +github.com/bahlo/generic-list-go v0.2.0 h1:5sz/EEAK+ls5wF+NeqDpk5+iNdMDXrh3z3nPnH1Wvgk= +github.com/bahlo/generic-list-go v0.2.0/go.mod h1:2KvAjgMlE5NNynlg/5iLrrCCZ2+5xWbdbCW3pNTGyYg= +github.com/basgys/goxml2json v1.1.1-0.20231018121955-e66ee54ceaad h1:3swAvbzgfaI6nKuDDU7BiKfZRdF+h2ZwKgMHd8Ha4t8= +github.com/basgys/goxml2json v1.1.1-0.20231018121955-e66ee54ceaad/go.mod h1:9+nBLYNWkvPcq9ep0owWUsPTLgL9ZXTsZWcCSVGGLJ0= github.com/bboreham/go-loser v0.0.0-20230920113527-fcc2c21820a3 h1:6df1vn4bBlDDo4tARvBm7l6KA9iVMnE3NWizDeWSrps= github.com/bboreham/go-loser v0.0.0-20230920113527-fcc2c21820a3/go.mod h1:CIWtjkly68+yqLPbvwwR/fjNJA/idrtULjZWh2v1ys0= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= @@ -88,6 +92,10 @@ github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+Ce github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= +github.com/bitly/go-simplejson v0.5.1 h1:xgwPbetQScXt1gh9BmoJ6j9JMr3TElvuIyjR8pgdoow= +github.com/bitly/go-simplejson v0.5.1/go.mod h1:YOPVLzCfwK14b4Sff3oP1AmGhI9T9Vsg84etUnlyp+Q= +github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs= +github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0= github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM= github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= @@ -116,6 +124,8 @@ github.com/digitalocean/godo v1.171.0 h1:QwpkwWKr3v7yxc8D4NQG973NoR9APCEWjYnLOQe github.com/digitalocean/godo v1.171.0/go.mod h1:xQsWpVCCbkDrWisHA72hPzPlnC+4W5w/McZY5ij9uvU= github.com/distribution/reference v0.5.0 h1:/FUIFXtfc/x2gpa5/VGfiGLuOIdYa1t65IKK2OFGvA0= github.com/distribution/reference v0.5.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E= +github.com/dlclark/regexp2 v1.11.5 h1:Q/sSnsKerHeCkc/jSTNq1oCm7KiVgUMZRDUoRu0JQZQ= +github.com/dlclark/regexp2 v1.11.5/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= github.com/docker/docker v28.5.2+incompatible h1:DBX0Y0zAjZbSrm1uzOkdr1onVghKaftjlSWt4AFexzM= github.com/docker/docker v28.5.2+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKohAFqRJQ= @@ -437,6 +447,14 @@ github.com/ovh/go-ovh v1.9.0/go.mod h1:cTVDnl94z4tl8pP1uZ/8jlVxntjSIf09bNcQ5TJSC github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/pascaldekloe/goe v0.1.0 h1:cBOtyMzM9HTpWjXfbbunk26uA6nG3a8n06Wieeh0MwY= github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= +github.com/pb33f/jsonpath v0.7.0 h1:3oG6yu1RqNoMZpqnRjBMqi8fSIXWoDAKDrsB0QGTcoU= +github.com/pb33f/jsonpath v0.7.0/go.mod h1:/+JlSIjWA2ijMVYGJ3IQPF4Q1nLMYbUTYNdk0exCDPQ= +github.com/pb33f/libopenapi v0.31.1 h1:smGr45U2Y+hHWYKiEV13oS2tP9IUnscqNb5qsvT9+YI= +github.com/pb33f/libopenapi v0.31.1/go.mod h1:oaebeA5l58AFbZ7qRKTtMnu15JEiPlaBas1vLDcw9vs= +github.com/pb33f/libopenapi-validator v0.10.0 h1:9XhgxW2jTDd+1aDMuIjGUsWaeUaPi5ql2z1Y+WBltiE= +github.com/pb33f/libopenapi-validator v0.10.0/go.mod h1:hW3wIpg4YCxLrJxyTrfrzP9Mtt9FvbD/nm0yemUcjSs= +github.com/pb33f/ordered-map/v2 v2.3.0 h1:k2OhVEQkhTCQMhAicQ3Z6iInzoZNQ7L9MVomwKBZ5WQ= +github.com/pb33f/ordered-map/v2 v2.3.0/go.mod h1:oe5ue+6ZNhy7QN9cPZvPA23Hx0vMHnNVeMg4fGdCANw= github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0= github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y= github.com/pborman/getopt v0.0.0-20170112200414-7148bc3a4c30/go.mod h1:85jBQOZwpVEaDAr341tbn15RS4fCAsIst0qp7i8ex1o= @@ -491,6 +509,8 @@ github.com/puzpuzpuz/xsync/v3 v3.5.1/go.mod h1:VjzYrABPabuM4KyBh1Ftq6u8nhwY5tBPK github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= +github.com/santhosh-tekuri/jsonschema/v6 v6.0.2 h1:KRzFb2m7YtdldCEkzs6KqmJw4nqEVZGK7IN2kJkjTuQ= +github.com/santhosh-tekuri/jsonschema/v6 v6.0.2/go.mod h1:JXeL+ps8p7/KNMjDQk3TCwPpBy0wYklyWTfbkIzdIFU= github.com/scaleway/scaleway-sdk-go v1.0.0-beta.36 h1:ObX9hZmK+VmijreZO/8x9pQ8/P/ToHD/bdSb4Eg4tUo= github.com/scaleway/scaleway-sdk-go v1.0.0-beta.36/go.mod h1:LEsDu4BubxK7/cWhtlQWfuxwL4rf/2UEpxXz1o1EMtM= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 h1:nn5Wsu0esKSJiIVhscUtVbo7ada43DJhG55ua/hjS5I= @@ -517,6 +537,7 @@ github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= @@ -533,6 +554,7 @@ github.com/xhit/go-str2duration/v2 v2.1.0 h1:lxklc02Drh6ynqX+DdPyp5pCKLUQpRT8bp8 github.com/xhit/go-str2duration/v2 v2.1.0/go.mod h1:ohY8p+0f07DiV6Em5LKB0s2YpLtXVyJfNt1+BlmyAsU= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= go.mongodb.org/mongo-driver v1.17.6 h1:87JUG1wZfWsr6rIz3ZmpH90rL5tea7O3IHuSwHUpsss= go.mongodb.org/mongo-driver v1.17.6/go.mod h1:Hy04i7O2kC4RS06ZrhPRqj/u4DTYkFDAAccj+rVKqgQ= go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= @@ -620,12 +642,16 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk golang.org/x/crypto v0.0.0-20190923035154-9ee001bba392/go.mod h1:/lpIB1dKB+9EgE3H3cr1v9wB50oz8l4C4h62xy7jSTY= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.11.0/go.mod h1:xgJhtzW8F9jGdVFWZESrid1U1bjeNy4zgy5cRr/CIio= golang.org/x/crypto v0.46.0 h1:cKRW/pmt1pKAfetfu+RCEvjvZkA9RimPbh7bhFjGVBU= golang.org/x/crypto v0.46.0/go.mod h1:Evb/oLKmMraqjZ2iQTwDwvCtJkczlDuTmdJXoZVzqU0= golang.org/x/exp v0.0.0-20250808145144-a408d31f581a h1:Y+7uR/b1Mw2iSXZ3G//1haIiSElDQZ8KWh0h+sZPG90= golang.org/x/exp v0.0.0-20250808145144-a408d31f581a/go.mod h1:rT6SFzZ7oxADUDx58pcaKFTcZ+inxAa9fTrYx/uVYwg= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.30.0 h1:fDEXFVZ/fmCKProc/yAXXUijritrDzahmwwefnjoPFk= golang.org/x/mod v0.30.0/go.mod h1:lAsf5O2EvJeSFMiBxXDki7sCgAxEUcZHXoXMKT4GJKc= golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -638,6 +664,10 @@ golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210410081132-afb366fc7cd1/go.mod h1:9tjilg8BloeKEkVJvy7fQ90B1CfIiPueXVOjqfkSzI8= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= +golang.org/x/net v0.13.0/go.mod h1:zEVYFnQC7m/vmpQFELhcD1EWkZlX69l4oqgmer6hfKA= golang.org/x/net v0.48.0 h1:zyQRTTrjc33Lhh0fBgT/H3oZq9WuvRR5gPC70xpDiQU= golang.org/x/net v0.48.0/go.mod h1:+ndRgGjkh8FGtu1w1FGbEC31if4VrNVMuKTgcAAnQRY= golang.org/x/oauth2 v0.34.0 h1:hqK/t4AKgbqWkdkcAeI8XLmbK+4m4G5YeQRrmiotGlw= @@ -648,6 +678,8 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -667,23 +699,37 @@ golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210303074136-134d130e1a04/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk= golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= +golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= +golang.org/x/term v0.10.0/go.mod h1:lpqdcUyK/oCiQxvxVrppt5ggO2KCZ5QblwqPnfZ6d5o= golang.org/x/term v0.38.0 h1:PQ5pkm/rLO6HnxFR7N2lJHOZX6Kez5Y1gDSJla6jo7Q= golang.org/x/term v0.38.0/go.mod h1:bSEAKrOT1W+VSu9TSCMtoGEOUcKxOKgl3LE5QEF/xVg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/text v0.11.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU= golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY= golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI= @@ -694,6 +740,8 @@ golang.org/x/tools v0.0.0-20190907020128-2ca718005c18/go.mod h1:b+2E5dAYhXwXZwtn golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/tools v0.39.0 h1:ik4ho21kwuQln40uelmciQPp9SipgNDdrafrYA4TmQQ= golang.org/x/tools v0.39.0/go.mod h1:JnefbkDPyD8UU2kI5fuf8ZX4/yUeh9W877ZeBONxUqQ= golang.org/x/tools/godoc v0.1.0-deprecated h1:o+aZ1BOj6Hsx/GBdJO/s815sqftjSnrZZwyYTHODvtk= diff --git a/go.work b/go.work index fbb73655e9..c5ba5dfad6 100644 --- a/go.work +++ b/go.work @@ -1,4 +1,4 @@ -go 1.24.0 +go 1.25.0 use ( . diff --git a/internal/tools/go.mod b/internal/tools/go.mod index c8b62b5ca7..5238fca024 100644 --- a/internal/tools/go.mod +++ b/internal/tools/go.mod @@ -1,6 +1,6 @@ module github.com/prometheus/prometheus/internal/tools -go 1.24.0 +go 1.25.0 require ( github.com/bufbuild/buf v1.62.1 diff --git a/web/api/testhelpers/api.go b/web/api/testhelpers/api.go new file mode 100644 index 0000000000..07d7003b5c --- /dev/null +++ b/web/api/testhelpers/api.go @@ -0,0 +1,244 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package testhelpers provides utilities for testing the Prometheus HTTP API. +// This file contains helper functions for creating test API instances and managing test lifecycles. +package testhelpers + +import ( + "context" + "log/slog" + "net/http" + "net/url" + "testing" + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/promslog" + + "github.com/prometheus/prometheus/config" + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/promql" + "github.com/prometheus/prometheus/promql/promqltest" + "github.com/prometheus/prometheus/rules" + "github.com/prometheus/prometheus/scrape" + "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/tsdb" + "github.com/prometheus/prometheus/util/notifications" +) + +// RulesRetriever provides a list of active rules and alerts. +type RulesRetriever interface { + RuleGroups() []*rules.Group + AlertingRules() []*rules.AlertingRule +} + +// TargetRetriever provides the list of active/dropped targets to scrape or not. +type TargetRetriever interface { + TargetsActive() map[string][]*scrape.Target + TargetsDropped() map[string][]*scrape.Target + TargetsDroppedCounts() map[string]int + ScrapePoolConfig(string) (*config.ScrapeConfig, error) +} + +// ScrapePoolsRetriever provide the list of all scrape pools. +type ScrapePoolsRetriever interface { + ScrapePools() []string +} + +// AlertmanagerRetriever provides a list of all/dropped AlertManager URLs. +type AlertmanagerRetriever interface { + Alertmanagers() []*url.URL + DroppedAlertmanagers() []*url.URL +} + +// TSDBAdminStats provides TSDB admin statistics. +type TSDBAdminStats interface { + CleanTombstones() error + Delete(ctx context.Context, mint, maxt int64, ms ...*labels.Matcher) error + Snapshot(dir string, withHead bool) error + Stats(statsByLabelName string, limit int) (*tsdb.Stats, error) + WALReplayStatus() (tsdb.WALReplayStatus, error) + BlockMetas() ([]tsdb.BlockMeta, error) +} + +// APIConfig holds configuration for creating a test API instance. +type APIConfig struct { + // Core dependencies. + QueryEngine *LazyLoader[promql.QueryEngine] + Queryable *LazyLoader[storage.SampleAndChunkQueryable] + ExemplarQueryable *LazyLoader[storage.ExemplarQueryable] + + // Retrievers. + RulesRetriever *LazyLoader[RulesRetriever] + TargetRetriever *LazyLoader[TargetRetriever] + ScrapePoolsRetriever *LazyLoader[ScrapePoolsRetriever] + AlertmanagerRetriever *LazyLoader[AlertmanagerRetriever] + + // Admin. + TSDBAdmin *LazyLoader[TSDBAdminStats] + DBDir string + + // Optional overrides. + Config func() config.Config + FlagsMap map[string]string + Now func() time.Time +} + +// APIWrapper wraps the API and provides a handler for testing. +type APIWrapper struct { + Handler http.Handler +} + +// PrometheusVersion contains build information about Prometheus. +type PrometheusVersion struct { + Version string `json:"version"` + Revision string `json:"revision"` + Branch string `json:"branch"` + BuildUser string `json:"buildUser"` + BuildDate string `json:"buildDate"` + GoVersion string `json:"goVersion"` +} + +// RuntimeInfo contains runtime information about Prometheus. +type RuntimeInfo struct { + StartTime time.Time `json:"startTime"` + CWD string `json:"CWD"` + Hostname string `json:"hostname"` + ServerTime time.Time `json:"serverTime"` + ReloadConfigSuccess bool `json:"reloadConfigSuccess"` + LastConfigTime time.Time `json:"lastConfigTime"` + CorruptionCount int64 `json:"corruptionCount"` + GoroutineCount int `json:"goroutineCount"` + GOMAXPROCS int `json:"GOMAXPROCS"` + GOMEMLIMIT int64 `json:"GOMEMLIMIT"` + GOGC string `json:"GOGC"` + GODEBUG string `json:"GODEBUG"` + StorageRetention string `json:"storageRetention"` +} + +// NewAPIParams holds all the parameters needed to create a v1.API instance. +type NewAPIParams struct { + QueryEngine promql.QueryEngine + Queryable storage.SampleAndChunkQueryable + ExemplarQueryable storage.ExemplarQueryable + ScrapePoolsRetriever func(context.Context) ScrapePoolsRetriever + TargetRetriever func(context.Context) TargetRetriever + AlertmanagerRetriever func(context.Context) AlertmanagerRetriever + ConfigFunc func() config.Config + FlagsMap map[string]string + ReadyFunc func(http.HandlerFunc) http.HandlerFunc + TSDBAdmin TSDBAdminStats + DBDir string + Logger *slog.Logger + RulesRetriever func(context.Context) RulesRetriever + RuntimeInfoFunc func() (RuntimeInfo, error) + BuildInfo *PrometheusVersion + NotificationsGetter func() []notifications.Notification + NotificationsSub func() (<-chan notifications.Notification, func(), bool) + Gatherer prometheus.Gatherer + Registerer prometheus.Registerer +} + +// PrepareAPI creates a NewAPIParams with sensible defaults for testing. +func PrepareAPI(t *testing.T, cfg APIConfig) NewAPIParams { + t.Helper() + + // Create defaults for unset lazy loaders. + if cfg.QueryEngine == nil { + cfg.QueryEngine = NewLazyLoader(func() promql.QueryEngine { + return promqltest.NewTestEngineWithOpts(t, promql.EngineOpts{ + Logger: nil, + Reg: nil, + MaxSamples: 10000, + Timeout: 100 * time.Second, + NoStepSubqueryIntervalFn: func(int64) int64 { return 60 * 1000 }, + EnableAtModifier: true, + EnableNegativeOffset: true, + EnablePerStepStats: true, + }) + }) + } + + if cfg.Queryable == nil { + cfg.Queryable = NewLazyLoader(NewEmptyQueryable) + } + + if cfg.ExemplarQueryable == nil { + cfg.ExemplarQueryable = NewLazyLoader(NewEmptyExemplarQueryable) + } + + if cfg.RulesRetriever == nil { + cfg.RulesRetriever = NewLazyLoader(func() RulesRetriever { + return NewEmptyRulesRetriever() + }) + } + + if cfg.TargetRetriever == nil { + cfg.TargetRetriever = NewLazyLoader(func() TargetRetriever { + return NewEmptyTargetRetriever() + }) + } + + if cfg.ScrapePoolsRetriever == nil { + cfg.ScrapePoolsRetriever = NewLazyLoader(func() ScrapePoolsRetriever { + return NewEmptyScrapePoolsRetriever() + }) + } + + if cfg.AlertmanagerRetriever == nil { + cfg.AlertmanagerRetriever = NewLazyLoader(func() AlertmanagerRetriever { + return NewEmptyAlertmanagerRetriever() + }) + } + + if cfg.TSDBAdmin == nil { + cfg.TSDBAdmin = NewLazyLoader(func() TSDBAdminStats { + return NewEmptyTSDBAdminStats() + }) + } + + if cfg.Config == nil { + cfg.Config = func() config.Config { return config.Config{} } + } + + if cfg.FlagsMap == nil { + cfg.FlagsMap = map[string]string{} + } + + if cfg.DBDir == "" { + cfg.DBDir = t.TempDir() + } + + return NewAPIParams{ + QueryEngine: cfg.QueryEngine.Get(), + Queryable: cfg.Queryable.Get(), + ExemplarQueryable: cfg.ExemplarQueryable.Get(), + ScrapePoolsRetriever: func(context.Context) ScrapePoolsRetriever { return cfg.ScrapePoolsRetriever.Get() }, + TargetRetriever: func(context.Context) TargetRetriever { return cfg.TargetRetriever.Get() }, + AlertmanagerRetriever: func(context.Context) AlertmanagerRetriever { return cfg.AlertmanagerRetriever.Get() }, + ConfigFunc: cfg.Config, + FlagsMap: cfg.FlagsMap, + ReadyFunc: func(f http.HandlerFunc) http.HandlerFunc { return f }, + TSDBAdmin: cfg.TSDBAdmin.Get(), + DBDir: cfg.DBDir, + Logger: promslog.NewNopLogger(), + RulesRetriever: func(context.Context) RulesRetriever { return cfg.RulesRetriever.Get() }, + RuntimeInfoFunc: func() (RuntimeInfo, error) { return RuntimeInfo{}, nil }, + BuildInfo: &PrometheusVersion{}, + NotificationsGetter: func() []notifications.Notification { return nil }, + NotificationsSub: func() (<-chan notifications.Notification, func(), bool) { return nil, func() {}, false }, + Gatherer: prometheus.NewRegistry(), + Registerer: prometheus.NewRegistry(), + } +} diff --git a/web/api/testhelpers/assertions.go b/web/api/testhelpers/assertions.go new file mode 100644 index 0000000000..53010b08b5 --- /dev/null +++ b/web/api/testhelpers/assertions.go @@ -0,0 +1,252 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file provides assertion helpers for validating API responses in tests. +package testhelpers + +import ( + "fmt" + "slices" + "strings" + + "github.com/stretchr/testify/require" +) + +// RequireSuccess asserts that the response has status "success" and returns the response for chaining. +func (r *Response) RequireSuccess() *Response { + r.t.Helper() + require.NotNil(r.t, r.JSON, "response body is not JSON") + require.Equal(r.t, "success", r.JSON["status"], "expected status to be 'success'") + return r +} + +// RequireError asserts that the response has status "error" and returns the response for chaining. +func (r *Response) RequireError() *Response { + r.t.Helper() + require.NotNil(r.t, r.JSON, "response body is not JSON") + require.Equal(r.t, "error", r.JSON["status"], "expected status to be 'error'") + return r +} + +// RequireStatusCode asserts that the response has the given HTTP status code and returns the response for chaining. +func (r *Response) RequireStatusCode(expectedCode int) *Response { + r.t.Helper() + require.Equal(r.t, expectedCode, r.StatusCode, "unexpected HTTP status code") + return r +} + +// RequireJSONPathExists asserts that a JSON path exists and returns the response for chaining. +func (r *Response) RequireJSONPathExists(path string) *Response { + r.t.Helper() + require.NotNil(r.t, r.JSON, "response body is not JSON") + + value := getJSONPath(r.JSON, path) + require.NotNil(r.t, value, "JSON path %q does not exist", path) + return r +} + +// RequireEquals asserts that a JSON path equals the expected value and returns the response for chaining. +func (r *Response) RequireEquals(path string, expected any) *Response { + r.t.Helper() + require.NotNil(r.t, r.JSON, "response body is not JSON") + + value := getJSONPath(r.JSON, path) + require.NotNil(r.t, value, "JSON path %q does not exist", path) + require.Equal(r.t, expected, value, "JSON path %q has unexpected value", path) + return r +} + +// RequireJSONArray asserts that a JSON path contains an array and returns the response for chaining. +func (r *Response) RequireJSONArray(path string) *Response { + r.t.Helper() + require.NotNil(r.t, r.JSON, "response body is not JSON") + + value := getJSONPath(r.JSON, path) + require.NotNil(r.t, value, "JSON path %q does not exist", path) + _, ok := value.([]any) + require.True(r.t, ok, "JSON path %q is not an array", path) + return r +} + +// RequireLenAtLeast asserts that a JSON path contains an array with at least minLen elements and returns the response for chaining. +func (r *Response) RequireLenAtLeast(path string, minLen int) *Response { + r.t.Helper() + require.NotNil(r.t, r.JSON, "response body is not JSON") + + value := getJSONPath(r.JSON, path) + require.NotNil(r.t, value, "JSON path %q does not exist", path) + arr, ok := value.([]any) + require.True(r.t, ok, "JSON path %q is not an array", path) + require.GreaterOrEqual(r.t, len(arr), minLen, "JSON path %q has fewer than %d elements", path, minLen) + return r +} + +// RequireArrayContains asserts that a JSON path contains an array with the expected element and returns the response for chaining. +func (r *Response) RequireArrayContains(path string, expected any) *Response { + r.t.Helper() + require.NotNil(r.t, r.JSON, "response body is not JSON") + + value := getJSONPath(r.JSON, path) + require.NotNil(r.t, value, "JSON path %q does not exist", path) + arr, ok := value.([]any) + require.True(r.t, ok, "JSON path %q is not an array", path) + + found := slices.Contains(arr, expected) + require.True(r.t, found, "JSON path %q does not contain expected value %v", path, expected) + return r +} + +// RequireSome asserts that at least one element in an array satisfies the predicate and returns the response for chaining. +func (r *Response) RequireSome(path string, predicate func(any) bool) *Response { + r.t.Helper() + require.NotNil(r.t, r.JSON, "response body is not JSON") + + value := getJSONPath(r.JSON, path) + require.NotNil(r.t, value, "JSON path %q does not exist", path) + arr, ok := value.([]any) + require.True(r.t, ok, "JSON path %q is not an array", path) + + found := slices.ContainsFunc(arr, predicate) + require.True(r.t, found, "no element in JSON path %q satisfies the predicate", path) + return r +} + +// getJSONPath extracts a value from a JSON object using a simple path notation. +// Supports paths like "$.data", "$.data.groups", "$.data.groups[0]". +func getJSONPath(data map[string]any, path string) any { + // Remove leading "$." if present. + path = strings.TrimPrefix(path, "$.") + + if path == "" { + return data + } + + parts := strings.Split(path, ".") + current := any(data) + + for _, part := range parts { + // Handle array indexing (e.g., "groups[0]"). + if strings.Contains(part, "[") { + // Not implementing array indexing for simplicity. + // Tests should use direct field access or RequireSome. + return nil + } + + // Navigate to the next level. + m, ok := current.(map[string]any) + if !ok { + return nil + } + current = m[part] + } + + return current +} + +// RequireVectorResult is a convenience helper for checking vector query results. +func (r *Response) RequireVectorResult() *Response { + r.t.Helper() + return r.RequireSuccess().RequireEquals("$.data.resultType", "vector") +} + +// RequireMatrixResult is a convenience helper for checking matrix query results. +func (r *Response) RequireMatrixResult() *Response { + r.t.Helper() + return r.RequireSuccess().RequireEquals("$.data.resultType", "matrix") +} + +// RequireScalarResult is a convenience helper for checking scalar query results. +func (r *Response) RequireScalarResult() *Response { + r.t.Helper() + return r.RequireSuccess().RequireEquals("$.data.resultType", "scalar") +} + +// RequireRulesGroupNamed asserts that a rules response contains a group with the given name. +func (r *Response) RequireRulesGroupNamed(name string) *Response { + r.t.Helper() + return r.RequireSuccess().RequireSome("$.data.groups", func(group any) bool { + if g, ok := group.(map[string]any); ok { + return g["name"] == name + } + return false + }) +} + +// RequireTargetCount asserts that a targets response contains at least n targets. +func (r *Response) RequireTargetCount(minCount int) *Response { + r.t.Helper() + r.RequireSuccess() + + // The targets endpoint returns activeTargets as an array of targets. + value := getJSONPath(r.JSON, "$.data.activeTargets") + require.NotNil(r.t, value, "JSON path $.data.activeTargets does not exist") + + arr, ok := value.([]any) + require.True(r.t, ok, "$.data.activeTargets is not an array") + require.GreaterOrEqual(r.t, len(arr), minCount, "expected at least %d targets, got %d", minCount, len(arr)) + return r +} + +// DebugJSON is a helper for debugging JSON responses in tests. +func (r *Response) DebugJSON() *Response { + r.t.Helper() + r.t.Logf("Response status code: %d", r.StatusCode) + r.t.Logf("Response body: %s", r.Body) + if r.JSON != nil { + r.t.Logf("Response JSON: %+v", r.JSON) + } + return r +} + +// RequireContainsSubstring asserts that the response body contains the given substring. +func (r *Response) RequireContainsSubstring(substring string) *Response { + r.t.Helper() + require.Contains(r.t, r.Body, substring, "response body does not contain expected substring") + return r +} + +// RequireField asserts that a field exists at the given path and returns its value. +// Note: This method cannot be chained further since it returns the field value, not the Response. +func (r *Response) RequireField(path string) any { + r.t.Helper() + require.NotNil(r.t, r.JSON, "response body is not JSON") + + value := getJSONPath(r.JSON, path) + require.NotNil(r.t, value, "JSON path %q does not exist", path) + return value +} + +// RequireFieldType asserts that a field exists and has the expected type. +func (r *Response) RequireFieldType(path, expectedType string) *Response { + r.t.Helper() + value := r.RequireField(path) + + var actualType string + switch value.(type) { + case string: + actualType = "string" + case float64: + actualType = "number" + case bool: + actualType = "bool" + case []any: + actualType = "array" + case map[string]any: + actualType = "object" + default: + actualType = fmt.Sprintf("%T", value) + } + + require.Equal(r.t, expectedType, actualType, "JSON path %q has unexpected type", path) + return r +} diff --git a/web/api/testhelpers/fixtures.go b/web/api/testhelpers/fixtures.go new file mode 100644 index 0000000000..caa5afd59d --- /dev/null +++ b/web/api/testhelpers/fixtures.go @@ -0,0 +1,178 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file provides test fixture data for API tests. +package testhelpers + +import ( + "time" + + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/promql" + "github.com/prometheus/prometheus/promql/parser" + "github.com/prometheus/prometheus/rules" + "github.com/prometheus/prometheus/storage" +) + +// FixtureSeries creates a simple series with the "up" metric. +func FixtureSeries() []storage.Series { + // Use timestamps relative to "now" so queries work. + now := time.Now().UnixMilli() + return []storage.Series{ + &FakeSeries{ + labels: labels.FromStrings("__name__", "up", "job", "prometheus", "instance", "localhost:9090"), + samples: []promql.FPoint{ + {T: now - 120000, F: 1}, + {T: now - 60000, F: 1}, + {T: now, F: 1}, + }, + }, + } +} + +// FixtureMultipleSeries creates multiple series for testing. +func FixtureMultipleSeries() []storage.Series { + // Use timestamps relative to "now" so queries work. + now := time.Now().UnixMilli() + return []storage.Series{ + &FakeSeries{ + labels: labels.FromStrings("__name__", "up", "job", "prometheus", "instance", "localhost:9090"), + samples: []promql.FPoint{ + {T: now - 60000, F: 1}, + {T: now, F: 1}, + }, + }, + &FakeSeries{ + labels: labels.FromStrings("__name__", "up", "job", "node", "instance", "localhost:9100"), + samples: []promql.FPoint{ + {T: now - 60000, F: 1}, + {T: now, F: 0}, + }, + }, + &FakeSeries{ + labels: labels.FromStrings("__name__", "http_requests_total", "job", "api", "instance", "localhost:8080"), + samples: []promql.FPoint{ + {T: now - 60000, F: 100}, + {T: now, F: 150}, + }, + }, + } +} + +// FixtureRuleGroups creates a simple set of rule groups for testing. +func FixtureRuleGroups() []*rules.Group { + // Create a simple recording rule. + expr, _ := parser.ParseExpr("up == 1") + recordingRule := rules.NewRecordingRule( + "job:up:sum", + expr, + labels.EmptyLabels(), + ) + + // Create a simple alerting rule. + alertExpr, _ := parser.ParseExpr("up == 0") + alertingRule := rules.NewAlertingRule( + "InstanceDown", + alertExpr, + time.Minute, + 0, + labels.FromStrings("severity", "critical"), + labels.EmptyLabels(), + labels.EmptyLabels(), + "Instance {{ $labels.instance }} is down", + true, + nil, + ) + + // Create a rule group. + group := rules.NewGroup(rules.GroupOptions{ + Name: "example", + File: "example.rules", + Interval: time.Minute, + Rules: []rules.Rule{ + recordingRule, + alertingRule, + }, + }) + + return []*rules.Group{group} +} + +// FixtureEmptyRuleGroups returns an empty set of rule groups. +func FixtureEmptyRuleGroups() []*rules.Group { + return []*rules.Group{} +} + +// FixtureSingleSeries creates a single series for simple tests. +func FixtureSingleSeries(metricName string, value float64) []storage.Series { + return []storage.Series{ + &FakeSeries{ + labels: labels.FromStrings("__name__", metricName), + samples: []promql.FPoint{ + {T: 0, F: value}, + }, + }, + } +} + +// FixtureHistogramSeries creates a series with native histogram data. +func FixtureHistogramSeries() []storage.Series { + // Use timestamps relative to "now" so queries work. + now := time.Now().UnixMilli() + return []storage.Series{ + &FakeHistogramSeries{ + labels: labels.FromStrings("__name__", "test_histogram", "job", "prometheus", "instance", "localhost:9090"), + histograms: []promql.HPoint{ + { + T: now - 60000, + H: &histogram.FloatHistogram{ + Schema: 2, + ZeroThreshold: 0.001, + ZeroCount: 5, + Count: 50, + Sum: 100, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: 2}, + {Offset: 1, Length: 2}, + }, + NegativeSpans: []histogram.Span{ + {Offset: 0, Length: 1}, + }, + PositiveBuckets: []float64{5, 10, 8, 7}, + NegativeBuckets: []float64{3}, + }, + }, + { + T: now, + H: &histogram.FloatHistogram{ + Schema: 2, + ZeroThreshold: 0.001, + ZeroCount: 8, + Count: 60, + Sum: 120, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: 2}, + {Offset: 1, Length: 2}, + }, + NegativeSpans: []histogram.Span{ + {Offset: 0, Length: 1}, + }, + PositiveBuckets: []float64{6, 12, 10, 9}, + NegativeBuckets: []float64{4}, + }, + }, + }, + }, + } +} diff --git a/web/api/testhelpers/mocks.go b/web/api/testhelpers/mocks.go new file mode 100644 index 0000000000..527febb727 --- /dev/null +++ b/web/api/testhelpers/mocks.go @@ -0,0 +1,534 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file contains mock implementations of API dependencies for testing. +package testhelpers + +import ( + "context" + "net/url" + + "github.com/prometheus/prometheus/config" + "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/promql" + "github.com/prometheus/prometheus/rules" + "github.com/prometheus/prometheus/scrape" + "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/tsdb" + "github.com/prometheus/prometheus/tsdb/chunkenc" + "github.com/prometheus/prometheus/tsdb/chunks" + "github.com/prometheus/prometheus/util/annotations" +) + +// LazyLoader allows lazy initialization of mocks per test. +type LazyLoader[T any] struct { + loader func() T + value *T +} + +// NewLazyLoader creates a new LazyLoader with the given loader function. +func NewLazyLoader[T any](loader func() T) *LazyLoader[T] { + return &LazyLoader[T]{loader: loader} +} + +// Get returns the loaded value, initializing it if necessary. +func (l *LazyLoader[T]) Get() T { + if l.value == nil { + v := l.loader() + l.value = &v + } + return *l.value +} + +// FakeQueryable implements storage.SampleAndChunkQueryable with configurable behavior. +type FakeQueryable struct { + series []storage.Series +} + +func (f *FakeQueryable) Querier(_, _ int64) (storage.Querier, error) { + return &FakeQuerier{series: f.series}, nil +} + +func (f *FakeQueryable) ChunkQuerier(_, _ int64) (storage.ChunkQuerier, error) { + return &FakeChunkQuerier{series: f.series}, nil +} + +// FakeQuerier implements storage.Querier. +type FakeQuerier struct { + series []storage.Series +} + +func (f *FakeQuerier) Select(_ context.Context, _ bool, _ *storage.SelectHints, _ ...*labels.Matcher) storage.SeriesSet { + return &FakeSeriesSet{series: f.series, idx: -1} +} + +func (f *FakeQuerier) LabelValues(_ context.Context, name string, _ *storage.LabelHints, _ ...*labels.Matcher) ([]string, annotations.Annotations, error) { + valuesMap := make(map[string]struct{}) + for _, s := range f.series { + lbls := s.Labels() + if val := lbls.Get(name); val != "" { + valuesMap[val] = struct{}{} + } + } + values := make([]string, 0, len(valuesMap)) + for v := range valuesMap { + values = append(values, v) + } + return values, nil, nil +} + +func (f *FakeQuerier) LabelNames(_ context.Context, _ *storage.LabelHints, _ ...*labels.Matcher) ([]string, annotations.Annotations, error) { + namesMap := make(map[string]struct{}) + for _, s := range f.series { + lbls := s.Labels() + lbls.Range(func(l labels.Label) { + namesMap[l.Name] = struct{}{} + }) + } + names := make([]string, 0, len(namesMap)) + for n := range namesMap { + names = append(names, n) + } + return names, nil, nil +} + +func (*FakeQuerier) Close() error { + return nil +} + +// FakeChunkQuerier implements storage.ChunkQuerier. +type FakeChunkQuerier struct { + series []storage.Series +} + +func (f *FakeChunkQuerier) Select(_ context.Context, _ bool, _ *storage.SelectHints, _ ...*labels.Matcher) storage.ChunkSeriesSet { + return &FakeChunkSeriesSet{series: f.series, idx: -1} +} + +func (f *FakeChunkQuerier) LabelValues(_ context.Context, name string, _ *storage.LabelHints, _ ...*labels.Matcher) ([]string, annotations.Annotations, error) { + valuesMap := make(map[string]struct{}) + for _, s := range f.series { + lbls := s.Labels() + if val := lbls.Get(name); val != "" { + valuesMap[val] = struct{}{} + } + } + values := make([]string, 0, len(valuesMap)) + for v := range valuesMap { + values = append(values, v) + } + return values, nil, nil +} + +func (f *FakeChunkQuerier) LabelNames(_ context.Context, _ *storage.LabelHints, _ ...*labels.Matcher) ([]string, annotations.Annotations, error) { + namesMap := make(map[string]struct{}) + for _, s := range f.series { + lbls := s.Labels() + lbls.Range(func(l labels.Label) { + namesMap[l.Name] = struct{}{} + }) + } + names := make([]string, 0, len(namesMap)) + for n := range namesMap { + names = append(names, n) + } + return names, nil, nil +} + +func (*FakeChunkQuerier) Close() error { + return nil +} + +// FakeSeriesSet implements storage.SeriesSet. +type FakeSeriesSet struct { + series []storage.Series + idx int +} + +func (f *FakeSeriesSet) Next() bool { + f.idx++ + return f.idx < len(f.series) +} + +func (f *FakeSeriesSet) At() storage.Series { + return f.series[f.idx] +} + +func (*FakeSeriesSet) Err() error { + return nil +} + +func (*FakeSeriesSet) Warnings() annotations.Annotations { + return nil +} + +// FakeChunkSeriesSet implements storage.ChunkSeriesSet. +type FakeChunkSeriesSet struct { + series []storage.Series + idx int +} + +func (f *FakeChunkSeriesSet) Next() bool { + f.idx++ + return f.idx < len(f.series) +} + +func (f *FakeChunkSeriesSet) At() storage.ChunkSeries { + return &FakeChunkSeries{series: f.series[f.idx]} +} + +func (*FakeChunkSeriesSet) Err() error { + return nil +} + +func (*FakeChunkSeriesSet) Warnings() annotations.Annotations { + return nil +} + +// FakeChunkSeries implements storage.ChunkSeries. +type FakeChunkSeries struct { + series storage.Series +} + +func (f *FakeChunkSeries) Labels() labels.Labels { + return f.series.Labels() +} + +func (*FakeChunkSeries) Iterator(_ chunks.Iterator) chunks.Iterator { + return &FakeChunkSeriesIterator{} +} + +// FakeChunkSeriesIterator implements chunks.Iterator. +type FakeChunkSeriesIterator struct{} + +func (*FakeChunkSeriesIterator) Next() bool { + return false +} + +func (*FakeChunkSeriesIterator) At() chunks.Meta { + return chunks.Meta{} +} + +func (*FakeChunkSeriesIterator) Err() error { + return nil +} + +// FakeSeries implements storage.Series. +type FakeSeries struct { + labels labels.Labels + samples []promql.FPoint +} + +func (f *FakeSeries) Labels() labels.Labels { + return f.labels +} + +func (f *FakeSeries) Iterator(chunkenc.Iterator) chunkenc.Iterator { + return &FakeSeriesIterator{samples: f.samples, idx: -1} +} + +// FakeSeriesIterator implements chunkenc.Iterator. +type FakeSeriesIterator struct { + samples []promql.FPoint + idx int +} + +func (f *FakeSeriesIterator) Next() chunkenc.ValueType { + f.idx++ + if f.idx < len(f.samples) { + return chunkenc.ValFloat + } + return chunkenc.ValNone +} + +func (f *FakeSeriesIterator) Seek(t int64) chunkenc.ValueType { + for f.idx < len(f.samples)-1 { + f.idx++ + if f.samples[f.idx].T >= t { + return chunkenc.ValFloat + } + } + return chunkenc.ValNone +} + +func (f *FakeSeriesIterator) At() (int64, float64) { + s := f.samples[f.idx] + return s.T, s.F +} + +func (*FakeSeriesIterator) AtHistogram(*histogram.Histogram) (int64, *histogram.Histogram) { + panic("not implemented") +} + +func (*FakeSeriesIterator) AtFloatHistogram(*histogram.FloatHistogram) (int64, *histogram.FloatHistogram) { + panic("not implemented") +} + +func (f *FakeSeriesIterator) AtT() int64 { + return f.samples[f.idx].T +} + +func (*FakeSeriesIterator) AtST() int64 { + return 0 +} + +func (*FakeSeriesIterator) Err() error { + return nil +} + +// FakeHistogramSeries implements storage.Series for histogram data. +type FakeHistogramSeries struct { + labels labels.Labels + histograms []promql.HPoint +} + +func (f *FakeHistogramSeries) Labels() labels.Labels { + return f.labels +} + +func (f *FakeHistogramSeries) Iterator(chunkenc.Iterator) chunkenc.Iterator { + return &FakeHistogramSeriesIterator{histograms: f.histograms, idx: -1} +} + +// FakeHistogramSeriesIterator implements chunkenc.Iterator for histogram data. +type FakeHistogramSeriesIterator struct { + histograms []promql.HPoint + idx int +} + +func (f *FakeHistogramSeriesIterator) Next() chunkenc.ValueType { + f.idx++ + if f.idx < len(f.histograms) { + return chunkenc.ValFloatHistogram + } + return chunkenc.ValNone +} + +func (f *FakeHistogramSeriesIterator) Seek(t int64) chunkenc.ValueType { + for f.idx < len(f.histograms)-1 { + f.idx++ + if f.histograms[f.idx].T >= t { + return chunkenc.ValFloatHistogram + } + } + return chunkenc.ValNone +} + +func (*FakeHistogramSeriesIterator) At() (int64, float64) { + panic("not a float value") +} + +func (*FakeHistogramSeriesIterator) AtHistogram(*histogram.Histogram) (int64, *histogram.Histogram) { + panic("not implemented") +} + +func (f *FakeHistogramSeriesIterator) AtFloatHistogram(*histogram.FloatHistogram) (int64, *histogram.FloatHistogram) { + h := f.histograms[f.idx] + return h.T, h.H +} + +func (f *FakeHistogramSeriesIterator) AtT() int64 { + return f.histograms[f.idx].T +} + +func (*FakeHistogramSeriesIterator) AtST() int64 { + return 0 +} + +func (*FakeHistogramSeriesIterator) Err() error { + return nil +} + +// FakeExemplarQueryable implements storage.ExemplarQueryable. +type FakeExemplarQueryable struct{} + +func (*FakeExemplarQueryable) ExemplarQuerier(_ context.Context) (storage.ExemplarQuerier, error) { + return &FakeExemplarQuerier{}, nil +} + +// FakeExemplarQuerier implements storage.ExemplarQuerier. +type FakeExemplarQuerier struct{} + +func (*FakeExemplarQuerier) Select(_, _ int64, _ ...[]*labels.Matcher) ([]exemplar.QueryResult, error) { + return nil, nil +} + +// FakeRulesRetriever implements v1.RulesRetriever. +type FakeRulesRetriever struct { + groups []*rules.Group +} + +func (f *FakeRulesRetriever) RuleGroups() []*rules.Group { + return f.groups +} + +func (f *FakeRulesRetriever) AlertingRules() []*rules.AlertingRule { + var alertingRules []*rules.AlertingRule + for _, g := range f.groups { + for _, r := range g.Rules() { + if ar, ok := r.(*rules.AlertingRule); ok { + alertingRules = append(alertingRules, ar) + } + } + } + return alertingRules +} + +// FakeTargetRetriever implements v1.TargetRetriever. +type FakeTargetRetriever struct { + active map[string][]*scrape.Target + dropped map[string][]*scrape.Target + droppedCounts map[string]int + scrapeConfig map[string]*config.ScrapeConfig +} + +func (f *FakeTargetRetriever) TargetsActive() map[string][]*scrape.Target { + if f.active == nil { + return make(map[string][]*scrape.Target) + } + return f.active +} + +func (f *FakeTargetRetriever) TargetsDropped() map[string][]*scrape.Target { + if f.dropped == nil { + return make(map[string][]*scrape.Target) + } + return f.dropped +} + +func (f *FakeTargetRetriever) TargetsDroppedCounts() map[string]int { + if f.droppedCounts == nil { + return make(map[string]int) + } + return f.droppedCounts +} + +func (f *FakeTargetRetriever) ScrapePoolConfig(name string) (*config.ScrapeConfig, error) { + if f.scrapeConfig == nil { + return nil, nil + } + return f.scrapeConfig[name], nil +} + +// FakeScrapePoolsRetriever implements v1.ScrapePoolsRetriever. +type FakeScrapePoolsRetriever struct { + pools []string +} + +func (f *FakeScrapePoolsRetriever) ScrapePools() []string { + if f.pools == nil { + return []string{} + } + return f.pools +} + +// FakeAlertmanagerRetriever implements v1.AlertmanagerRetriever. +type FakeAlertmanagerRetriever struct{} + +func (*FakeAlertmanagerRetriever) Alertmanagers() []*url.URL { + return nil +} + +func (*FakeAlertmanagerRetriever) DroppedAlertmanagers() []*url.URL { + return nil +} + +// FakeTSDBAdminStats implements v1.TSDBAdminStats. +type FakeTSDBAdminStats struct{} + +func (*FakeTSDBAdminStats) CleanTombstones() error { + return nil +} + +func (*FakeTSDBAdminStats) Delete(_ context.Context, _, _ int64, _ ...*labels.Matcher) error { + return nil +} + +func (*FakeTSDBAdminStats) Snapshot(_ string, _ bool) error { + return nil +} + +func (*FakeTSDBAdminStats) Stats(_ string, _ int) (*tsdb.Stats, error) { + return &tsdb.Stats{}, nil +} + +func (*FakeTSDBAdminStats) WALReplayStatus() (tsdb.WALReplayStatus, error) { + return tsdb.WALReplayStatus{}, nil +} + +func (*FakeTSDBAdminStats) BlockMetas() ([]tsdb.BlockMeta, error) { + return []tsdb.BlockMeta{}, nil +} + +// NewEmptyQueryable returns a queryable with no series. +func NewEmptyQueryable() storage.SampleAndChunkQueryable { + return &FakeQueryable{series: []storage.Series{}} +} + +// NewQueryableWithSeries returns a queryable with the given series. +func NewQueryableWithSeries(series []storage.Series) storage.SampleAndChunkQueryable { + return &FakeQueryable{series: series} +} + +// TSDBNotReadyQueryable implements storage.SampleAndChunkQueryable that returns tsdb.ErrNotReady. +type TSDBNotReadyQueryable struct{} + +func (*TSDBNotReadyQueryable) Querier(_, _ int64) (storage.Querier, error) { + return nil, tsdb.ErrNotReady +} + +func (*TSDBNotReadyQueryable) ChunkQuerier(_, _ int64) (storage.ChunkQuerier, error) { + return nil, tsdb.ErrNotReady +} + +// NewTSDBNotReadyQueryable returns a queryable that always returns tsdb.ErrNotReady. +func NewTSDBNotReadyQueryable() storage.SampleAndChunkQueryable { + return &TSDBNotReadyQueryable{} +} + +// NewEmptyExemplarQueryable returns an exemplar queryable with no exemplars. +func NewEmptyExemplarQueryable() storage.ExemplarQueryable { + return &FakeExemplarQueryable{} +} + +// NewEmptyRulesRetriever returns a rules retriever with no rules. +func NewEmptyRulesRetriever() *FakeRulesRetriever { + return &FakeRulesRetriever{groups: []*rules.Group{}} +} + +// NewRulesRetrieverWithGroups returns a rules retriever with the given groups. +func NewRulesRetrieverWithGroups(groups []*rules.Group) *FakeRulesRetriever { + return &FakeRulesRetriever{groups: groups} +} + +// NewEmptyTargetRetriever returns a target retriever with no targets. +func NewEmptyTargetRetriever() *FakeTargetRetriever { + return &FakeTargetRetriever{} +} + +// NewEmptyScrapePoolsRetriever returns a scrape pools retriever with no pools. +func NewEmptyScrapePoolsRetriever() *FakeScrapePoolsRetriever { + return &FakeScrapePoolsRetriever{pools: []string{}} +} + +// NewEmptyAlertmanagerRetriever returns an alertmanager retriever with no alertmanagers. +func NewEmptyAlertmanagerRetriever() *FakeAlertmanagerRetriever { + return &FakeAlertmanagerRetriever{} +} + +// NewEmptyTSDBAdminStats returns a TSDB admin stats with no-op implementations. +func NewEmptyTSDBAdminStats() *FakeTSDBAdminStats { + return &FakeTSDBAdminStats{} +} diff --git a/web/api/testhelpers/openapi.go b/web/api/testhelpers/openapi.go new file mode 100644 index 0000000000..d2e88943d2 --- /dev/null +++ b/web/api/testhelpers/openapi.go @@ -0,0 +1,204 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file provides OpenAPI-specific test utilities for validating spec compliance. +package testhelpers + +import ( + "bytes" + "fmt" + "io" + "net/http" + "os" + "path/filepath" + "strings" + "sync" + + "github.com/pb33f/libopenapi" + validator "github.com/pb33f/libopenapi-validator" + valerrors "github.com/pb33f/libopenapi-validator/errors" + "github.com/stretchr/testify/require" +) + +var ( + openAPIValidator31 validator.Validator + openAPIValidator32 validator.Validator + openAPIValidatorOnce sync.Once + openAPIValidatorErr error +) + +// loadOpenAPIValidators loads and caches both OpenAPI 3.1 and 3.2 validators from golden files. +func loadOpenAPIValidators() (v31, v32 validator.Validator, err error) { + openAPIValidatorOnce.Do(func() { + // Load OpenAPI 3.1 validator. + goldenPath31 := filepath.Join("testdata", "openapi_3.1_golden.yaml") + specBytes31, err := os.ReadFile(goldenPath31) + if err != nil { + openAPIValidatorErr = fmt.Errorf("failed to read OpenAPI 3.1 spec from %s: %w", goldenPath31, err) + return + } + + doc31, err := libopenapi.NewDocument(specBytes31) + if err != nil { + openAPIValidatorErr = fmt.Errorf("failed to parse OpenAPI 3.1 document: %w", err) + return + } + + v31, errs := validator.NewValidator(doc31) + if len(errs) > 0 { + openAPIValidatorErr = fmt.Errorf("failed to create OpenAPI 3.1 validator: %v", errs) + return + } + + openAPIValidator31 = v31 + + // Load OpenAPI 3.2 validator. + goldenPath32 := filepath.Join("testdata", "openapi_3.2_golden.yaml") + specBytes32, err := os.ReadFile(goldenPath32) + if err != nil { + openAPIValidatorErr = fmt.Errorf("failed to read OpenAPI 3.2 spec from %s: %w", goldenPath32, err) + return + } + + doc32, err := libopenapi.NewDocument(specBytes32) + if err != nil { + openAPIValidatorErr = fmt.Errorf("failed to parse OpenAPI 3.2 document: %w", err) + return + } + + v32, errs := validator.NewValidator(doc32) + if len(errs) > 0 { + openAPIValidatorErr = fmt.Errorf("failed to create OpenAPI 3.2 validator: %v", errs) + return + } + + openAPIValidator32 = v32 + }) + + if openAPIValidatorErr != nil { + return nil, nil, openAPIValidatorErr + } + + return openAPIValidator31, openAPIValidator32, nil +} + +// ValidateOpenAPI validates the request and response against both OpenAPI 3.1 and 3.2 specifications. +// This ensures API endpoints are compatible with both OpenAPI versions. +// Returns the response for chaining. +func (r *Response) ValidateOpenAPI() *Response { + r.t.Helper() + + // Load both validators (cached after first call). + v31, v32, err := loadOpenAPIValidators() + require.NoError(r.t, err, "failed to load OpenAPI validators") + + // Validate against OpenAPI 3.1 spec. + if r.request != nil { + r.validateRequestWithVersion(v31, "3.1") + } + r.validateResponseWithVersion(v31, "3.1") + + // Validate against OpenAPI 3.2 spec. + if r.request != nil { + r.validateRequestWithVersion(v32, "3.2") + } + r.validateResponseWithVersion(v32, "3.2") + + return r +} + +// validateRequestWithVersion validates the HTTP request against a specific OpenAPI version's spec. +func (r *Response) validateRequestWithVersion(v validator.Validator, version string) { + r.t.Helper() + + // Create a validation request from the original request. + validationReq := &http.Request{ + Method: r.request.Method, + URL: r.request.URL, + Header: r.request.Header, + Body: io.NopCloser(bytes.NewReader(r.requestBody)), + } + + // Validate the request. + valid, errors := v.ValidateHttpRequest(validationReq) + if !valid { + // Check if the error is because the path doesn't exist in this version. + // Some endpoints (like /notifications/live) only exist in 3.2, not 3.1. + if isPathNotFoundError(errors) && version == "3.1" && strings.Contains(r.request.URL.Path, "/notifications/live") { + // Expected: /notifications/live is only in OpenAPI 3.2. + return + } + + var errorMessages []string + for _, e := range errors { + errorMessages = append(errorMessages, e.Error()) + } + require.Fail(r.t, fmt.Sprintf("OpenAPI %s request validation failed", version), + "Request to %s %s failed OpenAPI %s validation:\n%v", + r.request.Method, r.request.URL.Path, version, errorMessages) + } +} + +// validateResponseWithVersion validates the HTTP response against a specific OpenAPI version's spec. +func (r *Response) validateResponseWithVersion(v validator.Validator, version string) { + r.t.Helper() + + // Create a validation request (needed for response validation context). + validationReq := &http.Request{ + Method: r.request.Method, + URL: r.request.URL, + Header: r.request.Header, + } + + // Create a response for validation. + validationResp := &http.Response{ + StatusCode: r.StatusCode, + Header: r.responseHeader, + Body: io.NopCloser(bytes.NewReader([]byte(r.Body))), + Request: validationReq, + } + + // Validate the response. + valid, errors := v.ValidateHttpResponse(validationReq, validationResp) + if !valid { + // Check if the error is because the path doesn't exist in this version. + // Some endpoints (like /notifications/live) only exist in 3.2, not 3.1. + if isPathNotFoundError(errors) && version == "3.1" && strings.Contains(r.request.URL.Path, "/notifications/live") { + // Expected: /notifications/live is only in OpenAPI 3.2. + return + } + + var errorMessages []string + for _, e := range errors { + errorMessages = append(errorMessages, e.Error()) + } + require.Fail(r.t, fmt.Sprintf("OpenAPI %s response validation failed", version), + "Response from %s %s (status %d) failed OpenAPI %s validation:\n%v", + r.request.Method, r.request.URL.Path, r.StatusCode, version, errorMessages) + } +} + +// isPathNotFoundError checks if the validation errors indicate a path was not found in the spec. +func isPathNotFoundError(errors []*valerrors.ValidationError) bool { + for _, err := range errors { + errStr := err.Error() + // Check for common "path not found" error messages from libopenapi-validator. + if strings.Contains(errStr, "path") && (strings.Contains(errStr, "not found") || strings.Contains(errStr, "does not exist")) { + return true + } + if strings.Contains(errStr, "GET /notifications/live") || strings.Contains(errStr, "/notifications/live not found") { + return true + } + } + return false +} diff --git a/web/api/testhelpers/request.go b/web/api/testhelpers/request.go new file mode 100644 index 0000000000..81650e4c49 --- /dev/null +++ b/web/api/testhelpers/request.go @@ -0,0 +1,145 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file provides HTTP request builders for testing API endpoints. +package testhelpers + +import ( + "encoding/json" + "io" + "net/http" + "net/http/httptest" + "net/url" + "strings" + "testing" +) + +// Response wraps an HTTP response with parsed JSON data. +// It supports method chaining for assertions. +// +// Example usage: +// +// testhelpers.GET(t, api, "/api/v1/query", "query", "up"). +// ValidateOpenAPI(). +// RequireSuccess(). +// RequireEquals("$.data.resultType", "vector"). +// RequireLenAtLeast("$.data.result", 1) +// +// testhelpers.POST(t, api, "/api/v1/query", "query", "up"). +// ValidateOpenAPI(). +// RequireSuccess(). +// RequireArrayContains("$.data.result", expectedValue) +type Response struct { + StatusCode int + Body string + JSON map[string]any + t *testing.T + request *http.Request + requestBody []byte + responseHeader http.Header +} + +// GET sends a GET request to the API and returns a Response with parsed JSON. +// queryParams should be pairs of key-value strings. +func GET(t *testing.T, api *APIWrapper, path string, queryParams ...string) *Response { + t.Helper() + + if len(queryParams)%2 != 0 { + t.Fatal("queryParams must be key-value pairs") + } + + // Build query string. + values := url.Values{} + for i := 0; i < len(queryParams); i += 2 { + values.Add(queryParams[i], queryParams[i+1]) + } + + fullPath := path + if len(values) > 0 { + fullPath = path + "?" + values.Encode() + } + + req := httptest.NewRequest(http.MethodGet, fullPath, nil) + return executeRequest(t, api, req) +} + +// POST sends a POST request to the API with the given body and returns a Response with parsed JSON. +// bodyParams should be pairs of key-value strings for form data. +func POST(t *testing.T, api *APIWrapper, path string, bodyParams ...string) *Response { + t.Helper() + + if len(bodyParams)%2 != 0 { + t.Fatal("bodyParams must be key-value pairs") + } + + // Build form data. + values := url.Values{} + for i := 0; i < len(bodyParams); i += 2 { + values.Add(bodyParams[i], bodyParams[i+1]) + } + + req := httptest.NewRequest(http.MethodPost, path, strings.NewReader(values.Encode())) + req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + return executeRequest(t, api, req) +} + +// executeRequest executes an HTTP request and parses the response as JSON. +func executeRequest(t *testing.T, api *APIWrapper, req *http.Request) *Response { + t.Helper() + + // Capture the request body for validation. + var requestBody []byte + if req.Body != nil { + var err error + requestBody, err = io.ReadAll(req.Body) + if err != nil { + t.Fatalf("failed to read request body: %v", err) + } + // Restore the body for the actual request. + req.Body = io.NopCloser(strings.NewReader(string(requestBody))) + } + + recorder := httptest.NewRecorder() + api.Handler.ServeHTTP(recorder, req) + + result := recorder.Result() + defer result.Body.Close() + + bodyBytes, err := io.ReadAll(result.Body) + if err != nil { + t.Fatalf("failed to read response body: %v", err) + } + + resp := &Response{ + StatusCode: result.StatusCode, + Body: string(bodyBytes), + t: t, + request: req, + requestBody: requestBody, + responseHeader: result.Header, + } + + // Try to parse as JSON. + if result.Header.Get("Content-Type") == "application/json" || strings.Contains(result.Header.Get("Content-Type"), "application/json") { + var jsonData map[string]any + if err := json.Unmarshal(bodyBytes, &jsonData); err != nil { + // If JSON parsing fails, leave JSON as nil. + // This allows tests to handle non-JSON responses. + resp.JSON = nil + } else { + resp.JSON = jsonData + } + } + + return resp +} diff --git a/web/api/v1/api.go b/web/api/v1/api.go index f32fee19f8..456bafc97d 100644 --- a/web/api/v1/api.go +++ b/web/api/v1/api.go @@ -258,6 +258,7 @@ type API struct { codecs []Codec featureRegistry features.Collector + openAPIBuilder *OpenAPIBuilder } // NewAPI returns an initialized API type. @@ -299,6 +300,7 @@ func NewAPI( appendMetadata bool, overrideErrorCode OverrideErrorCode, featureRegistry features.Collector, + openAPIOptions OpenAPIOptions, ) *API { a := &API{ QueryEngine: qe, @@ -329,6 +331,7 @@ func NewAPI( notificationsSub: notificationsSub, overrideErrorCode: overrideErrorCode, featureRegistry: featureRegistry, + openAPIBuilder: NewOpenAPIBuilder(openAPIOptions, logger), remoteReadHandler: remote.NewReadHandler(logger, registerer, q, configFunc, remoteReadSampleLimit, remoteReadConcurrencyLimit, remoteReadMaxBytesInFrame), } @@ -400,7 +403,7 @@ func (api *API) Register(r *route.Router) { w.WriteHeader(http.StatusNoContent) }) return api.ready(httputil.CompressionHandler{ - Handler: hf, + Handler: api.openAPIBuilder.WrapHandler(hf), }.ServeHTTP) } @@ -469,6 +472,9 @@ func (api *API) Register(r *route.Router) { r.Put("/admin/tsdb/delete_series", wrapAgent(api.deleteSeries)) r.Put("/admin/tsdb/clean_tombstones", wrapAgent(api.cleanTombstones)) r.Put("/admin/tsdb/snapshot", wrapAgent(api.snapshot)) + + // OpenAPI endpoint. + r.Get("/openapi.yaml", api.ready(api.openAPIBuilder.ServeOpenAPI)) } type QueryData struct { diff --git a/web/api/v1/api_scenarios_test.go b/web/api/v1/api_scenarios_test.go new file mode 100644 index 0000000000..a707680c57 --- /dev/null +++ b/web/api/v1/api_scenarios_test.go @@ -0,0 +1,419 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package v1 + +import ( + "strconv" + "testing" + "time" + + "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/web/api/testhelpers" +) + +// TODO: Generate automated tests from OpenAPI spec to validate API responses. + +// TestAPIEmpty tests the API with no metrics and no rules. +func TestAPIEmpty(t *testing.T) { + // Create an API with empty defaults (no series, no rules). + api := newTestAPI(t, testhelpers.APIConfig{}) + + t.Run("GET /api/v1/labels returns success with empty array", func(t *testing.T) { + testhelpers.GET(t, api, "/api/v1/labels"). + RequireSuccess(). + ValidateOpenAPI(). + RequireJSONArray("$.data") + }) + + t.Run("GET /api/v1/query?query=up returns success (empty result ok)", func(t *testing.T) { + testhelpers.GET(t, api, "/api/v1/query", "query", "up"). + ValidateOpenAPI(). + RequireSuccess(). + RequireEquals("$.data.resultType", "vector") + }) + + t.Run("GET /api/v1/query_range?query=up returns success", func(t *testing.T) { + testhelpers.GET(t, api, "/api/v1/query_range", + "query", "up", + "start", "0", + "end", "100", + "step", "10"). + RequireSuccess(). + ValidateOpenAPI(). + RequireEquals("$.data.resultType", "matrix") + }) + + t.Run("GET /api/v1/series returns success with empty result", func(t *testing.T) { + testhelpers.GET(t, api, "/api/v1/series", + "match[]", "up", + "start", "0", + "end", "100"). + RequireSuccess(). + ValidateOpenAPI(). + RequireJSONArray("$.data") + }) + + t.Run("GET /api/v1/label/__name__/values returns success with empty array", func(t *testing.T) { + testhelpers.GET(t, api, "/api/v1/label/__name__/values"). + RequireSuccess(). + ValidateOpenAPI(). + RequireJSONArray("$.data") + }) + + t.Run("GET /api/v1/targets returns success", func(t *testing.T) { + testhelpers.GET(t, api, "/api/v1/targets"). + RequireSuccess(). + RequireJSONPathExists("$.data.activeTargets") + }) + + t.Run("GET /api/v1/rules returns success with empty groups", func(t *testing.T) { + testhelpers.GET(t, api, "/api/v1/rules"). + RequireSuccess(). + ValidateOpenAPI(). + RequireJSONPathExists("$.data.groups") + }) + + t.Run("GET /api/v1/alerts returns success with empty alerts", func(t *testing.T) { + testhelpers.GET(t, api, "/api/v1/alerts"). + RequireSuccess(). + ValidateOpenAPI(). + RequireJSONPathExists("$.data.alerts") + }) + + t.Run("GET /api/v1/alertmanagers returns success", func(t *testing.T) { + testhelpers.GET(t, api, "/api/v1/alertmanagers"). + RequireSuccess(). + ValidateOpenAPI(). + RequireJSONPathExists("$.data.activeAlertmanagers") + }) + + t.Run("GET /api/v1/metadata returns success", func(t *testing.T) { + testhelpers.GET(t, api, "/api/v1/metadata"). + RequireSuccess(). + ValidateOpenAPI(). + RequireJSONPathExists("$.data") + }) + + t.Run("GET /api/v1/status/config returns success", func(t *testing.T) { + testhelpers.GET(t, api, "/api/v1/status/config"). + RequireSuccess(). + ValidateOpenAPI(). + RequireJSONPathExists("$.data.yaml") + }) + + t.Run("GET /api/v1/status/flags returns success", func(t *testing.T) { + testhelpers.GET(t, api, "/api/v1/status/flags"). + RequireSuccess(). + ValidateOpenAPI(). + RequireJSONPathExists("$.data") + }) + + t.Run("GET /api/v1/status/runtimeinfo returns success", func(t *testing.T) { + testhelpers.GET(t, api, "/api/v1/status/runtimeinfo"). + RequireSuccess(). + ValidateOpenAPI(). + RequireJSONPathExists("$.data") + }) + + t.Run("GET /api/v1/status/buildinfo returns success", func(t *testing.T) { + testhelpers.GET(t, api, "/api/v1/status/buildinfo"). + RequireSuccess(). + ValidateOpenAPI(). + RequireJSONPathExists("$.data") + }) + + t.Run("POST /api/v1/query with form data returns success", func(t *testing.T) { + testhelpers.POST(t, api, "/api/v1/query", "query", "up"). + RequireSuccess(). + ValidateOpenAPI(). + RequireEquals("$.data.resultType", "vector") + }) +} + +// TestAPIWithSeries tests the API with metrics/series data. +func TestAPIWithSeries(t *testing.T) { + // Create an API with sample series data. + api := newTestAPI(t, testhelpers.APIConfig{ + Queryable: testhelpers.NewLazyLoader(func() storage.SampleAndChunkQueryable { + return testhelpers.NewQueryableWithSeries(testhelpers.FixtureMultipleSeries()) + }), + }) + + t.Run("GET /api/v1/query returns vector with >= 1 sample", func(t *testing.T) { + testhelpers.GET(t, api, "/api/v1/query", "query", "up"). + RequireSuccess(). + ValidateOpenAPI(). + RequireEquals("$.data.resultType", "vector"). + RequireLenAtLeast("$.data.result", 1) + }) + + t.Run("GET /api/v1/query_range returns matrix result type", func(t *testing.T) { + // Use relative timestamps to match our fixtures. + now := time.Now().Unix() + testhelpers.GET(t, api, "/api/v1/query_range", + "query", "up", + "start", strconv.FormatInt(now-120, 10), + "end", strconv.FormatInt(now, 10), + "step", "60"). + RequireSuccess(). + ValidateOpenAPI(). + RequireEquals("$.data.resultType", "matrix") + // Note: Result may be empty if timestamps don't align perfectly with samples. + }) + + t.Run("GET /api/v1/labels returns non-empty array", func(t *testing.T) { + testhelpers.GET(t, api, "/api/v1/labels"). + RequireSuccess(). + ValidateOpenAPI(). + RequireJSONArray("$.data"). + RequireLenAtLeast("$.data", 1) + }) + + t.Run("GET /api/v1/label/__name__/values contains expected metric names", func(t *testing.T) { + testhelpers.GET(t, api, "/api/v1/label/__name__/values"). + RequireSuccess(). + ValidateOpenAPI(). + RequireArrayContains("$.data", "up"). + RequireArrayContains("$.data", "http_requests_total") + }) + + t.Run("GET /api/v1/label/job/values contains expected jobs", func(t *testing.T) { + testhelpers.GET(t, api, "/api/v1/label/job/values"). + RequireSuccess(). + ValidateOpenAPI(). + RequireJSONArray("$.data"). + RequireArrayContains("$.data", "prometheus"). + RequireArrayContains("$.data", "node"). + RequireArrayContains("$.data", "api") + }) + + t.Run("GET /api/v1/series with match returns results", func(t *testing.T) { + testhelpers.GET(t, api, "/api/v1/series", + "match[]", "up", + "start", "0", + "end", "120"). + RequireSuccess(). + ValidateOpenAPI(). + RequireJSONArray("$.data"). + RequireLenAtLeast("$.data", 1) + }) + + t.Run("GET /api/v1/query with specific job returns filtered results", func(t *testing.T) { + testhelpers.GET(t, api, "/api/v1/query", "query", `up{job="prometheus"}`). + RequireSuccess(). + ValidateOpenAPI(). + RequireEquals("$.data.resultType", "vector"). + RequireLenAtLeast("$.data.result", 1) + }) + + t.Run("GET /api/v1/query with aggregation returns result", func(t *testing.T) { + testhelpers.GET(t, api, "/api/v1/query", "query", "sum(up)"). + RequireSuccess(). + ValidateOpenAPI(). + RequireEquals("$.data.resultType", "vector") + }) + + t.Run("POST /api/v1/query returns vector with data", func(t *testing.T) { + testhelpers.POST(t, api, "/api/v1/query", "query", "up"). + RequireSuccess(). + ValidateOpenAPI(). + RequireEquals("$.data.resultType", "vector"). + RequireLenAtLeast("$.data.result", 1) + }) +} + +// TestAPIWithRules tests the API with rules configured. +func TestAPIWithRules(t *testing.T) { + // Create an API with rule groups. + api := newTestAPI(t, testhelpers.APIConfig{ + RulesRetriever: testhelpers.NewLazyLoader(func() testhelpers.RulesRetriever { + return testhelpers.NewRulesRetrieverWithGroups(testhelpers.FixtureRuleGroups()) + }), + }) + + t.Run("GET /api/v1/rules returns groups with rules", func(t *testing.T) { + testhelpers.GET(t, api, "/api/v1/rules"). + RequireSuccess(). + ValidateOpenAPI(). + RequireJSONPathExists("$.data.groups"). + RequireLenAtLeast("$.data.groups", 1). + RequireSome("$.data.groups", func(group any) bool { + if g, ok := group.(map[string]any); ok { + return g["name"] == "example" + } + return false + }). + RequireSome("$.data.groups", func(group any) bool { + if g, ok := group.(map[string]any); ok { + if g["name"] == "example" { + // Check that the group has rules. + if rules, ok := g["rules"].([]any); ok { + return len(rules) > 0 + } + } + } + return false + }) + }) + + t.Run("GET /api/v1/alerts returns alerts array", func(t *testing.T) { + testhelpers.GET(t, api, "/api/v1/alerts"). + RequireSuccess(). + ValidateOpenAPI(). + RequireJSONPathExists("$.data.alerts"). + RequireJSONArray("$.data.alerts") + }) + + t.Run("GET /api/v1/rules with rule_name filter", func(t *testing.T) { + testhelpers.GET(t, api, "/api/v1/rules", "rule_name[]", "InstanceDown"). + RequireSuccess(). + ValidateOpenAPI(). + RequireJSONPathExists("$.data.groups") + }) +} + +// TestAPITSDBNotReady tests the API when TSDB is not ready (e.g., during WAL replay). +// TSDB not ready errors are converted to errorUnavailable by setUnavailStatusOnTSDBNotReady, +// which returns HTTP 500 Internal Server Error (the default for errorUnavailable). +func TestAPITSDBNotReady(t *testing.T) { + // Create an API with a queryable that returns tsdb.ErrNotReady. + api := newTestAPI(t, testhelpers.APIConfig{ + Queryable: testhelpers.NewLazyLoader(testhelpers.NewTSDBNotReadyQueryable), + }) + + t.Run("GET /api/v1/query returns 500 when TSDB not ready", func(t *testing.T) { + testhelpers.GET(t, api, "/api/v1/query", "query", "up"). + RequireStatusCode(500). + ValidateOpenAPI(). + RequireError() + }) + + t.Run("POST /api/v1/query returns 500 when TSDB not ready", func(t *testing.T) { + testhelpers.POST(t, api, "/api/v1/query", "query", "up"). + RequireStatusCode(500). + ValidateOpenAPI(). + RequireError() + }) + + t.Run("GET /api/v1/query_range returns 500 when TSDB not ready", func(t *testing.T) { + testhelpers.GET(t, api, "/api/v1/query_range", + "query", "up", + "start", "0", + "end", "100", + "step", "10"). + RequireStatusCode(500). + ValidateOpenAPI(). + RequireError() + }) + + t.Run("GET /api/v1/series returns 500 when TSDB not ready", func(t *testing.T) { + testhelpers.GET(t, api, "/api/v1/series", + "match[]", "up", + "start", "0", + "end", "100"). + RequireStatusCode(500). + ValidateOpenAPI(). + RequireError() + }) + + t.Run("GET /api/v1/labels returns 500 when TSDB not ready", func(t *testing.T) { + testhelpers.GET(t, api, "/api/v1/labels"). + RequireStatusCode(500). + ValidateOpenAPI(). + RequireError() + }) + + t.Run("GET /api/v1/label/{name}/values returns 500 when TSDB not ready", func(t *testing.T) { + testhelpers.GET(t, api, "/api/v1/label/__name__/values"). + RequireStatusCode(500). + ValidateOpenAPI(). + RequireError() + }) +} + +// TestAPIWithNativeHistograms tests the API with native histogram data. +func TestAPIWithNativeHistograms(t *testing.T) { + // Create an API with histogram series data. + api := newTestAPI(t, testhelpers.APIConfig{ + Queryable: testhelpers.NewLazyLoader(func() storage.SampleAndChunkQueryable { + return testhelpers.NewQueryableWithSeries(testhelpers.FixtureHistogramSeries()) + }), + }) + + t.Run("GET /api/v1/query returns vector with native histogram", func(t *testing.T) { + testhelpers.GET(t, api, "/api/v1/query", "query", "test_histogram"). + RequireSuccess(). + ValidateOpenAPI(). + RequireEquals("$.data.resultType", "vector"). + RequireLenAtLeast("$.data.result", 1). + RequireSome("$.data.result", func(item any) bool { + sample, ok := item.(map[string]any) + if !ok { + return false + } + // Check that the sample has a histogram field (not a value field). + _, hasHistogram := sample["histogram"] + return hasHistogram + }) + }) + + t.Run("POST /api/v1/query returns vector with native histogram", func(t *testing.T) { + testhelpers.POST(t, api, "/api/v1/query", "query", "test_histogram"). + RequireSuccess(). + ValidateOpenAPI(). + RequireEquals("$.data.resultType", "vector"). + RequireLenAtLeast("$.data.result", 1). + RequireSome("$.data.result", func(item any) bool { + sample, ok := item.(map[string]any) + if !ok { + return false + } + // Check that the sample has a histogram field (not a value field). + _, hasHistogram := sample["histogram"] + return hasHistogram + }) + }) + + t.Run("GET /api/v1/query_range returns matrix with native histogram", func(t *testing.T) { + // Use relative timestamps to match our fixtures. + now := time.Now().Unix() + testhelpers.GET(t, api, "/api/v1/query_range", + "query", "test_histogram", + "start", strconv.FormatInt(now-120, 10), + "end", strconv.FormatInt(now, 10), + "step", "60"). + RequireSuccess(). + ValidateOpenAPI(). + RequireEquals("$.data.resultType", "matrix") + }) + + t.Run("GET /api/v1/query with histogram selector", func(t *testing.T) { + testhelpers.GET(t, api, "/api/v1/query", "query", `test_histogram{job="prometheus"}`). + RequireSuccess(). + ValidateOpenAPI(). + RequireEquals("$.data.resultType", "vector"). + RequireLenAtLeast("$.data.result", 1) + }) + + t.Run("GET /api/v1/series returns histogram metric series", func(t *testing.T) { + testhelpers.GET(t, api, "/api/v1/series", + "match[]", "test_histogram", + "start", "0", + "end", strconv.FormatInt(time.Now().Unix(), 10)). + RequireSuccess(). + ValidateOpenAPI(). + RequireJSONArray("$.data"). + RequireLenAtLeast("$.data", 1) + }) +} diff --git a/web/api/v1/errors_test.go b/web/api/v1/errors_test.go index 6e55089e16..850bedef17 100644 --- a/web/api/v1/errors_test.go +++ b/web/api/v1/errors_test.go @@ -169,6 +169,7 @@ func createPrometheusAPI(t *testing.T, q storage.SampleAndChunkQueryable, overri false, overrideErrorCode, nil, + OpenAPIOptions{}, ) promRouter := route.New().WithPrefix("/api/v1") diff --git a/web/api/v1/openapi.go b/web/api/v1/openapi.go new file mode 100644 index 0000000000..59fa8969ef --- /dev/null +++ b/web/api/v1/openapi.go @@ -0,0 +1,320 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file implements OpenAPI 3.2 specification generation for the Prometheus HTTP API. +// It provides dynamic spec building with optional path filtering. +package v1 + +import ( + "log/slog" + "net/http" + "net/url" + "path" + "strings" + "sync" + + "github.com/pb33f/libopenapi/datamodel/high/base" + v3 "github.com/pb33f/libopenapi/datamodel/high/v3" + "github.com/pb33f/libopenapi/orderedmap" +) + +const ( + // OpenAPI 3.1.0 is the default version with broader compatibility. + openAPIVersion31 = "3.1.0" + // OpenAPI 3.2.0 supports advanced features like itemSchema for SSE streams. + openAPIVersion32 = "3.2.0" +) + +// OpenAPIOptions configures the OpenAPI spec builder. +type OpenAPIOptions struct { + // IncludePaths filters which paths to include in the spec. + // If empty, all paths are included. + // Paths are matched by prefix (e.g., "/query" matches "/query" and "/query_range"). + IncludePaths []string + + // ExternalURL is the external URL of the Prometheus server (e.g., "http://prometheus.example.com:9090"). + ExternalURL string + + // Version is the API version to include in the OpenAPI spec. + // If empty, defaults to "0.0.1-undefined". + Version string +} + +// OpenAPIBuilder builds and caches OpenAPI specifications. +type OpenAPIBuilder struct { + mu sync.RWMutex + cachedYAML31 []byte // Cached OpenAPI 3.1 spec. + cachedYAML32 []byte // Cached OpenAPI 3.2 spec. + options OpenAPIOptions + logger *slog.Logger +} + +// NewOpenAPIBuilder creates a new OpenAPI builder with the given options. +func NewOpenAPIBuilder(opts OpenAPIOptions, logger *slog.Logger) *OpenAPIBuilder { + b := &OpenAPIBuilder{ + options: opts, + logger: logger, + } + + b.rebuild() + return b +} + +// rebuild constructs the OpenAPI specs for both 3.1 and 3.2 versions based on current options. +func (b *OpenAPIBuilder) rebuild() { + b.mu.Lock() + defer b.mu.Unlock() + + // Build OpenAPI 3.1 spec. + doc31 := b.buildDocument(openAPIVersion31) + yamlBytes31, err := doc31.Render() + if err != nil { + b.logger.Error("failed to render OpenAPI 3.1 spec - this is a bug, please report it", "err", err) + return + } + b.cachedYAML31 = yamlBytes31 + + // Build OpenAPI 3.2 spec. + doc32 := b.buildDocument(openAPIVersion32) + yamlBytes32, err := doc32.Render() + if err != nil { + b.logger.Error("failed to render OpenAPI 3.2 spec - this is a bug, please report it", "err", err) + return + } + b.cachedYAML32 = yamlBytes32 +} + +// ServeOpenAPI returns the OpenAPI specification as YAML. +// By default, serves OpenAPI 3.1.0. Use ?openapi_version=3.2 for OpenAPI 3.2.0. +func (b *OpenAPIBuilder) ServeOpenAPI(w http.ResponseWriter, r *http.Request) { + // Parse query parameter to determine which version to serve. + requestedVersion := r.URL.Query().Get("openapi_version") + + b.mu.RLock() + var yamlData []byte + switch requestedVersion { + case "3.2", "3.2.0": + yamlData = b.cachedYAML32 + case "3.1", "3.1.0": + yamlData = b.cachedYAML31 + default: + // Default to OpenAPI 3.1.0 for broader compatibility. + yamlData = b.cachedYAML31 + } + b.mu.RUnlock() + + w.Header().Set("Content-Type", "application/yaml; charset=utf-8") + w.Header().Set("Cache-Control", "no-cache, no-store, must-revalidate") + w.WriteHeader(http.StatusOK) + w.Write(yamlData) +} + +// WrapHandler returns the handler unchanged (no validation). +func (*OpenAPIBuilder) WrapHandler(next http.HandlerFunc) http.HandlerFunc { + return next +} + +// shouldIncludePath checks if a path should be included based on options. +func (b *OpenAPIBuilder) shouldIncludePath(path string) bool { + if len(b.options.IncludePaths) == 0 { + return true + } + for _, include := range b.options.IncludePaths { + if strings.HasPrefix(path, include) || path == include { + return true + } + } + return false +} + +// shouldIncludePathForVersion checks if a path should be included for a specific OpenAPI version. +func (b *OpenAPIBuilder) shouldIncludePathForVersion(path, version string) bool { + // First check IncludePaths filter. + if !b.shouldIncludePath(path) { + return false + } + + // OpenAPI 3.1 excludes paths that require 3.2 features. + // The /notifications/live endpoint uses itemSchema which is a 3.2-only feature. + if version == openAPIVersion31 && path == "/notifications/live" { + return false + } + + return true +} + +// buildDocument creates the OpenAPI document for the specified version using high-level structs. +func (b *OpenAPIBuilder) buildDocument(version string) *v3.Document { + return &v3.Document{ + Version: version, + Info: b.buildInfo(), + Servers: b.buildServers(), + Tags: b.buildTags(version), + Paths: b.buildPaths(version), + Components: b.buildComponents(), + } +} + +// buildInfo constructs the info section. +func (b *OpenAPIBuilder) buildInfo() *base.Info { + apiVersion := b.options.Version + if apiVersion == "" { + apiVersion = "0.0.1-undefined" + } + return &base.Info{ + Title: "Prometheus API", + Description: "Prometheus is an Open-Source monitoring system with a dimensional data model, flexible query language, efficient time series database and modern alerting approach.", + Version: apiVersion, + Contact: &base.Contact{ + Name: "Prometheus Community", + URL: "https://prometheus.io/community/", + }, + } +} + +// buildServers constructs the servers section. +func (b *OpenAPIBuilder) buildServers() []*v3.Server { + // ExternalURL is always set by computeExternalURL in main.go. + // It includes scheme, host, port, and optional path prefix (without trailing slash). + serverURL := "/api/v1" + if b.options.ExternalURL != "" { + baseURL, err := url.Parse(b.options.ExternalURL) + if err == nil { + // Use path.Join to properly append /api/v1 to the existing path. + // Then use ResolveReference to construct the full URL. + baseURL.Path = path.Join(baseURL.Path, "/api/v1") + serverURL = baseURL.String() + } + } + return []*v3.Server{ + {URL: serverURL}, + } +} + +// buildTags constructs the global tags list. +// Tag summary is an OpenAPI 3.2 feature, excluded from 3.1. +// Tag description is supported in both 3.1 and 3.2. +func (*OpenAPIBuilder) buildTags(version string) []*base.Tag { + // Define tags with all metadata. + tagData := []struct { + name string + summary string + description string + }{ + {"query", "Query", "Query and evaluate PromQL expressions."}, + {"metadata", "Metadata", "Retrieve metric metadata such as type and unit."}, + {"labels", "Labels", "Query label names and values."}, + {"series", "Series", "Query and manage time series."}, + {"targets", "Targets", "Retrieve target and scrape pool information."}, + {"rules", "Rules", "Query recording and alerting rules."}, + {"alerts", "Alerts", "Query active alerts and alertmanager discovery."}, + {"status", "Status", "Retrieve server status and configuration."}, + {"admin", "Admin", "Administrative operations for TSDB management."}, + {"features", "Features", "Query enabled features."}, + {"remote", "Remote Storage", "Remote read and write endpoints."}, + {"otlp", "OTLP", "OpenTelemetry Protocol metrics ingestion."}, + {"notifications", "Notifications", "Server notifications and events."}, + } + + tags := make([]*base.Tag, 0, len(tagData)) + for _, td := range tagData { + tag := &base.Tag{ + Name: td.name, + Description: td.description, // Description is supported in both 3.1 and 3.2. + } + + // Summary is an OpenAPI 3.2 feature only. + if version == openAPIVersion32 { + tag.Summary = td.summary + } + + tags = append(tags, tag) + } + + return tags +} + +// buildPaths constructs all API path definitions. +func (b *OpenAPIBuilder) buildPaths(version string) *v3.Paths { + pathItems := orderedmap.New[string, *v3.PathItem]() + + allPaths := b.getAllPathDefinitions() + for pair := allPaths.First(); pair != nil; pair = pair.Next() { + if b.shouldIncludePathForVersion(pair.Key(), version) { + pathItems.Set(pair.Key(), pair.Value()) + } + } + + return &v3.Paths{PathItems: pathItems} +} + +// getAllPathDefinitions returns all path definitions. +func (b *OpenAPIBuilder) getAllPathDefinitions() *orderedmap.Map[string, *v3.PathItem] { + paths := orderedmap.New[string, *v3.PathItem]() + + // Query endpoints. + paths.Set("/query", b.queryPath()) + paths.Set("/query_range", b.queryRangePath()) + paths.Set("/query_exemplars", b.queryExemplarsPath()) + paths.Set("/format_query", b.formatQueryPath()) + paths.Set("/parse_query", b.parseQueryPath()) + + // Label endpoints. + paths.Set("/labels", b.labelsPath()) + paths.Set("/label/{name}/values", b.labelValuesPath()) + + // Series endpoints. + paths.Set("/series", b.seriesPath()) + + // Metadata endpoints. + paths.Set("/metadata", b.metadataPath()) + + // Target endpoints. + paths.Set("/scrape_pools", b.scrapePoolsPath()) + paths.Set("/targets", b.targetsPath()) + paths.Set("/targets/metadata", b.targetsMetadataPath()) + paths.Set("/targets/relabel_steps", b.targetsRelabelStepsPath()) + + // Rules and alerts endpoints. + paths.Set("/rules", b.rulesPath()) + paths.Set("/alerts", b.alertsPath()) + paths.Set("/alertmanagers", b.alertmanagersPath()) + + // Status endpoints. + paths.Set("/status/config", b.statusConfigPath()) + paths.Set("/status/runtimeinfo", b.statusRuntimeInfoPath()) + paths.Set("/status/buildinfo", b.statusBuildInfoPath()) + paths.Set("/status/flags", b.statusFlagsPath()) + paths.Set("/status/tsdb", b.statusTSDBPath()) + paths.Set("/status/tsdb/blocks", b.statusTSDBBlocksPath()) + paths.Set("/status/walreplay", b.statusWALReplayPath()) + + // Admin endpoints. + paths.Set("/admin/tsdb/delete_series", b.adminDeleteSeriesPath()) + paths.Set("/admin/tsdb/clean_tombstones", b.adminCleanTombstonesPath()) + paths.Set("/admin/tsdb/snapshot", b.adminSnapshotPath()) + + // Remote endpoints. + paths.Set("/read", b.remoteReadPath()) + paths.Set("/write", b.remoteWritePath()) + paths.Set("/otlp/v1/metrics", b.otlpWritePath()) + + // Notifications endpoints. + paths.Set("/notifications", b.notificationsPath()) + paths.Set("/notifications/live", b.notificationsLivePath()) + + // Features endpoint. + paths.Set("/features", b.featuresPath()) + + return paths +} diff --git a/web/api/v1/openapi_coverage_test.go b/web/api/v1/openapi_coverage_test.go new file mode 100644 index 0000000000..103f82e08e --- /dev/null +++ b/web/api/v1/openapi_coverage_test.go @@ -0,0 +1,258 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package v1 + +import ( + _ "embed" + "go/ast" + "go/parser" + "go/token" + "strconv" + "strings" + "testing" + + v3 "github.com/pb33f/libopenapi/datamodel/high/v3" + "github.com/prometheus/common/promslog" + "github.com/stretchr/testify/require" +) + +//go:embed api.go +var apiGoSource string + +// routeInfo represents a route extracted from the Register function. +type routeInfo struct { + method string + path string +} + +// extractRoutesFromRegister parses the api.go source and extracts all routes +// registered in the (*API) Register function using AST. +func extractRoutesFromRegister(t *testing.T, source string) []routeInfo { + fset := token.NewFileSet() + f, err := parser.ParseFile(fset, "api.go", source, parser.ParseComments) + require.NoError(t, err, "failed to parse api.go") + + var registerFunc *ast.FuncDecl + + // Find the Register method on *API. + ast.Inspect(f, func(n ast.Node) bool { + fn, ok := n.(*ast.FuncDecl) + if !ok || fn.Body == nil { + return true + } + + if fn.Name.Name != "Register" { + return true + } + + // Ensure it's a method on *API. + if fn.Recv == nil || len(fn.Recv.List) != 1 { + return true + } + + star, ok := fn.Recv.List[0].Type.(*ast.StarExpr) + if !ok { + return true + } + + ident, ok := star.X.(*ast.Ident) + if !ok || ident.Name != "API" { + return true + } + + registerFunc = fn + return false // Stop walking once found. + }) + + require.NotNil(t, registerFunc, "Register method not found") + + var routes []routeInfo + + // Extract all r.Get, r.Post, r.Put, r.Delete, r.Options calls. + ast.Inspect(registerFunc.Body, func(n ast.Node) bool { + call, ok := n.(*ast.CallExpr) + if !ok { + return true + } + + sel, ok := call.Fun.(*ast.SelectorExpr) + if !ok { + return true + } + + // Check if it's a router method call. + method := sel.Sel.Name + if method != "Get" && method != "Post" && method != "Put" && method != "Delete" && method != "Del" && method != "Options" { + return true + } + + // Ensure the receiver is 'r'. + if x, ok := sel.X.(*ast.Ident); !ok || x.Name != "r" { + return true + } + + if len(call.Args) == 0 { + return true + } + + // Extract the path from the first argument. + lit, ok := call.Args[0].(*ast.BasicLit) + if !ok || lit.Kind != token.STRING { + return true + } + + path, err := strconv.Unquote(lit.Value) + if err != nil { + return true + } + + // Normalize Del to DELETE. + if method == "Del" { + method = "Delete" + } + + routes = append(routes, routeInfo{ + method: strings.ToUpper(method), + path: path, + }) + return true + }) + + return routes +} + +// normalizePathForOpenAPI converts route paths with colon parameters to OpenAPI format. +// e.g., "/label/:name/values" -> "/label/{name}/values". +func normalizePathForOpenAPI(path string) string { + // Replace :param with {param}. + parts := strings.Split(path, "/") + for i, part := range parts { + if trimmed, ok := strings.CutPrefix(part, ":"); ok { + parts[i] = "{" + trimmed + "}" + } + } + return strings.Join(parts, "/") +} + +// TestOpenAPICoverage verifies that all routes registered in the Register function +// are documented in the OpenAPI specification. +func TestOpenAPICoverage(t *testing.T) { + // Extract routes from api.go using AST. + routes := extractRoutesFromRegister(t, apiGoSource) + require.NotEmpty(t, routes, "no routes found in Register function") + + // Build OpenAPI spec. + builder := NewOpenAPIBuilder(OpenAPIOptions{}, promslog.NewNopLogger()) + allPaths := builder.getAllPathDefinitions() + + // Create a map of OpenAPI paths for quick lookup. + // Key is the normalized path, value is the PathItem. + openAPIPaths := make(map[string]bool) + for pair := allPaths.First(); pair != nil; pair = pair.Next() { + pathItem := pair.Value() + path := pair.Key() + + // Track which methods are defined for this path. + if pathItem.Get != nil { + openAPIPaths[path+":GET"] = true + } + if pathItem.Post != nil { + openAPIPaths[path+":POST"] = true + } + if pathItem.Put != nil { + openAPIPaths[path+":PUT"] = true + } + if pathItem.Delete != nil { + openAPIPaths[path+":DELETE"] = true + } + if pathItem.Options != nil { + openAPIPaths[path+":OPTIONS"] = true + } + } + + // Check coverage for each route. + var missingRoutes []string + ignoredRoutes := map[string]bool{ + "/*path:OPTIONS": true, // Wildcard OPTIONS handler. + "/openapi.yaml:GET": true, // Self-referential endpoint. + "/notifications/live:GET": true, // SSE endpoint (version-specific). + } + + for _, route := range routes { + normalizedPath := normalizePathForOpenAPI(route.path) + key := normalizedPath + ":" + route.method + + // Skip ignored routes. + if ignoredRoutes[key] { + continue + } + + if !openAPIPaths[key] { + missingRoutes = append(missingRoutes, key) + } + } + + if len(missingRoutes) > 0 { + t.Errorf("The following routes are registered but not documented in OpenAPI spec:\n%s", + strings.Join(missingRoutes, "\n")) + } +} + +// TestOpenAPIHasNoExtraRoutes verifies that the OpenAPI spec doesn't document +// routes that aren't actually registered. +func TestOpenAPIHasNoExtraRoutes(t *testing.T) { + // Extract routes from api.go using AST. + routes := extractRoutesFromRegister(t, apiGoSource) + require.NotEmpty(t, routes, "no routes found in Register function") + + // Create a map of registered routes. + registeredRoutes := make(map[string]bool) + for _, route := range routes { + normalizedPath := normalizePathForOpenAPI(route.path) + key := normalizedPath + ":" + route.method + registeredRoutes[key] = true + } + + // Build OpenAPI spec. + builder := NewOpenAPIBuilder(OpenAPIOptions{}, promslog.NewNopLogger()) + allPaths := builder.getAllPathDefinitions() + + // Check if any OpenAPI paths are not registered. + var extraRoutes []string + + for pair := allPaths.First(); pair != nil; pair = pair.Next() { + pathItem := pair.Value() + path := pair.Key() + + checkMethod := func(method string, op *v3.Operation) { + if op != nil { + key := path + ":" + method + if !registeredRoutes[key] { + extraRoutes = append(extraRoutes, key) + } + } + } + + checkMethod("GET", pathItem.Get) + checkMethod("POST", pathItem.Post) + checkMethod("PUT", pathItem.Put) + checkMethod("DELETE", pathItem.Delete) + checkMethod("OPTIONS", pathItem.Options) + } + + if len(extraRoutes) > 0 { + t.Errorf("The following routes are documented in OpenAPI but not registered:\n%s", + strings.Join(extraRoutes, "\n")) + } +} diff --git a/web/api/v1/openapi_examples.go b/web/api/v1/openapi_examples.go new file mode 100644 index 0000000000..50e155b184 --- /dev/null +++ b/web/api/v1/openapi_examples.go @@ -0,0 +1,1013 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file contains example request bodies and response data for OpenAPI documentation. +// Examples are included in the generated spec to provide realistic usage scenarios for API consumers. +package v1 + +import ( + "github.com/pb33f/libopenapi/datamodel/high/base" + "github.com/pb33f/libopenapi/orderedmap" + + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/promql" +) + +// Example builders for request bodies. + +func queryPostExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("simpleQuery", &base.Example{ + Summary: "Simple instant query", + Value: createYAMLNode(map[string]any{"query": "up"}), + }) + + examples.Set("queryWithTime", &base.Example{ + Summary: "Query with specific timestamp", + Value: createYAMLNode(map[string]any{ + "query": "up{job=\"prometheus\"}", + "time": "2026-01-02T13:37:00.000Z", + }), + }) + + examples.Set("queryWithLimit", &base.Example{ + Summary: "Query with limit and statistics", + Value: createYAMLNode(map[string]any{ + "query": "rate(prometheus_http_requests_total{handler=\"/api/v1/query\"}[5m])", + "limit": 100, + "stats": "all", + }), + }) + + return examples +} + +// queryRangePostExamples returns examples for POST /query_range endpoint. +func queryRangePostExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("basicRange", &base.Example{ + Summary: "Basic range query", + Value: createYAMLNode(map[string]any{ + "query": "up", + "start": "2026-01-02T12:37:00.000Z", + "end": "2026-01-02T13:37:00.000Z", + "step": "15s", + }), + }) + + examples.Set("rateQuery", &base.Example{ + Summary: "Rate calculation over time range", + Value: createYAMLNode(map[string]any{ + "query": "rate(prometheus_http_requests_total{handler=\"/api/v1/query\"}[5m])", + "start": "2026-01-02T12:37:00.000Z", + "end": "2026-01-02T13:37:00.000Z", + "step": "30s", + "timeout": "30s", + }), + }) + + return examples +} + +// queryExemplarsPostExamples returns examples for POST /query_exemplars endpoint. +func queryExemplarsPostExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("basicExemplar", &base.Example{ + Summary: "Query exemplars for a metric", + Value: createYAMLNode(map[string]any{"query": "prometheus_http_requests_total"}), + }) + + examples.Set("exemplarWithTimeRange", &base.Example{ + Summary: "Exemplars within specific time range", + Value: createYAMLNode(map[string]any{ + "query": "prometheus_http_requests_total{job=\"prometheus\"}", + "start": "2026-01-02T12:37:00.000Z", + "end": "2026-01-02T13:37:00.000Z", + }), + }) + + return examples +} + +// formatQueryPostExamples returns examples for POST /format_query endpoint. +func formatQueryPostExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("simpleFormat", &base.Example{ + Summary: "Format a simple query", + Value: createYAMLNode(map[string]any{"query": "up{job=\"prometheus\"}"}), + }) + + examples.Set("complexFormat", &base.Example{ + Summary: "Format a complex query", + Value: createYAMLNode(map[string]any{"query": "sum(rate(http_requests_total[5m])) by (job, status)"}), + }) + + return examples +} + +// parseQueryPostExamples returns examples for POST /parse_query endpoint. +func parseQueryPostExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("simpleParse", &base.Example{ + Summary: "Parse a simple query", + Value: createYAMLNode(map[string]any{"query": "up"}), + }) + + examples.Set("complexParse", &base.Example{ + Summary: "Parse a complex query", + Value: createYAMLNode(map[string]any{"query": "rate(http_requests_total{job=\"api\"}[5m])"}), + }) + + return examples +} + +// labelsPostExamples returns examples for POST /labels endpoint. +func labelsPostExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("allLabels", &base.Example{ + Summary: "Get all label names", + Value: createYAMLNode(map[string]any{}), + }) + + examples.Set("labelsWithTimeRange", &base.Example{ + Summary: "Get label names within time range", + Value: createYAMLNode(map[string]any{ + "start": "2026-01-02T12:37:00.000Z", + "end": "2026-01-02T13:37:00.000Z", + }), + }) + + examples.Set("labelsWithMatch", &base.Example{ + Summary: "Get label names matching series selector", + Value: createYAMLNode(map[string]any{ + "match[]": []string{"up", "process_start_time_seconds{job=\"prometheus\"}"}, + }), + }) + + return examples +} + +// seriesPostExamples returns examples for POST /series endpoint. +func seriesPostExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("seriesMatch", &base.Example{ + Summary: "Find series by label matchers", + Value: createYAMLNode(map[string]any{ + "match[]": []string{"up"}, + }), + }) + + examples.Set("seriesWithTimeRange", &base.Example{ + Summary: "Find series with time range", + Value: createYAMLNode(map[string]any{ + "match[]": []string{"up", "process_cpu_seconds_total{job=\"prometheus\"}"}, + "start": "2026-01-02T12:37:00.000Z", + "end": "2026-01-02T13:37:00.000Z", + }), + }) + + return examples +} + +// Example builders for response bodies. + +// queryResponseExamples returns examples for /query response. +func queryResponseExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + vectorResult := promql.Vector{ + promql.Sample{ + Metric: labels.FromStrings("__name__", "up", "job", "prometheus", "instance", "demo.prometheus.io:9090"), + T: 1767436620000, + F: 1, + }, + promql.Sample{ + Metric: labels.FromStrings("__name__", "up", "env", "demo", "job", "alertmanager", "instance", "demo.prometheus.io:9093"), + T: 1767436620000, + F: 1, + }, + } + + examples.Set("vectorResult", &base.Example{ + Summary: "Instant vector query: up", + Value: vectorExample(vectorResult), + }) + + examples.Set("scalarResult", &base.Example{ + Summary: "Scalar query: scalar(42)", + Value: createYAMLNode(map[string]any{ + "status": "success", + "data": map[string]any{ + "resultType": "scalar", + "result": []any{1767436620, "42"}, + }, + }), + }) + + matrixResult := promql.Matrix{ + promql.Series{ + Metric: labels.FromStrings("__name__", "up", "job", "prometheus", "instance", "demo.prometheus.io:9090"), + Floats: []promql.FPoint{ + {T: 1767436320000, F: 1}, + {T: 1767436620000, F: 1}, + }, + }, + } + + examples.Set("matrixResult", &base.Example{ + Summary: "Range vector query: up[5m]", + Value: matrixExample(matrixResult), + }) + + // TODO: Add native histogram example. + + return examples +} + +// queryRangeResponseExamples returns examples for /query_range response. +func queryRangeResponseExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + matrixResult := promql.Matrix{ + promql.Series{ + Metric: labels.FromStrings("__name__", "up", "job", "prometheus", "instance", "demo.prometheus.io:9090"), + Floats: []promql.FPoint{ + {T: 1767433020000, F: 1}, + {T: 1767434820000, F: 1}, + {T: 1767436620000, F: 1}, + }, + }, + } + + examples.Set("matrixResult", &base.Example{ + Summary: "Range query: rate(prometheus_http_requests_total[5m])", + Value: matrixExample(matrixResult), + }) + + // TODO: Add native histogram example. + + return examples +} + +// labelsResponseExamples returns examples for /labels response. +func labelsResponseExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("labelNames", &base.Example{ + Summary: "List of label names", + Value: createYAMLNode(map[string]any{ + "status": "success", + "data": []string{ + "__name__", "active", "address", "alertmanager", "alertname", "alertstate", + "backend", "branch", "code", "collector", "component", "device", + "env", "endpoint", "fstype", "handler", "instance", "job", + "le", "method", "mode", "name", + }, + }), + }) + + return examples +} + +// seriesResponseExamples returns examples for /series response. +func seriesResponseExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("seriesList", &base.Example{ + Summary: "List of series matching the selector", + Value: createYAMLNode(map[string]any{ + "status": "success", + "data": []map[string]string{ + { + "__name__": "up", + "env": "demo", + "instance": "demo.prometheus.io:8080", + "job": "cadvisor", + }, + { + "__name__": "up", + "env": "demo", + "instance": "demo.prometheus.io:9093", + "job": "alertmanager", + }, + { + "__name__": "up", + "env": "demo", + "instance": "demo.prometheus.io:9100", + "job": "node", + }, + { + "__name__": "up", + "instance": "demo.prometheus.io:3000", + "job": "grafana", + }, + { + "__name__": "up", + "instance": "demo.prometheus.io:8996", + "job": "random", + }, + }, + }), + }) + + return examples +} + +// targetsResponseExamples returns examples for /targets response. +func targetsResponseExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("targetsList", &base.Example{ + Summary: "Active and dropped targets", + Value: createYAMLNode(map[string]any{ + "status": "success", + "data": map[string]any{ + "activeTargets": []map[string]any{ + { + "discoveredLabels": map[string]string{ + "__address__": "demo.prometheus.io:9093", + "__meta_filepath": "/etc/prometheus/file_sd/alertmanager.yml", + "__metrics_path__": "/metrics", + "__scheme__": "http", + "env": "demo", + "job": "alertmanager", + }, + "labels": map[string]string{ + "env": "demo", + "instance": "demo.prometheus.io:9093", + "job": "alertmanager", + }, + "scrapePool": "alertmanager", + "scrapeUrl": "http://demo.prometheus.io:9093/metrics", + "globalUrl": "http://demo.prometheus.io:9093/metrics", + "lastError": "", + "lastScrape": "2026-01-02T13:36:40.200Z", + "lastScrapeDuration": 0.006576866, + "health": "up", + "scrapeInterval": "15s", + "scrapeTimeout": "10s", + }, + }, + "droppedTargets": []map[string]any{}, + "droppedTargetCounts": map[string]int{ + "alertmanager": 0, + "blackbox": 0, + "caddy": 0, + "cadvisor": 0, + "grafana": 0, + "node": 0, + "prometheus": 0, + "random": 0, + }, + }, + }), + }) + + return examples +} + +// rulesResponseExamples returns examples for /rules response. +func rulesResponseExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("ruleGroups", &base.Example{ + Summary: "Alerting and recording rules", + Value: createYAMLNode(map[string]any{ + "status": "success", + "data": map[string]any{ + "groups": []map[string]any{ + { + "name": "ansible managed alert rules", + "file": "/etc/prometheus/rules/ansible_managed.yml", + "interval": 15, + "limit": 0, + "rules": []map[string]any{ + { + "state": "firing", + "name": "Watchdog", + "query": "vector(1)", + "duration": 600, + "keepFiringFor": 0, + "labels": map[string]string{"severity": "warning"}, + "annotations": map[string]string{"description": "This is an alert meant to ensure that the entire alerting pipeline is functional. This alert is always firing, therefore it should always be firing in Alertmanager and always fire against a receiver. There are integrations with various notification mechanisms that send a notification when this alert is not firing. For example the \"DeadMansSnitch\" integration in PagerDuty.", "summary": "Ensure entire alerting pipeline is functional"}, + "health": "ok", + "evaluationTime": 0.000356688, + "lastEvaluation": "2026-01-02T13:36:56.874Z", + "type": "alerting", + }, + }, + "evaluationTime": 0.000561635, + "lastEvaluation": "2026-01-02T13:36:56.874Z", + }, + }, + }, + }), + }) + + return examples +} + +// alertsResponseExamples returns examples for /alerts response. +func alertsResponseExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("activeAlerts", &base.Example{ + Summary: "Currently active alerts", + Value: createYAMLNode(map[string]any{ + "status": "success", + "data": map[string]any{ + "alerts": []map[string]any{ + { + "labels": map[string]string{ + "alertname": "Watchdog", + "severity": "warning", + }, + "annotations": map[string]string{ + "description": "This is an alert meant to ensure that the entire alerting pipeline is functional. This alert is always firing, therefore it should always be firing in Alertmanager and always fire against a receiver. There are integrations with various notification mechanisms that send a notification when this alert is not firing. For example the \"DeadMansSnitch\" integration in PagerDuty.", + "summary": "Ensure entire alerting pipeline is functional", + }, + "state": "firing", + "activeAt": "2026-01-02T13:30:00.000Z", + "value": "1e+00", + }, + }, + }, + }), + }) + + return examples +} + +// queryExemplarsResponseExamples returns examples for /query_exemplars response. +func queryExemplarsResponseExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("exemplarsResult", &base.Example{ + Summary: "Exemplars for a metric with trace IDs", + Value: createYAMLNode(map[string]any{ + "status": "success", + "data": []map[string]any{ + { + "seriesLabels": map[string]string{ + "__name__": "http_requests_total", + "job": "api-server", + "method": "GET", + }, + "exemplars": []map[string]any{ + { + "labels": map[string]string{ + "traceID": "abc123def456", + }, + "value": "1.5", + "timestamp": 1689956451.781, + }, + }, + }, + }, + }), + }) + + return examples +} + +// formatQueryResponseExamples returns examples for /format_query response. +func formatQueryResponseExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("formattedQuery", &base.Example{ + Summary: "Formatted PromQL query", + Value: createYAMLNode(map[string]any{ + "status": "success", + "data": "sum by(job, status) (rate(http_requests_total[5m]))", + }), + }) + + return examples +} + +// parseQueryResponseExamples returns examples for /parse_query response. +func parseQueryResponseExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("parsedQuery", &base.Example{ + Summary: "Parsed PromQL expression tree", + Value: createYAMLNode(map[string]any{ + "status": "success", + "data": map[string]any{ + "resultType": "vector", + }, + }), + }) + + return examples +} + +// labelValuesResponseExamples returns examples for /label/{name}/values response. +func labelValuesResponseExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("labelValues", &base.Example{ + Summary: "List of values for a label", + Value: createYAMLNode(map[string]any{ + "status": "success", + "data": []string{"alertmanager", "blackbox", "caddy", "cadvisor", "grafana", "node", "prometheus", "random"}, + }), + }) + + return examples +} + +// metadataResponseExamples returns examples for /metadata response. +func metadataResponseExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("metricMetadata", &base.Example{ + Summary: "Metadata for metrics", + Value: createYAMLNode(map[string]any{ + "status": "success", + "data": map[string][]map[string]any{ + "prometheus_rule_group_iterations_missed_total": { + { + "type": "counter", + "help": "The total number of rule group evaluations missed due to slow rule group evaluation.", + "unit": "", + }, + }, + "prometheus_sd_updates_total": { + { + "type": "counter", + "help": "Total number of update events sent to the SD consumers.", + "unit": "", + }, + }, + "go_gc_stack_starting_size_bytes": { + { + "type": "gauge", + "help": "The stack size of new goroutines. Sourced from /gc/stack/starting-size:bytes.", + "unit": "", + }, + }, + }, + }), + }) + + return examples +} + +// scrapePoolsResponseExamples returns examples for /scrape_pools response. +func scrapePoolsResponseExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("scrapePoolsList", &base.Example{ + Summary: "List of scrape pool names", + Value: createYAMLNode(map[string]any{ + "status": "success", + "data": map[string]any{ + "scrapePools": []string{"alertmanager", "blackbox", "caddy", "cadvisor", "grafana", "node", "prometheus", "random"}, + }, + }), + }) + + return examples +} + +// targetsMetadataResponseExamples returns examples for /targets/metadata response. +func targetsMetadataResponseExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("targetMetadata", &base.Example{ + Summary: "Metadata for targets", + Value: createYAMLNode(map[string]any{ + "status": "success", + "data": []map[string]any{ + { + "target": map[string]string{ + "instance": "localhost:9090", + "job": "prometheus", + }, + "type": "gauge", + "help": "The current health status of the target", + "unit": "", + "metric": "up", + }, + }, + }), + }) + + return examples +} + +// targetsRelabelStepsResponseExamples returns examples for /targets/relabel_steps response. +func targetsRelabelStepsResponseExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("relabelSteps", &base.Example{ + Summary: "Relabel steps for a target", + Value: createYAMLNode(map[string]any{ + "status": "success", + "data": map[string]any{ + "steps": []map[string]any{ + { + "rule": map[string]any{ + "source_labels": []string{"__address__"}, + "target_label": "instance", + "action": "replace", + "regex": "(.*)", + "replacement": "$1", + }, + "output": map[string]string{ + "__address__": "localhost:9090", + "instance": "localhost:9090", + "job": "prometheus", + }, + "keep": true, + }, + }, + }, + }), + }) + + return examples +} + +// alertmanagersResponseExamples returns examples for /alertmanagers response. +func alertmanagersResponseExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("alertmanagerDiscovery", &base.Example{ + Summary: "Alertmanager discovery results", + Value: createYAMLNode(map[string]any{ + "status": "success", + "data": map[string]any{ + "activeAlertmanagers": []map[string]any{ + { + "url": "http://demo.prometheus.io:9093/api/v2/alerts", + }, + }, + "droppedAlertmanagers": []map[string]any{}, + }, + }), + }) + + return examples +} + +// statusConfigResponseExamples returns examples for /status/config response. +func statusConfigResponseExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("configYAML", &base.Example{ + Summary: "Prometheus configuration", + Value: createYAMLNode(map[string]any{ + "status": "success", + "data": map[string]any{ + "yaml": "global:\n scrape_interval: 15s\n scrape_timeout: 10s\n evaluation_interval: 15s\n external_labels:\n environment: demo-prometheus-io\nalerting:\n alertmanagers:\n - scheme: http\n static_configs:\n - targets:\n - demo.prometheus.io:9093\nrule_files:\n- /etc/prometheus/rules/*.yml\n", + }, + }), + }) + + return examples +} + +// statusRuntimeInfoResponseExamples returns examples for /status/runtimeinfo response. +func statusRuntimeInfoResponseExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("runtimeInfo", &base.Example{ + Summary: "Runtime information", + Value: createYAMLNode(map[string]any{ + "status": "success", + "data": map[string]any{ + "startTime": "2026-01-01T13:37:00.000Z", + "CWD": "/", + "hostname": "demo-prometheus-io", + "serverTime": "2026-01-02T13:37:00.000Z", + "reloadConfigSuccess": true, + "lastConfigTime": "2026-01-01T13:37:00.000Z", + "corruptionCount": 0, + "goroutineCount": 88, + "GOMAXPROCS": 2, + "GOMEMLIMIT": int64(3703818240), + "GOGC": "75", + "GODEBUG": "", + "storageRetention": "31d", + }, + }), + }) + + return examples +} + +// statusBuildInfoResponseExamples returns examples for /status/buildinfo response. +func statusBuildInfoResponseExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("buildInfo", &base.Example{ + Summary: "Build information", + Value: createYAMLNode(map[string]any{ + "status": "success", + "data": map[string]any{ + "version": "3.7.3", + "revision": "0a41f0000705c69ab8e0f9a723fc73e39ed62b07", + "branch": "HEAD", + "buildUser": "root@08c890a84441", + "buildDate": "20251030-07:26:10", + "goVersion": "go1.25.3", + }, + }), + }) + + return examples +} + +// statusFlagsResponseExamples returns examples for /status/flags response. +func statusFlagsResponseExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("flags", &base.Example{ + Summary: "Command-line flags", + Value: createYAMLNode(map[string]any{ + "status": "success", + "data": map[string]string{ + "agent": "false", + "alertmanager.notification-queue-capacity": "10000", + "config.file": "/etc/prometheus/prometheus.yml", + "enable-feature": "exemplar-storage,native-histograms", + "query.max-concurrency": "20", + "query.timeout": "2m", + "storage.tsdb.path": "/prometheus", + "storage.tsdb.retention.time": "15d", + "web.console.libraries": "/usr/share/prometheus/console_libraries", + "web.console.templates": "/usr/share/prometheus/consoles", + "web.enable-admin-api": "true", + "web.enable-lifecycle": "true", + "web.listen-address": "0.0.0.0:9090", + "web.page-title": "Prometheus Time Series Collection and Processing Server", + }, + }), + }) + + return examples +} + +// statusTSDBResponseExamples returns examples for /status/tsdb response. +func statusTSDBResponseExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("tsdbStats", &base.Example{ + Summary: "TSDB statistics", + Value: createYAMLNode(map[string]any{ + "status": "success", + "data": map[string]any{ + "headStats": map[string]any{ + "numSeries": 9925, + "numLabelPairs": 2512, + "chunkCount": 37525, + "minTime": int64(1767362400712), + "maxTime": int64(1767436620000), + }, + "seriesCountByMetricName": []map[string]any{ + { + "name": "up", + "value": 100, + }, + { + "name": "http_requests_total", + "value": 500, + }, + }, + "labelValueCountByLabelName": []map[string]any{ + { + "name": "__name__", + "value": 5, + }, + { + "name": "job", + "value": 3, + }, + }, + "memoryInBytesByLabelName": []map[string]any{ + { + "name": "__name__", + "value": 1024, + }, + { + "name": "job", + "value": 512, + }, + }, + "seriesCountByLabelValuePair": []map[string]any{ + { + "name": "job=prometheus", + "value": 100, + }, + { + "name": "instance=localhost:9090", + "value": 100, + }, + }, + }, + }), + }) + + return examples +} + +// statusTSDBBlocksResponseExamples returns examples for /status/tsdb/blocks response. +func statusTSDBBlocksResponseExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("tsdbBlocks", &base.Example{ + Summary: "TSDB block information", + Value: createYAMLNode(map[string]any{ + "status": "success", + "data": map[string]any{ + "blocks": []map[string]any{ + { + "ulid": "01KC4D6GXQA4CRHYKV78NEBVAE", + "minTime": int64(1764568801099), + "maxTime": int64(1764763200000), + "stats": map[string]any{ + "numSamples": 129505582, + "numSeries": 10661, + "numChunks": 1073962, + }, + "compaction": map[string]any{ + "level": 4, + "sources": []string{ + "01KBCJ7TR8A4QAJ3AA1J651P5S", + "01KBCS3J0E34567YPB8Y5W0E24", + "01KBCZZ9KRTYGG3E7HVQFGC3S3", + }, + }, + "version": 1, + }, + }, + }, + }), + }) + + return examples +} + +// statusWALReplayResponseExamples returns examples for /status/walreplay response. +func statusWALReplayResponseExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("walReplay", &base.Example{ + Summary: "WAL replay status", + Value: createYAMLNode(map[string]any{ + "status": "success", + "data": map[string]any{ + "min": 3209, + "max": 3214, + "current": 3214, + }, + }), + }) + + return examples +} + +// deleteSeriesResponseExamples returns examples for /admin/tsdb/delete_series response. +func deleteSeriesResponseExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("deletionSuccess", &base.Example{ + Summary: "Successful series deletion", + Value: createYAMLNode(map[string]any{ + "status": "success", + }), + }) + + return examples +} + +// cleanTombstonesResponseExamples returns examples for /admin/tsdb/clean_tombstones response. +func cleanTombstonesResponseExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("tombstonesCleaned", &base.Example{ + Summary: "Tombstones cleaned successfully", + Value: createYAMLNode(map[string]any{ + "status": "success", + }), + }) + + return examples +} + +// seriesDeleteResponseExamples returns examples for DELETE /series response. +func seriesDeleteResponseExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("seriesDeleted", &base.Example{ + Summary: "Series marked for deletion", + Value: createYAMLNode(map[string]any{ + "status": "success", + }), + }) + + return examples +} + +// snapshotResponseExamples returns examples for /admin/tsdb/snapshot response. +func snapshotResponseExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("snapshotCreated", &base.Example{ + Summary: "Snapshot created successfully", + Value: createYAMLNode(map[string]any{ + "status": "success", + "data": map[string]any{ + "name": "20260102T133700Z-a1b2c3d4e5f67890", + }, + }), + }) + + return examples +} + +// notificationsResponseExamples returns examples for /notifications response. +func notificationsResponseExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("notifications", &base.Example{ + Summary: "Server notifications", + Value: createYAMLNode(map[string]any{ + "status": "success", + "data": []map[string]any{ + { + "text": "Configuration reload has failed.", + "date": "2026-01-02T16:14:50.046Z", + "active": true, + }, + }, + }), + }) + + return examples +} + +// notificationLiveExamples provides example SSE messages for the live notifications endpoint. +func notificationLiveExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("activeNotification", &base.Example{ + Summary: "Active notification SSE message", + Description: "An SSE message containing an active server notification.", + Value: createYAMLNode(map[string]any{ + "data": "{\"text\":\"Configuration reload has failed.\",\"date\":\"2026-01-02T16:14:50.046Z\",\"active\":true}", + }), + }) + + return examples +} + +// featuresResponseExamples returns examples for /features response. +func featuresResponseExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("enabledFeatures", &base.Example{ + Summary: "Enabled feature flags", + Value: createYAMLNode(map[string]any{ + "status": "success", + "data": []string{"exemplar-storage", "remote-write-receiver"}, + }), + }) + + return examples +} + +// errorResponseExamples returns examples for error responses. +func errorResponseExamples() *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + + examples.Set("tsdbNotReady", &base.Example{ + Summary: "TSDB not ready", + Value: createYAMLNode(map[string]any{ + "status": "error", + "errorType": "internal", + "error": "TSDB not ready", + }), + }) + + return examples +} diff --git a/web/api/v1/openapi_golden_test.go b/web/api/v1/openapi_golden_test.go new file mode 100644 index 0000000000..6207fda81b --- /dev/null +++ b/web/api/v1/openapi_golden_test.go @@ -0,0 +1,176 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package v1 + +import ( + "flag" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" + "gopkg.in/yaml.v3" + + "github.com/prometheus/prometheus/web/api/testhelpers" +) + +var updateOpenAPISpec = flag.Bool("update-openapi-spec", false, "update openapi golden files with the current specs") + +// TestOpenAPIGolden_3_1 verifies that the OpenAPI 3.1 spec matches the golden file. +func TestOpenAPIGolden_3_1(t *testing.T) { + // Create an API instance to serve the OpenAPI spec. + api := newTestAPI(t, testhelpers.APIConfig{}) + + // Fetch the OpenAPI 3.1 spec from the API (default, no query param). + resp := testhelpers.GET(t, api, "/api/v1/openapi.yaml") + require.Equal(t, 200, resp.StatusCode, "expected HTTP 200 for OpenAPI spec endpoint") + require.NotEmpty(t, resp.Body, "OpenAPI spec should not be empty") + + goldenPath := filepath.Join("testdata", "openapi_3.1_golden.yaml") + + if *updateOpenAPISpec { + // Update mode: write the current spec to the golden file. + t.Logf("Updating golden file: %s", goldenPath) + + // Ensure the testdata directory exists. + err := os.MkdirAll(filepath.Dir(goldenPath), 0o755) + require.NoError(t, err, "failed to create testdata directory") + + // Write the golden file. + err = os.WriteFile(goldenPath, []byte(resp.Body), 0o644) + require.NoError(t, err, "failed to write golden file") + + t.Logf("Golden file updated successfully") + return + } + + // Comparison mode: verify the spec matches the golden file. + goldenData, err := os.ReadFile(goldenPath) + require.NoError(t, err, "failed to read golden file (run with -update-openapi-spec to generate it)") + + require.Equal(t, string(goldenData), resp.Body, + "OpenAPI 3.1 spec does not match golden file. Run 'go test -update-openapi-spec' to update.") + + // Verify version field is 3.1.0. + var spec map[string]any + err = yaml.Unmarshal([]byte(resp.Body), &spec) + require.NoError(t, err) + require.Equal(t, "3.1.0", spec["openapi"], "OpenAPI version should be 3.1.0") + + // Verify /notifications/live is NOT present in 3.1 spec. + paths := spec["paths"].(map[string]any) + _, found := paths["/notifications/live"] + require.False(t, found, "/notifications/live should not be in OpenAPI 3.1 spec") +} + +// TestOpenAPIGolden_3_2 verifies that the OpenAPI 3.2 spec matches the golden file. +func TestOpenAPIGolden_3_2(t *testing.T) { + // Create an API instance to serve the OpenAPI spec. + api := newTestAPI(t, testhelpers.APIConfig{}) + + // Fetch the OpenAPI 3.2 spec from the API with query parameter. + resp := testhelpers.GET(t, api, "/api/v1/openapi.yaml?openapi_version=3.2") + require.Equal(t, 200, resp.StatusCode, "expected HTTP 200 for OpenAPI spec endpoint") + require.NotEmpty(t, resp.Body, "OpenAPI spec should not be empty") + + goldenPath := filepath.Join("testdata", "openapi_3.2_golden.yaml") + + if *updateOpenAPISpec { + // Update mode: write the current spec to the golden file. + t.Logf("Updating golden file: %s", goldenPath) + + // Ensure the testdata directory exists. + err := os.MkdirAll(filepath.Dir(goldenPath), 0o755) + require.NoError(t, err, "failed to create testdata directory") + + // Write the golden file. + err = os.WriteFile(goldenPath, []byte(resp.Body), 0o644) + require.NoError(t, err, "failed to write golden file") + + t.Logf("Golden file updated successfully") + return + } + + // Comparison mode: verify the spec matches the golden file. + goldenData, err := os.ReadFile(goldenPath) + require.NoError(t, err, "failed to read golden file (run with -update-openapi-spec to generate it)") + + require.Equal(t, string(goldenData), resp.Body, + "OpenAPI 3.2 spec does not match golden file. Run 'go test -update-openapi-spec' to update.") + + // Verify version field is 3.2.0. + var spec map[string]any + err = yaml.Unmarshal([]byte(resp.Body), &spec) + require.NoError(t, err) + require.Equal(t, "3.2.0", spec["openapi"], "OpenAPI version should be 3.2.0") + + // Verify /notifications/live IS present in 3.2 spec. + paths := spec["paths"].(map[string]any) + _, found := paths["/notifications/live"] + require.True(t, found, "/notifications/live should be in OpenAPI 3.2 spec") +} + +// TestOpenAPIVersionSelection verifies version query parameter handling. +func TestOpenAPIVersionSelection(t *testing.T) { + api := newTestAPI(t, testhelpers.APIConfig{}) + + tests := []struct { + name string + url string + expectedVersion string + expectLivePath bool + }{ + { + name: "default to 3.1.0", + url: "/api/v1/openapi.yaml", + expectedVersion: "3.1.0", + expectLivePath: false, + }, + { + name: "explicit 3.1", + url: "/api/v1/openapi.yaml?openapi_version=3.1", + expectedVersion: "3.1.0", + expectLivePath: false, + }, + { + name: "explicit 3.2", + url: "/api/v1/openapi.yaml?openapi_version=3.2", + expectedVersion: "3.2.0", + expectLivePath: true, + }, + { + name: "invalid version defaults to 3.1.0", + url: "/api/v1/openapi.yaml?openapi_version=4.0", + expectedVersion: "3.1.0", + expectLivePath: false, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + resp := testhelpers.GET(t, api, tc.url) + require.Equal(t, 200, resp.StatusCode) + + var spec map[string]any + err := yaml.Unmarshal([]byte(resp.Body), &spec) + require.NoError(t, err) + + require.Equal(t, tc.expectedVersion, spec["openapi"]) + + paths := spec["paths"].(map[string]any) + _, found := paths["/notifications/live"] + require.Equal(t, tc.expectLivePath, found) + }) + } +} diff --git a/web/api/v1/openapi_helpers.go b/web/api/v1/openapi_helpers.go new file mode 100644 index 0000000000..76f6001693 --- /dev/null +++ b/web/api/v1/openapi_helpers.go @@ -0,0 +1,343 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package v1 + +import ( + "time" + + jsoniter "github.com/json-iterator/go" + "github.com/pb33f/libopenapi/datamodel/high/base" + v3 "github.com/pb33f/libopenapi/datamodel/high/v3" + "github.com/pb33f/libopenapi/orderedmap" + yaml "go.yaml.in/yaml/v4" + + "github.com/prometheus/prometheus/promql" +) + +// Helper functions for building common structures. + +// exampleTime is a reference time used for timestamp examples. +var exampleTime = time.Date(2026, 1, 2, 13, 37, 0, 0, time.UTC) + +func boolPtr(b bool) *bool { + return &b +} + +func int64Ptr(i int64) *int64 { + return &i +} + +type example struct { + name string + value any +} + +// exampleMap creates an Examples map from the provided examples. +func exampleMap(exs []example) *orderedmap.Map[string, *base.Example] { + examples := orderedmap.New[string, *base.Example]() + for _, ex := range exs { + examples.Set(ex.name, &base.Example{ + Value: createYAMLNode(ex.value), + }) + } + return examples +} + +func schemaRef(ref string) *base.SchemaProxy { + return base.CreateSchemaProxyRef(ref) +} + +func schemaFromType(t string) *base.SchemaProxy { + return base.CreateSchemaProxy(&base.Schema{Type: []string{t}}) +} + +func stringSchema() *base.SchemaProxy { + return schemaFromType("string") +} + +func integerSchema() *base.SchemaProxy { + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"integer"}, + Format: "int64", + }) +} + +func stringSchemaWithDescription(description string) *base.SchemaProxy { + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"string"}, + Description: description, + }) +} + +func stringSchemaWithDescriptionAndExample(description string, example any) *base.SchemaProxy { + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"string"}, + Description: description, + Example: createYAMLNode(example), + }) +} + +func integerSchemaWithDescription(description string) *base.SchemaProxy { + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"integer"}, + Format: "int64", + Description: description, + }) +} + +func integerSchemaWithDescriptionAndExample(description string, example any) *base.SchemaProxy { + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"integer"}, + Format: "int64", + Description: description, + Example: createYAMLNode(example), + }) +} + +func stringArraySchemaWithDescription(description string) *base.SchemaProxy { + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: stringSchema()}, + Description: description, + }) +} + +func stringArraySchemaWithDescriptionAndExample(description string, example any) *base.SchemaProxy { + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: stringSchema()}, + Description: description, + Example: createYAMLNode(example), + }) +} + +func statusSchema() *base.SchemaProxy { + successNode := &yaml.Node{Kind: yaml.ScalarNode, Value: "success"} + errorNode := &yaml.Node{Kind: yaml.ScalarNode, Value: "error"} + exampleNode := &yaml.Node{Kind: yaml.ScalarNode, Value: "success"} + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"string"}, + Enum: []*yaml.Node{successNode, errorNode}, + Description: "Response status.", + Example: exampleNode, + }) +} + +func warningsSchema() *base.SchemaProxy { + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: stringSchema()}, + Description: "Only set if there were warnings while executing the request. There will still be data in the data field.", + }) +} + +func infosSchema() *base.SchemaProxy { + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: stringSchema()}, + Description: "Only set if there were info-level annotations while executing the request.", + }) +} + +func timestampSchema() *base.SchemaProxy { + return base.CreateSchemaProxy(&base.Schema{ + OneOf: []*base.SchemaProxy{ + base.CreateSchemaProxy(&base.Schema{ + Type: []string{"string"}, + Format: "date-time", + Description: "RFC3339 timestamp.", + }), + base.CreateSchemaProxy(&base.Schema{ + Type: []string{"number"}, + Format: "unixtime", + Description: "Unix timestamp in seconds.", + }), + }, + Description: "Timestamp in RFC3339 format or Unix timestamp in seconds.", + }) +} + +func stringSchemaWithConstValue(value string) *base.SchemaProxy { + node := &yaml.Node{Kind: yaml.ScalarNode, Value: value} + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"string"}, + Enum: []*yaml.Node{node}, + }) +} + +func dateTimeSchemaWithDescription(description string) *base.SchemaProxy { + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"string"}, + Format: "date-time", + Description: description, + }) +} + +func numberSchemaWithDescription(description string) *base.SchemaProxy { + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"number"}, + Format: "double", + Description: description, + }) +} + +func errorResponse() *v3.Response { + content := orderedmap.New[string, *v3.MediaType]() + content.Set("application/json", &v3.MediaType{ + Schema: schemaRef("#/components/schemas/Error"), + }) + return &v3.Response{ + Description: "Error", + Content: content, + } +} + +func noContentResponse() *v3.Response { + return &v3.Response{Description: "No Content"} +} + +func responsesNoContent() *v3.Responses { + codes := orderedmap.New[string, *v3.Response]() + codes.Set("204", noContentResponse()) + codes.Set("default", errorResponse()) + return &v3.Responses{Codes: codes} +} + +func pathParam(name, description string, schema *base.SchemaProxy) *v3.Parameter { + return &v3.Parameter{ + Name: name, + In: "path", + Description: description, + Required: boolPtr(true), + Schema: schema, + } +} + +// createYAMLNode converts Go data to yaml.Node for use in examples. +func createYAMLNode(data any) *yaml.Node { + node := &yaml.Node{} + bytes, _ := yaml.Marshal(data) + _ = yaml.Unmarshal(bytes, node) + return node +} + +// formRequestBodyWithExamples creates a form-encoded request body with examples. +func formRequestBodyWithExamples(schemaRef string, examples *orderedmap.Map[string, *base.Example], description string) *v3.RequestBody { + content := orderedmap.New[string, *v3.MediaType]() + mediaType := &v3.MediaType{ + Schema: base.CreateSchemaProxyRef("#/components/schemas/" + schemaRef), + } + if examples != nil { + mediaType.Examples = examples + } + content.Set("application/x-www-form-urlencoded", mediaType) + return &v3.RequestBody{ + Required: boolPtr(true), + Description: description, + Content: content, + } +} + +// jsonResponseWithExamples creates a JSON response with examples. +func jsonResponseWithExamples(schemaRef string, examples *orderedmap.Map[string, *base.Example], description string) *v3.Response { + content := orderedmap.New[string, *v3.MediaType]() + mediaType := &v3.MediaType{ + Schema: base.CreateSchemaProxyRef("#/components/schemas/" + schemaRef), + } + if examples != nil { + mediaType.Examples = examples + } + content.Set("application/json", mediaType) + return &v3.Response{ + Description: description, + Content: content, + } +} + +// responsesWithErrorExamples creates responses with both success and error examples. +func responsesWithErrorExamples(okSchemaRef string, successExamples, errorExamples *orderedmap.Map[string, *base.Example], successDescription, errorDescription string) *v3.Responses { + codes := orderedmap.New[string, *v3.Response]() + codes.Set("200", jsonResponseWithExamples(okSchemaRef, successExamples, successDescription)) + codes.Set("default", jsonResponseWithExamples("Error", errorExamples, errorDescription)) + return &v3.Responses{Codes: codes} +} + +// timestampExamples returns examples for timestamp parameters (RFC3339 and epoch). +func timestampExamples(t time.Time) []example { + return []example{ + {"RFC3339", t.Format(time.RFC3339Nano)}, + {"epoch", t.Unix()}, + } +} + +// queryParamWithExample creates a query parameter with examples. +func queryParamWithExample(name, description string, required bool, schema *base.SchemaProxy, examples []example) *v3.Parameter { + param := &v3.Parameter{ + Name: name, + In: "query", + Description: description, + Required: &required, + Explode: boolPtr(false), + Schema: schema, + } + if len(examples) > 0 { + param.Examples = exampleMap(examples) + } + return param +} + +// marshalToYAMLNode marshals a value using jsoniter (production marshaling) and converts to yaml.Node. +// The result is an inline JSON representation that preserves integer types for timestamps. +func marshalToYAMLNode(v any) *yaml.Node { + jsonAPI := jsoniter.ConfigCompatibleWithStandardLibrary + jsonBytes, err := jsonAPI.Marshal(v) + if err != nil { + panic(err) + } + node := &yaml.Node{} + if err := yaml.Unmarshal(jsonBytes, node); err != nil { + panic(err) + } + return node +} + +// vectorExample creates an example for a vector query response using production marshaling. +func vectorExample(v promql.Vector) *yaml.Node { + type response struct { + Status string `json:"status"` + Data struct { + ResultType string `json:"resultType"` + Result promql.Vector `json:"result"` + } `json:"data"` + } + resp := response{Status: "success"} + resp.Data.ResultType = "vector" + resp.Data.Result = v + return marshalToYAMLNode(resp) +} + +// matrixExample creates an example for a matrix query response using production marshaling. +func matrixExample(m promql.Matrix) *yaml.Node { + type response struct { + Status string `json:"status"` + Data struct { + ResultType string `json:"resultType"` + Result promql.Matrix `json:"result"` + } `json:"data"` + } + resp := response{Status: "success"} + resp.Data.ResultType = "matrix" + resp.Data.Result = m + return marshalToYAMLNode(resp) +} diff --git a/web/api/v1/openapi_paths.go b/web/api/v1/openapi_paths.go new file mode 100644 index 0000000000..2f5ab592f7 --- /dev/null +++ b/web/api/v1/openapi_paths.go @@ -0,0 +1,626 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file defines all API path specifications including parameters, request bodies, +// and response schemas. Each path definition corresponds to an endpoint registered in api.go. +package v1 + +import ( + "time" + + "github.com/pb33f/libopenapi/datamodel/high/base" + v3 "github.com/pb33f/libopenapi/datamodel/high/v3" + "github.com/pb33f/libopenapi/orderedmap" +) + +// Path definition methods for API endpoints. + +func (*OpenAPIBuilder) queryPath() *v3.PathItem { + params := []*v3.Parameter{ + queryParamWithExample("limit", "The maximum number of metrics to return.", false, integerSchema(), []example{{"example", 100}}), + queryParamWithExample("time", "The evaluation timestamp (optional, defaults to current time).", false, timestampSchema(), timestampExamples(exampleTime)), + queryParamWithExample("query", "The PromQL query to execute.", true, stringSchema(), []example{{"example", "up"}}), + queryParamWithExample("timeout", "Evaluation timeout. Optional. Defaults to and is capped by the value of the -query.timeout flag.", false, stringSchema(), []example{{"example", "30s"}}), + queryParamWithExample("lookback_delta", "Override the lookback period for this query. Optional.", false, stringSchema(), []example{{"example", "5m"}}), + queryParamWithExample("stats", "When provided, include query statistics in the response. The special value 'all' enables more comprehensive statistics.", false, stringSchema(), []example{{"example", "all"}}), + } + return &v3.PathItem{ + Get: &v3.Operation{ + OperationId: "query", + Summary: "Evaluate an instant query", + Tags: []string{"query"}, + Parameters: params, + Responses: responsesWithErrorExamples("QueryOutputBody", queryResponseExamples(), errorResponseExamples(), "Query executed successfully.", "Error executing query."), + }, + Post: &v3.Operation{ + OperationId: "query-post", + Summary: "Evaluate an instant query", + Tags: []string{"query"}, + RequestBody: formRequestBodyWithExamples("QueryPostInputBody", queryPostExamples(), "Submit an instant query. This endpoint accepts the same parameters as the GET version."), + Responses: responsesWithErrorExamples("QueryOutputBody", queryResponseExamples(), errorResponseExamples(), "Instant query executed successfully.", "Error executing instant query."), + }, + } +} + +func (*OpenAPIBuilder) queryRangePath() *v3.PathItem { + params := []*v3.Parameter{ + queryParamWithExample("limit", "The maximum number of metrics to return.", false, integerSchema(), []example{{"example", 100}}), + queryParamWithExample("start", "The start time of the query.", true, timestampSchema(), timestampExamples(exampleTime.Add(-1*time.Hour))), + queryParamWithExample("end", "The end time of the query.", true, timestampSchema(), timestampExamples(exampleTime)), + queryParamWithExample("step", "The step size of the query.", true, stringSchema(), []example{{"example", "15s"}}), + queryParamWithExample("query", "The query to execute.", true, stringSchema(), []example{{"example", "rate(prometheus_http_requests_total{handler=\"/api/v1/query\"}[5m])"}}), + queryParamWithExample("timeout", "Evaluation timeout. Optional. Defaults to and is capped by the value of the -query.timeout flag.", false, stringSchema(), []example{{"example", "30s"}}), + queryParamWithExample("lookback_delta", "Override the lookback period for this query. Optional.", false, stringSchema(), []example{{"example", "5m"}}), + queryParamWithExample("stats", "When provided, include query statistics in the response. The special value 'all' enables more comprehensive statistics.", false, stringSchema(), []example{{"example", "all"}}), + } + return &v3.PathItem{ + Get: &v3.Operation{ + OperationId: "query-range", + Summary: "Evaluate a range query", + Tags: []string{"query"}, + Parameters: params, + Responses: responsesWithErrorExamples("QueryRangeOutputBody", queryRangeResponseExamples(), errorResponseExamples(), "Range query executed successfully.", "Error executing range query."), + }, + Post: &v3.Operation{ + OperationId: "query-range-post", + Summary: "Evaluate a range query", + Tags: []string{"query"}, + RequestBody: formRequestBodyWithExamples("QueryRangePostInputBody", queryRangePostExamples(), "Submit a range query. This endpoint accepts the same parameters as the GET version."), + Responses: responsesWithErrorExamples("QueryRangeOutputBody", queryRangeResponseExamples(), errorResponseExamples(), "Range query executed successfully.", "Error executing range query."), + }, + } +} + +func (*OpenAPIBuilder) queryExemplarsPath() *v3.PathItem { + params := []*v3.Parameter{ + queryParamWithExample("start", "Start timestamp for exemplars query.", false, timestampSchema(), timestampExamples(exampleTime.Add(-1*time.Hour))), + queryParamWithExample("end", "End timestamp for exemplars query.", false, timestampSchema(), timestampExamples(exampleTime)), + queryParamWithExample("query", "PromQL query to extract exemplars for.", true, stringSchema(), []example{{"example", "prometheus_http_requests_total"}}), + } + return &v3.PathItem{ + Get: &v3.Operation{ + OperationId: "query-exemplars", + Summary: "Query exemplars", + Tags: []string{"query"}, + Parameters: params, + Responses: responsesWithErrorExamples("QueryExemplarsOutputBody", queryExemplarsResponseExamples(), errorResponseExamples(), "Exemplars retrieved successfully.", "Error retrieving exemplars."), + }, + Post: &v3.Operation{ + OperationId: "query-exemplars-post", + Summary: "Query exemplars", + Tags: []string{"query"}, + RequestBody: formRequestBodyWithExamples("QueryExemplarsPostInputBody", queryExemplarsPostExamples(), "Submit an exemplars query. This endpoint accepts the same parameters as the GET version."), + Responses: responsesWithErrorExamples("QueryExemplarsOutputBody", queryExemplarsResponseExamples(), errorResponseExamples(), "Exemplars query completed successfully.", "Error processing exemplars query."), + }, + } +} + +func (*OpenAPIBuilder) formatQueryPath() *v3.PathItem { + params := []*v3.Parameter{ + queryParamWithExample("query", "PromQL expression to format.", true, stringSchema(), []example{{"example", "sum(rate(http_requests_total[5m])) by (job)"}}), + } + return &v3.PathItem{ + Get: &v3.Operation{ + OperationId: "format-query", + Summary: "Format a PromQL query", + Tags: []string{"query"}, + Parameters: params, + Responses: responsesWithErrorExamples("FormatQueryOutputBody", formatQueryResponseExamples(), errorResponseExamples(), "Query formatted successfully.", "Error formatting query."), + }, + Post: &v3.Operation{ + OperationId: "format-query-post", + Summary: "Format a PromQL query", + Tags: []string{"query"}, + RequestBody: formRequestBodyWithExamples("FormatQueryPostInputBody", formatQueryPostExamples(), "Submit a PromQL query to format. This endpoint accepts the same parameters as the GET version."), + Responses: responsesWithErrorExamples("FormatQueryOutputBody", formatQueryResponseExamples(), errorResponseExamples(), "Query formatting completed successfully.", "Error formatting query."), + }, + } +} + +func (*OpenAPIBuilder) parseQueryPath() *v3.PathItem { + params := []*v3.Parameter{ + queryParamWithExample("query", "PromQL expression to parse.", true, stringSchema(), []example{{"example", "up{job=\"prometheus\"}"}}), + } + return &v3.PathItem{ + Get: &v3.Operation{ + OperationId: "parse-query", + Summary: "Parse a PromQL query", + Tags: []string{"query"}, + Parameters: params, + Responses: responsesWithErrorExamples("ParseQueryOutputBody", parseQueryResponseExamples(), errorResponseExamples(), "Query parsed successfully.", "Error parsing query."), + }, + Post: &v3.Operation{ + OperationId: "parse-query-post", + Summary: "Parse a PromQL query", + Tags: []string{"query"}, + RequestBody: formRequestBodyWithExamples("ParseQueryPostInputBody", parseQueryPostExamples(), "Submit a PromQL query to parse. This endpoint accepts the same parameters as the GET version."), + Responses: responsesWithErrorExamples("ParseQueryOutputBody", parseQueryResponseExamples(), errorResponseExamples(), "Query parsed successfully via POST.", "Error parsing query via POST."), + }, + } +} + +func (*OpenAPIBuilder) labelsPath() *v3.PathItem { + params := []*v3.Parameter{ + queryParamWithExample("start", "Start timestamp for label names query.", false, timestampSchema(), timestampExamples(exampleTime.Add(-1*time.Hour))), + queryParamWithExample("end", "End timestamp for label names query.", false, timestampSchema(), timestampExamples(exampleTime)), + queryParamWithExample("match[]", "Series selector argument.", false, base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: stringSchema()}, + }), []example{{"example", []string{"{job=\"prometheus\"}"}}}), + queryParamWithExample("limit", "Maximum number of label names to return.", false, integerSchema(), []example{{"example", 100}}), + } + return &v3.PathItem{ + Get: &v3.Operation{ + OperationId: "labels", + Summary: "Get label names", + Tags: []string{"labels"}, + Parameters: params, + Responses: responsesWithErrorExamples("LabelsOutputBody", labelsResponseExamples(), errorResponseExamples(), "Label names retrieved successfully.", "Error retrieving label names."), + }, + Post: &v3.Operation{ + OperationId: "labels-post", + Summary: "Get label names", + Tags: []string{"labels"}, + RequestBody: formRequestBodyWithExamples("LabelsPostInputBody", labelsPostExamples(), "Submit a label names query. This endpoint accepts the same parameters as the GET version."), + Responses: responsesWithErrorExamples("LabelsOutputBody", labelsResponseExamples(), errorResponseExamples(), "Label names retrieved successfully via POST.", "Error retrieving label names via POST."), + }, + } +} + +func (*OpenAPIBuilder) labelValuesPath() *v3.PathItem { + params := []*v3.Parameter{ + pathParam("name", "Label name.", stringSchema()), + queryParamWithExample("start", "Start timestamp for label values query.", false, timestampSchema(), timestampExamples(exampleTime.Add(-1*time.Hour))), + queryParamWithExample("end", "End timestamp for label values query.", false, timestampSchema(), timestampExamples(exampleTime)), + queryParamWithExample("match[]", "Series selector argument.", false, base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: stringSchema()}, + }), []example{{"example", []string{"{job=\"prometheus\"}"}}}), + queryParamWithExample("limit", "Maximum number of label values to return.", false, integerSchema(), []example{{"example", 1000}}), + } + return &v3.PathItem{ + Get: &v3.Operation{ + OperationId: "label-values", + Summary: "Get label values", + Tags: []string{"labels"}, + Parameters: params, + Responses: responsesWithErrorExamples("LabelValuesOutputBody", labelValuesResponseExamples(), errorResponseExamples(), "Label values retrieved successfully.", "Error retrieving label values."), + }, + } +} + +func (*OpenAPIBuilder) seriesPath() *v3.PathItem { + params := []*v3.Parameter{ + queryParamWithExample("start", "Start timestamp for series query.", false, timestampSchema(), timestampExamples(exampleTime.Add(-1*time.Hour))), + queryParamWithExample("end", "End timestamp for series query.", false, timestampSchema(), timestampExamples(exampleTime)), + queryParamWithExample("match[]", "Series selector argument.", true, base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: stringSchema()}, + }), []example{{"example", []string{"{job=\"prometheus\"}"}}}), + queryParamWithExample("limit", "Maximum number of series to return.", false, integerSchema(), []example{{"example", 100}}), + } + return &v3.PathItem{ + Get: &v3.Operation{ + OperationId: "series", + Summary: "Find series by label matchers", + Tags: []string{"series"}, + Parameters: params, + Responses: responsesWithErrorExamples("SeriesOutputBody", seriesResponseExamples(), errorResponseExamples(), "Series returned matching the provided label matchers.", "Error retrieving series."), + }, + Post: &v3.Operation{ + OperationId: "series-post", + Summary: "Find series by label matchers", + Tags: []string{"series"}, + RequestBody: formRequestBodyWithExamples("SeriesPostInputBody", seriesPostExamples(), "Submit a series query. This endpoint accepts the same parameters as the GET version."), + Responses: responsesWithErrorExamples("SeriesOutputBody", seriesResponseExamples(), errorResponseExamples(), "Series returned matching the provided label matchers via POST.", "Error retrieving series via POST."), + }, + Delete: &v3.Operation{ + OperationId: "delete-series", + Summary: "Delete series", + Description: "Delete series matching selectors. Note: This is deprecated, use POST /admin/tsdb/delete_series instead.", + Tags: []string{"series"}, + Responses: responsesWithErrorExamples("SeriesDeleteOutputBody", seriesDeleteResponseExamples(), errorResponseExamples(), "Series marked for deletion.", "Error deleting series."), + }, + } +} + +func (*OpenAPIBuilder) metadataPath() *v3.PathItem { + params := []*v3.Parameter{ + queryParamWithExample("limit", "The maximum number of metrics to return.", false, integerSchema(), []example{{"example", 100}}), + queryParamWithExample("limit_per_metric", "The maximum number of metadata entries per metric.", false, integerSchema(), []example{{"example", 10}}), + queryParamWithExample("metric", "A metric name to filter metadata for.", false, stringSchema(), []example{{"example", "http_requests_total"}}), + } + return &v3.PathItem{ + Get: &v3.Operation{ + OperationId: "get-metadata", + Summary: "Get metadata", + Tags: []string{"metadata"}, + Parameters: params, + Responses: responsesWithErrorExamples("MetadataOutputBody", metadataResponseExamples(), errorResponseExamples(), "Metric metadata retrieved successfully.", "Error retrieving metadata."), + }, + } +} + +func (*OpenAPIBuilder) scrapePoolsPath() *v3.PathItem { + return &v3.PathItem{ + Get: &v3.Operation{ + OperationId: "get-scrape-pools", + Summary: "Get scrape pools", + Tags: []string{"targets"}, + Responses: responsesWithErrorExamples("ScrapePoolsOutputBody", scrapePoolsResponseExamples(), errorResponseExamples(), "Scrape pools retrieved successfully.", "Error retrieving scrape pools."), + }, + } +} + +func (*OpenAPIBuilder) targetsPath() *v3.PathItem { + params := []*v3.Parameter{ + queryParamWithExample("scrapePool", "Filter targets by scrape pool name.", false, stringSchema(), []example{{"example", "prometheus"}}), + queryParamWithExample("state", "Filter by state: active, dropped, or any.", false, stringSchema(), []example{{"example", "active"}}), + } + return &v3.PathItem{ + Get: &v3.Operation{ + OperationId: "get-targets", + Summary: "Get targets", + Tags: []string{"targets"}, + Parameters: params, + Responses: responsesWithErrorExamples("TargetsOutputBody", targetsResponseExamples(), errorResponseExamples(), "Target discovery information retrieved successfully.", "Error retrieving targets."), + }, + } +} + +func (*OpenAPIBuilder) targetsMetadataPath() *v3.PathItem { + params := []*v3.Parameter{ + queryParamWithExample("match_target", "Label selector to filter targets.", false, stringSchema(), []example{{"example", "{job=\"prometheus\"}"}}), + queryParamWithExample("metric", "Metric name to retrieve metadata for.", false, stringSchema(), []example{{"example", "http_requests_total"}}), + queryParamWithExample("limit", "Maximum number of targets to match.", false, integerSchema(), []example{{"example", 10}}), + } + return &v3.PathItem{ + Get: &v3.Operation{ + OperationId: "get-targets-metadata", + Summary: "Get targets metadata", + Tags: []string{"targets"}, + Parameters: params, + Responses: responsesWithErrorExamples("TargetMetadataOutputBody", targetsMetadataResponseExamples(), errorResponseExamples(), "Target metadata retrieved successfully.", "Error retrieving target metadata."), + }, + } +} + +func (*OpenAPIBuilder) targetsRelabelStepsPath() *v3.PathItem { + params := []*v3.Parameter{ + queryParamWithExample("scrapePool", "Name of the scrape pool.", true, stringSchema(), []example{{"example", "prometheus"}}), + queryParamWithExample("labels", "JSON-encoded labels to apply relabel rules to.", true, stringSchema(), []example{{"example", "{\"__address__\":\"localhost:9090\",\"job\":\"prometheus\"}"}}), + } + return &v3.PathItem{ + Get: &v3.Operation{ + OperationId: "get-targets-relabel-steps", + Summary: "Get targets relabel steps", + Tags: []string{"targets"}, + Parameters: params, + Responses: responsesWithErrorExamples("TargetRelabelStepsOutputBody", targetsRelabelStepsResponseExamples(), errorResponseExamples(), "Relabel steps retrieved successfully.", "Error retrieving relabel steps."), + }, + } +} + +func (*OpenAPIBuilder) rulesPath() *v3.PathItem { + params := []*v3.Parameter{ + queryParamWithExample("type", "Filter by rule type: alert or record.", false, stringSchema(), []example{{"example", "alert"}}), + queryParamWithExample("rule_name[]", "Filter by rule name.", false, base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: stringSchema()}, + }), []example{{"example", []string{"HighErrorRate"}}}), + queryParamWithExample("rule_group[]", "Filter by rule group name.", false, base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: stringSchema()}, + }), []example{{"example", []string{"example_alerts"}}}), + queryParamWithExample("file[]", "Filter by file path.", false, base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: stringSchema()}, + }), []example{{"example", []string{"/etc/prometheus/rules.yml"}}}), + queryParamWithExample("match[]", "Label matchers to filter rules.", false, base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: stringSchema()}, + }), []example{{"example", []string{"{severity=\"critical\"}"}}}), + queryParamWithExample("exclude_alerts", "Exclude active alerts from response.", false, stringSchema(), []example{{"example", "false"}}), + queryParamWithExample("group_limit", "Maximum number of rule groups to return.", false, integerSchema(), []example{{"example", 100}}), + queryParamWithExample("group_next_token", "Pagination token for next page.", false, stringSchema(), []example{{"example", "abc123"}}), + } + return &v3.PathItem{ + Get: &v3.Operation{ + OperationId: "rules", + Summary: "Get alerting and recording rules", + Tags: []string{"rules"}, + Parameters: params, + Responses: responsesWithErrorExamples("RulesOutputBody", rulesResponseExamples(), errorResponseExamples(), "Rules retrieved successfully.", "Error retrieving rules."), + }, + } +} + +func (*OpenAPIBuilder) alertsPath() *v3.PathItem { + return &v3.PathItem{ + Get: &v3.Operation{ + OperationId: "alerts", + Summary: "Get active alerts", + Tags: []string{"alerts"}, + Responses: responsesWithErrorExamples("AlertsOutputBody", alertsResponseExamples(), errorResponseExamples(), "Active alerts retrieved successfully.", "Error retrieving alerts."), + }, + } +} + +func (*OpenAPIBuilder) alertmanagersPath() *v3.PathItem { + return &v3.PathItem{ + Get: &v3.Operation{ + OperationId: "alertmanagers", + Summary: "Get Alertmanager discovery", + Tags: []string{"alerts"}, + Responses: responsesWithErrorExamples("AlertmanagersOutputBody", alertmanagersResponseExamples(), errorResponseExamples(), "Alertmanager targets retrieved successfully.", "Error retrieving Alertmanager targets."), + }, + } +} + +func (*OpenAPIBuilder) statusConfigPath() *v3.PathItem { + return &v3.PathItem{ + Get: &v3.Operation{ + OperationId: "get-status-config", + Summary: "Get status config", + Tags: []string{"status"}, + Responses: responsesWithErrorExamples("StatusConfigOutputBody", statusConfigResponseExamples(), errorResponseExamples(), "Configuration retrieved successfully.", "Error retrieving configuration."), + }, + } +} + +func (*OpenAPIBuilder) statusRuntimeInfoPath() *v3.PathItem { + return &v3.PathItem{ + Get: &v3.Operation{ + OperationId: "get-status-runtimeinfo", + Summary: "Get status runtimeinfo", + Tags: []string{"status"}, + Responses: responsesWithErrorExamples("StatusRuntimeInfoOutputBody", statusRuntimeInfoResponseExamples(), errorResponseExamples(), "Runtime information retrieved successfully.", "Error retrieving runtime information."), + }, + } +} + +func (*OpenAPIBuilder) statusBuildInfoPath() *v3.PathItem { + return &v3.PathItem{ + Get: &v3.Operation{ + OperationId: "get-status-buildinfo", + Summary: "Get status buildinfo", + Tags: []string{"status"}, + Responses: responsesWithErrorExamples("StatusBuildInfoOutputBody", statusBuildInfoResponseExamples(), errorResponseExamples(), "Build information retrieved successfully.", "Error retrieving build information."), + }, + } +} + +func (*OpenAPIBuilder) statusFlagsPath() *v3.PathItem { + return &v3.PathItem{ + Get: &v3.Operation{ + OperationId: "get-status-flags", + Summary: "Get status flags", + Tags: []string{"status"}, + Responses: responsesWithErrorExamples("StatusFlagsOutputBody", statusFlagsResponseExamples(), errorResponseExamples(), "Command-line flags retrieved successfully.", "Error retrieving flags."), + }, + } +} + +func (*OpenAPIBuilder) statusTSDBPath() *v3.PathItem { + params := []*v3.Parameter{ + queryParamWithExample("limit", "The maximum number of items to return per category.", false, integerSchema(), []example{{"example", 10}}), + } + return &v3.PathItem{ + Get: &v3.Operation{ + OperationId: "status-tsdb", + Summary: "Get TSDB status", + Tags: []string{"status"}, + Parameters: params, + Responses: responsesWithErrorExamples("StatusTSDBOutputBody", statusTSDBResponseExamples(), errorResponseExamples(), "TSDB status retrieved successfully.", "Error retrieving TSDB status."), + }, + } +} + +func (*OpenAPIBuilder) statusTSDBBlocksPath() *v3.PathItem { + return &v3.PathItem{ + Get: &v3.Operation{ + OperationId: "status-tsdb-blocks", + Summary: "Get TSDB blocks information", + Tags: []string{"status"}, + Responses: responsesWithErrorExamples("StatusTSDBBlocksOutputBody", statusTSDBBlocksResponseExamples(), errorResponseExamples(), "TSDB blocks information retrieved successfully.", "Error retrieving TSDB blocks."), + }, + } +} + +func (*OpenAPIBuilder) statusWALReplayPath() *v3.PathItem { + return &v3.PathItem{ + Get: &v3.Operation{ + OperationId: "get-status-walreplay", + Summary: "Get status walreplay", + Tags: []string{"status"}, + Responses: responsesWithErrorExamples("StatusWALReplayOutputBody", statusWALReplayResponseExamples(), errorResponseExamples(), "WAL replay status retrieved successfully.", "Error retrieving WAL replay status."), + }, + } +} + +func (*OpenAPIBuilder) adminDeleteSeriesPath() *v3.PathItem { + params := []*v3.Parameter{ + queryParamWithExample("match[]", "Series selectors to identify series to delete.", true, base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: stringSchema()}, + }), []example{{"example", []string{"{__name__=~\"test.*\"}"}}}), + queryParamWithExample("start", "Start timestamp for deletion.", false, timestampSchema(), timestampExamples(exampleTime.Add(-1*time.Hour))), + queryParamWithExample("end", "End timestamp for deletion.", false, timestampSchema(), timestampExamples(exampleTime)), + } + return &v3.PathItem{ + Post: &v3.Operation{ + OperationId: "deleteSeriesPost", + Summary: "Delete series matching selectors", + Description: "Deletes data for a selection of series in a time range.", + Tags: []string{"admin"}, + Parameters: params, + Responses: responsesWithErrorExamples("DeleteSeriesOutputBody", deleteSeriesResponseExamples(), errorResponseExamples(), "Series deleted successfully.", "Error deleting series."), + }, + Put: &v3.Operation{ + OperationId: "deleteSeriesPut", + Summary: "Delete series matching selectors via PUT", + Description: "Deletes data for a selection of series in a time range using PUT method.", + Tags: []string{"admin"}, + Parameters: params, + Responses: responsesWithErrorExamples("DeleteSeriesOutputBody", deleteSeriesResponseExamples(), errorResponseExamples(), "Series deleted successfully via PUT.", "Error deleting series via PUT."), + }, + } +} + +func (*OpenAPIBuilder) adminCleanTombstonesPath() *v3.PathItem { + return &v3.PathItem{ + Post: &v3.Operation{ + OperationId: "cleanTombstonesPost", + Summary: "Clean tombstones in the TSDB", + Description: "Removes deleted data from disk and cleans up existing tombstones.", + Tags: []string{"admin"}, + Responses: responsesWithErrorExamples("CleanTombstonesOutputBody", cleanTombstonesResponseExamples(), errorResponseExamples(), "Tombstones cleaned successfully.", "Error cleaning tombstones."), + }, + Put: &v3.Operation{ + OperationId: "cleanTombstonesPut", + Summary: "Clean tombstones in the TSDB via PUT", + Description: "Removes deleted data from disk and cleans up existing tombstones using PUT method.", + Tags: []string{"admin"}, + Responses: responsesWithErrorExamples("CleanTombstonesOutputBody", cleanTombstonesResponseExamples(), errorResponseExamples(), "Tombstones cleaned successfully via PUT.", "Error cleaning tombstones via PUT."), + }, + } +} + +func (*OpenAPIBuilder) adminSnapshotPath() *v3.PathItem { + params := []*v3.Parameter{ + queryParamWithExample("skip_head", "If true, do not snapshot data in the head block.", false, stringSchema(), []example{{"example", "false"}}), + } + return &v3.PathItem{ + Post: &v3.Operation{ + OperationId: "snapshotPost", + Summary: "Create a snapshot of the TSDB", + Description: "Creates a snapshot of all current data.", + Tags: []string{"admin"}, + Parameters: params, + Responses: responsesWithErrorExamples("SnapshotOutputBody", snapshotResponseExamples(), errorResponseExamples(), "Snapshot created successfully.", "Error creating snapshot."), + }, + Put: &v3.Operation{ + OperationId: "snapshotPut", + Summary: "Create a snapshot of the TSDB via PUT", + Description: "Creates a snapshot of all current data using PUT method.", + Tags: []string{"admin"}, + Parameters: params, + Responses: responsesWithErrorExamples("SnapshotOutputBody", snapshotResponseExamples(), errorResponseExamples(), "Snapshot created successfully via PUT.", "Error creating snapshot via PUT."), + }, + } +} + +func (*OpenAPIBuilder) remoteReadPath() *v3.PathItem { + return &v3.PathItem{ + Post: &v3.Operation{ + OperationId: "remoteRead", + Summary: "Remote read endpoint", + Description: "Prometheus remote read endpoint for federated queries. Accepts and returns Protocol Buffer encoded data.", + Tags: []string{"remote"}, + Responses: responsesNoContent(), + }, + } +} + +func (*OpenAPIBuilder) remoteWritePath() *v3.PathItem { + return &v3.PathItem{ + Post: &v3.Operation{ + OperationId: "remoteWrite", + Summary: "Remote write endpoint", + Description: "Prometheus remote write endpoint for sending metrics. Accepts Protocol Buffer encoded write requests.", + Tags: []string{"remote"}, + Responses: responsesNoContent(), + }, + } +} + +func (*OpenAPIBuilder) otlpWritePath() *v3.PathItem { + return &v3.PathItem{ + Post: &v3.Operation{ + OperationId: "otlpWrite", + Summary: "OTLP metrics write endpoint", + Description: "OpenTelemetry Protocol metrics ingestion endpoint. Accepts OTLP/HTTP metrics in Protocol Buffer format.", + Tags: []string{"otlp"}, + Responses: responsesNoContent(), + }, + } +} + +func (*OpenAPIBuilder) notificationsPath() *v3.PathItem { + return &v3.PathItem{ + Get: &v3.Operation{ + OperationId: "get-notifications", + Summary: "Get notifications", + Tags: []string{"notifications"}, + Responses: responsesWithErrorExamples("NotificationsOutputBody", notificationsResponseExamples(), errorResponseExamples(), "Notifications retrieved successfully.", "Error retrieving notifications."), + }, + } +} + +// notificationsLivePath defines the /notifications/live endpoint. +// This endpoint uses OpenAPI 3.2's itemSchema feature for documenting SSE streams. +// It is excluded from the OpenAPI 3.1 specification. +func (*OpenAPIBuilder) notificationsLivePath() *v3.PathItem { + codes := orderedmap.New[string, *v3.Response]() + content := orderedmap.New[string, *v3.MediaType]() + + // Create a schema for the SSE message structure. + // Each SSE message has a 'data' field containing JSON. + sseItemProps := orderedmap.New[string, *base.SchemaProxy]() + sseItemProps.Set("data", base.CreateSchemaProxy(&base.Schema{ + Type: []string{"string"}, + Description: "SSE data field containing JSON-encoded notification.", + ContentMediaType: "application/json", + ContentSchema: schemaRef("#/components/schemas/Notification"), + })) + + content.Set("text/event-stream", &v3.MediaType{ + // Use ItemSchema (OpenAPI 3.2) instead of Schema to describe each SSE message. + ItemSchema: base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Title: "Server Sent Event Message", + Description: "A single SSE message. The data field contains a JSON-encoded Notification object.", + Properties: sseItemProps, + Required: []string{"data"}, + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + }), + Examples: notificationLiveExamples(), + }) + + codes.Set("200", &v3.Response{ + Description: "Server-sent events stream established.", + Content: content, + }) + codes.Set("default", errorResponse()) + + return &v3.PathItem{ + Get: &v3.Operation{ + OperationId: "notifications-live", + Summary: "Stream live notifications via Server-Sent Events", + Description: "Subscribe to real-time server notifications using SSE. Each event contains a JSON-encoded Notification object in the data field.", + Tags: []string{"notifications"}, + Responses: &v3.Responses{Codes: codes}, + }, + } +} + +func (*OpenAPIBuilder) featuresPath() *v3.PathItem { + return &v3.PathItem{ + Get: &v3.Operation{ + OperationId: "get-features", + Summary: "Get features", + Tags: []string{"features"}, + Responses: responsesWithErrorExamples("FeaturesOutputBody", featuresResponseExamples(), errorResponseExamples(), "Feature flags retrieved successfully.", "Error retrieving features."), + }, + } +} diff --git a/web/api/v1/openapi_schemas.go b/web/api/v1/openapi_schemas.go new file mode 100644 index 0000000000..3a567983f4 --- /dev/null +++ b/web/api/v1/openapi_schemas.go @@ -0,0 +1,1223 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file defines all OpenAPI schema definitions for API request and response types. +// Schemas are organized by functional area: query, labels, series, metadata, targets, +// rules, alerts, and status endpoints. +package v1 + +import ( + "github.com/pb33f/libopenapi/datamodel/high/base" + v3 "github.com/pb33f/libopenapi/datamodel/high/v3" + "github.com/pb33f/libopenapi/orderedmap" +) + +// Schema definitions and components builder. + +func (b *OpenAPIBuilder) buildComponents() *v3.Components { + schemas := orderedmap.New[string, *base.SchemaProxy]() + + // Core schemas. + schemas.Set("Error", b.errorSchema()) + schemas.Set("Labels", b.labelsSchema()) + + // Query schemas. + schemas.Set("QueryOutputBody", b.responseBodySchema("QueryData", "Response body for instant query.")) + schemas.Set("QueryRangeOutputBody", b.responseBodySchema("QueryData", "Response body for range query.")) + schemas.Set("QueryPostInputBody", b.queryPostInputBodySchema()) + schemas.Set("QueryRangePostInputBody", b.queryRangePostInputBodySchema()) + schemas.Set("QueryExemplarsOutputBody", b.simpleResponseBodySchema()) + schemas.Set("QueryExemplarsPostInputBody", b.queryExemplarsPostInputBodySchema()) + schemas.Set("FormatQueryOutputBody", b.formatQueryOutputBodySchema()) + schemas.Set("FormatQueryPostInputBody", b.formatQueryPostInputBodySchema()) + schemas.Set("ParseQueryOutputBody", b.simpleResponseBodySchema()) + schemas.Set("ParseQueryPostInputBody", b.parseQueryPostInputBodySchema()) + schemas.Set("QueryData", b.queryDataSchema()) + schemas.Set("FloatSample", b.floatSampleSchema()) + schemas.Set("HistogramSample", b.histogramSampleSchema()) + schemas.Set("FloatSeries", b.floatSeriesSchema()) + schemas.Set("HistogramSeries", b.histogramSeriesSchema()) + schemas.Set("HistogramValue", b.histogramValueSchema()) + + // Label schemas. + schemas.Set("LabelsOutputBody", b.stringArrayResponseBodySchema()) + schemas.Set("LabelsPostInputBody", b.labelsPostInputBodySchema()) + schemas.Set("LabelValuesOutputBody", b.stringArrayResponseBodySchema()) + + // Series schemas. + schemas.Set("SeriesOutputBody", b.labelsArrayResponseBodySchema()) + schemas.Set("SeriesPostInputBody", b.seriesPostInputBodySchema()) + schemas.Set("SeriesDeleteOutputBody", b.simpleResponseBodySchema()) + + // Metadata schemas. + schemas.Set("Metadata", b.metadataSchema()) + schemas.Set("MetadataOutputBody", b.metadataOutputBodySchema()) + schemas.Set("MetricMetadata", b.metricMetadataSchema()) + + // Target schemas. + schemas.Set("Target", b.targetSchema()) + schemas.Set("DroppedTarget", b.droppedTargetSchema()) + schemas.Set("TargetDiscovery", b.targetDiscoverySchema()) + schemas.Set("TargetsOutputBody", b.refResponseBodySchema("TargetDiscovery", "Response body for targets endpoint.")) + schemas.Set("TargetMetadataOutputBody", b.metricMetadataArrayResponseBodySchema()) + schemas.Set("ScrapePoolsDiscovery", b.scrapePoolsDiscoverySchema()) + schemas.Set("ScrapePoolsOutputBody", b.refResponseBodySchema("ScrapePoolsDiscovery", "Response body for scrape pools endpoint.")) + + // Relabel schemas. + schemas.Set("Config", b.configSchema()) + schemas.Set("RelabelStep", b.relabelStepSchema()) + schemas.Set("RelabelStepsResponse", b.relabelStepsResponseSchema()) + schemas.Set("TargetRelabelStepsOutputBody", b.refResponseBodySchema("RelabelStepsResponse", "Response body for target relabel steps endpoint.")) + + // Rule schemas. + schemas.Set("RuleGroup", b.ruleGroupSchema()) + schemas.Set("RuleDiscovery", b.ruleDiscoverySchema()) + schemas.Set("RulesOutputBody", b.refResponseBodySchema("RuleDiscovery", "Response body for rules endpoint.")) + + // Alert schemas. + schemas.Set("Alert", b.alertSchema()) + schemas.Set("AlertDiscovery", b.alertDiscoverySchema()) + schemas.Set("AlertsOutputBody", b.refResponseBodySchema("AlertDiscovery", "Response body for alerts endpoint.")) + schemas.Set("AlertmanagerTarget", b.alertmanagerTargetSchema()) + schemas.Set("AlertmanagerDiscovery", b.alertmanagerDiscoverySchema()) + schemas.Set("AlertmanagersOutputBody", b.refResponseBodySchema("AlertmanagerDiscovery", "Response body for alertmanagers endpoint.")) + + // Status schemas. + schemas.Set("StatusConfigData", b.statusConfigDataSchema()) + schemas.Set("StatusConfigOutputBody", b.refResponseBodySchema("StatusConfigData", "Response body for status config endpoint.")) + schemas.Set("RuntimeInfo", b.runtimeInfoSchema()) + schemas.Set("StatusRuntimeInfoOutputBody", b.refResponseBodySchema("RuntimeInfo", "Response body for status runtime info endpoint.")) + schemas.Set("PrometheusVersion", b.prometheusVersionSchema()) + schemas.Set("StatusBuildInfoOutputBody", b.refResponseBodySchema("PrometheusVersion", "Response body for status build info endpoint.")) + schemas.Set("StatusFlagsOutputBody", b.statusFlagsOutputBodySchema()) + schemas.Set("HeadStats", b.headStatsSchema()) + schemas.Set("TSDBStat", b.tsdbStatSchema()) + schemas.Set("TSDBStatus", b.tsdbStatusSchema()) + schemas.Set("StatusTSDBOutputBody", b.refResponseBodySchema("TSDBStatus", "Response body for status TSDB endpoint.")) + schemas.Set("BlockDesc", b.blockDescSchema()) + schemas.Set("BlockStats", b.blockStatsSchema()) + schemas.Set("BlockMetaCompaction", b.blockMetaCompactionSchema()) + schemas.Set("BlockMeta", b.blockMetaSchema()) + schemas.Set("StatusTSDBBlocksData", b.statusTSDBBlocksDataSchema()) + schemas.Set("StatusTSDBBlocksOutputBody", b.refResponseBodySchema("StatusTSDBBlocksData", "Response body for status TSDB blocks endpoint.")) + schemas.Set("StatusWALReplayData", b.statusWALReplayDataSchema()) + schemas.Set("StatusWALReplayOutputBody", b.refResponseBodySchema("StatusWALReplayData", "Response body for status WAL replay endpoint.")) + + // Admin schemas. + schemas.Set("DeleteSeriesOutputBody", b.statusOnlyResponseBodySchema()) + schemas.Set("CleanTombstonesOutputBody", b.statusOnlyResponseBodySchema()) + schemas.Set("DataStruct", b.dataStructSchema()) + schemas.Set("SnapshotOutputBody", b.refResponseBodySchema("DataStruct", "Response body for snapshot endpoint.")) + + // Notification schemas. + schemas.Set("Notification", b.notificationSchema()) + schemas.Set("NotificationsOutputBody", b.notificationArrayResponseBodySchema()) + + // Features schema. + schemas.Set("FeaturesOutputBody", b.simpleResponseBodySchema()) + + return &v3.Components{Schemas: schemas} +} + +// Schema definitions using high-level structs. + +func (*OpenAPIBuilder) errorSchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("status", statusSchema()) + props.Set("errorType", stringSchemaWithDescriptionAndExample("Type of error that occurred.", "bad_data")) + props.Set("error", stringSchemaWithDescriptionAndExample("Human-readable error message.", "invalid parameter")) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Error response.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"status", "errorType", "error"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) labelsSchema() *base.SchemaProxy { + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Label set represented as a key-value map.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: true}, + }) +} + +func (*OpenAPIBuilder) responseBodySchema(dataSchemaRef, description string) *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("status", statusSchema()) + props.Set("data", schemaRef("#/components/schemas/"+dataSchemaRef)) + props.Set("warnings", warningsSchema()) + props.Set("infos", infosSchema()) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: description, + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"status", "data"}, + Properties: props, + }) +} + +func (b *OpenAPIBuilder) refResponseBodySchema(dataSchemaRef, description string) *base.SchemaProxy { + return b.responseBodySchema(dataSchemaRef, description) +} + +func (*OpenAPIBuilder) simpleResponseBodySchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("status", statusSchema()) + props.Set("data", base.CreateSchemaProxy(&base.Schema{ + Description: "Response data (structure varies by endpoint).", + Example: createYAMLNode(map[string]any{"result": "ok"}), + })) + props.Set("warnings", warningsSchema()) + props.Set("infos", infosSchema()) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Generic response body.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"status", "data"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) statusOnlyResponseBodySchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("status", statusSchema()) + props.Set("warnings", warningsSchema()) + props.Set("infos", infosSchema()) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Response body containing only status.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"status"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) stringArrayResponseBodySchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("status", statusSchema()) + props.Set("data", base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: stringSchema()}, + Example: createYAMLNode([]string{"__name__", "job", "instance"}), + })) + props.Set("warnings", warningsSchema()) + props.Set("infos", infosSchema()) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Response body with an array of strings.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"status", "data"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) labelsArrayResponseBodySchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("status", statusSchema()) + props.Set("data", base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: schemaRef("#/components/schemas/Labels")}, + Example: createYAMLNode([]map[string]string{{"__name__": "up", "job": "prometheus", "instance": "localhost:9090"}}), + })) + props.Set("warnings", warningsSchema()) + props.Set("infos", infosSchema()) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Response body with an array of label sets.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"status", "data"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) metricMetadataArrayResponseBodySchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("status", statusSchema()) + props.Set("data", base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: schemaRef("#/components/schemas/MetricMetadata")}, + Example: createYAMLNode([]map[string]any{ + { + "target": map[string]string{ + "instance": "localhost:9090", + "job": "prometheus", + }, + "metric": "up", + "type": "gauge", + "help": "The current health status of the target", + "unit": "", + }, + }), + })) + props.Set("warnings", warningsSchema()) + props.Set("infos", infosSchema()) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Response body with an array of metric metadata.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"status", "data"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) notificationArrayResponseBodySchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("status", statusSchema()) + props.Set("data", base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: schemaRef("#/components/schemas/Notification")}, + Example: createYAMLNode([]map[string]any{ + {"text": "Server is running", "date": "2023-07-21T20:00:00.000Z", "active": true}, + }), + })) + props.Set("warnings", warningsSchema()) + props.Set("infos", infosSchema()) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Response body with an array of notifications.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"status", "data"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) floatSampleSchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("metric", schemaRef("#/components/schemas/Labels")) + props.Set("value", base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Description: "Timestamp and float value as [unixTimestamp, stringValue].", + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: base.CreateSchemaProxy(&base.Schema{ + OneOf: []*base.SchemaProxy{ + base.CreateSchemaProxy(&base.Schema{Type: []string{"number"}}), + stringSchema(), + }, + })}, + MinItems: int64Ptr(2), + MaxItems: int64Ptr(2), + Example: createYAMLNode([]any{1767436620, "1"}), + })) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "A sample with a float value.", + Required: []string{"metric", "value"}, + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) histogramValueSchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("count", stringSchemaWithDescription("Total count of observations.")) + props.Set("sum", stringSchemaWithDescription("Sum of all observed values.")) + props.Set("buckets", base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Description: "Histogram buckets as [boundary_rule, lower, upper, count].", + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: base.CreateSchemaProxy(&base.Schema{ + OneOf: []*base.SchemaProxy{ + base.CreateSchemaProxy(&base.Schema{Type: []string{"number"}}), + stringSchema(), + }, + })}, + })}, + })) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Native histogram value representation.", + Required: []string{"count", "sum"}, + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) histogramSampleSchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("metric", schemaRef("#/components/schemas/Labels")) + props.Set("histogram", base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Description: "Timestamp and histogram value as [unixTimestamp, histogramObject].", + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: base.CreateSchemaProxy(&base.Schema{ + OneOf: []*base.SchemaProxy{ + base.CreateSchemaProxy(&base.Schema{Type: []string{"number"}}), + schemaRef("#/components/schemas/HistogramValue"), + }, + })}, + MinItems: int64Ptr(2), + MaxItems: int64Ptr(2), + Example: createYAMLNode([]any{1767436620, map[string]any{"count": "60", "sum": "120", "buckets": []any{}}}), + })) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "A sample with a native histogram value.", + Required: []string{"metric", "histogram"}, + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) floatSeriesSchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("metric", schemaRef("#/components/schemas/Labels")) + props.Set("values", base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Description: "Array of [timestamp, stringValue] pairs for float values.", + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: base.CreateSchemaProxy(&base.Schema{ + OneOf: []*base.SchemaProxy{ + base.CreateSchemaProxy(&base.Schema{Type: []string{"number"}}), + stringSchema(), + }, + })}, + MinItems: int64Ptr(2), + MaxItems: int64Ptr(2), + })}, + })) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "A time series with float values.", + Required: []string{"metric", "values"}, + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) histogramSeriesSchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("metric", schemaRef("#/components/schemas/Labels")) + props.Set("histograms", base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Description: "Array of [timestamp, histogramObject] pairs for histogram values.", + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: base.CreateSchemaProxy(&base.Schema{ + OneOf: []*base.SchemaProxy{ + base.CreateSchemaProxy(&base.Schema{Type: []string{"number"}}), + schemaRef("#/components/schemas/HistogramValue"), + }, + })}, + MinItems: int64Ptr(2), + MaxItems: int64Ptr(2), + })}, + })) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "A time series with native histogram values.", + Required: []string{"metric", "histograms"}, + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) queryDataSchema() *base.SchemaProxy { + // Vector query result. + vectorProps := orderedmap.New[string, *base.SchemaProxy]() + vectorProps.Set("resultType", stringSchemaWithConstValue("vector")) + vectorProps.Set("result", base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Description: "Array of samples (either float or histogram).", + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: base.CreateSchemaProxy(&base.Schema{ + AnyOf: []*base.SchemaProxy{ + schemaRef("#/components/schemas/FloatSample"), + schemaRef("#/components/schemas/HistogramSample"), + }, + })}, + })) + + // Matrix query result. + matrixProps := orderedmap.New[string, *base.SchemaProxy]() + matrixProps.Set("resultType", stringSchemaWithConstValue("matrix")) + matrixProps.Set("result", base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Description: "Array of time series (either float or histogram).", + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: base.CreateSchemaProxy(&base.Schema{ + AnyOf: []*base.SchemaProxy{ + schemaRef("#/components/schemas/FloatSeries"), + schemaRef("#/components/schemas/HistogramSeries"), + }, + })}, + })) + + // Scalar query result. + scalarProps := orderedmap.New[string, *base.SchemaProxy]() + scalarProps.Set("resultType", stringSchemaWithConstValue("scalar")) + scalarProps.Set("result", base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Description: "Scalar value as [timestamp, stringValue].", + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: base.CreateSchemaProxy(&base.Schema{ + OneOf: []*base.SchemaProxy{ + base.CreateSchemaProxy(&base.Schema{Type: []string{"number"}}), + stringSchema(), + }, + })}, + MinItems: int64Ptr(2), + MaxItems: int64Ptr(2), + })) + + // String query result. + stringResultProps := orderedmap.New[string, *base.SchemaProxy]() + stringResultProps.Set("resultType", stringSchemaWithConstValue("string")) + stringResultProps.Set("result", base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Description: "String value as [timestamp, stringValue].", + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: stringSchema()}, + MinItems: int64Ptr(2), + MaxItems: int64Ptr(2), + })) + + return base.CreateSchemaProxy(&base.Schema{ + Description: "Query result data. The structure of 'result' depends on 'resultType'.", + AnyOf: []*base.SchemaProxy{ + // resultType: vector -> result: array of samples. + base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Required: []string{"resultType", "result"}, + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Properties: vectorProps, + }), + // resultType: matrix -> result: array of series. + base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Required: []string{"resultType", "result"}, + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Properties: matrixProps, + }), + // resultType: scalar -> result: [timestamp, value]. + base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Required: []string{"resultType", "result"}, + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Properties: scalarProps, + }), + // resultType: string -> result: [timestamp, stringValue]. + base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Required: []string{"resultType", "result"}, + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Properties: stringResultProps, + }), + }, + Example: createYAMLNode(map[string]any{ + "resultType": "vector", + "result": []map[string]any{ + { + "metric": map[string]string{"__name__": "up", "job": "prometheus"}, + "value": []any{1627845600, "1"}, + }, + }, + }), + }) +} + +func (*OpenAPIBuilder) queryPostInputBodySchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("query", stringSchemaWithDescriptionAndExample("Form field: The PromQL query to execute.", "up")) + props.Set("time", stringSchemaWithDescriptionAndExample("Form field: The evaluation timestamp (optional, defaults to current time).", "2023-07-21T20:10:51.781Z")) + props.Set("limit", integerSchemaWithDescriptionAndExample("Form field: The maximum number of metrics to return.", 100)) + props.Set("timeout", stringSchemaWithDescriptionAndExample("Form field: Evaluation timeout (optional, defaults to and is capped by the value of the -query.timeout flag).", "30s")) + props.Set("lookback_delta", stringSchemaWithDescriptionAndExample("Form field: Override the lookback period for this query (optional).", "5m")) + props.Set("stats", stringSchemaWithDescriptionAndExample("Form field: When provided, include query statistics in the response (the special value 'all' enables more comprehensive statistics).", "all")) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "POST request body for instant query.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"query"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) queryRangePostInputBodySchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("query", stringSchemaWithDescriptionAndExample("Form field: The query to execute.", "rate(http_requests_total[5m])")) + props.Set("start", stringSchemaWithDescriptionAndExample("Form field: The start time of the query.", "2023-07-21T20:10:30.781Z")) + props.Set("end", stringSchemaWithDescriptionAndExample("Form field: The end time of the query.", "2023-07-21T20:20:30.781Z")) + props.Set("step", stringSchemaWithDescriptionAndExample("Form field: The step size of the query.", "15s")) + props.Set("limit", integerSchemaWithDescriptionAndExample("Form field: The maximum number of metrics to return.", 100)) + props.Set("timeout", stringSchemaWithDescriptionAndExample("Form field: Evaluation timeout (optional, defaults to and is capped by the value of the -query.timeout flag).", "30s")) + props.Set("lookback_delta", stringSchemaWithDescriptionAndExample("Form field: Override the lookback period for this query (optional).", "5m")) + props.Set("stats", stringSchemaWithDescriptionAndExample("Form field: When provided, include query statistics in the response (the special value 'all' enables more comprehensive statistics).", "all")) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "POST request body for range query.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"query", "start", "end", "step"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) queryExemplarsPostInputBodySchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("query", stringSchemaWithDescriptionAndExample("Form field: The query to execute.", "http_requests_total")) + props.Set("start", stringSchemaWithDescriptionAndExample("Form field: The start time of the query.", "2023-07-21T20:00:00.000Z")) + props.Set("end", stringSchemaWithDescriptionAndExample("Form field: The end time of the query.", "2023-07-21T21:00:00.000Z")) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "POST request body for exemplars query.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"query"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) formatQueryOutputBodySchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("status", statusSchema()) + props.Set("data", stringSchemaWithDescriptionAndExample("Formatted query string.", "sum by(status) (rate(http_requests_total[5m]))")) + props.Set("warnings", warningsSchema()) + props.Set("infos", infosSchema()) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Response body for format query endpoint.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"status", "data"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) formatQueryPostInputBodySchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("query", stringSchemaWithDescriptionAndExample("Form field: The query to format.", "sum(rate(http_requests_total[5m])) by (status)")) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "POST request body for format query.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"query"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) parseQueryPostInputBodySchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("query", stringSchemaWithDescriptionAndExample("Form field: The query to parse.", "sum(rate(http_requests_total[5m]))")) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "POST request body for parse query.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"query"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) labelsPostInputBodySchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("start", stringSchemaWithDescriptionAndExample("Form field: The start time of the query.", "2023-07-21T20:00:00.000Z")) + props.Set("end", stringSchemaWithDescriptionAndExample("Form field: The end time of the query.", "2023-07-21T21:00:00.000Z")) + props.Set("match[]", stringArraySchemaWithDescriptionAndExample("Form field: Series selector argument that selects the series from which to read the label names.", []string{"{job=\"prometheus\"}"})) + props.Set("limit", integerSchemaWithDescriptionAndExample("Form field: The maximum number of label names to return.", 100)) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "POST request body for labels query.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) seriesPostInputBodySchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("start", stringSchemaWithDescriptionAndExample("Form field: The start time of the query.", "2023-07-21T20:00:00.000Z")) + props.Set("end", stringSchemaWithDescriptionAndExample("Form field: The end time of the query.", "2023-07-21T21:00:00.000Z")) + props.Set("match[]", stringArraySchemaWithDescriptionAndExample("Form field: Series selector argument that selects the series to return.", []string{"{job=\"prometheus\"}"})) + props.Set("limit", integerSchemaWithDescriptionAndExample("Form field: The maximum number of series to return.", 100)) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "POST request body for series query.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"match[]"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) metadataSchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("type", stringSchemaWithDescription("Metric type (counter, gauge, histogram, summary, or untyped).")) + props.Set("unit", stringSchemaWithDescription("Unit of the metric.")) + props.Set("help", stringSchemaWithDescription("Help text describing the metric.")) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Metric metadata.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"type", "unit", "help"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) metadataOutputBodySchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("status", statusSchema()) + props.Set("data", base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{ + A: base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: schemaRef("#/components/schemas/Metadata")}, + }), + }, + })) + props.Set("warnings", warningsSchema()) + props.Set("infos", infosSchema()) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Response body for metadata endpoint.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"status", "data"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) metricMetadataSchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("target", schemaRef("#/components/schemas/Labels")) + props.Set("metric", stringSchemaWithDescription("Metric name.")) + props.Set("type", stringSchemaWithDescription("Metric type (counter, gauge, histogram, summary, or untyped).")) + props.Set("help", stringSchemaWithDescription("Help text describing the metric.")) + props.Set("unit", stringSchemaWithDescription("Unit of the metric.")) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Target metric metadata.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"target", "type", "help", "unit"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) targetSchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("discoveredLabels", schemaRef("#/components/schemas/Labels")) + props.Set("labels", schemaRef("#/components/schemas/Labels")) + props.Set("scrapePool", stringSchemaWithDescription("Name of the scrape pool.")) + props.Set("scrapeUrl", stringSchemaWithDescription("URL of the target.")) + props.Set("globalUrl", stringSchemaWithDescription("Global URL of the target.")) + props.Set("lastError", stringSchemaWithDescription("Last error message from scraping.")) + props.Set("lastScrape", dateTimeSchemaWithDescription("Timestamp of the last scrape.")) + props.Set("lastScrapeDuration", numberSchemaWithDescription("Duration of the last scrape in seconds.")) + props.Set("health", stringSchemaWithDescription("Health status of the target (up, down, or unknown).")) + props.Set("scrapeInterval", stringSchemaWithDescription("Scrape interval for this target.")) + props.Set("scrapeTimeout", stringSchemaWithDescription("Scrape timeout for this target.")) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Scrape target information.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"discoveredLabels", "labels", "scrapePool", "scrapeUrl", "globalUrl", "lastError", "lastScrape", "lastScrapeDuration", "health", "scrapeInterval", "scrapeTimeout"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) droppedTargetSchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("discoveredLabels", schemaRef("#/components/schemas/Labels")) + props.Set("scrapePool", stringSchemaWithDescription("Name of the scrape pool.")) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Dropped target information.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"discoveredLabels", "scrapePool"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) targetDiscoverySchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("activeTargets", base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: schemaRef("#/components/schemas/Target")}, + })) + props.Set("droppedTargets", base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: schemaRef("#/components/schemas/DroppedTarget")}, + })) + props.Set("droppedTargetCounts", base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{A: integerSchema()}, + })) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Target discovery information including active and dropped targets.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"activeTargets", "droppedTargets", "droppedTargetCounts"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) scrapePoolsDiscoverySchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("scrapePools", base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: stringSchema()}, + })) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "List of all configured scrape pools.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"scrapePools"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) configSchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("source_labels", stringArraySchemaWithDescription("Source labels for relabeling.")) + props.Set("separator", stringSchemaWithDescription("Separator for source label values.")) + props.Set("regex", stringSchemaWithDescription("Regular expression for matching.")) + props.Set("modulus", integerSchemaWithDescription("Modulus for hash-based relabeling.")) + props.Set("target_label", stringSchemaWithDescription("Target label name.")) + props.Set("replacement", stringSchemaWithDescription("Replacement value.")) + props.Set("action", stringSchemaWithDescription("Relabel action.")) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Relabel configuration.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) relabelStepSchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("rule", schemaRef("#/components/schemas/Config")) + props.Set("output", schemaRef("#/components/schemas/Labels")) + props.Set("keep", base.CreateSchemaProxy(&base.Schema{Type: []string{"boolean"}})) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Relabel step showing the rule, output, and whether the target was kept.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"rule", "output", "keep"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) relabelStepsResponseSchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("steps", base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: schemaRef("#/components/schemas/RelabelStep")}, + })) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Relabeling steps response.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"steps"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) ruleGroupSchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("name", stringSchemaWithDescription("Name of the rule group.")) + props.Set("file", stringSchemaWithDescription("File containing the rule group.")) + props.Set("rules", base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Description: "Rules in this group.", + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: base.CreateSchemaProxy(&base.Schema{Type: []string{"object"}, Description: "Rule definition."})}, + })) + props.Set("interval", numberSchemaWithDescription("Evaluation interval in seconds.")) + props.Set("limit", integerSchemaWithDescription("Maximum number of alerts for this group.")) + props.Set("evaluationTime", numberSchemaWithDescription("Time taken to evaluate the group in seconds.")) + props.Set("lastEvaluation", dateTimeSchemaWithDescription("Timestamp of the last evaluation.")) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Rule group information.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"name", "file", "rules", "interval", "limit", "evaluationTime", "lastEvaluation"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) ruleDiscoverySchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("groups", base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: schemaRef("#/components/schemas/RuleGroup")}, + })) + props.Set("groupNextToken", stringSchemaWithDescription("Pagination token for the next page of groups.")) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Rule discovery information containing all rule groups.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"groups"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) alertSchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("labels", schemaRef("#/components/schemas/Labels")) + props.Set("annotations", schemaRef("#/components/schemas/Labels")) + props.Set("state", stringSchemaWithDescription("State of the alert (pending, firing, or inactive).")) + props.Set("value", stringSchemaWithDescription("Value of the alert expression.")) + props.Set("activeAt", dateTimeSchemaWithDescription("Timestamp when the alert became active.")) + props.Set("keepFiringSince", dateTimeSchemaWithDescription("Timestamp since the alert has been kept firing.")) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Alert information.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"labels", "annotations", "state", "value"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) alertDiscoverySchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("alerts", base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: schemaRef("#/components/schemas/Alert")}, + })) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Alert discovery information containing all active alerts.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"alerts"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) alertmanagerTargetSchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("url", stringSchemaWithDescription("URL of the Alertmanager instance.")) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Alertmanager target information.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"url"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) alertmanagerDiscoverySchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("activeAlertmanagers", base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: schemaRef("#/components/schemas/AlertmanagerTarget")}, + })) + props.Set("droppedAlertmanagers", base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: schemaRef("#/components/schemas/AlertmanagerTarget")}, + })) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Alertmanager discovery information including active and dropped instances.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"activeAlertmanagers", "droppedAlertmanagers"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) statusConfigDataSchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("yaml", stringSchemaWithDescription("Prometheus configuration in YAML format.")) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Prometheus configuration.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"yaml"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) runtimeInfoSchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("startTime", base.CreateSchemaProxy(&base.Schema{Type: []string{"string"}, Format: "date-time"})) + props.Set("CWD", stringSchema()) + props.Set("hostname", stringSchema()) + props.Set("serverTime", base.CreateSchemaProxy(&base.Schema{Type: []string{"string"}, Format: "date-time"})) + props.Set("reloadConfigSuccess", base.CreateSchemaProxy(&base.Schema{Type: []string{"boolean"}})) + props.Set("lastConfigTime", base.CreateSchemaProxy(&base.Schema{Type: []string{"string"}, Format: "date-time"})) + props.Set("corruptionCount", integerSchema()) + props.Set("goroutineCount", integerSchema()) + props.Set("GOMAXPROCS", integerSchema()) + props.Set("GOMEMLIMIT", integerSchema()) + props.Set("GOGC", stringSchema()) + props.Set("GODEBUG", stringSchema()) + props.Set("storageRetention", stringSchema()) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Prometheus runtime information.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"startTime", "CWD", "hostname", "serverTime", "reloadConfigSuccess", "lastConfigTime", "corruptionCount", "goroutineCount", "GOMAXPROCS", "GOMEMLIMIT", "GOGC", "GODEBUG", "storageRetention"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) prometheusVersionSchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("version", stringSchema()) + props.Set("revision", stringSchema()) + props.Set("branch", stringSchema()) + props.Set("buildUser", stringSchema()) + props.Set("buildDate", stringSchema()) + props.Set("goVersion", stringSchema()) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Prometheus version information.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"version", "revision", "branch", "buildUser", "buildDate", "goVersion"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) statusFlagsOutputBodySchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("status", statusSchema()) + props.Set("data", base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{A: stringSchema()}, + })) + props.Set("warnings", warningsSchema()) + props.Set("infos", infosSchema()) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Response body for status flags endpoint.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"status", "data"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) headStatsSchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("numSeries", integerSchema()) + props.Set("numLabelPairs", integerSchema()) + props.Set("chunkCount", integerSchema()) + props.Set("minTime", integerSchema()) + props.Set("maxTime", integerSchema()) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "TSDB head statistics.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"numSeries", "numLabelPairs", "chunkCount", "minTime", "maxTime"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) tsdbStatSchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("name", stringSchema()) + props.Set("value", integerSchema()) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "TSDB statistic.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"name", "value"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) tsdbStatusSchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("headStats", schemaRef("#/components/schemas/HeadStats")) + props.Set("seriesCountByMetricName", base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: schemaRef("#/components/schemas/TSDBStat")}, + })) + props.Set("labelValueCountByLabelName", base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: schemaRef("#/components/schemas/TSDBStat")}, + })) + props.Set("memoryInBytesByLabelName", base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: schemaRef("#/components/schemas/TSDBStat")}, + })) + props.Set("seriesCountByLabelValuePair", base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: schemaRef("#/components/schemas/TSDBStat")}, + })) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "TSDB status information.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"headStats", "seriesCountByMetricName", "labelValueCountByLabelName", "memoryInBytesByLabelName", "seriesCountByLabelValuePair"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) blockDescSchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("ulid", stringSchema()) + props.Set("minTime", integerSchema()) + props.Set("maxTime", integerSchema()) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Block descriptor.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"ulid", "minTime", "maxTime"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) blockStatsSchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("numSamples", integerSchema()) + props.Set("numSeries", integerSchema()) + props.Set("numChunks", integerSchema()) + props.Set("numTombstones", integerSchema()) + props.Set("numFloatSamples", integerSchema()) + props.Set("numHistogramSamples", integerSchema()) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Block statistics.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) blockMetaCompactionSchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("level", integerSchema()) + props.Set("sources", base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: stringSchema()}, + })) + props.Set("parents", base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: schemaRef("#/components/schemas/BlockDesc")}, + })) + props.Set("failed", base.CreateSchemaProxy(&base.Schema{Type: []string{"boolean"}})) + props.Set("deletable", base.CreateSchemaProxy(&base.Schema{Type: []string{"boolean"}})) + props.Set("hints", base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: stringSchema()}, + })) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Block compaction metadata.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"level"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) blockMetaSchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("ulid", stringSchema()) + props.Set("minTime", integerSchema()) + props.Set("maxTime", integerSchema()) + props.Set("stats", schemaRef("#/components/schemas/BlockStats")) + props.Set("compaction", schemaRef("#/components/schemas/BlockMetaCompaction")) + props.Set("version", integerSchema()) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Block metadata.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"ulid", "minTime", "maxTime", "compaction", "version"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) statusTSDBBlocksDataSchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("blocks", base.CreateSchemaProxy(&base.Schema{ + Type: []string{"array"}, + Items: &base.DynamicValue[*base.SchemaProxy, bool]{A: schemaRef("#/components/schemas/BlockMeta")}, + })) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "TSDB blocks information.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"blocks"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) statusWALReplayDataSchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("min", integerSchema()) + props.Set("max", integerSchema()) + props.Set("current", integerSchema()) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "WAL replay status.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"min", "max", "current"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) dataStructSchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("name", stringSchema()) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Generic data structure with a name field.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"name"}, + Properties: props, + }) +} + +func (*OpenAPIBuilder) notificationSchema() *base.SchemaProxy { + props := orderedmap.New[string, *base.SchemaProxy]() + props.Set("text", stringSchema()) + props.Set("date", base.CreateSchemaProxy(&base.Schema{Type: []string{"string"}, Format: "date-time"})) + props.Set("active", base.CreateSchemaProxy(&base.Schema{Type: []string{"boolean"}})) + + return base.CreateSchemaProxy(&base.Schema{ + Type: []string{"object"}, + Description: "Server notification.", + AdditionalProperties: &base.DynamicValue[*base.SchemaProxy, bool]{N: 1, B: false}, + Required: []string{"text", "date", "active"}, + Properties: props, + }) +} diff --git a/web/api/v1/openapi_test.go b/web/api/v1/openapi_test.go new file mode 100644 index 0000000000..0d2f5cc83e --- /dev/null +++ b/web/api/v1/openapi_test.go @@ -0,0 +1,289 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package v1 + +import ( + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/prometheus/common/promslog" + "github.com/stretchr/testify/require" + "gopkg.in/yaml.v2" +) + +// TestOpenAPIHTTPHandler verifies that the OpenAPI endpoint serves a valid specification +// with correct headers, structure conforming to OpenAPI 3.1 standards, and consistent responses. +func TestOpenAPIHTTPHandler(t *testing.T) { + builder := NewOpenAPIBuilder(OpenAPIOptions{}, promslog.NewNopLogger()) + + // First request. + req1 := httptest.NewRequest(http.MethodGet, "/api/v1/openapi.yaml", nil) + rec1 := httptest.NewRecorder() + builder.ServeOpenAPI(rec1, req1) + + // Verify status code and headers. + require.Equal(t, http.StatusOK, rec1.Code) + require.True(t, strings.HasPrefix(rec1.Header().Get("Content-Type"), "application/yaml"), "Content-Type should start with application/yaml") + require.Equal(t, "no-cache, no-store, must-revalidate", rec1.Header().Get("Cache-Control")) + + // Verify it is valid YAML. + var spec map[string]any + err := yaml.Unmarshal(rec1.Body.Bytes(), &spec) + require.NoError(t, err) + + // Verify structure. + require.Contains(t, spec, "openapi") + require.Contains(t, spec, "info") + require.Contains(t, spec, "paths") + require.Contains(t, spec, "components") + + // Verify OpenAPI version (default is 3.1.0). + require.Equal(t, "3.1.0", spec["openapi"]) + + // Verify info section. + info, ok := spec["info"].(map[any]any) + require.True(t, ok, "info should be a map") + require.Equal(t, "Prometheus API", info["title"]) + + // Verify paths exist. + paths, ok := spec["paths"].(map[any]any) + require.True(t, ok, "paths should be a map") + require.NotEmpty(t, paths, "paths should not be empty") + + // Second request to verify response consistency. + req2 := httptest.NewRequest(http.MethodGet, "/api/v1/openapi.yaml", nil) + rec2 := httptest.NewRecorder() + builder.ServeOpenAPI(rec2, req2) + + // Both responses should be identical. + require.Equal(t, rec1.Body.String(), rec2.Body.String()) +} + +// TestOpenAPIPathFiltering verifies that the IncludePaths option correctly filters +// which API paths are included in the generated specification. +func TestOpenAPIPathFiltering(t *testing.T) { + tests := []struct { + name string + includePaths []string + wantPaths []string + excludePaths []string + }{ + { + name: "no filter includes all", + includePaths: nil, + wantPaths: []string{"/query", "/labels", "/alerts", "/targets"}, + }, + { + name: "filter query paths", + includePaths: []string{"/query"}, + wantPaths: []string{"/query", "/query_range", "/query_exemplars"}, + excludePaths: []string{"/labels", "/alerts", "/targets"}, + }, + { + name: "filter status paths", + includePaths: []string{"/status"}, + wantPaths: []string{"/status/config", "/status/flags", "/status/runtimeinfo"}, + excludePaths: []string{"/query", "/alerts", "/targets"}, + }, + { + name: "filter multiple prefixes", + includePaths: []string{"/label", "/series"}, + wantPaths: []string{"/labels", "/label/{name}/values", "/series"}, + excludePaths: []string{"/query", "/alerts", "/targets"}, + }, + { + name: "exact path match", + includePaths: []string{"/alerts"}, + wantPaths: []string{"/alerts"}, + excludePaths: []string{"/alertmanagers", "/query"}, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + builder := NewOpenAPIBuilder(OpenAPIOptions{ + IncludePaths: tc.includePaths, + }, promslog.NewNopLogger()) + + req := httptest.NewRequest(http.MethodGet, "/api/v1/openapi.yaml", nil) + rec := httptest.NewRecorder() + builder.ServeOpenAPI(rec, req) + + require.Equal(t, http.StatusOK, rec.Code) + + var spec map[string]any + err := yaml.Unmarshal(rec.Body.Bytes(), &spec) + require.NoError(t, err) + + paths, ok := spec["paths"].(map[any]any) + require.True(t, ok, "paths should be a map") + + for _, want := range tc.wantPaths { + require.Contains(t, paths, want) + } + + for _, exclude := range tc.excludePaths { + require.NotContains(t, paths, exclude) + } + }) + } +} + +// TestOpenAPISchemaCompleteness verifies that all referenced schemas in paths +// are defined in the components/schemas section of the specification. +func TestOpenAPISchemaCompleteness(t *testing.T) { + builder := NewOpenAPIBuilder(OpenAPIOptions{}, promslog.NewNopLogger()) + + req := httptest.NewRequest(http.MethodGet, "/api/v1/openapi.yaml", nil) + rec := httptest.NewRecorder() + builder.ServeOpenAPI(rec, req) + + var spec map[string]any + err := yaml.Unmarshal(rec.Body.Bytes(), &spec) + require.NoError(t, err) + + components, ok := spec["components"].(map[any]any) + require.True(t, ok, "components should be a map") + + schemas, ok := components["schemas"].(map[any]any) + require.True(t, ok, "schemas should be a map") + + // Verify essential schemas are present. + essentialSchemas := []string{ + "Error", + "Labels", + "QueryOutputBody", + "LabelsOutputBody", + "SeriesOutputBody", + "TargetsOutputBody", + "AlertsOutputBody", + "RulesOutputBody", + "StatusConfigOutputBody", + "StatusFlagsOutputBody", + "PrometheusVersion", + } + + for _, schema := range essentialSchemas { + require.Contains(t, schemas, schema) + } +} + +// TODO: Add test to verify all routes from api.go Register() are covered in OpenAPI spec. +// Consider wrapping Router to track registered paths and cross-check with OpenAPI paths. + +// TestOpenAPIShouldIncludePath verifies the shouldIncludePath method correctly +// matches paths against the IncludePaths filter configuration. +func TestOpenAPIShouldIncludePath(t *testing.T) { + tests := []struct { + name string + includePaths []string + path string + expected bool + }{ + { + name: "empty filter includes all", + includePaths: nil, + path: "/query", + expected: true, + }, + { + name: "exact match", + includePaths: []string{"/query"}, + path: "/query", + expected: true, + }, + { + name: "prefix match", + includePaths: []string{"/query"}, + path: "/query_range", + expected: true, + }, + { + name: "no match", + includePaths: []string{"/query"}, + path: "/labels", + expected: false, + }, + { + name: "multiple filters with match", + includePaths: []string{"/labels", "/series"}, + path: "/series", + expected: true, + }, + { + name: "multiple filters without match", + includePaths: []string{"/labels", "/series"}, + path: "/query", + expected: false, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + builder := &OpenAPIBuilder{ + options: OpenAPIOptions{ + IncludePaths: tc.includePaths, + }, + } + + result := builder.shouldIncludePath(tc.path) + require.Equal(t, tc.expected, result) + }) + } +} + +// TestOpenAPIVersionConsistency verifies that both OpenAPI versions are properly generated +// and that 3.2 has exactly one more path than 3.1 (/notifications/live). +func TestOpenAPIVersionConsistency(t *testing.T) { + builder := NewOpenAPIBuilder(OpenAPIOptions{}, promslog.NewNopLogger()) + + // Fetch OpenAPI 3.1 spec (default). + req31 := httptest.NewRequest(http.MethodGet, "/api/v1/openapi.yaml", nil) + rec31 := httptest.NewRecorder() + builder.ServeOpenAPI(rec31, req31) + + require.Equal(t, http.StatusOK, rec31.Code) + + // Fetch OpenAPI 3.2 spec. + req32 := httptest.NewRequest(http.MethodGet, "/api/v1/openapi.yaml?openapi_version=3.2", nil) + rec32 := httptest.NewRecorder() + builder.ServeOpenAPI(rec32, req32) + + require.Equal(t, http.StatusOK, rec32.Code) + + // Parse both specs. + var spec31, spec32 map[string]any + err := yaml.Unmarshal(rec31.Body.Bytes(), &spec31) + require.NoError(t, err) + err = yaml.Unmarshal(rec32.Body.Bytes(), &spec32) + require.NoError(t, err) + + // Verify versions are different. + require.Equal(t, "3.1.0", spec31["openapi"]) + require.Equal(t, "3.2.0", spec32["openapi"]) + + // Verify /notifications/live is only in 3.2. + paths31 := spec31["paths"].(map[any]any) + paths32 := spec32["paths"].(map[any]any) + + require.NotContains(t, paths31, "/notifications/live") + + require.Contains(t, paths32, "/notifications/live") + + // Verify 3.2 has exactly one more path than 3.1. + require.Len(t, paths32, len(paths31)+1, + "OpenAPI 3.2 should have exactly one more path than 3.1") +} diff --git a/web/api/v1/test_helpers.go b/web/api/v1/test_helpers.go new file mode 100644 index 0000000000..2662b0c84b --- /dev/null +++ b/web/api/v1/test_helpers.go @@ -0,0 +1,157 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package v1 + +import ( + "context" + "testing" + "time" + + "github.com/prometheus/common/route" + + "github.com/prometheus/prometheus/web/api/testhelpers" +) + +// newTestAPI creates a new API instance for testing using testhelpers. +func newTestAPI(t *testing.T, cfg testhelpers.APIConfig) *testhelpers.APIWrapper { + t.Helper() + + params := testhelpers.PrepareAPI(t, cfg) + + // Adapt the testhelpers interfaces to v1 interfaces. + api := NewAPI( + params.QueryEngine, + params.Queryable, + nil, // appendable + params.ExemplarQueryable, + func(ctx context.Context) ScrapePoolsRetriever { + return adaptScrapePoolsRetriever(params.ScrapePoolsRetriever(ctx)) + }, + func(ctx context.Context) TargetRetriever { + return adaptTargetRetriever(params.TargetRetriever(ctx)) + }, + func(ctx context.Context) AlertmanagerRetriever { + return adaptAlertmanagerRetriever(params.AlertmanagerRetriever(ctx)) + }, + params.ConfigFunc, + params.FlagsMap, + GlobalURLOptions{}, + params.ReadyFunc, + adaptTSDBAdminStats(params.TSDBAdmin), + params.DBDir, + false, // enableAdmin + params.Logger, + func(ctx context.Context) RulesRetriever { + return adaptRulesRetriever(params.RulesRetriever(ctx)) + }, + 0, // remoteReadSampleLimit + 0, // remoteReadConcurrencyLimit + 0, // remoteReadMaxBytesInFrame + false, // isAgent + nil, // corsOrigin + func() (RuntimeInfo, error) { + info, err := params.RuntimeInfoFunc() + return RuntimeInfo{ + StartTime: info.StartTime, + CWD: info.CWD, + Hostname: info.Hostname, + ServerTime: info.ServerTime, + ReloadConfigSuccess: info.ReloadConfigSuccess, + LastConfigTime: info.LastConfigTime, + CorruptionCount: info.CorruptionCount, + GoroutineCount: info.GoroutineCount, + GOMAXPROCS: info.GOMAXPROCS, + GOMEMLIMIT: info.GOMEMLIMIT, + GOGC: info.GOGC, + GODEBUG: info.GODEBUG, + StorageRetention: info.StorageRetention, + }, err + }, + &PrometheusVersion{ + Version: params.BuildInfo.Version, + Revision: params.BuildInfo.Revision, + Branch: params.BuildInfo.Branch, + BuildUser: params.BuildInfo.BuildUser, + BuildDate: params.BuildInfo.BuildDate, + GoVersion: params.BuildInfo.GoVersion, + }, + params.NotificationsGetter, + params.NotificationsSub, + params.Gatherer, + params.Registerer, + nil, // statsRenderer + false, // rwEnabled + nil, // acceptRemoteWriteProtoMsgs + false, // otlpEnabled + false, // otlpDeltaToCumulative + false, // otlpNativeDeltaIngestion + false, // stZeroIngestionEnabled + 5*time.Minute, // lookbackDelta + false, // enableTypeAndUnitLabels + false, // appendMetadata + nil, // overrideErrorCode + nil, // featureRegistry + OpenAPIOptions{}, // openAPIOptions + ) + + // Register routes. + router := route.New() + api.Register(router.WithPrefix("/api/v1")) + + return &testhelpers.APIWrapper{ + Handler: router, + } +} + +// Adapter functions to convert testhelpers interfaces to v1 interfaces. + +type rulesRetrieverAdapter struct { + testhelpers.RulesRetriever +} + +func adaptRulesRetriever(r testhelpers.RulesRetriever) RulesRetriever { + return &rulesRetrieverAdapter{r} +} + +type targetRetrieverAdapter struct { + testhelpers.TargetRetriever +} + +func adaptTargetRetriever(t testhelpers.TargetRetriever) TargetRetriever { + return &targetRetrieverAdapter{t} +} + +type scrapePoolsRetrieverAdapter struct { + testhelpers.ScrapePoolsRetriever +} + +func adaptScrapePoolsRetriever(s testhelpers.ScrapePoolsRetriever) ScrapePoolsRetriever { + return &scrapePoolsRetrieverAdapter{s} +} + +type alertmanagerRetrieverAdapter struct { + testhelpers.AlertmanagerRetriever +} + +func adaptAlertmanagerRetriever(a testhelpers.AlertmanagerRetriever) AlertmanagerRetriever { + return &alertmanagerRetrieverAdapter{a} +} + +type tsdbAdminStatsAdapter struct { + testhelpers.TSDBAdminStats +} + +func adaptTSDBAdminStats(t testhelpers.TSDBAdminStats) TSDBAdminStats { + return &tsdbAdminStatsAdapter{t} +} diff --git a/web/api/v1/testdata/openapi_3.1_golden.yaml b/web/api/v1/testdata/openapi_3.1_golden.yaml new file mode 100644 index 0000000000..c69694b530 --- /dev/null +++ b/web/api/v1/testdata/openapi_3.1_golden.yaml @@ -0,0 +1,4401 @@ +openapi: 3.1.0 +info: + title: Prometheus API + description: Prometheus is an Open-Source monitoring system with a dimensional data model, flexible query language, efficient time series database and modern alerting approach. + contact: + name: Prometheus Community + url: https://prometheus.io/community/ + version: 0.0.1-undefined +servers: + - url: /api/v1 +paths: + /query: + get: + tags: + - query + summary: Evaluate an instant query + operationId: query + parameters: + - name: limit + in: query + description: The maximum number of metrics to return. + required: false + explode: false + schema: + type: integer + format: int64 + examples: + example: + value: 100 + - name: time + in: query + description: The evaluation timestamp (optional, defaults to current time). + required: false + explode: false + schema: + oneOf: + - type: string + format: date-time + description: RFC3339 timestamp. + - type: number + format: unixtime + description: Unix timestamp in seconds. + description: Timestamp in RFC3339 format or Unix timestamp in seconds. + examples: + RFC3339: + value: "2026-01-02T13:37:00Z" + epoch: + value: 1767361020 + - name: query + in: query + description: The PromQL query to execute. + required: true + explode: false + schema: + type: string + examples: + example: + value: up + - name: timeout + in: query + description: Evaluation timeout. Optional. Defaults to and is capped by the value of the -query.timeout flag. + required: false + explode: false + schema: + type: string + examples: + example: + value: 30s + - name: lookback_delta + in: query + description: Override the lookback period for this query. Optional. + required: false + explode: false + schema: + type: string + examples: + example: + value: 5m + - name: stats + in: query + description: When provided, include query statistics in the response. The special value 'all' enables more comprehensive statistics. + required: false + explode: false + schema: + type: string + examples: + example: + value: all + responses: + "200": + description: Query executed successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/QueryOutputBody' + examples: + vectorResult: + summary: 'Instant vector query: up' + value: {"status": "success", "data": {"resultType": "vector", "result": [{"metric": {"__name__": "up", "instance": "demo.prometheus.io:9090", "job": "prometheus"}, "value": [1767436620, "1"]}, {"metric": {"__name__": "up", "env": "demo", "instance": "demo.prometheus.io:9093", "job": "alertmanager"}, "value": [1767436620, "1"]}]}} + scalarResult: + summary: 'Scalar query: scalar(42)' + value: + data: + result: + - 1767436620 + - "42" + resultType: scalar + status: success + matrixResult: + summary: 'Range vector query: up[5m]' + value: {"status": "success", "data": {"resultType": "matrix", "result": [{"metric": {"__name__": "up", "instance": "demo.prometheus.io:9090", "job": "prometheus"}, "values": [[1767436320, "1"], [1767436620, "1"]]}]}} + default: + description: Error executing query. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + post: + tags: + - query + summary: Evaluate an instant query + operationId: query-post + requestBody: + description: Submit an instant query. This endpoint accepts the same parameters as the GET version. + content: + application/x-www-form-urlencoded: + schema: + $ref: '#/components/schemas/QueryPostInputBody' + examples: + simpleQuery: + summary: Simple instant query + value: + query: up + queryWithTime: + summary: Query with specific timestamp + value: + query: up{job="prometheus"} + time: "2026-01-02T13:37:00.000Z" + queryWithLimit: + summary: Query with limit and statistics + value: + limit: 100 + query: rate(prometheus_http_requests_total{handler="/api/v1/query"}[5m]) + stats: all + required: true + responses: + "200": + description: Instant query executed successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/QueryOutputBody' + examples: + vectorResult: + summary: 'Instant vector query: up' + value: {"status": "success", "data": {"resultType": "vector", "result": [{"metric": {"__name__": "up", "instance": "demo.prometheus.io:9090", "job": "prometheus"}, "value": [1767436620, "1"]}, {"metric": {"__name__": "up", "env": "demo", "instance": "demo.prometheus.io:9093", "job": "alertmanager"}, "value": [1767436620, "1"]}]}} + scalarResult: + summary: 'Scalar query: scalar(42)' + value: + data: + result: + - 1767436620 + - "42" + resultType: scalar + status: success + matrixResult: + summary: 'Range vector query: up[5m]' + value: {"status": "success", "data": {"resultType": "matrix", "result": [{"metric": {"__name__": "up", "instance": "demo.prometheus.io:9090", "job": "prometheus"}, "values": [[1767436320, "1"], [1767436620, "1"]]}]}} + default: + description: Error executing instant query. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /query_range: + get: + tags: + - query + summary: Evaluate a range query + operationId: query-range + parameters: + - name: limit + in: query + description: The maximum number of metrics to return. + required: false + explode: false + schema: + type: integer + format: int64 + examples: + example: + value: 100 + - name: start + in: query + description: The start time of the query. + required: true + explode: false + schema: + oneOf: + - type: string + format: date-time + description: RFC3339 timestamp. + - type: number + format: unixtime + description: Unix timestamp in seconds. + description: Timestamp in RFC3339 format or Unix timestamp in seconds. + examples: + RFC3339: + value: "2026-01-02T12:37:00Z" + epoch: + value: 1767357420 + - name: end + in: query + description: The end time of the query. + required: true + explode: false + schema: + oneOf: + - type: string + format: date-time + description: RFC3339 timestamp. + - type: number + format: unixtime + description: Unix timestamp in seconds. + description: Timestamp in RFC3339 format or Unix timestamp in seconds. + examples: + RFC3339: + value: "2026-01-02T13:37:00Z" + epoch: + value: 1767361020 + - name: step + in: query + description: The step size of the query. + required: true + explode: false + schema: + type: string + examples: + example: + value: 15s + - name: query + in: query + description: The query to execute. + required: true + explode: false + schema: + type: string + examples: + example: + value: rate(prometheus_http_requests_total{handler="/api/v1/query"}[5m]) + - name: timeout + in: query + description: Evaluation timeout. Optional. Defaults to and is capped by the value of the -query.timeout flag. + required: false + explode: false + schema: + type: string + examples: + example: + value: 30s + - name: lookback_delta + in: query + description: Override the lookback period for this query. Optional. + required: false + explode: false + schema: + type: string + examples: + example: + value: 5m + - name: stats + in: query + description: When provided, include query statistics in the response. The special value 'all' enables more comprehensive statistics. + required: false + explode: false + schema: + type: string + examples: + example: + value: all + responses: + "200": + description: Range query executed successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/QueryRangeOutputBody' + examples: + matrixResult: + summary: 'Range query: rate(prometheus_http_requests_total[5m])' + value: {"status": "success", "data": {"resultType": "matrix", "result": [{"metric": {"__name__": "up", "instance": "demo.prometheus.io:9090", "job": "prometheus"}, "values": [[1767433020, "1"], [1767434820, "1"], [1767436620, "1"]]}]}} + default: + description: Error executing range query. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + post: + tags: + - query + summary: Evaluate a range query + operationId: query-range-post + requestBody: + description: Submit a range query. This endpoint accepts the same parameters as the GET version. + content: + application/x-www-form-urlencoded: + schema: + $ref: '#/components/schemas/QueryRangePostInputBody' + examples: + basicRange: + summary: Basic range query + value: + end: "2026-01-02T13:37:00.000Z" + query: up + start: "2026-01-02T12:37:00.000Z" + step: 15s + rateQuery: + summary: Rate calculation over time range + value: + end: "2026-01-02T13:37:00.000Z" + query: rate(prometheus_http_requests_total{handler="/api/v1/query"}[5m]) + start: "2026-01-02T12:37:00.000Z" + step: 30s + timeout: 30s + required: true + responses: + "200": + description: Range query executed successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/QueryRangeOutputBody' + examples: + matrixResult: + summary: 'Range query: rate(prometheus_http_requests_total[5m])' + value: {"status": "success", "data": {"resultType": "matrix", "result": [{"metric": {"__name__": "up", "instance": "demo.prometheus.io:9090", "job": "prometheus"}, "values": [[1767433020, "1"], [1767434820, "1"], [1767436620, "1"]]}]}} + default: + description: Error executing range query. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /query_exemplars: + get: + tags: + - query + summary: Query exemplars + operationId: query-exemplars + parameters: + - name: start + in: query + description: Start timestamp for exemplars query. + required: false + explode: false + schema: + oneOf: + - type: string + format: date-time + description: RFC3339 timestamp. + - type: number + format: unixtime + description: Unix timestamp in seconds. + description: Timestamp in RFC3339 format or Unix timestamp in seconds. + examples: + RFC3339: + value: "2026-01-02T12:37:00Z" + epoch: + value: 1767357420 + - name: end + in: query + description: End timestamp for exemplars query. + required: false + explode: false + schema: + oneOf: + - type: string + format: date-time + description: RFC3339 timestamp. + - type: number + format: unixtime + description: Unix timestamp in seconds. + description: Timestamp in RFC3339 format or Unix timestamp in seconds. + examples: + RFC3339: + value: "2026-01-02T13:37:00Z" + epoch: + value: 1767361020 + - name: query + in: query + description: PromQL query to extract exemplars for. + required: true + explode: false + schema: + type: string + examples: + example: + value: prometheus_http_requests_total + responses: + "200": + description: Exemplars retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/QueryExemplarsOutputBody' + examples: + exemplarsResult: + summary: Exemplars for a metric with trace IDs + value: + data: + - exemplars: + - labels: + traceID: abc123def456 + timestamp: 1.689956451781e+09 + value: "1.5" + seriesLabels: + __name__: http_requests_total + job: api-server + method: GET + status: success + default: + description: Error retrieving exemplars. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + post: + tags: + - query + summary: Query exemplars + operationId: query-exemplars-post + requestBody: + description: Submit an exemplars query. This endpoint accepts the same parameters as the GET version. + content: + application/x-www-form-urlencoded: + schema: + $ref: '#/components/schemas/QueryExemplarsPostInputBody' + examples: + basicExemplar: + summary: Query exemplars for a metric + value: + query: prometheus_http_requests_total + exemplarWithTimeRange: + summary: Exemplars within specific time range + value: + end: "2026-01-02T13:37:00.000Z" + query: prometheus_http_requests_total{job="prometheus"} + start: "2026-01-02T12:37:00.000Z" + required: true + responses: + "200": + description: Exemplars query completed successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/QueryExemplarsOutputBody' + examples: + exemplarsResult: + summary: Exemplars for a metric with trace IDs + value: + data: + - exemplars: + - labels: + traceID: abc123def456 + timestamp: 1.689956451781e+09 + value: "1.5" + seriesLabels: + __name__: http_requests_total + job: api-server + method: GET + status: success + default: + description: Error processing exemplars query. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /format_query: + get: + tags: + - query + summary: Format a PromQL query + operationId: format-query + parameters: + - name: query + in: query + description: PromQL expression to format. + required: true + explode: false + schema: + type: string + examples: + example: + value: sum(rate(http_requests_total[5m])) by (job) + responses: + "200": + description: Query formatted successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/FormatQueryOutputBody' + examples: + formattedQuery: + summary: Formatted PromQL query + value: + data: sum by(job, status) (rate(http_requests_total[5m])) + status: success + default: + description: Error formatting query. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + post: + tags: + - query + summary: Format a PromQL query + operationId: format-query-post + requestBody: + description: Submit a PromQL query to format. This endpoint accepts the same parameters as the GET version. + content: + application/x-www-form-urlencoded: + schema: + $ref: '#/components/schemas/FormatQueryPostInputBody' + examples: + simpleFormat: + summary: Format a simple query + value: + query: up{job="prometheus"} + complexFormat: + summary: Format a complex query + value: + query: sum(rate(http_requests_total[5m])) by (job, status) + required: true + responses: + "200": + description: Query formatting completed successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/FormatQueryOutputBody' + examples: + formattedQuery: + summary: Formatted PromQL query + value: + data: sum by(job, status) (rate(http_requests_total[5m])) + status: success + default: + description: Error formatting query. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /parse_query: + get: + tags: + - query + summary: Parse a PromQL query + operationId: parse-query + parameters: + - name: query + in: query + description: PromQL expression to parse. + required: true + explode: false + schema: + type: string + examples: + example: + value: up{job="prometheus"} + responses: + "200": + description: Query parsed successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/ParseQueryOutputBody' + examples: + parsedQuery: + summary: Parsed PromQL expression tree + value: + data: + resultType: vector + status: success + default: + description: Error parsing query. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + post: + tags: + - query + summary: Parse a PromQL query + operationId: parse-query-post + requestBody: + description: Submit a PromQL query to parse. This endpoint accepts the same parameters as the GET version. + content: + application/x-www-form-urlencoded: + schema: + $ref: '#/components/schemas/ParseQueryPostInputBody' + examples: + simpleParse: + summary: Parse a simple query + value: + query: up + complexParse: + summary: Parse a complex query + value: + query: rate(http_requests_total{job="api"}[5m]) + required: true + responses: + "200": + description: Query parsed successfully via POST. + content: + application/json: + schema: + $ref: '#/components/schemas/ParseQueryOutputBody' + examples: + parsedQuery: + summary: Parsed PromQL expression tree + value: + data: + resultType: vector + status: success + default: + description: Error parsing query via POST. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /labels: + get: + tags: + - labels + summary: Get label names + operationId: labels + parameters: + - name: start + in: query + description: Start timestamp for label names query. + required: false + explode: false + schema: + oneOf: + - type: string + format: date-time + description: RFC3339 timestamp. + - type: number + format: unixtime + description: Unix timestamp in seconds. + description: Timestamp in RFC3339 format or Unix timestamp in seconds. + examples: + RFC3339: + value: "2026-01-02T12:37:00Z" + epoch: + value: 1767357420 + - name: end + in: query + description: End timestamp for label names query. + required: false + explode: false + schema: + oneOf: + - type: string + format: date-time + description: RFC3339 timestamp. + - type: number + format: unixtime + description: Unix timestamp in seconds. + description: Timestamp in RFC3339 format or Unix timestamp in seconds. + examples: + RFC3339: + value: "2026-01-02T13:37:00Z" + epoch: + value: 1767361020 + - name: match[] + in: query + description: Series selector argument. + required: false + explode: false + schema: + type: array + items: + type: string + examples: + example: + value: + - '{job="prometheus"}' + - name: limit + in: query + description: Maximum number of label names to return. + required: false + explode: false + schema: + type: integer + format: int64 + examples: + example: + value: 100 + responses: + "200": + description: Label names retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/LabelsOutputBody' + examples: + labelNames: + summary: List of label names + value: + data: + - __name__ + - active + - address + - alertmanager + - alertname + - alertstate + - backend + - branch + - code + - collector + - component + - device + - env + - endpoint + - fstype + - handler + - instance + - job + - le + - method + - mode + - name + status: success + default: + description: Error retrieving label names. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + post: + tags: + - labels + summary: Get label names + operationId: labels-post + requestBody: + description: Submit a label names query. This endpoint accepts the same parameters as the GET version. + content: + application/x-www-form-urlencoded: + schema: + $ref: '#/components/schemas/LabelsPostInputBody' + examples: + allLabels: + summary: Get all label names + value: {} + labelsWithTimeRange: + summary: Get label names within time range + value: + end: "2026-01-02T13:37:00.000Z" + start: "2026-01-02T12:37:00.000Z" + labelsWithMatch: + summary: Get label names matching series selector + value: + match[]: + - up + - process_start_time_seconds{job="prometheus"} + required: true + responses: + "200": + description: Label names retrieved successfully via POST. + content: + application/json: + schema: + $ref: '#/components/schemas/LabelsOutputBody' + examples: + labelNames: + summary: List of label names + value: + data: + - __name__ + - active + - address + - alertmanager + - alertname + - alertstate + - backend + - branch + - code + - collector + - component + - device + - env + - endpoint + - fstype + - handler + - instance + - job + - le + - method + - mode + - name + status: success + default: + description: Error retrieving label names via POST. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /label/{name}/values: + get: + tags: + - labels + summary: Get label values + operationId: label-values + parameters: + - name: name + in: path + description: Label name. + required: true + schema: + type: string + - name: start + in: query + description: Start timestamp for label values query. + required: false + explode: false + schema: + oneOf: + - type: string + format: date-time + description: RFC3339 timestamp. + - type: number + format: unixtime + description: Unix timestamp in seconds. + description: Timestamp in RFC3339 format or Unix timestamp in seconds. + examples: + RFC3339: + value: "2026-01-02T12:37:00Z" + epoch: + value: 1767357420 + - name: end + in: query + description: End timestamp for label values query. + required: false + explode: false + schema: + oneOf: + - type: string + format: date-time + description: RFC3339 timestamp. + - type: number + format: unixtime + description: Unix timestamp in seconds. + description: Timestamp in RFC3339 format or Unix timestamp in seconds. + examples: + RFC3339: + value: "2026-01-02T13:37:00Z" + epoch: + value: 1767361020 + - name: match[] + in: query + description: Series selector argument. + required: false + explode: false + schema: + type: array + items: + type: string + examples: + example: + value: + - '{job="prometheus"}' + - name: limit + in: query + description: Maximum number of label values to return. + required: false + explode: false + schema: + type: integer + format: int64 + examples: + example: + value: 1000 + responses: + "200": + description: Label values retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/LabelValuesOutputBody' + examples: + labelValues: + summary: List of values for a label + value: + data: + - alertmanager + - blackbox + - caddy + - cadvisor + - grafana + - node + - prometheus + - random + status: success + default: + description: Error retrieving label values. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /series: + get: + tags: + - series + summary: Find series by label matchers + operationId: series + parameters: + - name: start + in: query + description: Start timestamp for series query. + required: false + explode: false + schema: + oneOf: + - type: string + format: date-time + description: RFC3339 timestamp. + - type: number + format: unixtime + description: Unix timestamp in seconds. + description: Timestamp in RFC3339 format or Unix timestamp in seconds. + examples: + RFC3339: + value: "2026-01-02T12:37:00Z" + epoch: + value: 1767357420 + - name: end + in: query + description: End timestamp for series query. + required: false + explode: false + schema: + oneOf: + - type: string + format: date-time + description: RFC3339 timestamp. + - type: number + format: unixtime + description: Unix timestamp in seconds. + description: Timestamp in RFC3339 format or Unix timestamp in seconds. + examples: + RFC3339: + value: "2026-01-02T13:37:00Z" + epoch: + value: 1767361020 + - name: match[] + in: query + description: Series selector argument. + required: true + explode: false + schema: + type: array + items: + type: string + examples: + example: + value: + - '{job="prometheus"}' + - name: limit + in: query + description: Maximum number of series to return. + required: false + explode: false + schema: + type: integer + format: int64 + examples: + example: + value: 100 + responses: + "200": + description: Series returned matching the provided label matchers. + content: + application/json: + schema: + $ref: '#/components/schemas/SeriesOutputBody' + examples: + seriesList: + summary: List of series matching the selector + value: + data: + - __name__: up + env: demo + instance: demo.prometheus.io:8080 + job: cadvisor + - __name__: up + env: demo + instance: demo.prometheus.io:9093 + job: alertmanager + - __name__: up + env: demo + instance: demo.prometheus.io:9100 + job: node + - __name__: up + instance: demo.prometheus.io:3000 + job: grafana + - __name__: up + instance: demo.prometheus.io:8996 + job: random + status: success + default: + description: Error retrieving series. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + post: + tags: + - series + summary: Find series by label matchers + operationId: series-post + requestBody: + description: Submit a series query. This endpoint accepts the same parameters as the GET version. + content: + application/x-www-form-urlencoded: + schema: + $ref: '#/components/schemas/SeriesPostInputBody' + examples: + seriesMatch: + summary: Find series by label matchers + value: + match[]: + - up + seriesWithTimeRange: + summary: Find series with time range + value: + end: "2026-01-02T13:37:00.000Z" + match[]: + - up + - process_cpu_seconds_total{job="prometheus"} + start: "2026-01-02T12:37:00.000Z" + required: true + responses: + "200": + description: Series returned matching the provided label matchers via POST. + content: + application/json: + schema: + $ref: '#/components/schemas/SeriesOutputBody' + examples: + seriesList: + summary: List of series matching the selector + value: + data: + - __name__: up + env: demo + instance: demo.prometheus.io:8080 + job: cadvisor + - __name__: up + env: demo + instance: demo.prometheus.io:9093 + job: alertmanager + - __name__: up + env: demo + instance: demo.prometheus.io:9100 + job: node + - __name__: up + instance: demo.prometheus.io:3000 + job: grafana + - __name__: up + instance: demo.prometheus.io:8996 + job: random + status: success + default: + description: Error retrieving series via POST. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + delete: + tags: + - series + summary: Delete series + description: 'Delete series matching selectors. Note: This is deprecated, use POST /admin/tsdb/delete_series instead.' + operationId: delete-series + responses: + "200": + description: Series marked for deletion. + content: + application/json: + schema: + $ref: '#/components/schemas/SeriesDeleteOutputBody' + examples: + seriesDeleted: + summary: Series marked for deletion + value: + status: success + default: + description: Error deleting series. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /metadata: + get: + tags: + - metadata + summary: Get metadata + operationId: get-metadata + parameters: + - name: limit + in: query + description: The maximum number of metrics to return. + required: false + explode: false + schema: + type: integer + format: int64 + examples: + example: + value: 100 + - name: limit_per_metric + in: query + description: The maximum number of metadata entries per metric. + required: false + explode: false + schema: + type: integer + format: int64 + examples: + example: + value: 10 + - name: metric + in: query + description: A metric name to filter metadata for. + required: false + explode: false + schema: + type: string + examples: + example: + value: http_requests_total + responses: + "200": + description: Metric metadata retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/MetadataOutputBody' + examples: + metricMetadata: + summary: Metadata for metrics + value: + data: + go_gc_stack_starting_size_bytes: + - help: The stack size of new goroutines. Sourced from /gc/stack/starting-size:bytes. + type: gauge + unit: "" + prometheus_rule_group_iterations_missed_total: + - help: The total number of rule group evaluations missed due to slow rule group evaluation. + type: counter + unit: "" + prometheus_sd_updates_total: + - help: Total number of update events sent to the SD consumers. + type: counter + unit: "" + status: success + default: + description: Error retrieving metadata. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /scrape_pools: + get: + tags: + - targets + summary: Get scrape pools + operationId: get-scrape-pools + responses: + "200": + description: Scrape pools retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/ScrapePoolsOutputBody' + examples: + scrapePoolsList: + summary: List of scrape pool names + value: + data: + scrapePools: + - alertmanager + - blackbox + - caddy + - cadvisor + - grafana + - node + - prometheus + - random + status: success + default: + description: Error retrieving scrape pools. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /targets: + get: + tags: + - targets + summary: Get targets + operationId: get-targets + parameters: + - name: scrapePool + in: query + description: Filter targets by scrape pool name. + required: false + explode: false + schema: + type: string + examples: + example: + value: prometheus + - name: state + in: query + description: 'Filter by state: active, dropped, or any.' + required: false + explode: false + schema: + type: string + examples: + example: + value: active + responses: + "200": + description: Target discovery information retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/TargetsOutputBody' + examples: + targetsList: + summary: Active and dropped targets + value: + data: + activeTargets: + - discoveredLabels: + __address__: demo.prometheus.io:9093 + __meta_filepath: /etc/prometheus/file_sd/alertmanager.yml + __metrics_path__: /metrics + __scheme__: http + env: demo + job: alertmanager + globalUrl: http://demo.prometheus.io:9093/metrics + health: up + labels: + env: demo + instance: demo.prometheus.io:9093 + job: alertmanager + lastError: "" + lastScrape: "2026-01-02T13:36:40.200Z" + lastScrapeDuration: 0.006576866 + scrapeInterval: 15s + scrapePool: alertmanager + scrapeTimeout: 10s + scrapeUrl: http://demo.prometheus.io:9093/metrics + droppedTargetCounts: + alertmanager: 0 + blackbox: 0 + caddy: 0 + cadvisor: 0 + grafana: 0 + node: 0 + prometheus: 0 + random: 0 + droppedTargets: [] + status: success + default: + description: Error retrieving targets. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /targets/metadata: + get: + tags: + - targets + summary: Get targets metadata + operationId: get-targets-metadata + parameters: + - name: match_target + in: query + description: Label selector to filter targets. + required: false + explode: false + schema: + type: string + examples: + example: + value: '{job="prometheus"}' + - name: metric + in: query + description: Metric name to retrieve metadata for. + required: false + explode: false + schema: + type: string + examples: + example: + value: http_requests_total + - name: limit + in: query + description: Maximum number of targets to match. + required: false + explode: false + schema: + type: integer + format: int64 + examples: + example: + value: 10 + responses: + "200": + description: Target metadata retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/TargetMetadataOutputBody' + examples: + targetMetadata: + summary: Metadata for targets + value: + data: + - help: The current health status of the target + metric: up + target: + instance: localhost:9090 + job: prometheus + type: gauge + unit: "" + status: success + default: + description: Error retrieving target metadata. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /targets/relabel_steps: + get: + tags: + - targets + summary: Get targets relabel steps + operationId: get-targets-relabel-steps + parameters: + - name: scrapePool + in: query + description: Name of the scrape pool. + required: true + explode: false + schema: + type: string + examples: + example: + value: prometheus + - name: labels + in: query + description: JSON-encoded labels to apply relabel rules to. + required: true + explode: false + schema: + type: string + examples: + example: + value: '{"__address__":"localhost:9090","job":"prometheus"}' + responses: + "200": + description: Relabel steps retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/TargetRelabelStepsOutputBody' + examples: + relabelSteps: + summary: Relabel steps for a target + value: + data: + steps: + - keep: true + output: + __address__: localhost:9090 + instance: localhost:9090 + job: prometheus + rule: + action: replace + regex: (.*) + replacement: $1 + source_labels: + - __address__ + target_label: instance + status: success + default: + description: Error retrieving relabel steps. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /rules: + get: + tags: + - rules + summary: Get alerting and recording rules + operationId: rules + parameters: + - name: type + in: query + description: 'Filter by rule type: alert or record.' + required: false + explode: false + schema: + type: string + examples: + example: + value: alert + - name: rule_name[] + in: query + description: Filter by rule name. + required: false + explode: false + schema: + type: array + items: + type: string + examples: + example: + value: + - HighErrorRate + - name: rule_group[] + in: query + description: Filter by rule group name. + required: false + explode: false + schema: + type: array + items: + type: string + examples: + example: + value: + - example_alerts + - name: file[] + in: query + description: Filter by file path. + required: false + explode: false + schema: + type: array + items: + type: string + examples: + example: + value: + - /etc/prometheus/rules.yml + - name: match[] + in: query + description: Label matchers to filter rules. + required: false + explode: false + schema: + type: array + items: + type: string + examples: + example: + value: + - '{severity="critical"}' + - name: exclude_alerts + in: query + description: Exclude active alerts from response. + required: false + explode: false + schema: + type: string + examples: + example: + value: "false" + - name: group_limit + in: query + description: Maximum number of rule groups to return. + required: false + explode: false + schema: + type: integer + format: int64 + examples: + example: + value: 100 + - name: group_next_token + in: query + description: Pagination token for next page. + required: false + explode: false + schema: + type: string + examples: + example: + value: abc123 + responses: + "200": + description: Rules retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/RulesOutputBody' + examples: + ruleGroups: + summary: Alerting and recording rules + value: + data: + groups: + - evaluationTime: 0.000561635 + file: /etc/prometheus/rules/ansible_managed.yml + interval: 15 + lastEvaluation: "2026-01-02T13:36:56.874Z" + limit: 0 + name: ansible managed alert rules + rules: + - annotations: + description: This is an alert meant to ensure that the entire alerting pipeline is functional. This alert is always firing, therefore it should always be firing in Alertmanager and always fire against a receiver. There are integrations with various notification mechanisms that send a notification when this alert is not firing. For example the "DeadMansSnitch" integration in PagerDuty. + summary: Ensure entire alerting pipeline is functional + duration: 600 + evaluationTime: 0.000356688 + health: ok + keepFiringFor: 0 + labels: + severity: warning + lastEvaluation: "2026-01-02T13:36:56.874Z" + name: Watchdog + query: vector(1) + state: firing + type: alerting + status: success + default: + description: Error retrieving rules. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /alerts: + get: + tags: + - alerts + summary: Get active alerts + operationId: alerts + responses: + "200": + description: Active alerts retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/AlertsOutputBody' + examples: + activeAlerts: + summary: Currently active alerts + value: + data: + alerts: + - activeAt: "2026-01-02T13:30:00.000Z" + annotations: + description: This is an alert meant to ensure that the entire alerting pipeline is functional. This alert is always firing, therefore it should always be firing in Alertmanager and always fire against a receiver. There are integrations with various notification mechanisms that send a notification when this alert is not firing. For example the "DeadMansSnitch" integration in PagerDuty. + summary: Ensure entire alerting pipeline is functional + labels: + alertname: Watchdog + severity: warning + state: firing + value: "1e+00" + status: success + default: + description: Error retrieving alerts. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /alertmanagers: + get: + tags: + - alerts + summary: Get Alertmanager discovery + operationId: alertmanagers + responses: + "200": + description: Alertmanager targets retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/AlertmanagersOutputBody' + examples: + alertmanagerDiscovery: + summary: Alertmanager discovery results + value: + data: + activeAlertmanagers: + - url: http://demo.prometheus.io:9093/api/v2/alerts + droppedAlertmanagers: [] + status: success + default: + description: Error retrieving Alertmanager targets. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /status/config: + get: + tags: + - status + summary: Get status config + operationId: get-status-config + responses: + "200": + description: Configuration retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/StatusConfigOutputBody' + examples: + configYAML: + summary: Prometheus configuration + value: + data: + yaml: | + global: + scrape_interval: 15s + scrape_timeout: 10s + evaluation_interval: 15s + external_labels: + environment: demo-prometheus-io + alerting: + alertmanagers: + - scheme: http + static_configs: + - targets: + - demo.prometheus.io:9093 + rule_files: + - /etc/prometheus/rules/*.yml + status: success + default: + description: Error retrieving configuration. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /status/runtimeinfo: + get: + tags: + - status + summary: Get status runtimeinfo + operationId: get-status-runtimeinfo + responses: + "200": + description: Runtime information retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/StatusRuntimeInfoOutputBody' + examples: + runtimeInfo: + summary: Runtime information + value: + data: + CWD: / + GODEBUG: "" + GOGC: "75" + GOMAXPROCS: 2 + GOMEMLIMIT: 3703818240 + corruptionCount: 0 + goroutineCount: 88 + hostname: demo-prometheus-io + lastConfigTime: "2026-01-01T13:37:00.000Z" + reloadConfigSuccess: true + serverTime: "2026-01-02T13:37:00.000Z" + startTime: "2026-01-01T13:37:00.000Z" + storageRetention: 31d + status: success + default: + description: Error retrieving runtime information. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /status/buildinfo: + get: + tags: + - status + summary: Get status buildinfo + operationId: get-status-buildinfo + responses: + "200": + description: Build information retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/StatusBuildInfoOutputBody' + examples: + buildInfo: + summary: Build information + value: + data: + branch: HEAD + buildDate: 20251030-07:26:10 + buildUser: root@08c890a84441 + goVersion: go1.25.3 + revision: 0a41f0000705c69ab8e0f9a723fc73e39ed62b07 + version: 3.7.3 + status: success + default: + description: Error retrieving build information. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /status/flags: + get: + tags: + - status + summary: Get status flags + operationId: get-status-flags + responses: + "200": + description: Command-line flags retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/StatusFlagsOutputBody' + examples: + flags: + summary: Command-line flags + value: + data: + agent: "false" + alertmanager.notification-queue-capacity: "10000" + config.file: /etc/prometheus/prometheus.yml + enable-feature: exemplar-storage,native-histograms + query.max-concurrency: "20" + query.timeout: 2m + storage.tsdb.path: /prometheus + storage.tsdb.retention.time: 15d + web.console.libraries: /usr/share/prometheus/console_libraries + web.console.templates: /usr/share/prometheus/consoles + web.enable-admin-api: "true" + web.enable-lifecycle: "true" + web.listen-address: 0.0.0.0:9090 + web.page-title: Prometheus Time Series Collection and Processing Server + status: success + default: + description: Error retrieving flags. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /status/tsdb: + get: + tags: + - status + summary: Get TSDB status + operationId: status-tsdb + parameters: + - name: limit + in: query + description: The maximum number of items to return per category. + required: false + explode: false + schema: + type: integer + format: int64 + examples: + example: + value: 10 + responses: + "200": + description: TSDB status retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/StatusTSDBOutputBody' + examples: + tsdbStats: + summary: TSDB statistics + value: + data: + headStats: + chunkCount: 37525 + maxTime: 1767436620000 + minTime: 1767362400712 + numLabelPairs: 2512 + numSeries: 9925 + labelValueCountByLabelName: + - name: __name__ + value: 5 + - name: job + value: 3 + memoryInBytesByLabelName: + - name: __name__ + value: 1024 + - name: job + value: 512 + seriesCountByLabelValuePair: + - name: job=prometheus + value: 100 + - name: instance=localhost:9090 + value: 100 + seriesCountByMetricName: + - name: up + value: 100 + - name: http_requests_total + value: 500 + status: success + default: + description: Error retrieving TSDB status. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /status/tsdb/blocks: + get: + tags: + - status + summary: Get TSDB blocks information + operationId: status-tsdb-blocks + responses: + "200": + description: TSDB blocks information retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/StatusTSDBBlocksOutputBody' + examples: + tsdbBlocks: + summary: TSDB block information + value: + data: + blocks: + - compaction: + level: 4 + sources: + - 01KBCJ7TR8A4QAJ3AA1J651P5S + - 01KBCS3J0E34567YPB8Y5W0E24 + - 01KBCZZ9KRTYGG3E7HVQFGC3S3 + maxTime: 1764763200000 + minTime: 1764568801099 + stats: + numChunks: 1073962 + numSamples: 129505582 + numSeries: 10661 + ulid: 01KC4D6GXQA4CRHYKV78NEBVAE + version: 1 + status: success + default: + description: Error retrieving TSDB blocks. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /status/walreplay: + get: + tags: + - status + summary: Get status walreplay + operationId: get-status-walreplay + responses: + "200": + description: WAL replay status retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/StatusWALReplayOutputBody' + examples: + walReplay: + summary: WAL replay status + value: + data: + current: 3214 + max: 3214 + min: 3209 + status: success + default: + description: Error retrieving WAL replay status. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /admin/tsdb/delete_series: + put: + tags: + - admin + summary: Delete series matching selectors via PUT + description: Deletes data for a selection of series in a time range using PUT method. + operationId: deleteSeriesPut + parameters: + - name: match[] + in: query + description: Series selectors to identify series to delete. + required: true + explode: false + schema: + type: array + items: + type: string + examples: + example: + value: + - '{__name__=~"test.*"}' + - name: start + in: query + description: Start timestamp for deletion. + required: false + explode: false + schema: + oneOf: + - type: string + format: date-time + description: RFC3339 timestamp. + - type: number + format: unixtime + description: Unix timestamp in seconds. + description: Timestamp in RFC3339 format or Unix timestamp in seconds. + examples: + RFC3339: + value: "2026-01-02T12:37:00Z" + epoch: + value: 1767357420 + - name: end + in: query + description: End timestamp for deletion. + required: false + explode: false + schema: + oneOf: + - type: string + format: date-time + description: RFC3339 timestamp. + - type: number + format: unixtime + description: Unix timestamp in seconds. + description: Timestamp in RFC3339 format or Unix timestamp in seconds. + examples: + RFC3339: + value: "2026-01-02T13:37:00Z" + epoch: + value: 1767361020 + responses: + "200": + description: Series deleted successfully via PUT. + content: + application/json: + schema: + $ref: '#/components/schemas/DeleteSeriesOutputBody' + examples: + deletionSuccess: + summary: Successful series deletion + value: + status: success + default: + description: Error deleting series via PUT. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + post: + tags: + - admin + summary: Delete series matching selectors + description: Deletes data for a selection of series in a time range. + operationId: deleteSeriesPost + parameters: + - name: match[] + in: query + description: Series selectors to identify series to delete. + required: true + explode: false + schema: + type: array + items: + type: string + examples: + example: + value: + - '{__name__=~"test.*"}' + - name: start + in: query + description: Start timestamp for deletion. + required: false + explode: false + schema: + oneOf: + - type: string + format: date-time + description: RFC3339 timestamp. + - type: number + format: unixtime + description: Unix timestamp in seconds. + description: Timestamp in RFC3339 format or Unix timestamp in seconds. + examples: + RFC3339: + value: "2026-01-02T12:37:00Z" + epoch: + value: 1767357420 + - name: end + in: query + description: End timestamp for deletion. + required: false + explode: false + schema: + oneOf: + - type: string + format: date-time + description: RFC3339 timestamp. + - type: number + format: unixtime + description: Unix timestamp in seconds. + description: Timestamp in RFC3339 format or Unix timestamp in seconds. + examples: + RFC3339: + value: "2026-01-02T13:37:00Z" + epoch: + value: 1767361020 + responses: + "200": + description: Series deleted successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/DeleteSeriesOutputBody' + examples: + deletionSuccess: + summary: Successful series deletion + value: + status: success + default: + description: Error deleting series. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /admin/tsdb/clean_tombstones: + put: + tags: + - admin + summary: Clean tombstones in the TSDB via PUT + description: Removes deleted data from disk and cleans up existing tombstones using PUT method. + operationId: cleanTombstonesPut + responses: + "200": + description: Tombstones cleaned successfully via PUT. + content: + application/json: + schema: + $ref: '#/components/schemas/CleanTombstonesOutputBody' + examples: + tombstonesCleaned: + summary: Tombstones cleaned successfully + value: + status: success + default: + description: Error cleaning tombstones via PUT. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + post: + tags: + - admin + summary: Clean tombstones in the TSDB + description: Removes deleted data from disk and cleans up existing tombstones. + operationId: cleanTombstonesPost + responses: + "200": + description: Tombstones cleaned successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/CleanTombstonesOutputBody' + examples: + tombstonesCleaned: + summary: Tombstones cleaned successfully + value: + status: success + default: + description: Error cleaning tombstones. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /admin/tsdb/snapshot: + put: + tags: + - admin + summary: Create a snapshot of the TSDB via PUT + description: Creates a snapshot of all current data using PUT method. + operationId: snapshotPut + parameters: + - name: skip_head + in: query + description: If true, do not snapshot data in the head block. + required: false + explode: false + schema: + type: string + examples: + example: + value: "false" + responses: + "200": + description: Snapshot created successfully via PUT. + content: + application/json: + schema: + $ref: '#/components/schemas/SnapshotOutputBody' + examples: + snapshotCreated: + summary: Snapshot created successfully + value: + data: + name: 20260102T133700Z-a1b2c3d4e5f67890 + status: success + default: + description: Error creating snapshot via PUT. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + post: + tags: + - admin + summary: Create a snapshot of the TSDB + description: Creates a snapshot of all current data. + operationId: snapshotPost + parameters: + - name: skip_head + in: query + description: If true, do not snapshot data in the head block. + required: false + explode: false + schema: + type: string + examples: + example: + value: "false" + responses: + "200": + description: Snapshot created successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/SnapshotOutputBody' + examples: + snapshotCreated: + summary: Snapshot created successfully + value: + data: + name: 20260102T133700Z-a1b2c3d4e5f67890 + status: success + default: + description: Error creating snapshot. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /read: + post: + tags: + - remote + summary: Remote read endpoint + description: Prometheus remote read endpoint for federated queries. Accepts and returns Protocol Buffer encoded data. + operationId: remoteRead + responses: + "204": + description: No Content + default: + description: Error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + /write: + post: + tags: + - remote + summary: Remote write endpoint + description: Prometheus remote write endpoint for sending metrics. Accepts Protocol Buffer encoded write requests. + operationId: remoteWrite + responses: + "204": + description: No Content + default: + description: Error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + /otlp/v1/metrics: + post: + tags: + - otlp + summary: OTLP metrics write endpoint + description: OpenTelemetry Protocol metrics ingestion endpoint. Accepts OTLP/HTTP metrics in Protocol Buffer format. + operationId: otlpWrite + responses: + "204": + description: No Content + default: + description: Error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + /notifications: + get: + tags: + - notifications + summary: Get notifications + operationId: get-notifications + responses: + "200": + description: Notifications retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/NotificationsOutputBody' + examples: + notifications: + summary: Server notifications + value: + data: + - active: true + date: "2026-01-02T16:14:50.046Z" + text: Configuration reload has failed. + status: success + default: + description: Error retrieving notifications. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /features: + get: + tags: + - features + summary: Get features + operationId: get-features + responses: + "200": + description: Feature flags retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/FeaturesOutputBody' + examples: + enabledFeatures: + summary: Enabled feature flags + value: + data: + - exemplar-storage + - remote-write-receiver + status: success + default: + description: Error retrieving features. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error +components: + schemas: + Error: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + errorType: + type: string + description: Type of error that occurred. + example: bad_data + error: + type: string + description: Human-readable error message. + example: invalid parameter + required: + - status + - errorType + - error + additionalProperties: false + description: Error response. + Labels: + type: object + additionalProperties: true + description: Label set represented as a key-value map. + QueryOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + $ref: '#/components/schemas/QueryData' + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for instant query. + QueryRangeOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + $ref: '#/components/schemas/QueryData' + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for range query. + QueryPostInputBody: + type: object + properties: + query: + type: string + description: 'Form field: The PromQL query to execute.' + example: up + time: + type: string + description: 'Form field: The evaluation timestamp (optional, defaults to current time).' + example: "2023-07-21T20:10:51.781Z" + limit: + type: integer + format: int64 + description: 'Form field: The maximum number of metrics to return.' + example: 100 + timeout: + type: string + description: 'Form field: Evaluation timeout (optional, defaults to and is capped by the value of the -query.timeout flag).' + example: 30s + lookback_delta: + type: string + description: 'Form field: Override the lookback period for this query (optional).' + example: 5m + stats: + type: string + description: 'Form field: When provided, include query statistics in the response (the special value ''all'' enables more comprehensive statistics).' + example: all + required: + - query + additionalProperties: false + description: POST request body for instant query. + QueryRangePostInputBody: + type: object + properties: + query: + type: string + description: 'Form field: The query to execute.' + example: rate(http_requests_total[5m]) + start: + type: string + description: 'Form field: The start time of the query.' + example: "2023-07-21T20:10:30.781Z" + end: + type: string + description: 'Form field: The end time of the query.' + example: "2023-07-21T20:20:30.781Z" + step: + type: string + description: 'Form field: The step size of the query.' + example: 15s + limit: + type: integer + format: int64 + description: 'Form field: The maximum number of metrics to return.' + example: 100 + timeout: + type: string + description: 'Form field: Evaluation timeout (optional, defaults to and is capped by the value of the -query.timeout flag).' + example: 30s + lookback_delta: + type: string + description: 'Form field: Override the lookback period for this query (optional).' + example: 5m + stats: + type: string + description: 'Form field: When provided, include query statistics in the response (the special value ''all'' enables more comprehensive statistics).' + example: all + required: + - query + - start + - end + - step + additionalProperties: false + description: POST request body for range query. + QueryExemplarsOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + description: Response data (structure varies by endpoint). + example: + result: ok + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Generic response body. + QueryExemplarsPostInputBody: + type: object + properties: + query: + type: string + description: 'Form field: The query to execute.' + example: http_requests_total + start: + type: string + description: 'Form field: The start time of the query.' + example: "2023-07-21T20:00:00.000Z" + end: + type: string + description: 'Form field: The end time of the query.' + example: "2023-07-21T21:00:00.000Z" + required: + - query + additionalProperties: false + description: POST request body for exemplars query. + FormatQueryOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + type: string + description: Formatted query string. + example: sum by(status) (rate(http_requests_total[5m])) + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for format query endpoint. + FormatQueryPostInputBody: + type: object + properties: + query: + type: string + description: 'Form field: The query to format.' + example: sum(rate(http_requests_total[5m])) by (status) + required: + - query + additionalProperties: false + description: POST request body for format query. + ParseQueryOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + description: Response data (structure varies by endpoint). + example: + result: ok + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Generic response body. + ParseQueryPostInputBody: + type: object + properties: + query: + type: string + description: 'Form field: The query to parse.' + example: sum(rate(http_requests_total[5m])) + required: + - query + additionalProperties: false + description: POST request body for parse query. + QueryData: + anyOf: + - type: object + properties: + resultType: + type: string + enum: + - vector + result: + type: array + items: + anyOf: + - $ref: '#/components/schemas/FloatSample' + - $ref: '#/components/schemas/HistogramSample' + description: Array of samples (either float or histogram). + required: + - resultType + - result + additionalProperties: false + - type: object + properties: + resultType: + type: string + enum: + - matrix + result: + type: array + items: + anyOf: + - $ref: '#/components/schemas/FloatSeries' + - $ref: '#/components/schemas/HistogramSeries' + description: Array of time series (either float or histogram). + required: + - resultType + - result + additionalProperties: false + - type: object + properties: + resultType: + type: string + enum: + - scalar + result: + type: array + items: + oneOf: + - type: number + - type: string + maxItems: 2 + minItems: 2 + description: Scalar value as [timestamp, stringValue]. + required: + - resultType + - result + additionalProperties: false + - type: object + properties: + resultType: + type: string + enum: + - string + result: + type: array + items: + type: string + maxItems: 2 + minItems: 2 + description: String value as [timestamp, stringValue]. + required: + - resultType + - result + additionalProperties: false + description: Query result data. The structure of 'result' depends on 'resultType'. + example: + result: + - metric: + __name__: up + job: prometheus + value: + - 1627845600 + - "1" + resultType: vector + FloatSample: + type: object + properties: + metric: + $ref: '#/components/schemas/Labels' + value: + type: array + items: + oneOf: + - type: number + - type: string + maxItems: 2 + minItems: 2 + description: Timestamp and float value as [unixTimestamp, stringValue]. + example: + - 1767436620 + - "1" + required: + - metric + - value + additionalProperties: false + description: A sample with a float value. + HistogramSample: + type: object + properties: + metric: + $ref: '#/components/schemas/Labels' + histogram: + type: array + items: + oneOf: + - type: number + - $ref: '#/components/schemas/HistogramValue' + maxItems: 2 + minItems: 2 + description: Timestamp and histogram value as [unixTimestamp, histogramObject]. + example: + - 1767436620 + - buckets: [] + count: "60" + sum: "120" + required: + - metric + - histogram + additionalProperties: false + description: A sample with a native histogram value. + FloatSeries: + type: object + properties: + metric: + $ref: '#/components/schemas/Labels' + values: + type: array + items: + type: array + items: + oneOf: + - type: number + - type: string + maxItems: 2 + minItems: 2 + description: Array of [timestamp, stringValue] pairs for float values. + required: + - metric + - values + additionalProperties: false + description: A time series with float values. + HistogramSeries: + type: object + properties: + metric: + $ref: '#/components/schemas/Labels' + histograms: + type: array + items: + type: array + items: + oneOf: + - type: number + - $ref: '#/components/schemas/HistogramValue' + maxItems: 2 + minItems: 2 + description: Array of [timestamp, histogramObject] pairs for histogram values. + required: + - metric + - histograms + additionalProperties: false + description: A time series with native histogram values. + HistogramValue: + type: object + properties: + count: + type: string + description: Total count of observations. + sum: + type: string + description: Sum of all observed values. + buckets: + type: array + items: + type: array + items: + oneOf: + - type: number + - type: string + description: Histogram buckets as [boundary_rule, lower, upper, count]. + required: + - count + - sum + additionalProperties: false + description: Native histogram value representation. + LabelsOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + type: array + items: + type: string + example: + - __name__ + - job + - instance + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body with an array of strings. + LabelsPostInputBody: + type: object + properties: + start: + type: string + description: 'Form field: The start time of the query.' + example: "2023-07-21T20:00:00.000Z" + end: + type: string + description: 'Form field: The end time of the query.' + example: "2023-07-21T21:00:00.000Z" + match[]: + type: array + items: + type: string + description: 'Form field: Series selector argument that selects the series from which to read the label names.' + example: + - '{job="prometheus"}' + limit: + type: integer + format: int64 + description: 'Form field: The maximum number of label names to return.' + example: 100 + additionalProperties: false + description: POST request body for labels query. + LabelValuesOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + type: array + items: + type: string + example: + - __name__ + - job + - instance + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body with an array of strings. + SeriesOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + type: array + items: + $ref: '#/components/schemas/Labels' + example: + - __name__: up + instance: localhost:9090 + job: prometheus + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body with an array of label sets. + SeriesPostInputBody: + type: object + properties: + start: + type: string + description: 'Form field: The start time of the query.' + example: "2023-07-21T20:00:00.000Z" + end: + type: string + description: 'Form field: The end time of the query.' + example: "2023-07-21T21:00:00.000Z" + match[]: + type: array + items: + type: string + description: 'Form field: Series selector argument that selects the series to return.' + example: + - '{job="prometheus"}' + limit: + type: integer + format: int64 + description: 'Form field: The maximum number of series to return.' + example: 100 + required: + - match[] + additionalProperties: false + description: POST request body for series query. + SeriesDeleteOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + description: Response data (structure varies by endpoint). + example: + result: ok + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Generic response body. + Metadata: + type: object + properties: + type: + type: string + description: Metric type (counter, gauge, histogram, summary, or untyped). + unit: + type: string + description: Unit of the metric. + help: + type: string + description: Help text describing the metric. + required: + - type + - unit + - help + additionalProperties: false + description: Metric metadata. + MetadataOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + type: object + additionalProperties: + type: array + items: + $ref: '#/components/schemas/Metadata' + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for metadata endpoint. + MetricMetadata: + type: object + properties: + target: + $ref: '#/components/schemas/Labels' + metric: + type: string + description: Metric name. + type: + type: string + description: Metric type (counter, gauge, histogram, summary, or untyped). + help: + type: string + description: Help text describing the metric. + unit: + type: string + description: Unit of the metric. + required: + - target + - type + - help + - unit + additionalProperties: false + description: Target metric metadata. + Target: + type: object + properties: + discoveredLabels: + $ref: '#/components/schemas/Labels' + labels: + $ref: '#/components/schemas/Labels' + scrapePool: + type: string + description: Name of the scrape pool. + scrapeUrl: + type: string + description: URL of the target. + globalUrl: + type: string + description: Global URL of the target. + lastError: + type: string + description: Last error message from scraping. + lastScrape: + type: string + format: date-time + description: Timestamp of the last scrape. + lastScrapeDuration: + type: number + format: double + description: Duration of the last scrape in seconds. + health: + type: string + description: Health status of the target (up, down, or unknown). + scrapeInterval: + type: string + description: Scrape interval for this target. + scrapeTimeout: + type: string + description: Scrape timeout for this target. + required: + - discoveredLabels + - labels + - scrapePool + - scrapeUrl + - globalUrl + - lastError + - lastScrape + - lastScrapeDuration + - health + - scrapeInterval + - scrapeTimeout + additionalProperties: false + description: Scrape target information. + DroppedTarget: + type: object + properties: + discoveredLabels: + $ref: '#/components/schemas/Labels' + scrapePool: + type: string + description: Name of the scrape pool. + required: + - discoveredLabels + - scrapePool + additionalProperties: false + description: Dropped target information. + TargetDiscovery: + type: object + properties: + activeTargets: + type: array + items: + $ref: '#/components/schemas/Target' + droppedTargets: + type: array + items: + $ref: '#/components/schemas/DroppedTarget' + droppedTargetCounts: + type: object + additionalProperties: + type: integer + format: int64 + required: + - activeTargets + - droppedTargets + - droppedTargetCounts + additionalProperties: false + description: Target discovery information including active and dropped targets. + TargetsOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + $ref: '#/components/schemas/TargetDiscovery' + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for targets endpoint. + TargetMetadataOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + type: array + items: + $ref: '#/components/schemas/MetricMetadata' + example: + - help: The current health status of the target + metric: up + target: + instance: localhost:9090 + job: prometheus + type: gauge + unit: "" + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body with an array of metric metadata. + ScrapePoolsDiscovery: + type: object + properties: + scrapePools: + type: array + items: + type: string + required: + - scrapePools + additionalProperties: false + description: List of all configured scrape pools. + ScrapePoolsOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + $ref: '#/components/schemas/ScrapePoolsDiscovery' + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for scrape pools endpoint. + Config: + type: object + properties: + source_labels: + type: array + items: + type: string + description: Source labels for relabeling. + separator: + type: string + description: Separator for source label values. + regex: + type: string + description: Regular expression for matching. + modulus: + type: integer + format: int64 + description: Modulus for hash-based relabeling. + target_label: + type: string + description: Target label name. + replacement: + type: string + description: Replacement value. + action: + type: string + description: Relabel action. + additionalProperties: false + description: Relabel configuration. + RelabelStep: + type: object + properties: + rule: + $ref: '#/components/schemas/Config' + output: + $ref: '#/components/schemas/Labels' + keep: + type: boolean + required: + - rule + - output + - keep + additionalProperties: false + description: Relabel step showing the rule, output, and whether the target was kept. + RelabelStepsResponse: + type: object + properties: + steps: + type: array + items: + $ref: '#/components/schemas/RelabelStep' + required: + - steps + additionalProperties: false + description: Relabeling steps response. + TargetRelabelStepsOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + $ref: '#/components/schemas/RelabelStepsResponse' + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for target relabel steps endpoint. + RuleGroup: + type: object + properties: + name: + type: string + description: Name of the rule group. + file: + type: string + description: File containing the rule group. + rules: + type: array + items: + type: object + description: Rule definition. + description: Rules in this group. + interval: + type: number + format: double + description: Evaluation interval in seconds. + limit: + type: integer + format: int64 + description: Maximum number of alerts for this group. + evaluationTime: + type: number + format: double + description: Time taken to evaluate the group in seconds. + lastEvaluation: + type: string + format: date-time + description: Timestamp of the last evaluation. + required: + - name + - file + - rules + - interval + - limit + - evaluationTime + - lastEvaluation + additionalProperties: false + description: Rule group information. + RuleDiscovery: + type: object + properties: + groups: + type: array + items: + $ref: '#/components/schemas/RuleGroup' + groupNextToken: + type: string + description: Pagination token for the next page of groups. + required: + - groups + additionalProperties: false + description: Rule discovery information containing all rule groups. + RulesOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + $ref: '#/components/schemas/RuleDiscovery' + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for rules endpoint. + Alert: + type: object + properties: + labels: + $ref: '#/components/schemas/Labels' + annotations: + $ref: '#/components/schemas/Labels' + state: + type: string + description: State of the alert (pending, firing, or inactive). + value: + type: string + description: Value of the alert expression. + activeAt: + type: string + format: date-time + description: Timestamp when the alert became active. + keepFiringSince: + type: string + format: date-time + description: Timestamp since the alert has been kept firing. + required: + - labels + - annotations + - state + - value + additionalProperties: false + description: Alert information. + AlertDiscovery: + type: object + properties: + alerts: + type: array + items: + $ref: '#/components/schemas/Alert' + required: + - alerts + additionalProperties: false + description: Alert discovery information containing all active alerts. + AlertsOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + $ref: '#/components/schemas/AlertDiscovery' + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for alerts endpoint. + AlertmanagerTarget: + type: object + properties: + url: + type: string + description: URL of the Alertmanager instance. + required: + - url + additionalProperties: false + description: Alertmanager target information. + AlertmanagerDiscovery: + type: object + properties: + activeAlertmanagers: + type: array + items: + $ref: '#/components/schemas/AlertmanagerTarget' + droppedAlertmanagers: + type: array + items: + $ref: '#/components/schemas/AlertmanagerTarget' + required: + - activeAlertmanagers + - droppedAlertmanagers + additionalProperties: false + description: Alertmanager discovery information including active and dropped instances. + AlertmanagersOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + $ref: '#/components/schemas/AlertmanagerDiscovery' + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for alertmanagers endpoint. + StatusConfigData: + type: object + properties: + yaml: + type: string + description: Prometheus configuration in YAML format. + required: + - yaml + additionalProperties: false + description: Prometheus configuration. + StatusConfigOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + $ref: '#/components/schemas/StatusConfigData' + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for status config endpoint. + RuntimeInfo: + type: object + properties: + startTime: + type: string + format: date-time + CWD: + type: string + hostname: + type: string + serverTime: + type: string + format: date-time + reloadConfigSuccess: + type: boolean + lastConfigTime: + type: string + format: date-time + corruptionCount: + type: integer + format: int64 + goroutineCount: + type: integer + format: int64 + GOMAXPROCS: + type: integer + format: int64 + GOMEMLIMIT: + type: integer + format: int64 + GOGC: + type: string + GODEBUG: + type: string + storageRetention: + type: string + required: + - startTime + - CWD + - hostname + - serverTime + - reloadConfigSuccess + - lastConfigTime + - corruptionCount + - goroutineCount + - GOMAXPROCS + - GOMEMLIMIT + - GOGC + - GODEBUG + - storageRetention + additionalProperties: false + description: Prometheus runtime information. + StatusRuntimeInfoOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + $ref: '#/components/schemas/RuntimeInfo' + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for status runtime info endpoint. + PrometheusVersion: + type: object + properties: + version: + type: string + revision: + type: string + branch: + type: string + buildUser: + type: string + buildDate: + type: string + goVersion: + type: string + required: + - version + - revision + - branch + - buildUser + - buildDate + - goVersion + additionalProperties: false + description: Prometheus version information. + StatusBuildInfoOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + $ref: '#/components/schemas/PrometheusVersion' + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for status build info endpoint. + StatusFlagsOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + type: object + additionalProperties: + type: string + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for status flags endpoint. + HeadStats: + type: object + properties: + numSeries: + type: integer + format: int64 + numLabelPairs: + type: integer + format: int64 + chunkCount: + type: integer + format: int64 + minTime: + type: integer + format: int64 + maxTime: + type: integer + format: int64 + required: + - numSeries + - numLabelPairs + - chunkCount + - minTime + - maxTime + additionalProperties: false + description: TSDB head statistics. + TSDBStat: + type: object + properties: + name: + type: string + value: + type: integer + format: int64 + required: + - name + - value + additionalProperties: false + description: TSDB statistic. + TSDBStatus: + type: object + properties: + headStats: + $ref: '#/components/schemas/HeadStats' + seriesCountByMetricName: + type: array + items: + $ref: '#/components/schemas/TSDBStat' + labelValueCountByLabelName: + type: array + items: + $ref: '#/components/schemas/TSDBStat' + memoryInBytesByLabelName: + type: array + items: + $ref: '#/components/schemas/TSDBStat' + seriesCountByLabelValuePair: + type: array + items: + $ref: '#/components/schemas/TSDBStat' + required: + - headStats + - seriesCountByMetricName + - labelValueCountByLabelName + - memoryInBytesByLabelName + - seriesCountByLabelValuePair + additionalProperties: false + description: TSDB status information. + StatusTSDBOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + $ref: '#/components/schemas/TSDBStatus' + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for status TSDB endpoint. + BlockDesc: + type: object + properties: + ulid: + type: string + minTime: + type: integer + format: int64 + maxTime: + type: integer + format: int64 + required: + - ulid + - minTime + - maxTime + additionalProperties: false + description: Block descriptor. + BlockStats: + type: object + properties: + numSamples: + type: integer + format: int64 + numSeries: + type: integer + format: int64 + numChunks: + type: integer + format: int64 + numTombstones: + type: integer + format: int64 + numFloatSamples: + type: integer + format: int64 + numHistogramSamples: + type: integer + format: int64 + additionalProperties: false + description: Block statistics. + BlockMetaCompaction: + type: object + properties: + level: + type: integer + format: int64 + sources: + type: array + items: + type: string + parents: + type: array + items: + $ref: '#/components/schemas/BlockDesc' + failed: + type: boolean + deletable: + type: boolean + hints: + type: array + items: + type: string + required: + - level + additionalProperties: false + description: Block compaction metadata. + BlockMeta: + type: object + properties: + ulid: + type: string + minTime: + type: integer + format: int64 + maxTime: + type: integer + format: int64 + stats: + $ref: '#/components/schemas/BlockStats' + compaction: + $ref: '#/components/schemas/BlockMetaCompaction' + version: + type: integer + format: int64 + required: + - ulid + - minTime + - maxTime + - compaction + - version + additionalProperties: false + description: Block metadata. + StatusTSDBBlocksData: + type: object + properties: + blocks: + type: array + items: + $ref: '#/components/schemas/BlockMeta' + required: + - blocks + additionalProperties: false + description: TSDB blocks information. + StatusTSDBBlocksOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + $ref: '#/components/schemas/StatusTSDBBlocksData' + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for status TSDB blocks endpoint. + StatusWALReplayData: + type: object + properties: + min: + type: integer + format: int64 + max: + type: integer + format: int64 + current: + type: integer + format: int64 + required: + - min + - max + - current + additionalProperties: false + description: WAL replay status. + StatusWALReplayOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + $ref: '#/components/schemas/StatusWALReplayData' + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for status WAL replay endpoint. + DeleteSeriesOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + additionalProperties: false + description: Response body containing only status. + CleanTombstonesOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + additionalProperties: false + description: Response body containing only status. + DataStruct: + type: object + properties: + name: + type: string + required: + - name + additionalProperties: false + description: Generic data structure with a name field. + SnapshotOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + $ref: '#/components/schemas/DataStruct' + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for snapshot endpoint. + Notification: + type: object + properties: + text: + type: string + date: + type: string + format: date-time + active: + type: boolean + required: + - text + - date + - active + additionalProperties: false + description: Server notification. + NotificationsOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + type: array + items: + $ref: '#/components/schemas/Notification' + example: + - active: true + date: "2023-07-21T20:00:00.000Z" + text: Server is running + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body with an array of notifications. + FeaturesOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + description: Response data (structure varies by endpoint). + example: + result: ok + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Generic response body. +tags: + - name: query + description: Query and evaluate PromQL expressions. + - name: metadata + description: Retrieve metric metadata such as type and unit. + - name: labels + description: Query label names and values. + - name: series + description: Query and manage time series. + - name: targets + description: Retrieve target and scrape pool information. + - name: rules + description: Query recording and alerting rules. + - name: alerts + description: Query active alerts and alertmanager discovery. + - name: status + description: Retrieve server status and configuration. + - name: admin + description: Administrative operations for TSDB management. + - name: features + description: Query enabled features. + - name: remote + description: Remote read and write endpoints. + - name: otlp + description: OpenTelemetry Protocol metrics ingestion. + - name: notifications + description: Server notifications and events. diff --git a/web/api/v1/testdata/openapi_3.2_golden.yaml b/web/api/v1/testdata/openapi_3.2_golden.yaml new file mode 100644 index 0000000000..f122408013 --- /dev/null +++ b/web/api/v1/testdata/openapi_3.2_golden.yaml @@ -0,0 +1,4452 @@ +openapi: 3.2.0 +info: + title: Prometheus API + description: Prometheus is an Open-Source monitoring system with a dimensional data model, flexible query language, efficient time series database and modern alerting approach. + contact: + name: Prometheus Community + url: https://prometheus.io/community/ + version: 0.0.1-undefined +servers: + - url: /api/v1 +paths: + /query: + get: + tags: + - query + summary: Evaluate an instant query + operationId: query + parameters: + - name: limit + in: query + description: The maximum number of metrics to return. + required: false + explode: false + schema: + type: integer + format: int64 + examples: + example: + value: 100 + - name: time + in: query + description: The evaluation timestamp (optional, defaults to current time). + required: false + explode: false + schema: + oneOf: + - type: string + format: date-time + description: RFC3339 timestamp. + - type: number + format: unixtime + description: Unix timestamp in seconds. + description: Timestamp in RFC3339 format or Unix timestamp in seconds. + examples: + RFC3339: + value: "2026-01-02T13:37:00Z" + epoch: + value: 1767361020 + - name: query + in: query + description: The PromQL query to execute. + required: true + explode: false + schema: + type: string + examples: + example: + value: up + - name: timeout + in: query + description: Evaluation timeout. Optional. Defaults to and is capped by the value of the -query.timeout flag. + required: false + explode: false + schema: + type: string + examples: + example: + value: 30s + - name: lookback_delta + in: query + description: Override the lookback period for this query. Optional. + required: false + explode: false + schema: + type: string + examples: + example: + value: 5m + - name: stats + in: query + description: When provided, include query statistics in the response. The special value 'all' enables more comprehensive statistics. + required: false + explode: false + schema: + type: string + examples: + example: + value: all + responses: + "200": + description: Query executed successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/QueryOutputBody' + examples: + vectorResult: + summary: 'Instant vector query: up' + value: {"status": "success", "data": {"resultType": "vector", "result": [{"metric": {"__name__": "up", "instance": "demo.prometheus.io:9090", "job": "prometheus"}, "value": [1767436620, "1"]}, {"metric": {"__name__": "up", "env": "demo", "instance": "demo.prometheus.io:9093", "job": "alertmanager"}, "value": [1767436620, "1"]}]}} + scalarResult: + summary: 'Scalar query: scalar(42)' + value: + data: + result: + - 1767436620 + - "42" + resultType: scalar + status: success + matrixResult: + summary: 'Range vector query: up[5m]' + value: {"status": "success", "data": {"resultType": "matrix", "result": [{"metric": {"__name__": "up", "instance": "demo.prometheus.io:9090", "job": "prometheus"}, "values": [[1767436320, "1"], [1767436620, "1"]]}]}} + default: + description: Error executing query. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + post: + tags: + - query + summary: Evaluate an instant query + operationId: query-post + requestBody: + description: Submit an instant query. This endpoint accepts the same parameters as the GET version. + content: + application/x-www-form-urlencoded: + schema: + $ref: '#/components/schemas/QueryPostInputBody' + examples: + simpleQuery: + summary: Simple instant query + value: + query: up + queryWithTime: + summary: Query with specific timestamp + value: + query: up{job="prometheus"} + time: "2026-01-02T13:37:00.000Z" + queryWithLimit: + summary: Query with limit and statistics + value: + limit: 100 + query: rate(prometheus_http_requests_total{handler="/api/v1/query"}[5m]) + stats: all + required: true + responses: + "200": + description: Instant query executed successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/QueryOutputBody' + examples: + vectorResult: + summary: 'Instant vector query: up' + value: {"status": "success", "data": {"resultType": "vector", "result": [{"metric": {"__name__": "up", "instance": "demo.prometheus.io:9090", "job": "prometheus"}, "value": [1767436620, "1"]}, {"metric": {"__name__": "up", "env": "demo", "instance": "demo.prometheus.io:9093", "job": "alertmanager"}, "value": [1767436620, "1"]}]}} + scalarResult: + summary: 'Scalar query: scalar(42)' + value: + data: + result: + - 1767436620 + - "42" + resultType: scalar + status: success + matrixResult: + summary: 'Range vector query: up[5m]' + value: {"status": "success", "data": {"resultType": "matrix", "result": [{"metric": {"__name__": "up", "instance": "demo.prometheus.io:9090", "job": "prometheus"}, "values": [[1767436320, "1"], [1767436620, "1"]]}]}} + default: + description: Error executing instant query. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /query_range: + get: + tags: + - query + summary: Evaluate a range query + operationId: query-range + parameters: + - name: limit + in: query + description: The maximum number of metrics to return. + required: false + explode: false + schema: + type: integer + format: int64 + examples: + example: + value: 100 + - name: start + in: query + description: The start time of the query. + required: true + explode: false + schema: + oneOf: + - type: string + format: date-time + description: RFC3339 timestamp. + - type: number + format: unixtime + description: Unix timestamp in seconds. + description: Timestamp in RFC3339 format or Unix timestamp in seconds. + examples: + RFC3339: + value: "2026-01-02T12:37:00Z" + epoch: + value: 1767357420 + - name: end + in: query + description: The end time of the query. + required: true + explode: false + schema: + oneOf: + - type: string + format: date-time + description: RFC3339 timestamp. + - type: number + format: unixtime + description: Unix timestamp in seconds. + description: Timestamp in RFC3339 format or Unix timestamp in seconds. + examples: + RFC3339: + value: "2026-01-02T13:37:00Z" + epoch: + value: 1767361020 + - name: step + in: query + description: The step size of the query. + required: true + explode: false + schema: + type: string + examples: + example: + value: 15s + - name: query + in: query + description: The query to execute. + required: true + explode: false + schema: + type: string + examples: + example: + value: rate(prometheus_http_requests_total{handler="/api/v1/query"}[5m]) + - name: timeout + in: query + description: Evaluation timeout. Optional. Defaults to and is capped by the value of the -query.timeout flag. + required: false + explode: false + schema: + type: string + examples: + example: + value: 30s + - name: lookback_delta + in: query + description: Override the lookback period for this query. Optional. + required: false + explode: false + schema: + type: string + examples: + example: + value: 5m + - name: stats + in: query + description: When provided, include query statistics in the response. The special value 'all' enables more comprehensive statistics. + required: false + explode: false + schema: + type: string + examples: + example: + value: all + responses: + "200": + description: Range query executed successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/QueryRangeOutputBody' + examples: + matrixResult: + summary: 'Range query: rate(prometheus_http_requests_total[5m])' + value: {"status": "success", "data": {"resultType": "matrix", "result": [{"metric": {"__name__": "up", "instance": "demo.prometheus.io:9090", "job": "prometheus"}, "values": [[1767433020, "1"], [1767434820, "1"], [1767436620, "1"]]}]}} + default: + description: Error executing range query. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + post: + tags: + - query + summary: Evaluate a range query + operationId: query-range-post + requestBody: + description: Submit a range query. This endpoint accepts the same parameters as the GET version. + content: + application/x-www-form-urlencoded: + schema: + $ref: '#/components/schemas/QueryRangePostInputBody' + examples: + basicRange: + summary: Basic range query + value: + end: "2026-01-02T13:37:00.000Z" + query: up + start: "2026-01-02T12:37:00.000Z" + step: 15s + rateQuery: + summary: Rate calculation over time range + value: + end: "2026-01-02T13:37:00.000Z" + query: rate(prometheus_http_requests_total{handler="/api/v1/query"}[5m]) + start: "2026-01-02T12:37:00.000Z" + step: 30s + timeout: 30s + required: true + responses: + "200": + description: Range query executed successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/QueryRangeOutputBody' + examples: + matrixResult: + summary: 'Range query: rate(prometheus_http_requests_total[5m])' + value: {"status": "success", "data": {"resultType": "matrix", "result": [{"metric": {"__name__": "up", "instance": "demo.prometheus.io:9090", "job": "prometheus"}, "values": [[1767433020, "1"], [1767434820, "1"], [1767436620, "1"]]}]}} + default: + description: Error executing range query. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /query_exemplars: + get: + tags: + - query + summary: Query exemplars + operationId: query-exemplars + parameters: + - name: start + in: query + description: Start timestamp for exemplars query. + required: false + explode: false + schema: + oneOf: + - type: string + format: date-time + description: RFC3339 timestamp. + - type: number + format: unixtime + description: Unix timestamp in seconds. + description: Timestamp in RFC3339 format or Unix timestamp in seconds. + examples: + RFC3339: + value: "2026-01-02T12:37:00Z" + epoch: + value: 1767357420 + - name: end + in: query + description: End timestamp for exemplars query. + required: false + explode: false + schema: + oneOf: + - type: string + format: date-time + description: RFC3339 timestamp. + - type: number + format: unixtime + description: Unix timestamp in seconds. + description: Timestamp in RFC3339 format or Unix timestamp in seconds. + examples: + RFC3339: + value: "2026-01-02T13:37:00Z" + epoch: + value: 1767361020 + - name: query + in: query + description: PromQL query to extract exemplars for. + required: true + explode: false + schema: + type: string + examples: + example: + value: prometheus_http_requests_total + responses: + "200": + description: Exemplars retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/QueryExemplarsOutputBody' + examples: + exemplarsResult: + summary: Exemplars for a metric with trace IDs + value: + data: + - exemplars: + - labels: + traceID: abc123def456 + timestamp: 1.689956451781e+09 + value: "1.5" + seriesLabels: + __name__: http_requests_total + job: api-server + method: GET + status: success + default: + description: Error retrieving exemplars. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + post: + tags: + - query + summary: Query exemplars + operationId: query-exemplars-post + requestBody: + description: Submit an exemplars query. This endpoint accepts the same parameters as the GET version. + content: + application/x-www-form-urlencoded: + schema: + $ref: '#/components/schemas/QueryExemplarsPostInputBody' + examples: + basicExemplar: + summary: Query exemplars for a metric + value: + query: prometheus_http_requests_total + exemplarWithTimeRange: + summary: Exemplars within specific time range + value: + end: "2026-01-02T13:37:00.000Z" + query: prometheus_http_requests_total{job="prometheus"} + start: "2026-01-02T12:37:00.000Z" + required: true + responses: + "200": + description: Exemplars query completed successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/QueryExemplarsOutputBody' + examples: + exemplarsResult: + summary: Exemplars for a metric with trace IDs + value: + data: + - exemplars: + - labels: + traceID: abc123def456 + timestamp: 1.689956451781e+09 + value: "1.5" + seriesLabels: + __name__: http_requests_total + job: api-server + method: GET + status: success + default: + description: Error processing exemplars query. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /format_query: + get: + tags: + - query + summary: Format a PromQL query + operationId: format-query + parameters: + - name: query + in: query + description: PromQL expression to format. + required: true + explode: false + schema: + type: string + examples: + example: + value: sum(rate(http_requests_total[5m])) by (job) + responses: + "200": + description: Query formatted successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/FormatQueryOutputBody' + examples: + formattedQuery: + summary: Formatted PromQL query + value: + data: sum by(job, status) (rate(http_requests_total[5m])) + status: success + default: + description: Error formatting query. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + post: + tags: + - query + summary: Format a PromQL query + operationId: format-query-post + requestBody: + description: Submit a PromQL query to format. This endpoint accepts the same parameters as the GET version. + content: + application/x-www-form-urlencoded: + schema: + $ref: '#/components/schemas/FormatQueryPostInputBody' + examples: + simpleFormat: + summary: Format a simple query + value: + query: up{job="prometheus"} + complexFormat: + summary: Format a complex query + value: + query: sum(rate(http_requests_total[5m])) by (job, status) + required: true + responses: + "200": + description: Query formatting completed successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/FormatQueryOutputBody' + examples: + formattedQuery: + summary: Formatted PromQL query + value: + data: sum by(job, status) (rate(http_requests_total[5m])) + status: success + default: + description: Error formatting query. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /parse_query: + get: + tags: + - query + summary: Parse a PromQL query + operationId: parse-query + parameters: + - name: query + in: query + description: PromQL expression to parse. + required: true + explode: false + schema: + type: string + examples: + example: + value: up{job="prometheus"} + responses: + "200": + description: Query parsed successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/ParseQueryOutputBody' + examples: + parsedQuery: + summary: Parsed PromQL expression tree + value: + data: + resultType: vector + status: success + default: + description: Error parsing query. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + post: + tags: + - query + summary: Parse a PromQL query + operationId: parse-query-post + requestBody: + description: Submit a PromQL query to parse. This endpoint accepts the same parameters as the GET version. + content: + application/x-www-form-urlencoded: + schema: + $ref: '#/components/schemas/ParseQueryPostInputBody' + examples: + simpleParse: + summary: Parse a simple query + value: + query: up + complexParse: + summary: Parse a complex query + value: + query: rate(http_requests_total{job="api"}[5m]) + required: true + responses: + "200": + description: Query parsed successfully via POST. + content: + application/json: + schema: + $ref: '#/components/schemas/ParseQueryOutputBody' + examples: + parsedQuery: + summary: Parsed PromQL expression tree + value: + data: + resultType: vector + status: success + default: + description: Error parsing query via POST. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /labels: + get: + tags: + - labels + summary: Get label names + operationId: labels + parameters: + - name: start + in: query + description: Start timestamp for label names query. + required: false + explode: false + schema: + oneOf: + - type: string + format: date-time + description: RFC3339 timestamp. + - type: number + format: unixtime + description: Unix timestamp in seconds. + description: Timestamp in RFC3339 format or Unix timestamp in seconds. + examples: + RFC3339: + value: "2026-01-02T12:37:00Z" + epoch: + value: 1767357420 + - name: end + in: query + description: End timestamp for label names query. + required: false + explode: false + schema: + oneOf: + - type: string + format: date-time + description: RFC3339 timestamp. + - type: number + format: unixtime + description: Unix timestamp in seconds. + description: Timestamp in RFC3339 format or Unix timestamp in seconds. + examples: + RFC3339: + value: "2026-01-02T13:37:00Z" + epoch: + value: 1767361020 + - name: match[] + in: query + description: Series selector argument. + required: false + explode: false + schema: + type: array + items: + type: string + examples: + example: + value: + - '{job="prometheus"}' + - name: limit + in: query + description: Maximum number of label names to return. + required: false + explode: false + schema: + type: integer + format: int64 + examples: + example: + value: 100 + responses: + "200": + description: Label names retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/LabelsOutputBody' + examples: + labelNames: + summary: List of label names + value: + data: + - __name__ + - active + - address + - alertmanager + - alertname + - alertstate + - backend + - branch + - code + - collector + - component + - device + - env + - endpoint + - fstype + - handler + - instance + - job + - le + - method + - mode + - name + status: success + default: + description: Error retrieving label names. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + post: + tags: + - labels + summary: Get label names + operationId: labels-post + requestBody: + description: Submit a label names query. This endpoint accepts the same parameters as the GET version. + content: + application/x-www-form-urlencoded: + schema: + $ref: '#/components/schemas/LabelsPostInputBody' + examples: + allLabels: + summary: Get all label names + value: {} + labelsWithTimeRange: + summary: Get label names within time range + value: + end: "2026-01-02T13:37:00.000Z" + start: "2026-01-02T12:37:00.000Z" + labelsWithMatch: + summary: Get label names matching series selector + value: + match[]: + - up + - process_start_time_seconds{job="prometheus"} + required: true + responses: + "200": + description: Label names retrieved successfully via POST. + content: + application/json: + schema: + $ref: '#/components/schemas/LabelsOutputBody' + examples: + labelNames: + summary: List of label names + value: + data: + - __name__ + - active + - address + - alertmanager + - alertname + - alertstate + - backend + - branch + - code + - collector + - component + - device + - env + - endpoint + - fstype + - handler + - instance + - job + - le + - method + - mode + - name + status: success + default: + description: Error retrieving label names via POST. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /label/{name}/values: + get: + tags: + - labels + summary: Get label values + operationId: label-values + parameters: + - name: name + in: path + description: Label name. + required: true + schema: + type: string + - name: start + in: query + description: Start timestamp for label values query. + required: false + explode: false + schema: + oneOf: + - type: string + format: date-time + description: RFC3339 timestamp. + - type: number + format: unixtime + description: Unix timestamp in seconds. + description: Timestamp in RFC3339 format or Unix timestamp in seconds. + examples: + RFC3339: + value: "2026-01-02T12:37:00Z" + epoch: + value: 1767357420 + - name: end + in: query + description: End timestamp for label values query. + required: false + explode: false + schema: + oneOf: + - type: string + format: date-time + description: RFC3339 timestamp. + - type: number + format: unixtime + description: Unix timestamp in seconds. + description: Timestamp in RFC3339 format or Unix timestamp in seconds. + examples: + RFC3339: + value: "2026-01-02T13:37:00Z" + epoch: + value: 1767361020 + - name: match[] + in: query + description: Series selector argument. + required: false + explode: false + schema: + type: array + items: + type: string + examples: + example: + value: + - '{job="prometheus"}' + - name: limit + in: query + description: Maximum number of label values to return. + required: false + explode: false + schema: + type: integer + format: int64 + examples: + example: + value: 1000 + responses: + "200": + description: Label values retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/LabelValuesOutputBody' + examples: + labelValues: + summary: List of values for a label + value: + data: + - alertmanager + - blackbox + - caddy + - cadvisor + - grafana + - node + - prometheus + - random + status: success + default: + description: Error retrieving label values. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /series: + get: + tags: + - series + summary: Find series by label matchers + operationId: series + parameters: + - name: start + in: query + description: Start timestamp for series query. + required: false + explode: false + schema: + oneOf: + - type: string + format: date-time + description: RFC3339 timestamp. + - type: number + format: unixtime + description: Unix timestamp in seconds. + description: Timestamp in RFC3339 format or Unix timestamp in seconds. + examples: + RFC3339: + value: "2026-01-02T12:37:00Z" + epoch: + value: 1767357420 + - name: end + in: query + description: End timestamp for series query. + required: false + explode: false + schema: + oneOf: + - type: string + format: date-time + description: RFC3339 timestamp. + - type: number + format: unixtime + description: Unix timestamp in seconds. + description: Timestamp in RFC3339 format or Unix timestamp in seconds. + examples: + RFC3339: + value: "2026-01-02T13:37:00Z" + epoch: + value: 1767361020 + - name: match[] + in: query + description: Series selector argument. + required: true + explode: false + schema: + type: array + items: + type: string + examples: + example: + value: + - '{job="prometheus"}' + - name: limit + in: query + description: Maximum number of series to return. + required: false + explode: false + schema: + type: integer + format: int64 + examples: + example: + value: 100 + responses: + "200": + description: Series returned matching the provided label matchers. + content: + application/json: + schema: + $ref: '#/components/schemas/SeriesOutputBody' + examples: + seriesList: + summary: List of series matching the selector + value: + data: + - __name__: up + env: demo + instance: demo.prometheus.io:8080 + job: cadvisor + - __name__: up + env: demo + instance: demo.prometheus.io:9093 + job: alertmanager + - __name__: up + env: demo + instance: demo.prometheus.io:9100 + job: node + - __name__: up + instance: demo.prometheus.io:3000 + job: grafana + - __name__: up + instance: demo.prometheus.io:8996 + job: random + status: success + default: + description: Error retrieving series. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + post: + tags: + - series + summary: Find series by label matchers + operationId: series-post + requestBody: + description: Submit a series query. This endpoint accepts the same parameters as the GET version. + content: + application/x-www-form-urlencoded: + schema: + $ref: '#/components/schemas/SeriesPostInputBody' + examples: + seriesMatch: + summary: Find series by label matchers + value: + match[]: + - up + seriesWithTimeRange: + summary: Find series with time range + value: + end: "2026-01-02T13:37:00.000Z" + match[]: + - up + - process_cpu_seconds_total{job="prometheus"} + start: "2026-01-02T12:37:00.000Z" + required: true + responses: + "200": + description: Series returned matching the provided label matchers via POST. + content: + application/json: + schema: + $ref: '#/components/schemas/SeriesOutputBody' + examples: + seriesList: + summary: List of series matching the selector + value: + data: + - __name__: up + env: demo + instance: demo.prometheus.io:8080 + job: cadvisor + - __name__: up + env: demo + instance: demo.prometheus.io:9093 + job: alertmanager + - __name__: up + env: demo + instance: demo.prometheus.io:9100 + job: node + - __name__: up + instance: demo.prometheus.io:3000 + job: grafana + - __name__: up + instance: demo.prometheus.io:8996 + job: random + status: success + default: + description: Error retrieving series via POST. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + delete: + tags: + - series + summary: Delete series + description: 'Delete series matching selectors. Note: This is deprecated, use POST /admin/tsdb/delete_series instead.' + operationId: delete-series + responses: + "200": + description: Series marked for deletion. + content: + application/json: + schema: + $ref: '#/components/schemas/SeriesDeleteOutputBody' + examples: + seriesDeleted: + summary: Series marked for deletion + value: + status: success + default: + description: Error deleting series. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /metadata: + get: + tags: + - metadata + summary: Get metadata + operationId: get-metadata + parameters: + - name: limit + in: query + description: The maximum number of metrics to return. + required: false + explode: false + schema: + type: integer + format: int64 + examples: + example: + value: 100 + - name: limit_per_metric + in: query + description: The maximum number of metadata entries per metric. + required: false + explode: false + schema: + type: integer + format: int64 + examples: + example: + value: 10 + - name: metric + in: query + description: A metric name to filter metadata for. + required: false + explode: false + schema: + type: string + examples: + example: + value: http_requests_total + responses: + "200": + description: Metric metadata retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/MetadataOutputBody' + examples: + metricMetadata: + summary: Metadata for metrics + value: + data: + go_gc_stack_starting_size_bytes: + - help: The stack size of new goroutines. Sourced from /gc/stack/starting-size:bytes. + type: gauge + unit: "" + prometheus_rule_group_iterations_missed_total: + - help: The total number of rule group evaluations missed due to slow rule group evaluation. + type: counter + unit: "" + prometheus_sd_updates_total: + - help: Total number of update events sent to the SD consumers. + type: counter + unit: "" + status: success + default: + description: Error retrieving metadata. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /scrape_pools: + get: + tags: + - targets + summary: Get scrape pools + operationId: get-scrape-pools + responses: + "200": + description: Scrape pools retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/ScrapePoolsOutputBody' + examples: + scrapePoolsList: + summary: List of scrape pool names + value: + data: + scrapePools: + - alertmanager + - blackbox + - caddy + - cadvisor + - grafana + - node + - prometheus + - random + status: success + default: + description: Error retrieving scrape pools. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /targets: + get: + tags: + - targets + summary: Get targets + operationId: get-targets + parameters: + - name: scrapePool + in: query + description: Filter targets by scrape pool name. + required: false + explode: false + schema: + type: string + examples: + example: + value: prometheus + - name: state + in: query + description: 'Filter by state: active, dropped, or any.' + required: false + explode: false + schema: + type: string + examples: + example: + value: active + responses: + "200": + description: Target discovery information retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/TargetsOutputBody' + examples: + targetsList: + summary: Active and dropped targets + value: + data: + activeTargets: + - discoveredLabels: + __address__: demo.prometheus.io:9093 + __meta_filepath: /etc/prometheus/file_sd/alertmanager.yml + __metrics_path__: /metrics + __scheme__: http + env: demo + job: alertmanager + globalUrl: http://demo.prometheus.io:9093/metrics + health: up + labels: + env: demo + instance: demo.prometheus.io:9093 + job: alertmanager + lastError: "" + lastScrape: "2026-01-02T13:36:40.200Z" + lastScrapeDuration: 0.006576866 + scrapeInterval: 15s + scrapePool: alertmanager + scrapeTimeout: 10s + scrapeUrl: http://demo.prometheus.io:9093/metrics + droppedTargetCounts: + alertmanager: 0 + blackbox: 0 + caddy: 0 + cadvisor: 0 + grafana: 0 + node: 0 + prometheus: 0 + random: 0 + droppedTargets: [] + status: success + default: + description: Error retrieving targets. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /targets/metadata: + get: + tags: + - targets + summary: Get targets metadata + operationId: get-targets-metadata + parameters: + - name: match_target + in: query + description: Label selector to filter targets. + required: false + explode: false + schema: + type: string + examples: + example: + value: '{job="prometheus"}' + - name: metric + in: query + description: Metric name to retrieve metadata for. + required: false + explode: false + schema: + type: string + examples: + example: + value: http_requests_total + - name: limit + in: query + description: Maximum number of targets to match. + required: false + explode: false + schema: + type: integer + format: int64 + examples: + example: + value: 10 + responses: + "200": + description: Target metadata retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/TargetMetadataOutputBody' + examples: + targetMetadata: + summary: Metadata for targets + value: + data: + - help: The current health status of the target + metric: up + target: + instance: localhost:9090 + job: prometheus + type: gauge + unit: "" + status: success + default: + description: Error retrieving target metadata. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /targets/relabel_steps: + get: + tags: + - targets + summary: Get targets relabel steps + operationId: get-targets-relabel-steps + parameters: + - name: scrapePool + in: query + description: Name of the scrape pool. + required: true + explode: false + schema: + type: string + examples: + example: + value: prometheus + - name: labels + in: query + description: JSON-encoded labels to apply relabel rules to. + required: true + explode: false + schema: + type: string + examples: + example: + value: '{"__address__":"localhost:9090","job":"prometheus"}' + responses: + "200": + description: Relabel steps retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/TargetRelabelStepsOutputBody' + examples: + relabelSteps: + summary: Relabel steps for a target + value: + data: + steps: + - keep: true + output: + __address__: localhost:9090 + instance: localhost:9090 + job: prometheus + rule: + action: replace + regex: (.*) + replacement: $1 + source_labels: + - __address__ + target_label: instance + status: success + default: + description: Error retrieving relabel steps. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /rules: + get: + tags: + - rules + summary: Get alerting and recording rules + operationId: rules + parameters: + - name: type + in: query + description: 'Filter by rule type: alert or record.' + required: false + explode: false + schema: + type: string + examples: + example: + value: alert + - name: rule_name[] + in: query + description: Filter by rule name. + required: false + explode: false + schema: + type: array + items: + type: string + examples: + example: + value: + - HighErrorRate + - name: rule_group[] + in: query + description: Filter by rule group name. + required: false + explode: false + schema: + type: array + items: + type: string + examples: + example: + value: + - example_alerts + - name: file[] + in: query + description: Filter by file path. + required: false + explode: false + schema: + type: array + items: + type: string + examples: + example: + value: + - /etc/prometheus/rules.yml + - name: match[] + in: query + description: Label matchers to filter rules. + required: false + explode: false + schema: + type: array + items: + type: string + examples: + example: + value: + - '{severity="critical"}' + - name: exclude_alerts + in: query + description: Exclude active alerts from response. + required: false + explode: false + schema: + type: string + examples: + example: + value: "false" + - name: group_limit + in: query + description: Maximum number of rule groups to return. + required: false + explode: false + schema: + type: integer + format: int64 + examples: + example: + value: 100 + - name: group_next_token + in: query + description: Pagination token for next page. + required: false + explode: false + schema: + type: string + examples: + example: + value: abc123 + responses: + "200": + description: Rules retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/RulesOutputBody' + examples: + ruleGroups: + summary: Alerting and recording rules + value: + data: + groups: + - evaluationTime: 0.000561635 + file: /etc/prometheus/rules/ansible_managed.yml + interval: 15 + lastEvaluation: "2026-01-02T13:36:56.874Z" + limit: 0 + name: ansible managed alert rules + rules: + - annotations: + description: This is an alert meant to ensure that the entire alerting pipeline is functional. This alert is always firing, therefore it should always be firing in Alertmanager and always fire against a receiver. There are integrations with various notification mechanisms that send a notification when this alert is not firing. For example the "DeadMansSnitch" integration in PagerDuty. + summary: Ensure entire alerting pipeline is functional + duration: 600 + evaluationTime: 0.000356688 + health: ok + keepFiringFor: 0 + labels: + severity: warning + lastEvaluation: "2026-01-02T13:36:56.874Z" + name: Watchdog + query: vector(1) + state: firing + type: alerting + status: success + default: + description: Error retrieving rules. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /alerts: + get: + tags: + - alerts + summary: Get active alerts + operationId: alerts + responses: + "200": + description: Active alerts retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/AlertsOutputBody' + examples: + activeAlerts: + summary: Currently active alerts + value: + data: + alerts: + - activeAt: "2026-01-02T13:30:00.000Z" + annotations: + description: This is an alert meant to ensure that the entire alerting pipeline is functional. This alert is always firing, therefore it should always be firing in Alertmanager and always fire against a receiver. There are integrations with various notification mechanisms that send a notification when this alert is not firing. For example the "DeadMansSnitch" integration in PagerDuty. + summary: Ensure entire alerting pipeline is functional + labels: + alertname: Watchdog + severity: warning + state: firing + value: "1e+00" + status: success + default: + description: Error retrieving alerts. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /alertmanagers: + get: + tags: + - alerts + summary: Get Alertmanager discovery + operationId: alertmanagers + responses: + "200": + description: Alertmanager targets retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/AlertmanagersOutputBody' + examples: + alertmanagerDiscovery: + summary: Alertmanager discovery results + value: + data: + activeAlertmanagers: + - url: http://demo.prometheus.io:9093/api/v2/alerts + droppedAlertmanagers: [] + status: success + default: + description: Error retrieving Alertmanager targets. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /status/config: + get: + tags: + - status + summary: Get status config + operationId: get-status-config + responses: + "200": + description: Configuration retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/StatusConfigOutputBody' + examples: + configYAML: + summary: Prometheus configuration + value: + data: + yaml: | + global: + scrape_interval: 15s + scrape_timeout: 10s + evaluation_interval: 15s + external_labels: + environment: demo-prometheus-io + alerting: + alertmanagers: + - scheme: http + static_configs: + - targets: + - demo.prometheus.io:9093 + rule_files: + - /etc/prometheus/rules/*.yml + status: success + default: + description: Error retrieving configuration. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /status/runtimeinfo: + get: + tags: + - status + summary: Get status runtimeinfo + operationId: get-status-runtimeinfo + responses: + "200": + description: Runtime information retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/StatusRuntimeInfoOutputBody' + examples: + runtimeInfo: + summary: Runtime information + value: + data: + CWD: / + GODEBUG: "" + GOGC: "75" + GOMAXPROCS: 2 + GOMEMLIMIT: 3703818240 + corruptionCount: 0 + goroutineCount: 88 + hostname: demo-prometheus-io + lastConfigTime: "2026-01-01T13:37:00.000Z" + reloadConfigSuccess: true + serverTime: "2026-01-02T13:37:00.000Z" + startTime: "2026-01-01T13:37:00.000Z" + storageRetention: 31d + status: success + default: + description: Error retrieving runtime information. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /status/buildinfo: + get: + tags: + - status + summary: Get status buildinfo + operationId: get-status-buildinfo + responses: + "200": + description: Build information retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/StatusBuildInfoOutputBody' + examples: + buildInfo: + summary: Build information + value: + data: + branch: HEAD + buildDate: 20251030-07:26:10 + buildUser: root@08c890a84441 + goVersion: go1.25.3 + revision: 0a41f0000705c69ab8e0f9a723fc73e39ed62b07 + version: 3.7.3 + status: success + default: + description: Error retrieving build information. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /status/flags: + get: + tags: + - status + summary: Get status flags + operationId: get-status-flags + responses: + "200": + description: Command-line flags retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/StatusFlagsOutputBody' + examples: + flags: + summary: Command-line flags + value: + data: + agent: "false" + alertmanager.notification-queue-capacity: "10000" + config.file: /etc/prometheus/prometheus.yml + enable-feature: exemplar-storage,native-histograms + query.max-concurrency: "20" + query.timeout: 2m + storage.tsdb.path: /prometheus + storage.tsdb.retention.time: 15d + web.console.libraries: /usr/share/prometheus/console_libraries + web.console.templates: /usr/share/prometheus/consoles + web.enable-admin-api: "true" + web.enable-lifecycle: "true" + web.listen-address: 0.0.0.0:9090 + web.page-title: Prometheus Time Series Collection and Processing Server + status: success + default: + description: Error retrieving flags. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /status/tsdb: + get: + tags: + - status + summary: Get TSDB status + operationId: status-tsdb + parameters: + - name: limit + in: query + description: The maximum number of items to return per category. + required: false + explode: false + schema: + type: integer + format: int64 + examples: + example: + value: 10 + responses: + "200": + description: TSDB status retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/StatusTSDBOutputBody' + examples: + tsdbStats: + summary: TSDB statistics + value: + data: + headStats: + chunkCount: 37525 + maxTime: 1767436620000 + minTime: 1767362400712 + numLabelPairs: 2512 + numSeries: 9925 + labelValueCountByLabelName: + - name: __name__ + value: 5 + - name: job + value: 3 + memoryInBytesByLabelName: + - name: __name__ + value: 1024 + - name: job + value: 512 + seriesCountByLabelValuePair: + - name: job=prometheus + value: 100 + - name: instance=localhost:9090 + value: 100 + seriesCountByMetricName: + - name: up + value: 100 + - name: http_requests_total + value: 500 + status: success + default: + description: Error retrieving TSDB status. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /status/tsdb/blocks: + get: + tags: + - status + summary: Get TSDB blocks information + operationId: status-tsdb-blocks + responses: + "200": + description: TSDB blocks information retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/StatusTSDBBlocksOutputBody' + examples: + tsdbBlocks: + summary: TSDB block information + value: + data: + blocks: + - compaction: + level: 4 + sources: + - 01KBCJ7TR8A4QAJ3AA1J651P5S + - 01KBCS3J0E34567YPB8Y5W0E24 + - 01KBCZZ9KRTYGG3E7HVQFGC3S3 + maxTime: 1764763200000 + minTime: 1764568801099 + stats: + numChunks: 1073962 + numSamples: 129505582 + numSeries: 10661 + ulid: 01KC4D6GXQA4CRHYKV78NEBVAE + version: 1 + status: success + default: + description: Error retrieving TSDB blocks. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /status/walreplay: + get: + tags: + - status + summary: Get status walreplay + operationId: get-status-walreplay + responses: + "200": + description: WAL replay status retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/StatusWALReplayOutputBody' + examples: + walReplay: + summary: WAL replay status + value: + data: + current: 3214 + max: 3214 + min: 3209 + status: success + default: + description: Error retrieving WAL replay status. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /admin/tsdb/delete_series: + put: + tags: + - admin + summary: Delete series matching selectors via PUT + description: Deletes data for a selection of series in a time range using PUT method. + operationId: deleteSeriesPut + parameters: + - name: match[] + in: query + description: Series selectors to identify series to delete. + required: true + explode: false + schema: + type: array + items: + type: string + examples: + example: + value: + - '{__name__=~"test.*"}' + - name: start + in: query + description: Start timestamp for deletion. + required: false + explode: false + schema: + oneOf: + - type: string + format: date-time + description: RFC3339 timestamp. + - type: number + format: unixtime + description: Unix timestamp in seconds. + description: Timestamp in RFC3339 format or Unix timestamp in seconds. + examples: + RFC3339: + value: "2026-01-02T12:37:00Z" + epoch: + value: 1767357420 + - name: end + in: query + description: End timestamp for deletion. + required: false + explode: false + schema: + oneOf: + - type: string + format: date-time + description: RFC3339 timestamp. + - type: number + format: unixtime + description: Unix timestamp in seconds. + description: Timestamp in RFC3339 format or Unix timestamp in seconds. + examples: + RFC3339: + value: "2026-01-02T13:37:00Z" + epoch: + value: 1767361020 + responses: + "200": + description: Series deleted successfully via PUT. + content: + application/json: + schema: + $ref: '#/components/schemas/DeleteSeriesOutputBody' + examples: + deletionSuccess: + summary: Successful series deletion + value: + status: success + default: + description: Error deleting series via PUT. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + post: + tags: + - admin + summary: Delete series matching selectors + description: Deletes data for a selection of series in a time range. + operationId: deleteSeriesPost + parameters: + - name: match[] + in: query + description: Series selectors to identify series to delete. + required: true + explode: false + schema: + type: array + items: + type: string + examples: + example: + value: + - '{__name__=~"test.*"}' + - name: start + in: query + description: Start timestamp for deletion. + required: false + explode: false + schema: + oneOf: + - type: string + format: date-time + description: RFC3339 timestamp. + - type: number + format: unixtime + description: Unix timestamp in seconds. + description: Timestamp in RFC3339 format or Unix timestamp in seconds. + examples: + RFC3339: + value: "2026-01-02T12:37:00Z" + epoch: + value: 1767357420 + - name: end + in: query + description: End timestamp for deletion. + required: false + explode: false + schema: + oneOf: + - type: string + format: date-time + description: RFC3339 timestamp. + - type: number + format: unixtime + description: Unix timestamp in seconds. + description: Timestamp in RFC3339 format or Unix timestamp in seconds. + examples: + RFC3339: + value: "2026-01-02T13:37:00Z" + epoch: + value: 1767361020 + responses: + "200": + description: Series deleted successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/DeleteSeriesOutputBody' + examples: + deletionSuccess: + summary: Successful series deletion + value: + status: success + default: + description: Error deleting series. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /admin/tsdb/clean_tombstones: + put: + tags: + - admin + summary: Clean tombstones in the TSDB via PUT + description: Removes deleted data from disk and cleans up existing tombstones using PUT method. + operationId: cleanTombstonesPut + responses: + "200": + description: Tombstones cleaned successfully via PUT. + content: + application/json: + schema: + $ref: '#/components/schemas/CleanTombstonesOutputBody' + examples: + tombstonesCleaned: + summary: Tombstones cleaned successfully + value: + status: success + default: + description: Error cleaning tombstones via PUT. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + post: + tags: + - admin + summary: Clean tombstones in the TSDB + description: Removes deleted data from disk and cleans up existing tombstones. + operationId: cleanTombstonesPost + responses: + "200": + description: Tombstones cleaned successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/CleanTombstonesOutputBody' + examples: + tombstonesCleaned: + summary: Tombstones cleaned successfully + value: + status: success + default: + description: Error cleaning tombstones. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /admin/tsdb/snapshot: + put: + tags: + - admin + summary: Create a snapshot of the TSDB via PUT + description: Creates a snapshot of all current data using PUT method. + operationId: snapshotPut + parameters: + - name: skip_head + in: query + description: If true, do not snapshot data in the head block. + required: false + explode: false + schema: + type: string + examples: + example: + value: "false" + responses: + "200": + description: Snapshot created successfully via PUT. + content: + application/json: + schema: + $ref: '#/components/schemas/SnapshotOutputBody' + examples: + snapshotCreated: + summary: Snapshot created successfully + value: + data: + name: 20260102T133700Z-a1b2c3d4e5f67890 + status: success + default: + description: Error creating snapshot via PUT. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + post: + tags: + - admin + summary: Create a snapshot of the TSDB + description: Creates a snapshot of all current data. + operationId: snapshotPost + parameters: + - name: skip_head + in: query + description: If true, do not snapshot data in the head block. + required: false + explode: false + schema: + type: string + examples: + example: + value: "false" + responses: + "200": + description: Snapshot created successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/SnapshotOutputBody' + examples: + snapshotCreated: + summary: Snapshot created successfully + value: + data: + name: 20260102T133700Z-a1b2c3d4e5f67890 + status: success + default: + description: Error creating snapshot. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /read: + post: + tags: + - remote + summary: Remote read endpoint + description: Prometheus remote read endpoint for federated queries. Accepts and returns Protocol Buffer encoded data. + operationId: remoteRead + responses: + "204": + description: No Content + default: + description: Error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + /write: + post: + tags: + - remote + summary: Remote write endpoint + description: Prometheus remote write endpoint for sending metrics. Accepts Protocol Buffer encoded write requests. + operationId: remoteWrite + responses: + "204": + description: No Content + default: + description: Error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + /otlp/v1/metrics: + post: + tags: + - otlp + summary: OTLP metrics write endpoint + description: OpenTelemetry Protocol metrics ingestion endpoint. Accepts OTLP/HTTP metrics in Protocol Buffer format. + operationId: otlpWrite + responses: + "204": + description: No Content + default: + description: Error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + /notifications: + get: + tags: + - notifications + summary: Get notifications + operationId: get-notifications + responses: + "200": + description: Notifications retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/NotificationsOutputBody' + examples: + notifications: + summary: Server notifications + value: + data: + - active: true + date: "2026-01-02T16:14:50.046Z" + text: Configuration reload has failed. + status: success + default: + description: Error retrieving notifications. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error + /notifications/live: + get: + tags: + - notifications + summary: Stream live notifications via Server-Sent Events + description: Subscribe to real-time server notifications using SSE. Each event contains a JSON-encoded Notification object in the data field. + operationId: notifications-live + responses: + "200": + description: Server-sent events stream established. + content: + text/event-stream: + itemSchema: + type: object + properties: + data: + type: string + contentSchema: + $ref: '#/components/schemas/Notification' + description: SSE data field containing JSON-encoded notification. + contentMediaType: application/json + title: Server Sent Event Message + required: + - data + additionalProperties: false + description: A single SSE message. The data field contains a JSON-encoded Notification object. + examples: + activeNotification: + summary: Active notification SSE message + description: An SSE message containing an active server notification. + value: + data: '{"text":"Configuration reload has failed.","date":"2026-01-02T16:14:50.046Z","active":true}' + default: + description: Error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + /features: + get: + tags: + - features + summary: Get features + operationId: get-features + responses: + "200": + description: Feature flags retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/FeaturesOutputBody' + examples: + enabledFeatures: + summary: Enabled feature flags + value: + data: + - exemplar-storage + - remote-write-receiver + status: success + default: + description: Error retrieving features. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + examples: + tsdbNotReady: + summary: TSDB not ready + value: + error: TSDB not ready + errorType: internal + status: error +components: + schemas: + Error: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + errorType: + type: string + description: Type of error that occurred. + example: bad_data + error: + type: string + description: Human-readable error message. + example: invalid parameter + required: + - status + - errorType + - error + additionalProperties: false + description: Error response. + Labels: + type: object + additionalProperties: true + description: Label set represented as a key-value map. + QueryOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + $ref: '#/components/schemas/QueryData' + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for instant query. + QueryRangeOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + $ref: '#/components/schemas/QueryData' + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for range query. + QueryPostInputBody: + type: object + properties: + query: + type: string + description: 'Form field: The PromQL query to execute.' + example: up + time: + type: string + description: 'Form field: The evaluation timestamp (optional, defaults to current time).' + example: "2023-07-21T20:10:51.781Z" + limit: + type: integer + format: int64 + description: 'Form field: The maximum number of metrics to return.' + example: 100 + timeout: + type: string + description: 'Form field: Evaluation timeout (optional, defaults to and is capped by the value of the -query.timeout flag).' + example: 30s + lookback_delta: + type: string + description: 'Form field: Override the lookback period for this query (optional).' + example: 5m + stats: + type: string + description: 'Form field: When provided, include query statistics in the response (the special value ''all'' enables more comprehensive statistics).' + example: all + required: + - query + additionalProperties: false + description: POST request body for instant query. + QueryRangePostInputBody: + type: object + properties: + query: + type: string + description: 'Form field: The query to execute.' + example: rate(http_requests_total[5m]) + start: + type: string + description: 'Form field: The start time of the query.' + example: "2023-07-21T20:10:30.781Z" + end: + type: string + description: 'Form field: The end time of the query.' + example: "2023-07-21T20:20:30.781Z" + step: + type: string + description: 'Form field: The step size of the query.' + example: 15s + limit: + type: integer + format: int64 + description: 'Form field: The maximum number of metrics to return.' + example: 100 + timeout: + type: string + description: 'Form field: Evaluation timeout (optional, defaults to and is capped by the value of the -query.timeout flag).' + example: 30s + lookback_delta: + type: string + description: 'Form field: Override the lookback period for this query (optional).' + example: 5m + stats: + type: string + description: 'Form field: When provided, include query statistics in the response (the special value ''all'' enables more comprehensive statistics).' + example: all + required: + - query + - start + - end + - step + additionalProperties: false + description: POST request body for range query. + QueryExemplarsOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + description: Response data (structure varies by endpoint). + example: + result: ok + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Generic response body. + QueryExemplarsPostInputBody: + type: object + properties: + query: + type: string + description: 'Form field: The query to execute.' + example: http_requests_total + start: + type: string + description: 'Form field: The start time of the query.' + example: "2023-07-21T20:00:00.000Z" + end: + type: string + description: 'Form field: The end time of the query.' + example: "2023-07-21T21:00:00.000Z" + required: + - query + additionalProperties: false + description: POST request body for exemplars query. + FormatQueryOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + type: string + description: Formatted query string. + example: sum by(status) (rate(http_requests_total[5m])) + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for format query endpoint. + FormatQueryPostInputBody: + type: object + properties: + query: + type: string + description: 'Form field: The query to format.' + example: sum(rate(http_requests_total[5m])) by (status) + required: + - query + additionalProperties: false + description: POST request body for format query. + ParseQueryOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + description: Response data (structure varies by endpoint). + example: + result: ok + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Generic response body. + ParseQueryPostInputBody: + type: object + properties: + query: + type: string + description: 'Form field: The query to parse.' + example: sum(rate(http_requests_total[5m])) + required: + - query + additionalProperties: false + description: POST request body for parse query. + QueryData: + anyOf: + - type: object + properties: + resultType: + type: string + enum: + - vector + result: + type: array + items: + anyOf: + - $ref: '#/components/schemas/FloatSample' + - $ref: '#/components/schemas/HistogramSample' + description: Array of samples (either float or histogram). + required: + - resultType + - result + additionalProperties: false + - type: object + properties: + resultType: + type: string + enum: + - matrix + result: + type: array + items: + anyOf: + - $ref: '#/components/schemas/FloatSeries' + - $ref: '#/components/schemas/HistogramSeries' + description: Array of time series (either float or histogram). + required: + - resultType + - result + additionalProperties: false + - type: object + properties: + resultType: + type: string + enum: + - scalar + result: + type: array + items: + oneOf: + - type: number + - type: string + maxItems: 2 + minItems: 2 + description: Scalar value as [timestamp, stringValue]. + required: + - resultType + - result + additionalProperties: false + - type: object + properties: + resultType: + type: string + enum: + - string + result: + type: array + items: + type: string + maxItems: 2 + minItems: 2 + description: String value as [timestamp, stringValue]. + required: + - resultType + - result + additionalProperties: false + description: Query result data. The structure of 'result' depends on 'resultType'. + example: + result: + - metric: + __name__: up + job: prometheus + value: + - 1627845600 + - "1" + resultType: vector + FloatSample: + type: object + properties: + metric: + $ref: '#/components/schemas/Labels' + value: + type: array + items: + oneOf: + - type: number + - type: string + maxItems: 2 + minItems: 2 + description: Timestamp and float value as [unixTimestamp, stringValue]. + example: + - 1767436620 + - "1" + required: + - metric + - value + additionalProperties: false + description: A sample with a float value. + HistogramSample: + type: object + properties: + metric: + $ref: '#/components/schemas/Labels' + histogram: + type: array + items: + oneOf: + - type: number + - $ref: '#/components/schemas/HistogramValue' + maxItems: 2 + minItems: 2 + description: Timestamp and histogram value as [unixTimestamp, histogramObject]. + example: + - 1767436620 + - buckets: [] + count: "60" + sum: "120" + required: + - metric + - histogram + additionalProperties: false + description: A sample with a native histogram value. + FloatSeries: + type: object + properties: + metric: + $ref: '#/components/schemas/Labels' + values: + type: array + items: + type: array + items: + oneOf: + - type: number + - type: string + maxItems: 2 + minItems: 2 + description: Array of [timestamp, stringValue] pairs for float values. + required: + - metric + - values + additionalProperties: false + description: A time series with float values. + HistogramSeries: + type: object + properties: + metric: + $ref: '#/components/schemas/Labels' + histograms: + type: array + items: + type: array + items: + oneOf: + - type: number + - $ref: '#/components/schemas/HistogramValue' + maxItems: 2 + minItems: 2 + description: Array of [timestamp, histogramObject] pairs for histogram values. + required: + - metric + - histograms + additionalProperties: false + description: A time series with native histogram values. + HistogramValue: + type: object + properties: + count: + type: string + description: Total count of observations. + sum: + type: string + description: Sum of all observed values. + buckets: + type: array + items: + type: array + items: + oneOf: + - type: number + - type: string + description: Histogram buckets as [boundary_rule, lower, upper, count]. + required: + - count + - sum + additionalProperties: false + description: Native histogram value representation. + LabelsOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + type: array + items: + type: string + example: + - __name__ + - job + - instance + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body with an array of strings. + LabelsPostInputBody: + type: object + properties: + start: + type: string + description: 'Form field: The start time of the query.' + example: "2023-07-21T20:00:00.000Z" + end: + type: string + description: 'Form field: The end time of the query.' + example: "2023-07-21T21:00:00.000Z" + match[]: + type: array + items: + type: string + description: 'Form field: Series selector argument that selects the series from which to read the label names.' + example: + - '{job="prometheus"}' + limit: + type: integer + format: int64 + description: 'Form field: The maximum number of label names to return.' + example: 100 + additionalProperties: false + description: POST request body for labels query. + LabelValuesOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + type: array + items: + type: string + example: + - __name__ + - job + - instance + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body with an array of strings. + SeriesOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + type: array + items: + $ref: '#/components/schemas/Labels' + example: + - __name__: up + instance: localhost:9090 + job: prometheus + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body with an array of label sets. + SeriesPostInputBody: + type: object + properties: + start: + type: string + description: 'Form field: The start time of the query.' + example: "2023-07-21T20:00:00.000Z" + end: + type: string + description: 'Form field: The end time of the query.' + example: "2023-07-21T21:00:00.000Z" + match[]: + type: array + items: + type: string + description: 'Form field: Series selector argument that selects the series to return.' + example: + - '{job="prometheus"}' + limit: + type: integer + format: int64 + description: 'Form field: The maximum number of series to return.' + example: 100 + required: + - match[] + additionalProperties: false + description: POST request body for series query. + SeriesDeleteOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + description: Response data (structure varies by endpoint). + example: + result: ok + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Generic response body. + Metadata: + type: object + properties: + type: + type: string + description: Metric type (counter, gauge, histogram, summary, or untyped). + unit: + type: string + description: Unit of the metric. + help: + type: string + description: Help text describing the metric. + required: + - type + - unit + - help + additionalProperties: false + description: Metric metadata. + MetadataOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + type: object + additionalProperties: + type: array + items: + $ref: '#/components/schemas/Metadata' + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for metadata endpoint. + MetricMetadata: + type: object + properties: + target: + $ref: '#/components/schemas/Labels' + metric: + type: string + description: Metric name. + type: + type: string + description: Metric type (counter, gauge, histogram, summary, or untyped). + help: + type: string + description: Help text describing the metric. + unit: + type: string + description: Unit of the metric. + required: + - target + - type + - help + - unit + additionalProperties: false + description: Target metric metadata. + Target: + type: object + properties: + discoveredLabels: + $ref: '#/components/schemas/Labels' + labels: + $ref: '#/components/schemas/Labels' + scrapePool: + type: string + description: Name of the scrape pool. + scrapeUrl: + type: string + description: URL of the target. + globalUrl: + type: string + description: Global URL of the target. + lastError: + type: string + description: Last error message from scraping. + lastScrape: + type: string + format: date-time + description: Timestamp of the last scrape. + lastScrapeDuration: + type: number + format: double + description: Duration of the last scrape in seconds. + health: + type: string + description: Health status of the target (up, down, or unknown). + scrapeInterval: + type: string + description: Scrape interval for this target. + scrapeTimeout: + type: string + description: Scrape timeout for this target. + required: + - discoveredLabels + - labels + - scrapePool + - scrapeUrl + - globalUrl + - lastError + - lastScrape + - lastScrapeDuration + - health + - scrapeInterval + - scrapeTimeout + additionalProperties: false + description: Scrape target information. + DroppedTarget: + type: object + properties: + discoveredLabels: + $ref: '#/components/schemas/Labels' + scrapePool: + type: string + description: Name of the scrape pool. + required: + - discoveredLabels + - scrapePool + additionalProperties: false + description: Dropped target information. + TargetDiscovery: + type: object + properties: + activeTargets: + type: array + items: + $ref: '#/components/schemas/Target' + droppedTargets: + type: array + items: + $ref: '#/components/schemas/DroppedTarget' + droppedTargetCounts: + type: object + additionalProperties: + type: integer + format: int64 + required: + - activeTargets + - droppedTargets + - droppedTargetCounts + additionalProperties: false + description: Target discovery information including active and dropped targets. + TargetsOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + $ref: '#/components/schemas/TargetDiscovery' + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for targets endpoint. + TargetMetadataOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + type: array + items: + $ref: '#/components/schemas/MetricMetadata' + example: + - help: The current health status of the target + metric: up + target: + instance: localhost:9090 + job: prometheus + type: gauge + unit: "" + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body with an array of metric metadata. + ScrapePoolsDiscovery: + type: object + properties: + scrapePools: + type: array + items: + type: string + required: + - scrapePools + additionalProperties: false + description: List of all configured scrape pools. + ScrapePoolsOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + $ref: '#/components/schemas/ScrapePoolsDiscovery' + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for scrape pools endpoint. + Config: + type: object + properties: + source_labels: + type: array + items: + type: string + description: Source labels for relabeling. + separator: + type: string + description: Separator for source label values. + regex: + type: string + description: Regular expression for matching. + modulus: + type: integer + format: int64 + description: Modulus for hash-based relabeling. + target_label: + type: string + description: Target label name. + replacement: + type: string + description: Replacement value. + action: + type: string + description: Relabel action. + additionalProperties: false + description: Relabel configuration. + RelabelStep: + type: object + properties: + rule: + $ref: '#/components/schemas/Config' + output: + $ref: '#/components/schemas/Labels' + keep: + type: boolean + required: + - rule + - output + - keep + additionalProperties: false + description: Relabel step showing the rule, output, and whether the target was kept. + RelabelStepsResponse: + type: object + properties: + steps: + type: array + items: + $ref: '#/components/schemas/RelabelStep' + required: + - steps + additionalProperties: false + description: Relabeling steps response. + TargetRelabelStepsOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + $ref: '#/components/schemas/RelabelStepsResponse' + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for target relabel steps endpoint. + RuleGroup: + type: object + properties: + name: + type: string + description: Name of the rule group. + file: + type: string + description: File containing the rule group. + rules: + type: array + items: + type: object + description: Rule definition. + description: Rules in this group. + interval: + type: number + format: double + description: Evaluation interval in seconds. + limit: + type: integer + format: int64 + description: Maximum number of alerts for this group. + evaluationTime: + type: number + format: double + description: Time taken to evaluate the group in seconds. + lastEvaluation: + type: string + format: date-time + description: Timestamp of the last evaluation. + required: + - name + - file + - rules + - interval + - limit + - evaluationTime + - lastEvaluation + additionalProperties: false + description: Rule group information. + RuleDiscovery: + type: object + properties: + groups: + type: array + items: + $ref: '#/components/schemas/RuleGroup' + groupNextToken: + type: string + description: Pagination token for the next page of groups. + required: + - groups + additionalProperties: false + description: Rule discovery information containing all rule groups. + RulesOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + $ref: '#/components/schemas/RuleDiscovery' + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for rules endpoint. + Alert: + type: object + properties: + labels: + $ref: '#/components/schemas/Labels' + annotations: + $ref: '#/components/schemas/Labels' + state: + type: string + description: State of the alert (pending, firing, or inactive). + value: + type: string + description: Value of the alert expression. + activeAt: + type: string + format: date-time + description: Timestamp when the alert became active. + keepFiringSince: + type: string + format: date-time + description: Timestamp since the alert has been kept firing. + required: + - labels + - annotations + - state + - value + additionalProperties: false + description: Alert information. + AlertDiscovery: + type: object + properties: + alerts: + type: array + items: + $ref: '#/components/schemas/Alert' + required: + - alerts + additionalProperties: false + description: Alert discovery information containing all active alerts. + AlertsOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + $ref: '#/components/schemas/AlertDiscovery' + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for alerts endpoint. + AlertmanagerTarget: + type: object + properties: + url: + type: string + description: URL of the Alertmanager instance. + required: + - url + additionalProperties: false + description: Alertmanager target information. + AlertmanagerDiscovery: + type: object + properties: + activeAlertmanagers: + type: array + items: + $ref: '#/components/schemas/AlertmanagerTarget' + droppedAlertmanagers: + type: array + items: + $ref: '#/components/schemas/AlertmanagerTarget' + required: + - activeAlertmanagers + - droppedAlertmanagers + additionalProperties: false + description: Alertmanager discovery information including active and dropped instances. + AlertmanagersOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + $ref: '#/components/schemas/AlertmanagerDiscovery' + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for alertmanagers endpoint. + StatusConfigData: + type: object + properties: + yaml: + type: string + description: Prometheus configuration in YAML format. + required: + - yaml + additionalProperties: false + description: Prometheus configuration. + StatusConfigOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + $ref: '#/components/schemas/StatusConfigData' + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for status config endpoint. + RuntimeInfo: + type: object + properties: + startTime: + type: string + format: date-time + CWD: + type: string + hostname: + type: string + serverTime: + type: string + format: date-time + reloadConfigSuccess: + type: boolean + lastConfigTime: + type: string + format: date-time + corruptionCount: + type: integer + format: int64 + goroutineCount: + type: integer + format: int64 + GOMAXPROCS: + type: integer + format: int64 + GOMEMLIMIT: + type: integer + format: int64 + GOGC: + type: string + GODEBUG: + type: string + storageRetention: + type: string + required: + - startTime + - CWD + - hostname + - serverTime + - reloadConfigSuccess + - lastConfigTime + - corruptionCount + - goroutineCount + - GOMAXPROCS + - GOMEMLIMIT + - GOGC + - GODEBUG + - storageRetention + additionalProperties: false + description: Prometheus runtime information. + StatusRuntimeInfoOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + $ref: '#/components/schemas/RuntimeInfo' + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for status runtime info endpoint. + PrometheusVersion: + type: object + properties: + version: + type: string + revision: + type: string + branch: + type: string + buildUser: + type: string + buildDate: + type: string + goVersion: + type: string + required: + - version + - revision + - branch + - buildUser + - buildDate + - goVersion + additionalProperties: false + description: Prometheus version information. + StatusBuildInfoOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + $ref: '#/components/schemas/PrometheusVersion' + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for status build info endpoint. + StatusFlagsOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + type: object + additionalProperties: + type: string + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for status flags endpoint. + HeadStats: + type: object + properties: + numSeries: + type: integer + format: int64 + numLabelPairs: + type: integer + format: int64 + chunkCount: + type: integer + format: int64 + minTime: + type: integer + format: int64 + maxTime: + type: integer + format: int64 + required: + - numSeries + - numLabelPairs + - chunkCount + - minTime + - maxTime + additionalProperties: false + description: TSDB head statistics. + TSDBStat: + type: object + properties: + name: + type: string + value: + type: integer + format: int64 + required: + - name + - value + additionalProperties: false + description: TSDB statistic. + TSDBStatus: + type: object + properties: + headStats: + $ref: '#/components/schemas/HeadStats' + seriesCountByMetricName: + type: array + items: + $ref: '#/components/schemas/TSDBStat' + labelValueCountByLabelName: + type: array + items: + $ref: '#/components/schemas/TSDBStat' + memoryInBytesByLabelName: + type: array + items: + $ref: '#/components/schemas/TSDBStat' + seriesCountByLabelValuePair: + type: array + items: + $ref: '#/components/schemas/TSDBStat' + required: + - headStats + - seriesCountByMetricName + - labelValueCountByLabelName + - memoryInBytesByLabelName + - seriesCountByLabelValuePair + additionalProperties: false + description: TSDB status information. + StatusTSDBOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + $ref: '#/components/schemas/TSDBStatus' + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for status TSDB endpoint. + BlockDesc: + type: object + properties: + ulid: + type: string + minTime: + type: integer + format: int64 + maxTime: + type: integer + format: int64 + required: + - ulid + - minTime + - maxTime + additionalProperties: false + description: Block descriptor. + BlockStats: + type: object + properties: + numSamples: + type: integer + format: int64 + numSeries: + type: integer + format: int64 + numChunks: + type: integer + format: int64 + numTombstones: + type: integer + format: int64 + numFloatSamples: + type: integer + format: int64 + numHistogramSamples: + type: integer + format: int64 + additionalProperties: false + description: Block statistics. + BlockMetaCompaction: + type: object + properties: + level: + type: integer + format: int64 + sources: + type: array + items: + type: string + parents: + type: array + items: + $ref: '#/components/schemas/BlockDesc' + failed: + type: boolean + deletable: + type: boolean + hints: + type: array + items: + type: string + required: + - level + additionalProperties: false + description: Block compaction metadata. + BlockMeta: + type: object + properties: + ulid: + type: string + minTime: + type: integer + format: int64 + maxTime: + type: integer + format: int64 + stats: + $ref: '#/components/schemas/BlockStats' + compaction: + $ref: '#/components/schemas/BlockMetaCompaction' + version: + type: integer + format: int64 + required: + - ulid + - minTime + - maxTime + - compaction + - version + additionalProperties: false + description: Block metadata. + StatusTSDBBlocksData: + type: object + properties: + blocks: + type: array + items: + $ref: '#/components/schemas/BlockMeta' + required: + - blocks + additionalProperties: false + description: TSDB blocks information. + StatusTSDBBlocksOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + $ref: '#/components/schemas/StatusTSDBBlocksData' + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for status TSDB blocks endpoint. + StatusWALReplayData: + type: object + properties: + min: + type: integer + format: int64 + max: + type: integer + format: int64 + current: + type: integer + format: int64 + required: + - min + - max + - current + additionalProperties: false + description: WAL replay status. + StatusWALReplayOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + $ref: '#/components/schemas/StatusWALReplayData' + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for status WAL replay endpoint. + DeleteSeriesOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + additionalProperties: false + description: Response body containing only status. + CleanTombstonesOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + additionalProperties: false + description: Response body containing only status. + DataStruct: + type: object + properties: + name: + type: string + required: + - name + additionalProperties: false + description: Generic data structure with a name field. + SnapshotOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + $ref: '#/components/schemas/DataStruct' + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body for snapshot endpoint. + Notification: + type: object + properties: + text: + type: string + date: + type: string + format: date-time + active: + type: boolean + required: + - text + - date + - active + additionalProperties: false + description: Server notification. + NotificationsOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + type: array + items: + $ref: '#/components/schemas/Notification' + example: + - active: true + date: "2023-07-21T20:00:00.000Z" + text: Server is running + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Response body with an array of notifications. + FeaturesOutputBody: + type: object + properties: + status: + type: string + enum: + - success + - error + description: Response status. + example: success + data: + description: Response data (structure varies by endpoint). + example: + result: ok + warnings: + type: array + items: + type: string + description: Only set if there were warnings while executing the request. There will still be data in the data field. + infos: + type: array + items: + type: string + description: Only set if there were info-level annotations while executing the request. + required: + - status + - data + additionalProperties: false + description: Generic response body. +tags: + - name: query + summary: Query + description: Query and evaluate PromQL expressions. + - name: metadata + summary: Metadata + description: Retrieve metric metadata such as type and unit. + - name: labels + summary: Labels + description: Query label names and values. + - name: series + summary: Series + description: Query and manage time series. + - name: targets + summary: Targets + description: Retrieve target and scrape pool information. + - name: rules + summary: Rules + description: Query recording and alerting rules. + - name: alerts + summary: Alerts + description: Query active alerts and alertmanager discovery. + - name: status + summary: Status + description: Retrieve server status and configuration. + - name: admin + summary: Admin + description: Administrative operations for TSDB management. + - name: features + summary: Features + description: Query enabled features. + - name: remote + summary: Remote Storage + description: Remote read and write endpoints. + - name: otlp + summary: OTLP + description: OpenTelemetry Protocol metrics ingestion. + - name: notifications + summary: Notifications + description: Server notifications and events. diff --git a/web/ui/mantine-ui/src/promql/tools/go.mod b/web/ui/mantine-ui/src/promql/tools/go.mod index a3abc881e2..d3f69a698b 100644 --- a/web/ui/mantine-ui/src/promql/tools/go.mod +++ b/web/ui/mantine-ui/src/promql/tools/go.mod @@ -1,6 +1,6 @@ module github.com/prometheus/prometheus/web/ui/mantine-ui/src/promql/tools -go 1.24.0 +go 1.25.0 require ( github.com/grafana/regexp v0.0.0-20250905093917-f7b3be9d1853 diff --git a/web/web.go b/web/web.go index 4df447be64..aec2f2d4ee 100644 --- a/web/web.go +++ b/web/web.go @@ -361,6 +361,11 @@ func New(logger *slog.Logger, o *Options) *Handler { app = h.storage } + version := "" + if o.Version != nil { + version = o.Version.Version + } + h.apiV1 = api_v1.NewAPI(h.queryEngine, h.storage, app, h.exemplarStorage, factorySPr, factoryTr, factoryAr, func() config.Config { h.mtx.RLock() @@ -402,6 +407,10 @@ func New(logger *slog.Logger, o *Options) *Handler { o.AppendMetadata, nil, o.FeatureRegistry, + api_v1.OpenAPIOptions{ + ExternalURL: o.ExternalURL.String(), + Version: version, + }, ) if r := o.FeatureRegistry; r != nil { diff --git a/web/web_test.go b/web/web_test.go index ce682912a9..cbcf15ffdc 100644 --- a/web/web_test.go +++ b/web/web_test.go @@ -328,6 +328,7 @@ func TestDebugHandler(t *testing.T) { Host: "localhost.localdomain:9090", Scheme: "http", }, + Version: &PrometheusVersion{}, } handler := New(nil, opts) handler.SetReady(Ready) @@ -353,6 +354,7 @@ func TestHTTPMetrics(t *testing.T) { Host: "localhost.localdomain:9090", Scheme: "http", }, + Version: &PrometheusVersion{}, }) getReady := func() int { t.Helper() From 3bc688e5cbbc7c7aba0c6df5bb9293ee1a012d03 Mon Sep 17 00:00:00 2001 From: Julien Pivotto <291750+roidelapluie@users.noreply.github.com> Date: Thu, 29 Jan 2026 14:26:03 +0100 Subject: [PATCH 44/46] Features API: Add OpenAPI 3.1 and 3.2 Now that #17825 is merged. Signed-off-by: Julien Pivotto <291750+roidelapluie@users.noreply.github.com> --- cmd/prometheus/testdata/features.json | 2 ++ web/web.go | 2 ++ 2 files changed, 4 insertions(+) diff --git a/cmd/prometheus/testdata/features.json b/cmd/prometheus/testdata/features.json index 4c893daae2..4f74b7e810 100644 --- a/cmd/prometheus/testdata/features.json +++ b/cmd/prometheus/testdata/features.json @@ -4,6 +4,8 @@ "exclude_alerts": true, "label_values_match": true, "lifecycle": false, + "openapi_3.1": true, + "openapi_3.2": true, "otlp_write_receiver": false, "query_stats": true, "query_warnings": true, diff --git a/web/web.go b/web/web.go index aec2f2d4ee..cb9258d87f 100644 --- a/web/web.go +++ b/web/web.go @@ -427,6 +427,8 @@ func New(logger *slog.Logger, o *Options) *Handler { r.Enable(features.API, "time_range_series") // start/end parameters for /series endpoint. r.Enable(features.API, "time_range_labels") // start/end parameters for /labels endpoints. r.Enable(features.API, "exclude_alerts") // exclude_alerts parameter for /rules endpoint. + r.Enable(features.API, "openapi_3.1") // OpenAPI 3.1 specification support. + r.Enable(features.API, "openapi_3.2") // OpenAPI 3.2 specification support. r.Set(features.UI, "ui_v3", !o.UseOldUI) r.Set(features.UI, "ui_v2", o.UseOldUI) } From 0fc70f3a6251776480e78c93859439b735fa06c3 Mon Sep 17 00:00:00 2001 From: Bartlomiej Plotka Date: Thu, 29 Jan 2026 14:26:40 +0000 Subject: [PATCH 45/46] tsdb: kill unused mint,maxt tracking (#17967) Signed-off-by: bwplotka --- tsdb/head_append.go | 24 ------------------------ tsdb/head_append_v2.go | 14 -------------- 2 files changed, 38 deletions(-) diff --git a/tsdb/head_append.go b/tsdb/head_append.go index 539884e74b..c171079509 100644 --- a/tsdb/head_append.go +++ b/tsdb/head_append.go @@ -168,8 +168,6 @@ func (h *Head) appender() *headAppender { headAppenderBase: headAppenderBase{ head: h, minValidTime: minValidTime, - mint: math.MaxInt64, - maxt: math.MinInt64, headMaxt: h.MaxTime(), oooTimeWindow: h.opts.OutOfOrderTimeWindow.Load(), seriesRefs: h.getRefSeriesBuffer(), @@ -393,7 +391,6 @@ func (b *appendBatch) close(h *Head) { type headAppenderBase struct { head *Head minValidTime int64 // No samples below this timestamp are allowed. - mint, maxt int64 headMaxt int64 // We track it here to not take the lock for every sample appended. oooTimeWindow int64 // Use the same for the entire append, and don't load the atomic for each sample. @@ -477,13 +474,6 @@ func (a *headAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64 return 0, err } - if t < a.mint { - a.mint = t - } - if t > a.maxt { - a.maxt = t - } - b := a.getCurrentBatch(stFloat, s.ref) b.floats = append(b.floats, record.RefSample{ Ref: s.ref, @@ -527,9 +517,6 @@ func (a *headAppender) AppendSTZeroSample(ref storage.SeriesRef, lset labels.Lab return storage.SeriesRef(s.ref), storage.ErrOutOfOrderST } - if st > a.maxt { - a.maxt = st - } b := a.getCurrentBatch(stFloat, s.ref) b.floats = append(b.floats, record.RefSample{Ref: s.ref, T: st, V: 0.0}) b.floatSeries = append(b.floatSeries, s) @@ -903,13 +890,6 @@ func (a *headAppender) AppendHistogram(ref storage.SeriesRef, lset labels.Labels b.floatHistogramSeries = append(b.floatHistogramSeries, s) } - if t < a.mint { - a.mint = t - } - if t > a.maxt { - a.maxt = t - } - return storage.SeriesRef(s.ref), nil } @@ -1013,10 +993,6 @@ func (a *headAppender) AppendHistogramSTZeroSample(ref storage.SeriesRef, lset l b.floatHistogramSeries = append(b.floatHistogramSeries, s) } - if st > a.maxt { - a.maxt = st - } - return storage.SeriesRef(s.ref), nil } diff --git a/tsdb/head_append_v2.go b/tsdb/head_append_v2.go index 4a62d56741..2c09c4cbd5 100644 --- a/tsdb/head_append_v2.go +++ b/tsdb/head_append_v2.go @@ -17,7 +17,6 @@ import ( "context" "errors" "fmt" - "math" "github.com/prometheus/prometheus/model/exemplar" "github.com/prometheus/prometheus/model/histogram" @@ -89,8 +88,6 @@ func (h *Head) appenderV2() *headAppenderV2 { headAppenderBase: headAppenderBase{ head: h, minValidTime: minValidTime, - mint: math.MaxInt64, - maxt: math.MinInt64, headMaxt: h.MaxTime(), oooTimeWindow: h.opts.OutOfOrderTimeWindow.Load(), seriesRefs: h.getRefSeriesBuffer(), @@ -193,13 +190,6 @@ func (a *headAppenderV2) Append(ref storage.SeriesRef, ls labels.Labels, st, t i return 0, appErr } - if t < a.mint { - a.mint = t - } - if t > a.maxt { - a.maxt = t - } - if isStale { // For stale values we never attempt to process metadata/exemplars, claim the success. return storage.SeriesRef(s.ref), nil @@ -390,10 +380,6 @@ func (a *headAppenderV2) bestEffortAppendSTZeroSample(s *memSeries, ls labels.La a.head.logger.Debug("Error when appending ST", "series", s.lset.String(), "st", st, "t", t, "err", err) return } - - if st > a.maxt { - a.maxt = st - } } var _ storage.GetRef = &headAppenderV2{} From 88f6ee4c8e324d783fb1b262f4df9e1ea8427a97 Mon Sep 17 00:00:00 2001 From: Bartlomiej Plotka Date: Fri, 30 Jan 2026 11:44:07 +0000 Subject: [PATCH 46/46] tests(scrape): add TestScrapeLoopAppend_WithStorage (#17937) Signed-off-by: bwplotka --- scrape/scrape_test.go | 188 +++++++++++++++++++++++++++++++++++++++-- tsdb/head_append_v2.go | 3 - 2 files changed, 183 insertions(+), 8 deletions(-) diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index f9a0834bd1..b29b445d01 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -1436,7 +1436,9 @@ func readTextParseTestMetrics(t testing.TB) []byte { if err != nil { t.Fatal(err) } - return b + + // Replace all Carriage Return chars that appear when testing on windows. + return bytes.ReplaceAll(b, []byte{'\r'}, nil) } func makeTestGauges(n int) []byte { @@ -1543,6 +1545,184 @@ func TestPromTextToProto(t *testing.T) { require.Equal(t, "promhttp_metric_handler_requests_total", got[236]) } +// TestScrapeLoopAppend_WithStorage tests appends and storage integration for the +// large input files that are also used in benchmarks. +func TestScrapeLoopAppend_WithStorage(t *testing.T) { + ts := time.Now() + + for _, appV2 := range []bool{false, true} { + for _, tc := range []struct { + name string + parsableText []byte + + expectedSamplesLen int + testAppendedSamples func(t *testing.T, committed []sample) + testExemplars func(t *testing.T, er []exemplar.QueryResult) + }{ + { + name: "1Fam2000Gauges", + parsableText: makeTestGauges(2000), + + expectedSamplesLen: 2000, + testAppendedSamples: func(t *testing.T, committed []sample) { + var expectedMF string + if appV2 { + expectedMF = "metric_a" // Only AppenderV2 supports metric family passing. + } + // Verify a few samples. + testutil.RequireEqual(t, sample{ + MF: expectedMF, + M: metadata.Metadata{Type: model.MetricTypeGauge, Help: "help text"}, + L: labels.FromStrings(model.MetricNameLabel, "metric_a", "foo", "0", "bar", "0"), V: 1, T: timestamp.FromTime(ts), + }, committed[0]) + testutil.RequireEqual(t, sample{ + MF: expectedMF, + M: metadata.Metadata{Type: model.MetricTypeGauge, Help: "help text"}, + L: labels.FromStrings(model.MetricNameLabel, "metric_a", "foo", "1245", "bar", "124500"), V: 1, T: timestamp.FromTime(ts), + }, committed[1245]) + testutil.RequireEqual(t, sample{ + MF: expectedMF, + M: metadata.Metadata{Type: model.MetricTypeGauge, Help: "help text"}, + L: labels.FromStrings(model.MetricNameLabel, "metric_a", "foo", "1999", "bar", "199900"), V: 1, T: timestamp.FromTime(ts), + }, committed[len(committed)-1]) + }, + }, + { + name: "237FamsAllTypes", + parsableText: readTextParseTestMetrics(t), + + expectedSamplesLen: 1857, + testAppendedSamples: func(t *testing.T, committed []sample) { + // Verify a few samples. + testutil.RequireEqual(t, sample{ + MF: func() string { + if !appV2 { + return "" + } + return "go_gc_gomemlimit_bytes" + }(), + M: metadata.Metadata{Type: model.MetricTypeGauge, Help: "Go runtime memory limit configured by the user, otherwise math.MaxInt64. This value is set by the GOMEMLIMIT environment variable, and the runtime/debug.SetMemoryLimit function. Sourced from /gc/gomemlimit:bytes"}, + L: labels.FromStrings(model.MetricNameLabel, "go_gc_gomemlimit_bytes"), V: 9.03676723e+08, T: timestamp.FromTime(ts), + }, committed[11]) + testutil.RequireEqual(t, sample{ + MF: func() string { + if !appV2 { + return "" // Only AppenderV2 supports metric family passing. + } + return "prometheus_http_request_duration_seconds" + }(), + M: metadata.Metadata{Type: model.MetricTypeHistogram, Help: "Histogram of latencies for HTTP requests."}, + L: labels.FromStrings(model.MetricNameLabel, "prometheus_http_request_duration_seconds_bucket", "handler", "/api/v1/query_range", "le", "120.0"), V: 118157, T: timestamp.FromTime(ts), + }, committed[448]) + testutil.RequireEqual(t, sample{ + MF: func() string { + if !appV2 { + return "" // Only AppenderV2 supports metric family passing. + } + return "promhttp_metric_handler_requests_total" + }(), + M: metadata.Metadata{Type: model.MetricTypeCounter, Help: "Total number of scrapes by HTTP status code."}, + L: labels.FromStrings(model.MetricNameLabel, "promhttp_metric_handler_requests_total", "code", "503"), V: 0, T: timestamp.FromTime(ts), + }, committed[len(committed)-1]) + }, + }, + { + name: "100HistsWithExemplars", + parsableText: makeTestHistogramsWithExemplars(100), + + expectedSamplesLen: 24 * 100, + testAppendedSamples: func(t *testing.T, committed []sample) { + // Verify a few samples. + m := metadata.Metadata{Type: model.MetricTypeHistogram, Help: "RPC latency distributions."} + testutil.RequireEqual(t, sample{ + MF: func() string { + if !appV2 { + return "" // Only AppenderV2 supports metric family passing. + } + return "rpc_durations_histogram0_seconds" + }(), + M: m, L: labels.FromStrings(model.MetricNameLabel, "rpc_durations_histogram0_seconds_bucket", "le", "0.0003100000000000002"), V: 15, T: timestamp.FromTime(ts), + ES: []exemplar.Exemplar{ + {Labels: labels.FromStrings("dummyID", "9818"), Value: 0.0002791130914009552, Ts: 1726839814982, HasTs: true}, + }, + }, committed[13]) + testutil.RequireEqual(t, sample{ + MF: func() string { + if !appV2 { + return "" // Only AppenderV2 supports metric family passing. + } + return "rpc_durations_histogram49_seconds" + }(), + M: m, L: labels.FromStrings(model.MetricNameLabel, "rpc_durations_histogram49_seconds_sum"), V: -8.452185437166741e-05, T: timestamp.FromTime(ts), + }, committed[24*50-3]) + + // This series does not have metadata, nor metric family, because of isSeriesPartOfFamily bug and OpenMetric 1.0 limitations around _created series. + // TODO(bwplotka): Fix with https://github.com/prometheus/prometheus/issues/17900 + testutil.RequireEqual(t, sample{ + L: labels.FromStrings(model.MetricNameLabel, "rpc_durations_histogram99_seconds_created"), V: 1.726839813016302e+09, T: timestamp.FromTime(ts), + }, committed[len(committed)-1]) + }, + testExemplars: func(t *testing.T, er []exemplar.QueryResult) { + // 12 out of 24 histogram series have exemplars. + require.Len(t, er, 12*100) + testutil.RequireEqual(t, exemplar.QueryResult{ + SeriesLabels: labels.FromStrings(model.MetricNameLabel, "rpc_durations_histogram0_seconds_bucket", "le", "0.0003100000000000002"), + Exemplars: []exemplar.Exemplar{ + {Labels: labels.FromStrings("dummyID", "9818"), Value: 0.0002791130914009552, Ts: 1726839814982, HasTs: true}, + }, + }, er[10]) + testutil.RequireEqual(t, exemplar.QueryResult{ + SeriesLabels: labels.FromStrings(model.MetricNameLabel, "rpc_durations_histogram9_seconds_bucket", "le", "1.0000000000000216e-05"), + Exemplars: []exemplar.Exemplar{ + {Labels: labels.FromStrings("dummyID", "19206"), Value: -4.6156147425468016e-05, Ts: 1726839815133, HasTs: true}, + }, + }, er[len(er)-1]) + }, + }, + } { + t.Run(fmt.Sprintf("appV2=%v/data=%v", appV2, tc.name), func(t *testing.T) { + s := teststorage.New(t, func(opt *tsdb.Options) { + opt.EnableMetadataWALRecords = true + }) + + appTest := teststorage.NewAppendable().Then(s) + sl, _ := newTestScrapeLoop(t, withAppendable(appTest, appV2)) + app := sl.appender() + + _, _, _, err := app.append(tc.parsableText, "application/openmetrics-text", ts) + require.NoError(t, err) + require.NoError(t, app.Commit()) + + // Check the recorded samples on the Appender layer. + require.Nil(t, appTest.PendingSamples()) + require.Nil(t, appTest.RolledbackSamples()) + + got := appTest.ResultSamples() + require.Len(t, got, tc.expectedSamplesLen) + tc.testAppendedSamples(t, got) + + // Check basic storage stats. + stats := s.Head().Stats(model.MetricNameLabel, 2000) + require.Equal(t, tc.expectedSamplesLen, int(stats.NumSeries)) + + // Check exemplars. + eq, err := s.ExemplarQuerier(t.Context()) + require.NoError(t, err) + + er, err := eq.Select(math.MinInt64, math.MaxInt64, nil) + require.NoError(t, err) + + if tc.testExemplars != nil { + tc.testExemplars(t, er) + } else { + // Expect no exemplars. + require.Empty(t, er, "%v is not empty", er) + } + }) + } + } +} + // BenchmarkScrapeLoopAppend benchmarks scrape appends for typical cases. // // Benchmark compares append function run across 4 dimensions: @@ -1567,7 +1747,7 @@ func BenchmarkScrapeLoopAppend(b *testing.B) { name string parsableText []byte }{ - {name: "1Fam1000Gauges", parsableText: makeTestGauges(2000)}, // ~68.1 KB, ~77.9 KB in proto. + {name: "1Fam2000Gauges", parsableText: makeTestGauges(2000)}, // ~68.1 KB, ~77.9 KB in proto. {name: "237FamsAllTypes", parsableText: readTextParseTestMetrics(b)}, // ~185.7 KB, ~70.6 KB in proto. } { b.Run(fmt.Sprintf("appV2=%v/appendMetadataToWAL=%v/data=%v", appV2, appendMetadataToWAL, data.name), func(b *testing.B) { @@ -3218,9 +3398,7 @@ metric: < } sl.alwaysScrapeClassicHist = test.alwaysScrapeClassicHist // This test does not care about metadata. - // Having this true would mean we need to add metadata to sample - // expectations. - // TODO(bwplotka): Add cases for append metadata to WAL and pass metadata + // TODO(bwplotka): Add metadata expectations and turn it on. sl.appendMetadataToWAL = false }) app := sl.appender() diff --git a/tsdb/head_append_v2.go b/tsdb/head_append_v2.go index 2c09c4cbd5..87b62df536 100644 --- a/tsdb/head_append_v2.go +++ b/tsdb/head_append_v2.go @@ -200,9 +200,6 @@ func (a *headAppenderV2) Append(ref storage.SeriesRef, ls labels.Labels, st, t i // Currently only exemplars can return partial errors. partialErr = a.appendExemplars(s, opts.Exemplars) } - - // TODO(bwplotka): Move/reuse metadata tests from scrape, once scrape adopts AppenderV2. - // Currently tsdb package does not test metadata. if a.head.opts.EnableMetadataWALRecords && !opts.Metadata.IsEmpty() { s.Lock() metaChanged := s.meta == nil || !s.meta.Equals(opts.Metadata)