From badf9da96a81c548b71537e32162d97c3287a3a2 Mon Sep 17 00:00:00 2001 From: Miguel Bernabeu Diaz Date: Thu, 28 May 2026 21:40:12 +0200 Subject: [PATCH 1/3] tsdb/record,tsdb: add native histogram WAL decode benchmarks Add two benchmark components to measure the native histogram decode hot path, which is shared by WAL replay, WAL watcher (remote write), and checkpoint creation. tsdb/record: BenchmarkDecodeHistogramSamples isolates the V1 and V2 histogram decoder paths across bucket counts (0, 4, 16), giving a precise per-sample allocation signal for decoder changes. tsdb: BenchmarkLoadWLs gains two new shapes: - all-histogram (histogramSeriesPct=1.0, bucketsPerHistogram=8): mirrors the existing "In between" float shape for direct comparison. - mixed (histogramSeriesPct=0.5, bucketsPerHistogram=8): models a deployment partway through migrating to native histograms. Both shapes are parameterised over stStorage (V1 vs V2 encoding) via the existing enableSTStorage loop, so benchstat can show the V1/V2 delta without additional test infrastructure. The subtest names include histogramSeriesPct and bucketsPerHistogram only when non-zero, leaving existing float-only subtest names unchanged. Co-Authored-By: Claude Sonnet 4.6 Signed-off-by: Miguel Bernabeu Diaz --- tsdb/head_test.go | 96 +++++++++++++++++++++++++++++++++++--- tsdb/record/record_test.go | 61 ++++++++++++++++++++++++ 2 files changed, 150 insertions(+), 7 deletions(-) diff --git a/tsdb/head_test.go b/tsdb/head_test.go index afa5163e75..c1af92e2a2 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -205,6 +205,16 @@ func BenchmarkLoadWLs(b *testing.B) { // The first oooSamplesPct*samplesPerSeries samples in an OOO series are written as OOO samples. oooSamplesPct float64 oooCapMax int64 + // histogramSeriesPct is the fraction of series that emit native + // histogram samples instead of float samples. 0 means all float + // (the default for existing cases), 1 means all histograms. + // Histogram series use the last histogramSeriesPct*seriesPerBatch + // refs in each batch so existing float-only shapes are unaffected. + histogramSeriesPct float64 + // bucketsPerHistogram is the number of positive buckets written + // per native histogram sample. Each bucket adds one span entry + // and one bucket delta to the encoded histogram. + bucketsPerHistogram int }{ { // Less series and more samples. 2 hour WAL with 1 second scrape interval. batches: 10, @@ -252,6 +262,27 @@ func BenchmarkLoadWLs(b *testing.B) { oooSamplesPct: 0.3, oooCapMax: DefaultOutOfOrderCapMax, }, + { // All-histogram WAL, matching the "In between" float shape. + // Exercises the native-histogram decode hot path (DecodeHistogram + // + histogramSamplesV1/V2) which is shared by WAL replay, + // WAL watcher (remote write), and checkpoint creation. + // bucketsPerHistogram=8 is representative of a moderately + // complex exponential histogram seen in practice. + batches: 10, + seriesPerBatch: 1000, + samplesPerSeries: 480, + histogramSeriesPct: 1.0, + bucketsPerHistogram: 8, + }, + { // Mixed WAL: 50% float series, 50% native histogram series. + // Models a deployment that is partway through migrating metrics + // to native histograms. + batches: 10, + seriesPerBatch: 1000, + samplesPerSeries: 480, + histogramSeriesPct: 0.5, + bucketsPerHistogram: 8, + }, } labelsPerSeries := 5 @@ -270,7 +301,11 @@ func BenchmarkLoadWLs(b *testing.B) { continue } lastExemplarsPerSeries = exemplarsPerSeries - b.Run(fmt.Sprintf("batches=%d,seriesPerBatch=%d,samplesPerSeries=%d,exemplarsPerSeries=%d,mmappedChunkT=%d,oooSeriesPct=%.3f,oooSamplesPct=%.3f,oooCapMax=%d,missingSeriesPct=%.3f,stStorage=%v", c.batches, c.seriesPerBatch, c.samplesPerSeries, exemplarsPerSeries, c.mmappedChunkT, c.oooSeriesPct, c.oooSamplesPct, c.oooCapMax, missingSeriesPct, enableSTStorage), + name := fmt.Sprintf("batches=%d,seriesPerBatch=%d,samplesPerSeries=%d,exemplarsPerSeries=%d,mmappedChunkT=%d,oooSeriesPct=%.3f,oooSamplesPct=%.3f,oooCapMax=%d,missingSeriesPct=%.3f,stStorage=%v", c.batches, c.seriesPerBatch, c.samplesPerSeries, exemplarsPerSeries, c.mmappedChunkT, c.oooSeriesPct, c.oooSamplesPct, c.oooCapMax, missingSeriesPct, enableSTStorage) + if c.histogramSeriesPct > 0 { + name += fmt.Sprintf(",histogramSeriesPct=%.3f,bucketsPerHistogram=%d", c.histogramSeriesPct, c.bucketsPerHistogram) + } + b.Run(name, func(b *testing.B) { dir := b.TempDir() @@ -312,30 +347,77 @@ func BenchmarkLoadWLs(b *testing.B) { buf = populateTestWL(b, wal, []any{writeSeries}, buf, enableSTStorage) } - // Write samples. - refSamples := make([]record.RefSample, 0, c.seriesPerBatch) + // Write samples. Series are split into float and + // histogram series: the last histogramSeriesPerBatch + // refs in each batch emit RefHistogramSample records; + // the rest emit RefSample records. This mirrors how + // real Prometheus deployments work — a given series is + // committed to one type. + histogramSeriesPerBatch := int(float64(c.seriesPerBatch) * c.histogramSeriesPct) + floatSeriesPerBatch := c.seriesPerBatch - histogramSeriesPerBatch + + refSamples := make([]record.RefSample, 0, floatSeriesPerBatch) + refHistSamples := make([]record.RefHistogramSample, 0, histogramSeriesPerBatch) oooSeriesPerBatch := int(float64(c.seriesPerBatch) * c.oooSeriesPct) oooSamplesPerSeries := int(float64(c.samplesPerSeries) * c.oooSamplesPct) + // Build a reusable histogram template with the configured + // bucket count. All histogram series share the same shape; + // only the value (Sum/Count) changes per sample. + var histTemplate *histogram.Histogram + if histogramSeriesPerBatch > 0 { + spans := make([]histogram.Span, c.bucketsPerHistogram) + for idx := range spans { + spans[idx] = histogram.Span{Offset: int32(idx), Length: 1} + } + buckets := make([]int64, c.bucketsPerHistogram) + for idx := range buckets { + buckets[idx] = int64(idx + 1) + } + histTemplate = &histogram.Histogram{ + Schema: 1, + PositiveSpans: spans, + PositiveBuckets: buckets, + } + } + for i := 0; i < c.samplesPerSeries; i++ { for j := 0; j < c.batches; j++ { refSamples = refSamples[:0] + refHistSamples = refHistSamples[:0] + // Float series occupy refs [j*seriesPerBatch, j*seriesPerBatch+floatSeriesPerBatch). k := j * c.seriesPerBatch - // Skip appending the first oooSamplesPerSeries samples for the series in the batch that - // should have OOO samples. OOO samples are appended after all the in-order samples. if i < oooSamplesPerSeries { k += oooSeriesPerBatch } - for ; k < (j+1)*c.seriesPerBatch; k++ { + floatEnd := j*c.seriesPerBatch + floatSeriesPerBatch + for ; k < floatEnd; k++ { refSamples = append(refSamples, record.RefSample{ Ref: chunks.HeadSeriesRef(k) * 101, T: int64(i) * 10, V: float64(i) * 100, }) } - buf = populateTestWL(b, wal, []any{refSamples}, buf, enableSTStorage) + if len(refSamples) > 0 { + buf = populateTestWL(b, wal, []any{refSamples}, buf, enableSTStorage) + } + + // Histogram series occupy refs [j*seriesPerBatch+floatSeriesPerBatch, (j+1)*seriesPerBatch). + for k = floatEnd; k < (j+1)*c.seriesPerBatch; k++ { + h := *histTemplate + h.Count = uint64(i + 1) + h.Sum = float64(i) * 100 + refHistSamples = append(refHistSamples, record.RefHistogramSample{ + Ref: chunks.HeadSeriesRef(k) * 101, + T: int64(i) * 10, + H: &h, + }) + } + if len(refHistSamples) > 0 { + buf = populateTestWL(b, wal, []any{refHistSamples}, buf, enableSTStorage) + } } } diff --git a/tsdb/record/record_test.go b/tsdb/record/record_test.go index cd7f557512..e2b7bed3bd 100644 --- a/tsdb/record/record_test.go +++ b/tsdb/record/record_test.go @@ -1323,3 +1323,64 @@ func BenchmarkWAL_HistogramEncoding(b *testing.B) { } } } + +// BenchmarkDecodeHistogramSamples measures per-sample allocation cost for +// histogram WAL decoding — both V1 and V2 paths. This exercises the hot +// path identified in finding B/C of the allocation analysis: one +// *histogram.Histogram allocation per sample (unavoidable with current +// design) plus one escaped *RefHistogramSample per V2 iteration (fixable). +func BenchmarkDecodeHistogramSamples(b *testing.B) { + const numSamples = 1000 + + makeHistogram := func(buckets int) *histogram.Histogram { + spans := make([]histogram.Span, buckets) + for i := range spans { + spans[i] = histogram.Span{Offset: int32(i), Length: 1} + } + bkts := make([]int64, buckets) + for i := range bkts { + bkts[i] = int64(i + 1) + } + return &histogram.Histogram{ + Schema: 1, + Count: uint64(buckets * 10), + Sum: float64(buckets), + PositiveSpans: spans, + PositiveBuckets: bkts, + } + } + + for _, buckets := range []int{0, 4, 16} { + for _, version := range []string{"v1", "v2"} { + b.Run(fmt.Sprintf("buckets=%d/%s", buckets, version), func(b *testing.B) { + samples := make([]RefHistogramSample, numSamples) + for i := range samples { + samples[i] = RefHistogramSample{ + Ref: chunks.HeadSeriesRef(i), + T: int64(i) * 1000, + H: makeHistogram(buckets), + } + } + + var raw []byte + if version == "v1" { + enc := Encoder{} + raw, _ = enc.HistogramSamples(samples, raw) + } else { + enc := Encoder{EnableSTStorage: true} + raw, _ = enc.HistogramSamples(samples, raw) + } + + dec := NewDecoder(labels.NewSymbolTable(), promslog.NewNopLogger()) + buf := make([]RefHistogramSample, 0, numSamples) + + b.ResetTimer() + b.ReportAllocs() + for b.Loop() { + buf, _ = dec.HistogramSamples(raw, buf[:0]) + } + _ = buf + }) + } + } +} From b4db611d52212de40438c1166256e9f2c39b9c03 Mon Sep 17 00:00:00 2001 From: Miguel Bernabeu Diaz Date: Thu, 28 May 2026 21:40:52 +0200 Subject: [PATCH 2/3] tsdb/record: eliminate prev pointer escapes in V2 histogram decoder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit histogramSamplesV2 and floatHistogramSamplesV2 tracked the previous sample's Ref and ST via a *RefHistogramSample pointer (prev). Taking the address of a loop-local variable (prev = &rh) forced the compiler to heap-allocate rh on every iteration; the first iteration also allocated a separate sentinel struct. The pointed-to fields were only ever read as two int64 scalars, so the pointer added zero semantic value. Replace prev with two scalar variables (prevRef, prevST) and a boolean sentinel. rh no longer has its address taken and stays on the stack. This affects every caller of dec.HistogramSamples that produces V2 records (EnableSTStorage=true): WAL replay, the WAL watcher (remote write tail), and checkpoint creation. Benchmarks (go test -count=6 -benchmem, benchstat): BenchmarkDecodeHistogramSamples (tsdb/record) │ before │ after │ │ allocs/op │ allocs/op vs base │ buckets=0/v2 │ 2.001k ± 0%│ 1.000k ± 0% -50.02% (p=0.002)│ buckets=4/v2 │ 4.001k ± 0%│ 3.000k ± 0% -25.02% (p=0.002)│ buckets=16/v2 │ 4.001k ± 0%│ 3.000k ± 0% -25.02% (p=0.002)│ │ before │ after │ │ B/op │ B/op vs base │ buckets=0/v2 │ 187.5Ki ± 0%│ 156.2Ki ± 0% -16.68% (p=0.002)│ buckets=4/v2 │ 250.0Ki ± 0%│ 218.8Ki ± 0% -12.51% (p=0.002)│ buckets=16/v2 │ 437.5Ki ± 0%│ 406.2Ki ± 0% -7.15% (p=0.002)│ BenchmarkLoadWLs end-to-end WAL replay (tsdb), stStorage=true only │ before │ after │ │ allocs/op │ allocs/op vs base │ histogramSeriesPct=1.000 │ 19.70M ± 0% │ 14.90M ± 0% -24.39% (p=0.002)│ histogramSeriesPct=0.500 │ 10.47M ± 0% │ 8.06M ± 0% -23.00% (p=0.002)│ │ before │ after │ │ B/op │ B/op vs base │ histogramSeriesPct=1.000 │ 1.539Gi ± 0%│ 1.394Gi ± 0% -9.42% (p=0.002)│ histogramSeriesPct=0.500 │ 1051.3Mi ± 0%│ 975.1Mi ± 0% -7.25% (p=0.002)│ │ before │ after │ │ sec/op │ sec/op vs base │ histogramSeriesPct=1.000 │ 824.9m ± 0% │ 762.6m ± 1% -7.55% (p=0.002)│ histogramSeriesPct=0.500 │ 488.6m ± 1% │ 451.4m ± 1% -7.61% (p=0.002)│ V1 paths and float-only shapes are unchanged (p >> 0.05 throughout). Co-Authored-By: Claude Sonnet 4.6 Signed-off-by: Miguel Bernabeu Diaz --- tsdb/record/record.go | 42 ++++++++++++++++++------------------------ 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/tsdb/record/record.go b/tsdb/record/record.go index b3e7e8370e..063f3c3619 100644 --- a/tsdb/record/record.go +++ b/tsdb/record/record.go @@ -593,22 +593,19 @@ func (d *Decoder) histogramSamplesV2(dec *encoding.Decbuf, histograms []RefHisto firstRef := chunks.HeadSeriesRef(dec.Varint64()) firstT := dec.Varint64() firstST := dec.Varint64() - var prev *RefHistogramSample + var prevRef chunks.HeadSeriesRef + var prevST int64 + hasPrev := false for len(dec.B) > 0 && dec.Err() == nil { var ref, t, st int64 - if prev == nil { - prev = &RefHistogramSample{ - Ref: firstRef, - ST: firstST, - } - ref = int64(firstRef) - t = firstT - st = firstST + if !hasPrev { + ref, t, st = int64(firstRef), firstT, firstST + hasPrev = true } else { - ref = int64(prev.Ref) + dec.Varint64() + ref = int64(prevRef) + dec.Varint64() t = firstT + dec.Varint64() - st = readSTMarker(dec, prev.ST, firstST) + st = readSTMarker(dec, prevST, firstST) } rh := RefHistogramSample{ @@ -617,7 +614,7 @@ func (d *Decoder) histogramSamplesV2(dec *encoding.Decbuf, histograms []RefHisto T: t, H: &histogram.Histogram{}, } - prev = &rh + prevRef, prevST = rh.Ref, rh.ST DecodeHistogram(dec, rh.H) if !histogram.IsKnownSchema(rh.H.Schema) { @@ -768,22 +765,19 @@ func (d *Decoder) floatHistogramSamplesV2(dec *encoding.Decbuf, histograms []Ref firstRef := chunks.HeadSeriesRef(dec.Varint64()) firstT := dec.Varint64() firstST := dec.Varint64() - var prev *RefFloatHistogramSample + var prevRef chunks.HeadSeriesRef + var prevST int64 + hasPrev := false for len(dec.B) > 0 && dec.Err() == nil { var ref, t, st int64 - if prev == nil { - prev = &RefFloatHistogramSample{ - Ref: firstRef, - ST: firstST, - } - ref = int64(firstRef) - t = firstT - st = firstST + if !hasPrev { + ref, t, st = int64(firstRef), firstT, firstST + hasPrev = true } else { - ref = int64(prev.Ref) + dec.Varint64() + ref = int64(prevRef) + dec.Varint64() t = firstT + dec.Varint64() - st = readSTMarker(dec, prev.ST, firstST) + st = readSTMarker(dec, prevST, firstST) } rfh := RefFloatHistogramSample{ @@ -792,7 +786,7 @@ func (d *Decoder) floatHistogramSamplesV2(dec *encoding.Decbuf, histograms []Ref T: t, FH: &histogram.FloatHistogram{}, } - prev = &rfh + prevRef, prevST = rfh.Ref, rfh.ST DecodeFloatHistogram(dec, rfh.FH) if !histogram.IsKnownSchema(rfh.FH.Schema) { From 423c7878de616b4e39ab4182d98901d488bb17dd Mon Sep 17 00:00:00 2001 From: Miguel Bernabeu Diaz Date: Mon, 1 Jun 2026 11:07:48 +0200 Subject: [PATCH 3/3] Update tsdb/record/record.go Co-authored-by: Bartlomiej Plotka Signed-off-by: Miguel Bernabeu Diaz --- tsdb/record/record.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tsdb/record/record.go b/tsdb/record/record.go index 063f3c3619..046abaa0d4 100644 --- a/tsdb/record/record.go +++ b/tsdb/record/record.go @@ -593,8 +593,10 @@ func (d *Decoder) histogramSamplesV2(dec *encoding.Decbuf, histograms []RefHisto firstRef := chunks.HeadSeriesRef(dec.Varint64()) firstT := dec.Varint64() firstST := dec.Varint64() - var prevRef chunks.HeadSeriesRef - var prevST int64 + var ( + prevRef chunks.HeadSeriesRef + prevST int64 + ) hasPrev := false for len(dec.B) > 0 && dec.Err() == nil {