diff --git a/tsdb/head_test.go b/tsdb/head_test.go index afa5163e75..c1af92e2a2 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -205,6 +205,16 @@ func BenchmarkLoadWLs(b *testing.B) { // The first oooSamplesPct*samplesPerSeries samples in an OOO series are written as OOO samples. oooSamplesPct float64 oooCapMax int64 + // histogramSeriesPct is the fraction of series that emit native + // histogram samples instead of float samples. 0 means all float + // (the default for existing cases), 1 means all histograms. + // Histogram series use the last histogramSeriesPct*seriesPerBatch + // refs in each batch so existing float-only shapes are unaffected. + histogramSeriesPct float64 + // bucketsPerHistogram is the number of positive buckets written + // per native histogram sample. Each bucket adds one span entry + // and one bucket delta to the encoded histogram. + bucketsPerHistogram int }{ { // Less series and more samples. 2 hour WAL with 1 second scrape interval. batches: 10, @@ -252,6 +262,27 @@ func BenchmarkLoadWLs(b *testing.B) { oooSamplesPct: 0.3, oooCapMax: DefaultOutOfOrderCapMax, }, + { // All-histogram WAL, matching the "In between" float shape. + // Exercises the native-histogram decode hot path (DecodeHistogram + // + histogramSamplesV1/V2) which is shared by WAL replay, + // WAL watcher (remote write), and checkpoint creation. + // bucketsPerHistogram=8 is representative of a moderately + // complex exponential histogram seen in practice. + batches: 10, + seriesPerBatch: 1000, + samplesPerSeries: 480, + histogramSeriesPct: 1.0, + bucketsPerHistogram: 8, + }, + { // Mixed WAL: 50% float series, 50% native histogram series. + // Models a deployment that is partway through migrating metrics + // to native histograms. + batches: 10, + seriesPerBatch: 1000, + samplesPerSeries: 480, + histogramSeriesPct: 0.5, + bucketsPerHistogram: 8, + }, } labelsPerSeries := 5 @@ -270,7 +301,11 @@ func BenchmarkLoadWLs(b *testing.B) { continue } lastExemplarsPerSeries = exemplarsPerSeries - b.Run(fmt.Sprintf("batches=%d,seriesPerBatch=%d,samplesPerSeries=%d,exemplarsPerSeries=%d,mmappedChunkT=%d,oooSeriesPct=%.3f,oooSamplesPct=%.3f,oooCapMax=%d,missingSeriesPct=%.3f,stStorage=%v", c.batches, c.seriesPerBatch, c.samplesPerSeries, exemplarsPerSeries, c.mmappedChunkT, c.oooSeriesPct, c.oooSamplesPct, c.oooCapMax, missingSeriesPct, enableSTStorage), + name := fmt.Sprintf("batches=%d,seriesPerBatch=%d,samplesPerSeries=%d,exemplarsPerSeries=%d,mmappedChunkT=%d,oooSeriesPct=%.3f,oooSamplesPct=%.3f,oooCapMax=%d,missingSeriesPct=%.3f,stStorage=%v", c.batches, c.seriesPerBatch, c.samplesPerSeries, exemplarsPerSeries, c.mmappedChunkT, c.oooSeriesPct, c.oooSamplesPct, c.oooCapMax, missingSeriesPct, enableSTStorage) + if c.histogramSeriesPct > 0 { + name += fmt.Sprintf(",histogramSeriesPct=%.3f,bucketsPerHistogram=%d", c.histogramSeriesPct, c.bucketsPerHistogram) + } + b.Run(name, func(b *testing.B) { dir := b.TempDir() @@ -312,30 +347,77 @@ func BenchmarkLoadWLs(b *testing.B) { buf = populateTestWL(b, wal, []any{writeSeries}, buf, enableSTStorage) } - // Write samples. - refSamples := make([]record.RefSample, 0, c.seriesPerBatch) + // Write samples. Series are split into float and + // histogram series: the last histogramSeriesPerBatch + // refs in each batch emit RefHistogramSample records; + // the rest emit RefSample records. This mirrors how + // real Prometheus deployments work — a given series is + // committed to one type. + histogramSeriesPerBatch := int(float64(c.seriesPerBatch) * c.histogramSeriesPct) + floatSeriesPerBatch := c.seriesPerBatch - histogramSeriesPerBatch + + refSamples := make([]record.RefSample, 0, floatSeriesPerBatch) + refHistSamples := make([]record.RefHistogramSample, 0, histogramSeriesPerBatch) oooSeriesPerBatch := int(float64(c.seriesPerBatch) * c.oooSeriesPct) oooSamplesPerSeries := int(float64(c.samplesPerSeries) * c.oooSamplesPct) + // Build a reusable histogram template with the configured + // bucket count. All histogram series share the same shape; + // only the value (Sum/Count) changes per sample. + var histTemplate *histogram.Histogram + if histogramSeriesPerBatch > 0 { + spans := make([]histogram.Span, c.bucketsPerHistogram) + for idx := range spans { + spans[idx] = histogram.Span{Offset: int32(idx), Length: 1} + } + buckets := make([]int64, c.bucketsPerHistogram) + for idx := range buckets { + buckets[idx] = int64(idx + 1) + } + histTemplate = &histogram.Histogram{ + Schema: 1, + PositiveSpans: spans, + PositiveBuckets: buckets, + } + } + for i := 0; i < c.samplesPerSeries; i++ { for j := 0; j < c.batches; j++ { refSamples = refSamples[:0] + refHistSamples = refHistSamples[:0] + // Float series occupy refs [j*seriesPerBatch, j*seriesPerBatch+floatSeriesPerBatch). k := j * c.seriesPerBatch - // Skip appending the first oooSamplesPerSeries samples for the series in the batch that - // should have OOO samples. OOO samples are appended after all the in-order samples. if i < oooSamplesPerSeries { k += oooSeriesPerBatch } - for ; k < (j+1)*c.seriesPerBatch; k++ { + floatEnd := j*c.seriesPerBatch + floatSeriesPerBatch + for ; k < floatEnd; k++ { refSamples = append(refSamples, record.RefSample{ Ref: chunks.HeadSeriesRef(k) * 101, T: int64(i) * 10, V: float64(i) * 100, }) } - buf = populateTestWL(b, wal, []any{refSamples}, buf, enableSTStorage) + if len(refSamples) > 0 { + buf = populateTestWL(b, wal, []any{refSamples}, buf, enableSTStorage) + } + + // Histogram series occupy refs [j*seriesPerBatch+floatSeriesPerBatch, (j+1)*seriesPerBatch). + for k = floatEnd; k < (j+1)*c.seriesPerBatch; k++ { + h := *histTemplate + h.Count = uint64(i + 1) + h.Sum = float64(i) * 100 + refHistSamples = append(refHistSamples, record.RefHistogramSample{ + Ref: chunks.HeadSeriesRef(k) * 101, + T: int64(i) * 10, + H: &h, + }) + } + if len(refHistSamples) > 0 { + buf = populateTestWL(b, wal, []any{refHistSamples}, buf, enableSTStorage) + } } } diff --git a/tsdb/record/record.go b/tsdb/record/record.go index b3e7e8370e..046abaa0d4 100644 --- a/tsdb/record/record.go +++ b/tsdb/record/record.go @@ -593,22 +593,21 @@ func (d *Decoder) histogramSamplesV2(dec *encoding.Decbuf, histograms []RefHisto firstRef := chunks.HeadSeriesRef(dec.Varint64()) firstT := dec.Varint64() firstST := dec.Varint64() - var prev *RefHistogramSample + var ( + prevRef chunks.HeadSeriesRef + prevST int64 + ) + hasPrev := false for len(dec.B) > 0 && dec.Err() == nil { var ref, t, st int64 - if prev == nil { - prev = &RefHistogramSample{ - Ref: firstRef, - ST: firstST, - } - ref = int64(firstRef) - t = firstT - st = firstST + if !hasPrev { + ref, t, st = int64(firstRef), firstT, firstST + hasPrev = true } else { - ref = int64(prev.Ref) + dec.Varint64() + ref = int64(prevRef) + dec.Varint64() t = firstT + dec.Varint64() - st = readSTMarker(dec, prev.ST, firstST) + st = readSTMarker(dec, prevST, firstST) } rh := RefHistogramSample{ @@ -617,7 +616,7 @@ func (d *Decoder) histogramSamplesV2(dec *encoding.Decbuf, histograms []RefHisto T: t, H: &histogram.Histogram{}, } - prev = &rh + prevRef, prevST = rh.Ref, rh.ST DecodeHistogram(dec, rh.H) if !histogram.IsKnownSchema(rh.H.Schema) { @@ -768,22 +767,19 @@ func (d *Decoder) floatHistogramSamplesV2(dec *encoding.Decbuf, histograms []Ref firstRef := chunks.HeadSeriesRef(dec.Varint64()) firstT := dec.Varint64() firstST := dec.Varint64() - var prev *RefFloatHistogramSample + var prevRef chunks.HeadSeriesRef + var prevST int64 + hasPrev := false for len(dec.B) > 0 && dec.Err() == nil { var ref, t, st int64 - if prev == nil { - prev = &RefFloatHistogramSample{ - Ref: firstRef, - ST: firstST, - } - ref = int64(firstRef) - t = firstT - st = firstST + if !hasPrev { + ref, t, st = int64(firstRef), firstT, firstST + hasPrev = true } else { - ref = int64(prev.Ref) + dec.Varint64() + ref = int64(prevRef) + dec.Varint64() t = firstT + dec.Varint64() - st = readSTMarker(dec, prev.ST, firstST) + st = readSTMarker(dec, prevST, firstST) } rfh := RefFloatHistogramSample{ @@ -792,7 +788,7 @@ func (d *Decoder) floatHistogramSamplesV2(dec *encoding.Decbuf, histograms []Ref T: t, FH: &histogram.FloatHistogram{}, } - prev = &rfh + prevRef, prevST = rfh.Ref, rfh.ST DecodeFloatHistogram(dec, rfh.FH) if !histogram.IsKnownSchema(rfh.FH.Schema) { diff --git a/tsdb/record/record_test.go b/tsdb/record/record_test.go index cd7f557512..e2b7bed3bd 100644 --- a/tsdb/record/record_test.go +++ b/tsdb/record/record_test.go @@ -1323,3 +1323,64 @@ func BenchmarkWAL_HistogramEncoding(b *testing.B) { } } } + +// BenchmarkDecodeHistogramSamples measures per-sample allocation cost for +// histogram WAL decoding — both V1 and V2 paths. This exercises the hot +// path identified in finding B/C of the allocation analysis: one +// *histogram.Histogram allocation per sample (unavoidable with current +// design) plus one escaped *RefHistogramSample per V2 iteration (fixable). +func BenchmarkDecodeHistogramSamples(b *testing.B) { + const numSamples = 1000 + + makeHistogram := func(buckets int) *histogram.Histogram { + spans := make([]histogram.Span, buckets) + for i := range spans { + spans[i] = histogram.Span{Offset: int32(i), Length: 1} + } + bkts := make([]int64, buckets) + for i := range bkts { + bkts[i] = int64(i + 1) + } + return &histogram.Histogram{ + Schema: 1, + Count: uint64(buckets * 10), + Sum: float64(buckets), + PositiveSpans: spans, + PositiveBuckets: bkts, + } + } + + for _, buckets := range []int{0, 4, 16} { + for _, version := range []string{"v1", "v2"} { + b.Run(fmt.Sprintf("buckets=%d/%s", buckets, version), func(b *testing.B) { + samples := make([]RefHistogramSample, numSamples) + for i := range samples { + samples[i] = RefHistogramSample{ + Ref: chunks.HeadSeriesRef(i), + T: int64(i) * 1000, + H: makeHistogram(buckets), + } + } + + var raw []byte + if version == "v1" { + enc := Encoder{} + raw, _ = enc.HistogramSamples(samples, raw) + } else { + enc := Encoder{EnableSTStorage: true} + raw, _ = enc.HistogramSamples(samples, raw) + } + + dec := NewDecoder(labels.NewSymbolTable(), promslog.NewNopLogger()) + buf := make([]RefHistogramSample, 0, numSamples) + + b.ResetTimer() + b.ReportAllocs() + for b.Loop() { + buf, _ = dec.HistogramSamples(raw, buf[:0]) + } + _ = buf + }) + } + } +}