Merge pull request #18813 from miguelbernadi/improve-hostogram-allocations

tsdb/record: eliminate prev pointer escapes in V2 histogram WAL decoder
This commit is contained in:
Bartlomiej Plotka 2026-06-01 10:54:02 +01:00 committed by GitHub
commit 178c53d83c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 170 additions and 31 deletions

View file

@ -205,6 +205,16 @@ func BenchmarkLoadWLs(b *testing.B) {
// The first oooSamplesPct*samplesPerSeries samples in an OOO series are written as OOO samples.
oooSamplesPct float64
oooCapMax int64
// histogramSeriesPct is the fraction of series that emit native
// histogram samples instead of float samples. 0 means all float
// (the default for existing cases), 1 means all histograms.
// Histogram series use the last histogramSeriesPct*seriesPerBatch
// refs in each batch so existing float-only shapes are unaffected.
histogramSeriesPct float64
// bucketsPerHistogram is the number of positive buckets written
// per native histogram sample. Each bucket adds one span entry
// and one bucket delta to the encoded histogram.
bucketsPerHistogram int
}{
{ // Less series and more samples. 2 hour WAL with 1 second scrape interval.
batches: 10,
@ -252,6 +262,27 @@ func BenchmarkLoadWLs(b *testing.B) {
oooSamplesPct: 0.3,
oooCapMax: DefaultOutOfOrderCapMax,
},
{ // All-histogram WAL, matching the "In between" float shape.
// Exercises the native-histogram decode hot path (DecodeHistogram
// + histogramSamplesV1/V2) which is shared by WAL replay,
// WAL watcher (remote write), and checkpoint creation.
// bucketsPerHistogram=8 is representative of a moderately
// complex exponential histogram seen in practice.
batches: 10,
seriesPerBatch: 1000,
samplesPerSeries: 480,
histogramSeriesPct: 1.0,
bucketsPerHistogram: 8,
},
{ // Mixed WAL: 50% float series, 50% native histogram series.
// Models a deployment that is partway through migrating metrics
// to native histograms.
batches: 10,
seriesPerBatch: 1000,
samplesPerSeries: 480,
histogramSeriesPct: 0.5,
bucketsPerHistogram: 8,
},
}
labelsPerSeries := 5
@ -270,7 +301,11 @@ func BenchmarkLoadWLs(b *testing.B) {
continue
}
lastExemplarsPerSeries = exemplarsPerSeries
b.Run(fmt.Sprintf("batches=%d,seriesPerBatch=%d,samplesPerSeries=%d,exemplarsPerSeries=%d,mmappedChunkT=%d,oooSeriesPct=%.3f,oooSamplesPct=%.3f,oooCapMax=%d,missingSeriesPct=%.3f,stStorage=%v", c.batches, c.seriesPerBatch, c.samplesPerSeries, exemplarsPerSeries, c.mmappedChunkT, c.oooSeriesPct, c.oooSamplesPct, c.oooCapMax, missingSeriesPct, enableSTStorage),
name := fmt.Sprintf("batches=%d,seriesPerBatch=%d,samplesPerSeries=%d,exemplarsPerSeries=%d,mmappedChunkT=%d,oooSeriesPct=%.3f,oooSamplesPct=%.3f,oooCapMax=%d,missingSeriesPct=%.3f,stStorage=%v", c.batches, c.seriesPerBatch, c.samplesPerSeries, exemplarsPerSeries, c.mmappedChunkT, c.oooSeriesPct, c.oooSamplesPct, c.oooCapMax, missingSeriesPct, enableSTStorage)
if c.histogramSeriesPct > 0 {
name += fmt.Sprintf(",histogramSeriesPct=%.3f,bucketsPerHistogram=%d", c.histogramSeriesPct, c.bucketsPerHistogram)
}
b.Run(name,
func(b *testing.B) {
dir := b.TempDir()
@ -312,30 +347,77 @@ func BenchmarkLoadWLs(b *testing.B) {
buf = populateTestWL(b, wal, []any{writeSeries}, buf, enableSTStorage)
}
// Write samples.
refSamples := make([]record.RefSample, 0, c.seriesPerBatch)
// Write samples. Series are split into float and
// histogram series: the last histogramSeriesPerBatch
// refs in each batch emit RefHistogramSample records;
// the rest emit RefSample records. This mirrors how
// real Prometheus deployments work — a given series is
// committed to one type.
histogramSeriesPerBatch := int(float64(c.seriesPerBatch) * c.histogramSeriesPct)
floatSeriesPerBatch := c.seriesPerBatch - histogramSeriesPerBatch
refSamples := make([]record.RefSample, 0, floatSeriesPerBatch)
refHistSamples := make([]record.RefHistogramSample, 0, histogramSeriesPerBatch)
oooSeriesPerBatch := int(float64(c.seriesPerBatch) * c.oooSeriesPct)
oooSamplesPerSeries := int(float64(c.samplesPerSeries) * c.oooSamplesPct)
// Build a reusable histogram template with the configured
// bucket count. All histogram series share the same shape;
// only the value (Sum/Count) changes per sample.
var histTemplate *histogram.Histogram
if histogramSeriesPerBatch > 0 {
spans := make([]histogram.Span, c.bucketsPerHistogram)
for idx := range spans {
spans[idx] = histogram.Span{Offset: int32(idx), Length: 1}
}
buckets := make([]int64, c.bucketsPerHistogram)
for idx := range buckets {
buckets[idx] = int64(idx + 1)
}
histTemplate = &histogram.Histogram{
Schema: 1,
PositiveSpans: spans,
PositiveBuckets: buckets,
}
}
for i := 0; i < c.samplesPerSeries; i++ {
for j := 0; j < c.batches; j++ {
refSamples = refSamples[:0]
refHistSamples = refHistSamples[:0]
// Float series occupy refs [j*seriesPerBatch, j*seriesPerBatch+floatSeriesPerBatch).
k := j * c.seriesPerBatch
// Skip appending the first oooSamplesPerSeries samples for the series in the batch that
// should have OOO samples. OOO samples are appended after all the in-order samples.
if i < oooSamplesPerSeries {
k += oooSeriesPerBatch
}
for ; k < (j+1)*c.seriesPerBatch; k++ {
floatEnd := j*c.seriesPerBatch + floatSeriesPerBatch
for ; k < floatEnd; k++ {
refSamples = append(refSamples, record.RefSample{
Ref: chunks.HeadSeriesRef(k) * 101,
T: int64(i) * 10,
V: float64(i) * 100,
})
}
buf = populateTestWL(b, wal, []any{refSamples}, buf, enableSTStorage)
if len(refSamples) > 0 {
buf = populateTestWL(b, wal, []any{refSamples}, buf, enableSTStorage)
}
// Histogram series occupy refs [j*seriesPerBatch+floatSeriesPerBatch, (j+1)*seriesPerBatch).
for k = floatEnd; k < (j+1)*c.seriesPerBatch; k++ {
h := *histTemplate
h.Count = uint64(i + 1)
h.Sum = float64(i) * 100
refHistSamples = append(refHistSamples, record.RefHistogramSample{
Ref: chunks.HeadSeriesRef(k) * 101,
T: int64(i) * 10,
H: &h,
})
}
if len(refHistSamples) > 0 {
buf = populateTestWL(b, wal, []any{refHistSamples}, buf, enableSTStorage)
}
}
}

View file

@ -593,22 +593,21 @@ func (d *Decoder) histogramSamplesV2(dec *encoding.Decbuf, histograms []RefHisto
firstRef := chunks.HeadSeriesRef(dec.Varint64())
firstT := dec.Varint64()
firstST := dec.Varint64()
var prev *RefHistogramSample
var (
prevRef chunks.HeadSeriesRef
prevST int64
)
hasPrev := false
for len(dec.B) > 0 && dec.Err() == nil {
var ref, t, st int64
if prev == nil {
prev = &RefHistogramSample{
Ref: firstRef,
ST: firstST,
}
ref = int64(firstRef)
t = firstT
st = firstST
if !hasPrev {
ref, t, st = int64(firstRef), firstT, firstST
hasPrev = true
} else {
ref = int64(prev.Ref) + dec.Varint64()
ref = int64(prevRef) + dec.Varint64()
t = firstT + dec.Varint64()
st = readSTMarker(dec, prev.ST, firstST)
st = readSTMarker(dec, prevST, firstST)
}
rh := RefHistogramSample{
@ -617,7 +616,7 @@ func (d *Decoder) histogramSamplesV2(dec *encoding.Decbuf, histograms []RefHisto
T: t,
H: &histogram.Histogram{},
}
prev = &rh
prevRef, prevST = rh.Ref, rh.ST
DecodeHistogram(dec, rh.H)
if !histogram.IsKnownSchema(rh.H.Schema) {
@ -768,22 +767,19 @@ func (d *Decoder) floatHistogramSamplesV2(dec *encoding.Decbuf, histograms []Ref
firstRef := chunks.HeadSeriesRef(dec.Varint64())
firstT := dec.Varint64()
firstST := dec.Varint64()
var prev *RefFloatHistogramSample
var prevRef chunks.HeadSeriesRef
var prevST int64
hasPrev := false
for len(dec.B) > 0 && dec.Err() == nil {
var ref, t, st int64
if prev == nil {
prev = &RefFloatHistogramSample{
Ref: firstRef,
ST: firstST,
}
ref = int64(firstRef)
t = firstT
st = firstST
if !hasPrev {
ref, t, st = int64(firstRef), firstT, firstST
hasPrev = true
} else {
ref = int64(prev.Ref) + dec.Varint64()
ref = int64(prevRef) + dec.Varint64()
t = firstT + dec.Varint64()
st = readSTMarker(dec, prev.ST, firstST)
st = readSTMarker(dec, prevST, firstST)
}
rfh := RefFloatHistogramSample{
@ -792,7 +788,7 @@ func (d *Decoder) floatHistogramSamplesV2(dec *encoding.Decbuf, histograms []Ref
T: t,
FH: &histogram.FloatHistogram{},
}
prev = &rfh
prevRef, prevST = rfh.Ref, rfh.ST
DecodeFloatHistogram(dec, rfh.FH)
if !histogram.IsKnownSchema(rfh.FH.Schema) {

View file

@ -1323,3 +1323,64 @@ func BenchmarkWAL_HistogramEncoding(b *testing.B) {
}
}
}
// BenchmarkDecodeHistogramSamples measures per-sample allocation cost for
// histogram WAL decoding — both V1 and V2 paths. This exercises the hot
// path identified in finding B/C of the allocation analysis: one
// *histogram.Histogram allocation per sample (unavoidable with current
// design) plus one escaped *RefHistogramSample per V2 iteration (fixable).
func BenchmarkDecodeHistogramSamples(b *testing.B) {
const numSamples = 1000
makeHistogram := func(buckets int) *histogram.Histogram {
spans := make([]histogram.Span, buckets)
for i := range spans {
spans[i] = histogram.Span{Offset: int32(i), Length: 1}
}
bkts := make([]int64, buckets)
for i := range bkts {
bkts[i] = int64(i + 1)
}
return &histogram.Histogram{
Schema: 1,
Count: uint64(buckets * 10),
Sum: float64(buckets),
PositiveSpans: spans,
PositiveBuckets: bkts,
}
}
for _, buckets := range []int{0, 4, 16} {
for _, version := range []string{"v1", "v2"} {
b.Run(fmt.Sprintf("buckets=%d/%s", buckets, version), func(b *testing.B) {
samples := make([]RefHistogramSample, numSamples)
for i := range samples {
samples[i] = RefHistogramSample{
Ref: chunks.HeadSeriesRef(i),
T: int64(i) * 1000,
H: makeHistogram(buckets),
}
}
var raw []byte
if version == "v1" {
enc := Encoder{}
raw, _ = enc.HistogramSamples(samples, raw)
} else {
enc := Encoder{EnableSTStorage: true}
raw, _ = enc.HistogramSamples(samples, raw)
}
dec := NewDecoder(labels.NewSymbolTable(), promslog.NewNopLogger())
buf := make([]RefHistogramSample, 0, numSamples)
b.ResetTimer()
b.ReportAllocs()
for b.Loop() {
buf, _ = dec.HistogramSamples(raw, buf[:0])
}
_ = buf
})
}
}
}