From 1e60d7fd3b807c5d4624d393b36bcd37338898ea Mon Sep 17 00:00:00 2001 From: Kyle Eckhart Date: Mon, 23 Feb 2026 15:34:44 -0500 Subject: [PATCH] More comments and only reset deleted if the new segment is larger Signed-off-by: Kyle Eckhart --- tsdb/agent/db.go | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/tsdb/agent/db.go b/tsdb/agent/db.go index 46a47a2105..93a9d1e712 100644 --- a/tsdb/agent/db.go +++ b/tsdb/agent/db.go @@ -556,9 +556,16 @@ func (db *DB) loadWAL(r *wlog.Reader, duplicateRefToValidRef map[chunks.HeadSeri series, created := db.series.GetOrSet(series.lset.Hash(), series) if !created { + // We don't need to check if entry.Ref exists / if the value is not series.ref because GetOrSet + // enforces that the same labels will always get the same Ref. If we did not create a new ref + // the only possible ref it should ever be in the WAL is series.ref duplicateRefToValidRef[entry.Ref] = series.ref - // Make sure we keep the duplicate SeriesRef while it exists in the WAL - db.deleted[entry.Ref] = currentSegmentOrCheckpoint + + // We want to track the largest segment where we encountered the duplicate ref, so we can ensure + // it remains in the checkpoint until we get past that segment. + if db.deleted[entry.Ref] <= currentSegmentOrCheckpoint { + db.deleted[entry.Ref] = currentSegmentOrCheckpoint + } } else { db.metrics.numActiveSeries.Inc() } @@ -567,8 +574,11 @@ func (db *DB) loadWAL(r *wlog.Reader, duplicateRefToValidRef map[chunks.HeadSeri case []record.RefSample: for _, entry := range v { if ref, ok := duplicateRefToValidRef[entry.Ref]; ok { - // Make sure we keep the duplicate SeriesRef in checkpoints until we get past the current segment. - db.deleted[entry.Ref] = currentSegmentOrCheckpoint + // We want to track the largest segment where we encountered the duplicate ref, so we can ensure + // it remains in the checkpoint until we get past that segment. + if db.deleted[entry.Ref] <= currentSegmentOrCheckpoint { + db.deleted[entry.Ref] = currentSegmentOrCheckpoint + } entry.Ref = ref } @@ -587,8 +597,11 @@ func (db *DB) loadWAL(r *wlog.Reader, duplicateRefToValidRef map[chunks.HeadSeri case []record.RefHistogramSample: for _, entry := range v { if ref, ok := duplicateRefToValidRef[entry.Ref]; ok { - // Make sure we keep the duplicate SeriesRef in checkpoints until we get past the current segment. - db.deleted[entry.Ref] = currentSegmentOrCheckpoint + // We want to track the largest segment where we encountered the duplicate ref, so we can ensure + // it remains in the checkpoint until we get past that segment. + if db.deleted[entry.Ref] <= currentSegmentOrCheckpoint { + db.deleted[entry.Ref] = currentSegmentOrCheckpoint + } entry.Ref = ref } series := db.series.GetByID(entry.Ref) @@ -606,8 +619,11 @@ func (db *DB) loadWAL(r *wlog.Reader, duplicateRefToValidRef map[chunks.HeadSeri case []record.RefFloatHistogramSample: for _, entry := range v { if ref, ok := duplicateRefToValidRef[entry.Ref]; ok { - // Make sure we keep the duplicate SeriesRef in checkpoints until we get past the current segment. - db.deleted[entry.Ref] = currentSegmentOrCheckpoint + // We want to track the largest segment where we encountered the duplicate ref, so we can ensure + // it remains in the checkpoint until we get past that segment. + if db.deleted[entry.Ref] <= currentSegmentOrCheckpoint { + db.deleted[entry.Ref] = currentSegmentOrCheckpoint + } entry.Ref = ref } series := db.series.GetByID(entry.Ref)