From 1e60d7fd3b807c5d4624d393b36bcd37338898ea Mon Sep 17 00:00:00 2001
From: Kyle Eckhart <kgeckhart@users.noreply.github.com>
Date: Mon, 23 Feb 2026 15:34:44 -0500
Subject: [PATCH] More comments and only reset deleted if the new segment is
 larger

Signed-off-by: Kyle Eckhart <kgeckhart@users.noreply.github.com>
---
 tsdb/agent/db.go | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/tsdb/agent/db.go b/tsdb/agent/db.go
index 46a47a2105..93a9d1e712 100644
--- a/tsdb/agent/db.go
+++ b/tsdb/agent/db.go
@@ -556,9 +556,16 @@ func (db *DB) loadWAL(r *wlog.Reader, duplicateRefToValidRef map[chunks.HeadSeri
 				series, created := db.series.GetOrSet(series.lset.Hash(), series)
 
 				if !created {
+					// We don't need to check if entry.Ref exists / if the value is not series.ref because GetOrSet
+					// enforces that the same labels will always get the same Ref. If we did not create a new ref
+					// the only possible ref it should ever be in the WAL is series.ref
 					duplicateRefToValidRef[entry.Ref] = series.ref
-					// Make sure we keep the duplicate SeriesRef while it exists in the WAL
-					db.deleted[entry.Ref] = currentSegmentOrCheckpoint
+
+					// We want to track the largest segment where we encountered the duplicate ref, so we can ensure
+					// it remains in the checkpoint until we get past that segment.
+					if db.deleted[entry.Ref] <= currentSegmentOrCheckpoint {
+						db.deleted[entry.Ref] = currentSegmentOrCheckpoint
+					}
 				} else {
 					db.metrics.numActiveSeries.Inc()
 				}
@@ -567,8 +574,11 @@ func (db *DB) loadWAL(r *wlog.Reader, duplicateRefToValidRef map[chunks.HeadSeri
 		case []record.RefSample:
 			for _, entry := range v {
 				if ref, ok := duplicateRefToValidRef[entry.Ref]; ok {
-					// Make sure we keep the duplicate SeriesRef in checkpoints until we get past the current segment.
-					db.deleted[entry.Ref] = currentSegmentOrCheckpoint
+					// We want to track the largest segment where we encountered the duplicate ref, so we can ensure
+					// it remains in the checkpoint until we get past that segment.
+					if db.deleted[entry.Ref] <= currentSegmentOrCheckpoint {
+						db.deleted[entry.Ref] = currentSegmentOrCheckpoint
+					}
 					entry.Ref = ref
 				}
 
@@ -587,8 +597,11 @@ func (db *DB) loadWAL(r *wlog.Reader, duplicateRefToValidRef map[chunks.HeadSeri
 		case []record.RefHistogramSample:
 			for _, entry := range v {
 				if ref, ok := duplicateRefToValidRef[entry.Ref]; ok {
-					// Make sure we keep the duplicate SeriesRef in checkpoints until we get past the current segment.
-					db.deleted[entry.Ref] = currentSegmentOrCheckpoint
+					// We want to track the largest segment where we encountered the duplicate ref, so we can ensure
+					// it remains in the checkpoint until we get past that segment.
+					if db.deleted[entry.Ref] <= currentSegmentOrCheckpoint {
+						db.deleted[entry.Ref] = currentSegmentOrCheckpoint
+					}
 					entry.Ref = ref
 				}
 				series := db.series.GetByID(entry.Ref)
@@ -606,8 +619,11 @@ func (db *DB) loadWAL(r *wlog.Reader, duplicateRefToValidRef map[chunks.HeadSeri
 		case []record.RefFloatHistogramSample:
 			for _, entry := range v {
 				if ref, ok := duplicateRefToValidRef[entry.Ref]; ok {
-					// Make sure we keep the duplicate SeriesRef in checkpoints until we get past the current segment.
-					db.deleted[entry.Ref] = currentSegmentOrCheckpoint
+					// We want to track the largest segment where we encountered the duplicate ref, so we can ensure
+					// it remains in the checkpoint until we get past that segment.
+					if db.deleted[entry.Ref] <= currentSegmentOrCheckpoint {
+						db.deleted[entry.Ref] = currentSegmentOrCheckpoint
+					}
 					entry.Ref = ref
 				}
 				series := db.series.GetByID(entry.Ref)