tsdb: fix flaky TestWaitForPendingReadersInTimeRange tests (#17985)
Some checks are pending
buf.build / lint and publish (push) Waiting to run
CI / Go tests (push) Waiting to run
CI / More Go tests (push) Waiting to run
CI / Go tests with previous Go version (push) Waiting to run
CI / UI tests (push) Waiting to run
CI / Go tests on Windows (push) Waiting to run
CI / Mixins tests (push) Waiting to run
CI / Build Prometheus for common architectures (push) Waiting to run
CI / Build Prometheus for all architectures (push) Waiting to run
CI / Report status of build Prometheus for all architectures (push) Blocked by required conditions
CI / Check generated parser (push) Waiting to run
CI / golangci-lint (push) Waiting to run
CI / fuzzing (push) Waiting to run
CI / codeql (push) Waiting to run
CI / Publish main branch artifacts (push) Blocked by required conditions
CI / Publish release artefacts (push) Blocked by required conditions
CI / Publish UI on npm Registry (push) Blocked by required conditions
Scorecards supply-chain security / Scorecards analysis (push) Waiting to run

The tests were flaky because they used hard-coded time.After(550ms)
waits, which had only 50ms margin over WaitForPendingReadersInTimeRange's
500ms poll interval. On slow CI runners, this margin wasn't reliable.

Use synctest for deterministic time control:
- Wrap test logic in synctest.Test() to use fake time
- Use synctest.Wait() to let goroutines reach dormant state
- Use time.Sleep() to advance fake time past the poll interval
- No more timing-dependent assertions

This makes the tests both reliable and ~60x faster (0.05s vs 3s).

Fixes both TestWaitForPendingReadersInTimeRange and
TestWaitForPendingReadersInTimeRange_AppenderV2.

Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
This commit is contained in:
Arve Knudsen 2026-02-01 16:52:26 +01:00 committed by GitHub
parent 6c18bba5c6
commit d9db76631d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 58 additions and 20 deletions

View file

@ -53,6 +53,7 @@ import (
"github.com/prometheus/prometheus/tsdb/wlog"
"github.com/prometheus/prometheus/util/compression"
"github.com/prometheus/prometheus/util/testutil"
"github.com/prometheus/prometheus/util/testutil/synctest"
)
// TODO(bwplotka): Ensure non-ported tests are not deleted from db_test.go when removing AppenderV1 flow (#17632),
@ -1625,17 +1626,35 @@ func TestWaitForPendingReadersInTimeRange_AppenderV2(t *testing.T) {
}
for _, c := range cases {
t.Run(fmt.Sprintf("mint=%d,maxt=%d,shouldWait=%t", c.mint, c.maxt, c.shouldWait), func(t *testing.T) {
// checkWaiting verifies WaitForPendingReadersInTimeRange behavior using synctest
// for deterministic time control. The function should block while an overlapping
// querier is open and return immediately when there's no overlap.
checkWaiting := func(cl io.Closer) {
var waitOver atomic.Bool
go func() {
db.head.WaitForPendingReadersInTimeRange(truncMint, truncMaxt)
waitOver.Store(true)
}()
<-time.After(550 * time.Millisecond)
require.Equal(t, !c.shouldWait, waitOver.Load())
require.NoError(t, cl.Close())
<-time.After(550 * time.Millisecond)
require.True(t, waitOver.Load())
synctest.Test(t, func(t *testing.T) {
var waitOver atomic.Bool
go func() {
db.head.WaitForPendingReadersInTimeRange(truncMint, truncMaxt)
waitOver.Store(true)
}()
// Wait for goroutine to either complete (no overlap) or block on Sleep (overlap).
synctest.Wait()
if c.shouldWait {
require.False(t, waitOver.Load(),
"WaitForPendingReadersInTimeRange should block while overlapping querier is open")
require.NoError(t, cl.Close())
// Advance fake time past the 500ms poll interval, then let goroutine process.
time.Sleep(time.Second)
synctest.Wait()
require.True(t, waitOver.Load(),
"WaitForPendingReadersInTimeRange should complete after querier is closed")
} else {
require.True(t, waitOver.Load(),
"WaitForPendingReadersInTimeRange should return immediately when no overlap")
require.NoError(t, cl.Close())
}
})
}
q, err := db.Querier(c.mint, c.maxt)

View file

@ -56,6 +56,7 @@ import (
"github.com/prometheus/prometheus/tsdb/wlog"
"github.com/prometheus/prometheus/util/compression"
"github.com/prometheus/prometheus/util/testutil"
"github.com/prometheus/prometheus/util/testutil/synctest"
)
// newTestHeadDefaultOptions returns the HeadOptions that should be used by default in unit tests.
@ -3861,17 +3862,35 @@ func TestWaitForPendingReadersInTimeRange(t *testing.T) {
}
for _, c := range cases {
t.Run(fmt.Sprintf("mint=%d,maxt=%d,shouldWait=%t", c.mint, c.maxt, c.shouldWait), func(t *testing.T) {
// checkWaiting verifies WaitForPendingReadersInTimeRange behavior using synctest
// for deterministic time control. The function should block while an overlapping
// querier is open and return immediately when there's no overlap.
checkWaiting := func(cl io.Closer) {
var waitOver atomic.Bool
go func() {
db.head.WaitForPendingReadersInTimeRange(truncMint, truncMaxt)
waitOver.Store(true)
}()
<-time.After(550 * time.Millisecond)
require.Equal(t, !c.shouldWait, waitOver.Load())
require.NoError(t, cl.Close())
<-time.After(550 * time.Millisecond)
require.True(t, waitOver.Load())
synctest.Test(t, func(t *testing.T) {
var waitOver atomic.Bool
go func() {
db.head.WaitForPendingReadersInTimeRange(truncMint, truncMaxt)
waitOver.Store(true)
}()
// Wait for goroutine to either complete (no overlap) or block on Sleep (overlap).
synctest.Wait()
if c.shouldWait {
require.False(t, waitOver.Load(),
"WaitForPendingReadersInTimeRange should block while overlapping querier is open")
require.NoError(t, cl.Close())
// Advance fake time past the 500ms poll interval, then let goroutine process.
time.Sleep(time.Second)
synctest.Wait()
require.True(t, waitOver.Load(),
"WaitForPendingReadersInTimeRange should complete after querier is closed")
} else {
require.True(t, waitOver.Load(),
"WaitForPendingReadersInTimeRange should return immediately when no overlap")
require.NoError(t, cl.Close())
}
})
}
q, err := db.Querier(c.mint, c.maxt)