diff --git a/.github/workflows/fuzzing.yml b/.github/workflows/fuzzing.yml index fe7a977cc9..8b2dd2e910 100644 --- a/.github/workflows/fuzzing.yml +++ b/.github/workflows/fuzzing.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - fuzz_test: [FuzzParseMetricText, FuzzParseOpenMetric, FuzzParseMetricSelector, FuzzParseExpr] + fuzz_test: [FuzzParseMetricText, FuzzParseOpenMetric, FuzzParseMetricSelector, FuzzParseExpr, FuzzXORChunk, FuzzXOR2Chunk] steps: - name: Checkout repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 diff --git a/util/fuzzing/corpus.go b/util/fuzzing/corpus.go index 025e4dfd7a..180cb395ea 100644 --- a/util/fuzzing/corpus.go +++ b/util/fuzzing/corpus.go @@ -17,6 +17,30 @@ import ( "github.com/prometheus/prometheus/promql/promqltest" ) +// ChunkFuzzSeed is a seed corpus entry for FuzzXORChunk. +type ChunkFuzzSeed struct { + // Seed is the RNG seed used to generate sample timestamps and values. + Seed int64 + // N drives the sample count: count = int(N)%120 + 1. + N uint8 + // NaNMask forces StaleNaN on specific samples: bit i set means sample i + // uses StaleNaN instead of a random value. + NaNMask uint64 +} + +// XOR2ChunkFuzzSeed is a seed corpus entry for FuzzXOR2Chunk. +type XOR2ChunkFuzzSeed struct { + // Seed is the RNG seed used to generate sample timestamps and values. + Seed int64 + // N drives the sample count: count = int(N)%120 + 1. + N uint8 + // NaNMask forces StaleNaN on specific samples: bit i set means sample i + // uses StaleNaN instead of a random value. + NaNMask uint64 + // STMode selects the start-timestamp pattern used by the fuzzer. + STMode uint8 +} + // GetCorpusForFuzzParseMetricText returns the seed corpus for FuzzParseMetricText. func GetCorpusForFuzzParseMetricText() [][]byte { return [][]byte{ @@ -109,3 +133,38 @@ func GetCorpusForFuzzParseExpr() ([]string, error) { return append(builtInExprs, additionalExprs...), nil } + +// GetCorpusForFuzzXORChunk returns the seed corpus for FuzzXORChunk. +func GetCorpusForFuzzXORChunk() []ChunkFuzzSeed { + return []ChunkFuzzSeed{ + // Basic cases: no StaleNaN. + {Seed: 0, N: 0, NaNMask: 0}, + {Seed: 42, N: 2, NaNMask: 0}, + {Seed: 1234567890, N: 119, NaNMask: 0}, + // Single StaleNaN at first sample. + {Seed: 0, N: 0, NaNMask: 0b1}, + // StaleNaN in the middle of a run. + {Seed: 42, N: 4, NaNMask: 0b00100}, + // Alternating StaleNaN. + {Seed: 1, N: 9, NaNMask: 0b0101010101}, + // All StaleNaN. + {Seed: 7, N: 9, NaNMask: ^uint64(0)}, + } +} + +// GetCorpusForFuzzXOR2Chunk returns the seed corpus for FuzzXOR2Chunk. +func GetCorpusForFuzzXOR2Chunk() []XOR2ChunkFuzzSeed { + return []XOR2ChunkFuzzSeed{ + // No ST at all. + {Seed: 0, N: 0, NaNMask: 0, STMode: 0}, + {Seed: 1234567890, N: 119, NaNMask: 0, STMode: 0}, + // ST known from sample 0 and then constant. + {Seed: 42, N: 2, NaNMask: 0, STMode: 1}, + // First ST change happens after sample 1. + {Seed: 42, N: 4, NaNMask: 0b00100, STMode: 2}, + // Active ST with small deltas to hit compact encodings. + {Seed: 1, N: 9, NaNMask: 0b0101010101, STMode: 3}, + // Active ST with large deltas to hit varbit fallback. + {Seed: 7, N: 9, NaNMask: ^uint64(0), STMode: 4}, + } +} diff --git a/util/fuzzing/corpus_gen/main.go b/util/fuzzing/corpus_gen/main.go index aa38a79a48..ac1cd42f08 100644 --- a/util/fuzzing/corpus_gen/main.go +++ b/util/fuzzing/corpus_gen/main.go @@ -67,6 +67,20 @@ func run() error { } fmt.Printf("Generated fuzzParseOpenMetric_seed_corpus.zip with %d entries.\n", len(openMetrics)) + // Generate FuzzXORChunk seed corpus. + xorSeeds := fuzzing.GetCorpusForFuzzXORChunk() + if err := generateZipFromXORChunkSeeds("fuzzXORChunk", xorSeeds); err != nil { + return fmt.Errorf("failed to generate fuzzXORChunk_seed_corpus.zip: %w", err) + } + fmt.Printf("Generated fuzzXORChunk_seed_corpus.zip with %d entries.\n", len(xorSeeds)) + + // Generate FuzzXOR2Chunk seed corpus. + xor2Seeds := fuzzing.GetCorpusForFuzzXOR2Chunk() + if err := generateZipFromXOR2ChunkSeeds("fuzzXOR2Chunk", xor2Seeds); err != nil { + return fmt.Errorf("failed to generate fuzzXOR2Chunk_seed_corpus.zip: %w", err) + } + fmt.Printf("Generated fuzzXOR2Chunk_seed_corpus.zip with %d entries.\n", len(xor2Seeds)) + return nil } @@ -107,10 +121,60 @@ func generateZipFromBytes(fuzzName string, corpus [][]byte) error { // generateZipFromStrings creates a seed corpus ZIP file from a slice of strings. func generateZipFromStrings(fuzzName string, corpus []string) error { - // Convert []string to [][]byte and delegate to generateZipFromBytes + // Convert []string to [][]byte and delegate to generateZipFromBytes. byteCorpus := make([][]byte, len(corpus)) for i, s := range corpus { byteCorpus[i] = []byte(s) } return generateZipFromBytes(fuzzName, byteCorpus) } + +// generateZipFromSeedEntries creates a seed corpus ZIP file from pre-serialised +// Go fuzz corpus entries. Entries are sorted deterministically before writing. +func generateZipFromSeedEntries(fuzzName string, entries [][]byte) error { + sort.Slice(entries, func(i, j int) bool { + return string(entries[i]) < string(entries[j]) + }) + + zipPath := filepath.Join("..", fuzzName+"_seed_corpus.zip") + zipFile, err := os.Create(zipPath) + if err != nil { + return fmt.Errorf("failed to create zip file: %w", err) + } + defer zipFile.Close() + + zipWriter := zip.NewWriter(zipFile) + defer zipWriter.Close() + + for i, entry := range entries { + fileName := fmt.Sprintf("seed%d", i) + writer, err := zipWriter.Create(fileName) + if err != nil { + return fmt.Errorf("failed to create zip entry %s: %w", fileName, err) + } + if _, err := writer.Write(entry); err != nil { + return fmt.Errorf("failed to write zip entry %s: %w", fileName, err) + } + } + + return nil +} + +// generateZipFromXORChunkSeeds creates a seed corpus ZIP file for fuzz functions +// with signature (int64, uint8, uint64), using the Go fuzz corpus file format. +func generateZipFromXORChunkSeeds(fuzzName string, seeds []fuzzing.ChunkFuzzSeed) error { + entries := make([][]byte, len(seeds)) + for i, s := range seeds { + entries[i] = []byte(fmt.Sprintf("go test fuzz v1\nint64(%d)\nuint8(%d)\nuint64(%d)\n", s.Seed, s.N, s.NaNMask)) + } + return generateZipFromSeedEntries(fuzzName, entries) +} + +// generateZipFromXOR2ChunkSeeds creates a seed corpus ZIP file for FuzzXOR2Chunk. +func generateZipFromXOR2ChunkSeeds(fuzzName string, seeds []fuzzing.XOR2ChunkFuzzSeed) error { + entries := make([][]byte, len(seeds)) + for i, s := range seeds { + entries[i] = []byte(fmt.Sprintf("go test fuzz v1\nint64(%d)\nuint8(%d)\nuint64(%d)\nuint8(%d)\n", s.Seed, s.N, s.NaNMask, s.STMode)) + } + return generateZipFromSeedEntries(fuzzName, entries) +} diff --git a/util/fuzzing/fuzz_test.go b/util/fuzzing/fuzz_test.go index ec6d7c4e72..8f2d2c9316 100644 --- a/util/fuzzing/fuzz_test.go +++ b/util/fuzzing/fuzz_test.go @@ -16,11 +16,15 @@ package fuzzing import ( "errors" "io" + "math" + "math/rand" "testing" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/textparse" + "github.com/prometheus/prometheus/model/value" "github.com/prometheus/prometheus/promql/parser" + "github.com/prometheus/prometheus/tsdb/chunkenc" ) const ( @@ -117,6 +121,169 @@ func FuzzParseMetricSelector(f *testing.F) { }) } +// FuzzXORChunk fuzzes the XOR chunk round-trip. The seed and count parameters +// drive a deterministic RNG that generates timestamps and values; nanMask forces +// StaleNaN on specific samples (bit i set → sample i is StaleNaN), ensuring the +// stale-NaN path is exercised without relying on random chance. +func FuzzXORChunk(f *testing.F) { + for _, s := range GetCorpusForFuzzXORChunk() { + f.Add(s.Seed, s.N, s.NaNMask) + } + + f.Fuzz(func(t *testing.T, seed int64, n uint8, nanMask uint64) { + count := int(n)%130 + 1 + r := rand.New(rand.NewSource(seed)) + + type sample struct { + t int64 + v float64 + } + samples := make([]sample, count) + var ts int64 + for i := range count { + ts += r.Int63n(10000) + 1 + v := math.Float64frombits(r.Uint64()) + if i < 64 && nanMask>>uint(i)&1 == 1 { + v = math.Float64frombits(value.StaleNaN) + } + samples[i] = sample{t: ts, v: v} + } + + c := chunkenc.NewXORChunk() + app, err := c.Appender() + if err != nil { + t.Fatal(err) + } + for _, s := range samples { + // XOR chunk does not store ST, therefore use 0 as ST. + app.Append(0, s.t, s.v) + } + + it := c.Iterator(nil) + for _, want := range samples { + if it.Next() == chunkenc.ValNone { + t.Fatal("iterator ended early") + } + gotT, gotV := it.At() + if gotT != want.t { + t.Fatalf("timestamp mismatch: got %d, want %d", gotT, want.t) + } + if math.Float64bits(gotV) != math.Float64bits(want.v) { + t.Fatalf("value mismatch: got %x, want %x", math.Float64bits(gotV), math.Float64bits(want.v)) + } + } + if it.Next() != chunkenc.ValNone { + t.Fatal("iterator has extra values") + } + if err := it.Err(); err != nil { + t.Fatal(err) + } + }) +} + +// FuzzXOR2Chunk fuzzes the XOR2 chunk round-trip. The seed and count parameters +// drive a deterministic RNG that generates start timestamps, timestamps, and +// values; nanMask forces StaleNaN on specific samples (bit i set → sample i is +// StaleNaN); stMode selects whether ST stays absent, constant, appears later, +// or changes with small or large deltas. This ensures the stale-NaN and ST +// encoding paths are exercised without relying on random chance. +func FuzzXOR2Chunk(f *testing.F) { + for _, s := range GetCorpusForFuzzXOR2Chunk() { + f.Add(s.Seed, s.N, s.NaNMask, s.STMode) + } + + f.Fuzz(func(t *testing.T, seed int64, n uint8, nanMask uint64, stMode uint8) { + count := int(n)%130 + 1 + r := rand.New(rand.NewSource(seed)) + + type sample struct { + st, t int64 + v float64 + } + samples := make([]sample, count) + var ts int64 + activeST := int64(0) + constantST := int64(0) + lateSTIndex := 1 + if count > 1 { + lateSTIndex = int(r.Int31n(int32(count-1))) + 1 + } + for i := range count { + ts += r.Int63n(10000) + 1 + v := math.Float64frombits(r.Uint64()) + if i < 64 && nanMask>>uint(i)&1 == 1 { + v = math.Float64frombits(value.StaleNaN) + } + + var st int64 + switch stMode % 5 { + case 0: + st = 0 + case 1: + if i == 0 { + constantST = ts - (r.Int63n(10000) + 1) + } + st = constantST + case 2: + if i >= lateSTIndex { + if i == lateSTIndex { + constantST = ts - (r.Int63n(10000) + 1) + } + st = constantST + } + case 3: + if i == 0 { + activeST = ts - (r.Int63n(10000) + 1) + } else { + activeST -= r.Int63n(8) - 3 + } + st = activeST + default: + activeST = ts - r.Int63() + st = activeST + } + + samples[i] = sample{ + st: st, + t: ts, + v: v, + } + } + + c := chunkenc.NewXOR2Chunk() + app, err := c.Appender() + if err != nil { + t.Fatal(err) + } + for _, s := range samples { + app.Append(s.st, s.t, s.v) + } + + it := c.Iterator(nil) + for _, want := range samples { + if it.Next() == chunkenc.ValNone { + t.Fatal("iterator ended early") + } + gotT, gotV := it.At() + if gotT != want.t { + t.Fatalf("timestamp mismatch: got %d, want %d", gotT, want.t) + } + if math.Float64bits(gotV) != math.Float64bits(want.v) { + t.Fatalf("value mismatch: got %x, want %x", math.Float64bits(gotV), math.Float64bits(want.v)) + } + if gotST := it.AtST(); gotST != want.st { + t.Fatalf("ST mismatch: got %d, want %d", gotST, want.st) + } + } + if it.Next() != chunkenc.ValNone { + t.Fatal("iterator has extra values") + } + if err := it.Err(); err != nil { + t.Fatal(err) + } + }) +} + // FuzzParseExpr fuzzes the expression parser. func FuzzParseExpr(f *testing.F) { // Add seed corpus from built-in test expressions