From 51b2b45c5ebb37602863229ff3340bbf8c168a5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Mon, 16 Feb 2026 15:48:27 +0100 Subject: [PATCH] tsdb/chunkenc: add alternative ST encodings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: György Krajcsovits Partially coded with Claude Opus 4.5 and 4.6. --- storage/remote/codec_test.go | 2 +- tsdb/chunkenc/benchmark_test.go | 395 ++++------- tsdb/chunkenc/bstream.go | 89 +++ tsdb/chunkenc/chunk.go | 204 +++++- tsdb/chunkenc/chunk_test.go | 19 +- tsdb/chunkenc/xor.go | 721 ++++++++++++++++++++ tsdb/chunkenc/xor18111.go | 688 +++++++++++++++++++ tsdb/chunkenc/xor18111st.go | 931 ++++++++++++++++++++++++++ tsdb/chunkenc/xor18111st2.go | 943 +++++++++++++++++++++++++++ tsdb/chunkenc/xor18111st2_test.go | 22 + tsdb/chunkenc/xor18111st_test.go | 22 + tsdb/chunkenc/xor18238.go | 675 +++++++++++++++++++ tsdb/chunkenc/xor18238optst.go | 761 +++++++++++++++++++++ tsdb/chunkenc/xor18238optst2.go | 799 +++++++++++++++++++++++ tsdb/chunkenc/xor18238optst2_test.go | 81 +++ tsdb/chunkenc/xor18238optst3.go | 761 +++++++++++++++++++++ tsdb/chunkenc/xor18238optst3_test.go | 81 +++ tsdb/chunkenc/xor18238optst_test.go | 81 +++ tsdb/chunkenc/xor_test.go | 14 + tsdb/chunkenc/xoroptst.go | 556 ++++++++-------- tsdb/chunkenc/xoroptst_otel.go | 759 +++++++++++++++++++++ tsdb/chunkenc/xoroptst_otel_test.go | 24 + tsdb/db_append_v2_test.go | 2 +- tsdb/head_test.go | 2 +- tsdb/ooo_head.go | 2 +- tsdb/ooo_head_test.go | 2 +- 26 files changed, 8093 insertions(+), 543 deletions(-) create mode 100644 tsdb/chunkenc/xor18111.go create mode 100644 tsdb/chunkenc/xor18111st.go create mode 100644 tsdb/chunkenc/xor18111st2.go create mode 100644 tsdb/chunkenc/xor18111st2_test.go create mode 100644 tsdb/chunkenc/xor18111st_test.go create mode 100644 tsdb/chunkenc/xor18238.go create mode 100644 tsdb/chunkenc/xor18238optst.go create mode 100644 tsdb/chunkenc/xor18238optst2.go create mode 100644 tsdb/chunkenc/xor18238optst2_test.go create mode 100644 tsdb/chunkenc/xor18238optst3.go create mode 100644 tsdb/chunkenc/xor18238optst3_test.go create mode 100644 tsdb/chunkenc/xor18238optst_test.go create mode 100644 tsdb/chunkenc/xoroptst_otel.go create mode 100644 tsdb/chunkenc/xoroptst_otel_test.go diff --git a/storage/remote/codec_test.go b/storage/remote/codec_test.go index 5da8c8176c..a7345624bd 100644 --- a/storage/remote/codec_test.go +++ b/storage/remote/codec_test.go @@ -940,7 +940,7 @@ func TestChunkedSeriesIterator(t *testing.T) { chks := buildTestChunks(t) // Set chunk type to an invalid value. - chks[0].Type = 8 + chks[0].Type = 15 it := newChunkedSeriesIterator(chks, 0, 14000) diff --git a/tsdb/chunkenc/benchmark_test.go b/tsdb/chunkenc/benchmark_test.go index 702e3a95e2..830690085e 100644 --- a/tsdb/chunkenc/benchmark_test.go +++ b/tsdb/chunkenc/benchmark_test.go @@ -62,284 +62,161 @@ func foreachFmtSampleCase(b *testing.B, fn func(b *testing.B, f fmtCase, s sampl rFloats[i] = float64(r.Intn(100)) } - sampleCases := []sampleCase{ - { - name: "vt=constant/st=0", - samples: func() (ret []triple) { - t, v := initT, initV - for range nSamples { - t += 15000 - ret = append(ret, triple{st: 0, t: t, v: v}) - } - return ret - }(), - }, + // tPatterns control how the regular timestamp advances. + type tPattern struct { + name string + next func(t int64, i int) int64 + } + // vPatterns control how the value advances. + type vPattern struct { + name string + next func(v float64, i int) float64 + } + // stPatterns compute the start timestamp from the previous t (before the + // step), the new t (after the step), and the sample index. + type stPattern struct { + name string + compute func(prevT, newT int64, i int) int64 + } + tPatterns := []tPattern{ { - // Cumulative with a constant ST through the whole chunk, typical case (e.g. long counting counter). - name: "vt=constant/st=cumulative", - samples: func() (ret []triple) { - t, v := initT, initV - for range nSamples { - t += 15000 - ret = append(ret, triple{st: initST, t: t, v: v}) - } - return ret - }(), + name: "t=constant", + next: func(t int64, _ int) int64 { return t + 15000 }, }, { - // Delta simulates delta type or worst case for cumulatives, where ST - // is changing on every sample. - name: "vt=constant/st=delta-exclusive", - samples: func() (ret []triple) { - t, v := initT, initV - for range nSamples { - st := t + 1 // ST is a tight interval after the last t+1ms. - t += 15000 - ret = append(ret, triple{st: st, t: t, v: v}) - } - return ret - }(), + // 15 seconds ± up to 100ms of jitter. + name: "t=jitter", + next: func(t int64, i int) int64 { return t + rInts[i] - 50 + 15000 }, }, { - // Delta simulates delta type or worst case for cumulatives, where ST - // is changing on every sample. - name: "vt=constant/st=delta-inclusive", - samples: func() (ret []triple) { - t, v := initT, initV - for range nSamples { - st := t // ST is the same as the previous t. - t += 15000 - ret = append(ret, triple{st: st, t: t, v: v}) + // First 10 samples at constant 60s, then one 10-interval gap (600s), + // then 60s ± 30ms jitter. The gap triggers XOR18111 full mode via + // multiplier encoding (dod=540000 = 9×60000). Subsequent small-jitter + // delta-of-deltas (≤30ms) use XOR18111's 7-bit full-mode code (9 bits + // total) vs XOR compact's minimum 14-bit code (16 bits total). + name: "t=gap-jitter", + next: func(t int64, i int) int64 { + if i < 10 { + return t + 60000 } - return ret - }(), + if i == 10 { + return t + 10*60000 // 10-interval gap; triggers XOR18111 full mode. + } + return t + 60000 + rInts[i]%61 - 30 // 60s ± 30ms jitter. + }, + }, + } + vPatterns := []vPattern{ + { + name: "v=constant", + next: func(v float64, _ int) float64 { return v }, + }, + // We are not interested in float compression we're not changing it. + // { + // // Varying from -50 to +50 in 100 discrete steps. + // name: "v=rand-steps", + // next: func(v float64, i int) float64 { return v + rFloats[i] - 50 }, + // }, + // { + // // Random increment between 0 and 1.0. + // name: "v=rand0-1", + // next: func(v float64, i int) float64 { return v + rFloats[i]/100.0 }, + // }, + // { + // // Random decrement between 0 and -1.0. Tests negative varint encoding; + // // see https://victoriametrics.com/blog/go-protobuf/. + // name: "v=nrand0-1", + // next: func(v float64, i int) float64 { return v - rFloats[i]/100.0 }, + // }, + } + stPatterns := []stPattern{ + { + name: "st=0", + compute: func(_, _ int64, _ int) int64 { return 0 }, }, { - name: "vt=constant/st=t", - samples: func() (ret []triple) { - t, v := initT, initV - for range nSamples { - t += 15000 - ret = append(ret, triple{st: t, t: t, v: v}) - } - return ret - }(), + // Constant ST throughout the chunk, typical for long-running counters. + name: "st=cumulative", + compute: func(_, _ int64, _ int) int64 { return initST }, }, { - // Delta simulates delta type or worst case for cumulatives, where ST - // is changing on every sample. - name: "vt=constant/st=delta-jitter", - samples: func() (ret []triple) { + // ST is just after the previous sample's t: tight delta interval. + name: "st=delta-excl", + compute: func(prevT, _ int64, _ int) int64 { return prevT + 1 }, + }, + { + // ST equals the previous sample's t: inclusive delta interval. + name: "st=delta-incl", + compute: func(prevT, _ int64, _ int) int64 { return prevT }, + }, + { + // ST equals the current sample's t. + name: "st=t", + compute: func(_, newT int64, _ int) int64 { return newT }, + }, + { + // ST is equal to the previous t plus up to 100ms of jitter. + name: "st=delta-jitter", + compute: func(prevT, _ int64, i int) int64 { return prevT + rInts[nSamples+i] }, + }, + { + // Cumulative ST with periodic resets 10s before the current t. + name: "st=cum-resets", + compute: func(_, newT int64, i int) int64 { + if i%6 == 5 { + return newT - 10000 + } + return initST + }, + }, + { + // Cumulative ST with periodic zero resets. + name: "st=cum-zeros", + compute: func(_, _ int64, i int) int64 { + if i%6 == 5 { + return 0 + } + return initST + }, + }, + } + + var sampleCases []sampleCase + for _, tp := range tPatterns { + for _, vp := range vPatterns { + for _, sp := range stPatterns { + samples := make([]triple, 0, nSamples) t, v := initT, initV for i := range nSamples { - st := t + rInts[nSamples+i] // ST is the same as the previous t + jitter of up to 100ms. - t += 15000 - ret = append(ret, triple{st: st, t: t, v: v}) + prevT := t + t = tp.next(t, i) + v = vp.next(v, i) + st := sp.compute(prevT, t, i) + samples = append(samples, triple{st: st, t: t, v: v}) } - return ret - }(), - }, - { - name: "vt=random steps/st=0", - samples: func() (ret []triple) { - t, v := initT, initV - for i := range nSamples { - t += rInts[i] - 50 + 15000 // 15 seconds +- up to 100ms of jitter. - v += rFloats[i] - 50 // Varying from -50 to +50 in 100 discrete steps. - ret = append(ret, triple{st: 0, t: t, v: v}) - } - return ret - }(), - }, - { - name: "vt=random steps/st=cumulative", - samples: func() (ret []triple) { - t, v := initT, initV - for i := range nSamples { - t += rInts[i] - 50 + 15000 // 15 seconds +- up to 100ms of jitter. - v += rFloats[i] - 50 // Varying from -50 to +50 in 100 discrete steps. - ret = append(ret, triple{st: initST, t: t, v: v}) - } - return ret - }(), - }, - { - name: "vt=random steps/st=delta-exclusive", - samples: func() (ret []triple) { - t, v := initT, initV - for i := range nSamples { - st := t + 1 // ST is a tight interval after the last t+1ms. - t += rInts[i] - 50 + 15000 // 15 seconds +- up to 100ms of jitter. - v += rFloats[i] - 50 // Varying from -50 to +50 in 100 discrete steps. - ret = append(ret, triple{st: st, t: t, v: v}) - } - return ret - }(), - }, - { - name: "vt=random steps/st=delta-inclusive", - samples: func() (ret []triple) { - t, v := initT, initV - for i := range nSamples { - st := t // ST is equal to the previous t. - t += rInts[i] - 50 + 15000 // 15 seconds +- up to 100ms of jitter. - v += rFloats[i] - 50 // Varying from -50 to +50 in 100 discrete steps. - ret = append(ret, triple{st: st, t: t, v: v}) - } - return ret - }(), - }, - { - name: "vt=random steps/st=t", - samples: func() (ret []triple) { - t, v := initT, initV - for i := range nSamples { - t += rInts[i] - 50 + 15000 // 15 seconds +- up to 100ms of jitter. - v += rFloats[i] - 50 // Varying from -50 to +50 in 100 discrete steps. - ret = append(ret, triple{st: t, t: t, v: v}) - } - return ret - }(), - }, - { - name: "vt=random steps/st=delta-jittery", - samples: func() (ret []triple) { - t, v := initT, initV - for i := range nSamples { - st := t + rInts[nSamples+i] // ST is equal to the previous t + jitter of up to 100ms. - t += rInts[i] - 50 + 15000 // 15 seconds +- up to 100ms of jitter. - v += rFloats[i] - 50 // Varying from -50 to +50 in 100 discrete steps. - ret = append(ret, triple{st: st, t: t, v: v}) - } - return ret - }(), - }, - { - name: "vt=random 0-1/st=0", - samples: func() (ret []triple) { - t, v := initT, initV - for i := range nSamples { - t += rInts[i] - 50 + 15000 // 15 seconds +- up to 100ms of jitter. - v += rFloats[i] / 100.0 // Random between 0 and 1.0. - ret = append(ret, triple{st: 0, t: t, v: v}) - } - return ret - }(), - }, - { - // Are we impacted by https://victoriametrics.com/blog/go-protobuf/ negative varint issue? (zig-zag needed?) - name: "vt=negrandom 0-1/st=0", - samples: func() (ret []triple) { - t, v := initT, initV - for i := range nSamples { - t += rInts[i] - 50 + 15000 // 15 seconds +- up to 100ms of jitter. - v -= rFloats[i] / 100.0 // Random between 0 and 1.0. - ret = append(ret, triple{st: 0, t: t, v: v}) - } - return ret - }(), - }, - { - name: "vt=random 0-1/st=cumulative", - samples: func() (ret []triple) { - t, v := initT, initV - for i := range nSamples { - t += rInts[i] - 50 + 15000 // 15 seconds +- up to 100ms of jitter. - v += rFloats[i] / 100.0 // Random between 0 and 1.0. - ret = append(ret, triple{st: initST, t: t, v: v}) - } - return ret - }(), - }, - { - name: "vt=random 0-1/st=cumulative-periodic-resets", - samples: func() (ret []triple) { - t, v := initT, initV - for i := range nSamples { - t += rInts[i] - 50 + 15000 // 15 seconds +- up to 100ms of jitter. - v += rFloats[i] / 100.0 // Random between 0 and 1.0. - st := initST - if i%6 == 5 { - st = t - 10000 // Reset of 10s before current t. - } - ret = append(ret, triple{st: st, t: t, v: v}) - } - return ret - }(), - }, - { - name: "vt=random 0-1/st=cumulative-periodic-zeros", - samples: func() (ret []triple) { - t, v := initT, initV - for i := range nSamples { - t += rInts[i] - 50 + 15000 // 15 seconds +- up to 100ms of jitter. - v += rFloats[i] / 100.0 // Random between 0 and 1.0. - st := initST - if i%6 == 5 { - st = 0 - } - ret = append(ret, triple{st: st, t: t, v: v}) - } - return ret - }(), - }, - { - name: "vt=random 0-1/st=delta-exclusive", - samples: func() (ret []triple) { - t, v := initT, initV - for i := range nSamples { - st := t + 1 // ST is a tight interval after the last t+1ms. - t += rInts[i] - 50 + 15000 // 15 seconds +- up to 100ms of jitter. - v += rFloats[i] / 100.0 // Random between 0 and 1.0. - ret = append(ret, triple{st: st, t: t, v: v}) - } - return ret - }(), - }, - { - name: "vt=random 0-1/st=delta-inclusive", - samples: func() (ret []triple) { - t, v := initT, initV - for i := range nSamples { - st := t // ST is the same as the previous t. - t += rInts[i] - 50 + 15000 // 15 seconds +- up to 100ms of jitter. - v += rFloats[i] / 100.0 // Random between 0 and 1.0. - ret = append(ret, triple{st: st, t: t, v: v}) - } - return ret - }(), - }, - { - name: "vt=random 0-1/st=t", - samples: func() (ret []triple) { - t, v := initT, initV - for i := range nSamples { - t += rInts[i] - 50 + 15000 // 15 seconds +- up to 100ms of jitter. - v += rFloats[i] / 100.0 // Random between 0 and 1.0. - ret = append(ret, triple{st: t, t: t, v: v}) - } - return ret - }(), - }, - { - name: "vt=random 0-1/st=delta-jittery", - samples: func() (ret []triple) { - t, v := initT, initV - for i := range nSamples { - st := t + rInts[nSamples+i] // ST is equal to the previous t + jitter of up to 100ms. - t += rInts[i] - 50 + 15000 // 15 seconds +- up to 100ms of jitter. - v += rFloats[i] / 100.0 // Random between 0 and 1.0. - ret = append(ret, triple{st: st, t: t, v: v}) - } - return ret - }(), - }, + sampleCases = append(sampleCases, sampleCase{ + name: tp.name + "/" + vp.name + "/" + sp.name, + samples: samples, + }) + } + } } for _, f := range []fmtCase{ {name: "XOR", newChunkFn: func() Chunk { return NewXORChunk() }, stUnsupported: true}, + {name: "XOR2", newChunkFn: func() Chunk { return NewXOR2Chunk() }, stUnsupported: true}, + {name: "XOR2ST", newChunkFn: func() Chunk { return NewXOR2STChunk() }}, + {name: "XOR2ST_OTEL", newChunkFn: func() Chunk { return NewXOR2STotelChunk() }}, {name: "XOR_OPT_ST", newChunkFn: func() Chunk { return NewXOROptSTChunk() }}, + {name: "XOR_OPT_ST_OTEL", newChunkFn: func() Chunk { return NewXOROptSTotelChunk() }}, + {name: "XOR18111", newChunkFn: func() Chunk { return NewXOR18111Chunk() }, stUnsupported: true}, + {name: "XOR18111ST", newChunkFn: func() Chunk { return NewXOR18111STChunk() }}, + {name: "XOR18111ST2", newChunkFn: func() Chunk { return NewXOR18111ST2Chunk() }}, + {name: "XOR18238", newChunkFn: func() Chunk { return NewXOR18238Chunk() }, stUnsupported: true}, + {name: "XOR18238OPTST", newChunkFn: func() Chunk { return NewXOR18238OPTSTChunk() }}, + {name: "XOR18238OPTST2", newChunkFn: func() Chunk { return NewXOR18238OPTST2Chunk() }}, + {name: "XOR18238OPTST3", newChunkFn: func() Chunk { return NewXOR18238OPTST3Chunk() }}, } { for _, s := range sampleCases { b.Run(fmt.Sprintf("fmt=%s/%s", f.name, s.name), func(b *testing.B) { diff --git a/tsdb/chunkenc/bstream.go b/tsdb/chunkenc/bstream.go index abf6e4dbef..1b95769ae4 100644 --- a/tsdb/chunkenc/bstream.go +++ b/tsdb/chunkenc/bstream.go @@ -215,6 +215,95 @@ func (b *bstreamReader) ReadByte() (byte, error) { return byte(v), nil } +// readXOR18238Control reads the XOR18238 variable-length joint control prefix +// and returns 0-5 mapping to the six encoding cases: +// +// 0 → '0' dod=0, val=0 (1 bit consumed) +// 1 → '10' dod=0, val≠0 (2 bits consumed) +// 2 → '110' dod≠0, 13-bit signed dod (3 bits consumed) +// 3 → '1110' dod≠0, 20-bit signed dod (4 bits consumed) +// 4 → '11110' dod≠0, 64-bit escape (5 bits consumed) +// 5 → '11111' dod=0, stale NaN (5 bits consumed) +// +// The fast path peeks at 4 bits from the internal buffer; for the '1111' +// prefix a fifth bit is read to distinguish cases 4 and 5. +func (b *bstreamReader) readXOR18238Control() (uint8, error) { + if b.valid >= 4 { + top4 := uint8((b.buffer >> (b.valid - 4)) & 0xf) + if top4 < 8 { // '0xxx' → case 0. + b.valid-- + return 0, nil + } + if top4 < 12 { // '10xx' → case 1. + b.valid -= 2 + return 1, nil + } + if top4 < 14 { // '110x' → case 2. + b.valid -= 3 + return 2, nil + } + if top4 == 14 { // '1110' → case 3. + b.valid -= 4 + return 3, nil + } + // '1111': need fifth bit to distinguish cases 4 and 5. + if b.valid >= 5 { + bit4 := uint8((b.buffer >> (b.valid - 5)) & 1) + b.valid -= 5 + return 4 + bit4, nil + } + // Fifth bit spans a buffer boundary; consume the four known bits + // and read the fifth from the stream. + b.valid -= 4 + bit4, err := b.readBit() + if err != nil { + return 0, err + } + if bit4 == zero { + return 4, nil + } + return 5, nil + } + + // Slow path: bits may span buffer boundaries, read one at a time. + bit0, err := b.readBit() + if err != nil { + return 0, err + } + if bit0 == zero { + return 0, nil + } + bit1, err := b.readBit() + if err != nil { + return 0, err + } + if bit1 == zero { + return 1, nil + } + bit2, err := b.readBit() + if err != nil { + return 0, err + } + if bit2 == zero { + return 2, nil + } + bit3, err := b.readBit() + if err != nil { + return 0, err + } + if bit3 == zero { + return 3, nil + } + bit4, err := b.readBit() + if err != nil { + return 0, err + } + if bit4 == zero { + return 4, nil + } + return 5, nil +} + // loadNextBuffer loads the next bytes from the stream into the internal buffer. // The input nbits is the minimum number of bits that must be read, but the implementation // can read more (if possible) to improve performances. diff --git a/tsdb/chunkenc/chunk.go b/tsdb/chunkenc/chunk.go index de5fa0c2de..2897c72466 100644 --- a/tsdb/chunkenc/chunk.go +++ b/tsdb/chunkenc/chunk.go @@ -31,6 +31,17 @@ const ( EncHistogram EncFloatHistogram EncXOROptST + EncXOROptOtelST + EncXOR2 + EncXOR2ST + EncXOR2STotel + EncXOR18111 + EncXOR18111ST + EncXOR18111ST2 + EncXOR18238 + EncXOR18238OPTST + EncXOR18238OPTST2 + EncXOR18238OPTST3 ) func (e Encoding) String() string { @@ -45,13 +56,37 @@ func (e Encoding) String() string { return "floathistogram" case EncXOROptST: return "XOR-start-timestamp" + case EncXOROptOtelST: + return "XOR-opt-otel-start-timestamp" + case EncXOR2: + return "XOR2" + case EncXOR2ST: + return "XOR2-start-timestamp" + case EncXOR2STotel: + return "XOR2-otel-start-timestamp" + case EncXOR18111: + return "XOR18111" + case EncXOR18111ST: + return "XOR18111-start-timestamp" + case EncXOR18111ST2: + return "XOR18111-start-timestamp-2" + case EncXOR18238: + return "XOR18238" + case EncXOR18238OPTST: + return "XOR18238-start-timestamp" + case EncXOR18238OPTST2: + return "XOR18238-start-timestamp-2" + case EncXOR18238OPTST3: + return "XOR18238-start-timestamp-3" } return "" } +const EncodingForFloatST = EncXOR2ST + // IsValidEncoding returns true for supported encodings. func IsValidEncoding(e Encoding) bool { - return e == EncXOR || e == EncHistogram || e == EncFloatHistogram || e == EncXOROptST + return e == EncXOR || e == EncHistogram || e == EncFloatHistogram || e == EncodingForFloatST } const ( @@ -195,7 +230,7 @@ func (v ValueType) ChunkEncoding(storeST bool) Encoding { switch v { case ValFloat: if storeST { - return EncXOROptST + return EncodingForFloatST } return EncXOR case ValHistogram: @@ -299,6 +334,17 @@ type pool struct { histogram sync.Pool floatHistogram sync.Pool xoroptst sync.Pool + xoroptOtelst sync.Pool + xor2 sync.Pool + xor2st sync.Pool + xor2stOtel sync.Pool + xor18111 sync.Pool + xor18111st sync.Pool + xor18111st2 sync.Pool + xor18238 sync.Pool + xor18238optst sync.Pool + xor18238optst2 sync.Pool + xor18238optst3 sync.Pool } // NewPool returns a new pool. @@ -324,6 +370,61 @@ func NewPool() Pool { return &XorOptSTChunk{b: bstream{}} }, }, + xoroptOtelst: sync.Pool{ + New: func() any { + return &XorOptSTotelChunk{b: bstream{}} + }, + }, + xor2: sync.Pool{ + New: func() any { + return &XOR2Chunk{XORChunk: XORChunk{b: bstream{}}} + }, + }, + xor2st: sync.Pool{ + New: func() any { + return &XOR2STChunk{XORChunk: XORChunk{b: bstream{}}} + }, + }, + xor2stOtel: sync.Pool{ + New: func() any { + return &XOR2STotelChunk{XORChunk: XORChunk{b: bstream{}}} + }, + }, + xor18111: sync.Pool{ + New: func() any { + return &XOR18111Chunk{b: bstream{}} + }, + }, + xor18111st: sync.Pool{ + New: func() any { + return &XOR18111STChunk{b: bstream{}} + }, + }, + xor18111st2: sync.Pool{ + New: func() any { + return &XOR18111ST2Chunk{b: bstream{}} + }, + }, + xor18238: sync.Pool{ + New: func() any { + return &XOR18238Chunk{b: bstream{}} + }, + }, + xor18238optst: sync.Pool{ + New: func() any { + return &XOR18238OPTSTChunk{b: bstream{}} + }, + }, + xor18238optst2: sync.Pool{ + New: func() any { + return &XOR18238OPTST2Chunk{b: bstream{}} + }, + }, + xor18238optst3: sync.Pool{ + New: func() any { + return &XOR18238OPTST3Chunk{b: bstream{}} + }, + }, } } @@ -338,6 +439,28 @@ func (p *pool) Get(e Encoding, b []byte) (Chunk, error) { c = p.floatHistogram.Get().(*FloatHistogramChunk) case EncXOROptST: c = p.xoroptst.Get().(*XorOptSTChunk) + case EncXOROptOtelST: + c = p.xoroptOtelst.Get().(*XorOptSTotelChunk) + case EncXOR2: + c = p.xor2.Get().(*XOR2Chunk) + case EncXOR2ST: + c = p.xor2st.Get().(*XOR2STChunk) + case EncXOR2STotel: + c = p.xor2stOtel.Get().(*XOR2STotelChunk) + case EncXOR18111: + c = p.xor18111.Get().(*XOR18111Chunk) + case EncXOR18111ST: + c = p.xor18111st.Get().(*XOR18111STChunk) + case EncXOR18111ST2: + c = p.xor18111st2.Get().(*XOR18111ST2Chunk) + case EncXOR18238: + c = p.xor18238.Get().(*XOR18238Chunk) + case EncXOR18238OPTST: + c = p.xor18238optst.Get().(*XOR18238OPTSTChunk) + case EncXOR18238OPTST2: + c = p.xor18238optst2.Get().(*XOR18238OPTST2Chunk) + case EncXOR18238OPTST3: + c = p.xor18238optst3.Get().(*XOR18238OPTST3Chunk) default: return nil, fmt.Errorf("invalid chunk encoding %q", e) } @@ -362,6 +485,39 @@ func (p *pool) Put(c Chunk) error { case EncXOROptST: _, ok = c.(*XorOptSTChunk) sp = &p.xoroptst + case EncXOROptOtelST: + _, ok = c.(*XorOptSTotelChunk) + sp = &p.xoroptOtelst + case EncXOR2: + _, ok = c.(*XOR2Chunk) + sp = &p.xor2 + case EncXOR2ST: + _, ok = c.(*XOR2STChunk) + sp = &p.xor2st + case EncXOR2STotel: + _, ok = c.(*XOR2STotelChunk) + sp = &p.xor2stOtel + case EncXOR18111: + _, ok = c.(*XOR18111Chunk) + sp = &p.xor18111 + case EncXOR18111ST: + _, ok = c.(*XOR18111STChunk) + sp = &p.xor18111st + case EncXOR18111ST2: + _, ok = c.(*XOR18111ST2Chunk) + sp = &p.xor18111st2 + case EncXOR18238: + _, ok = c.(*XOR18238Chunk) + sp = &p.xor18238 + case EncXOR18238OPTST: + _, ok = c.(*XOR18238OPTSTChunk) + sp = &p.xor18238optst + case EncXOR18238OPTST2: + _, ok = c.(*XOR18238OPTST2Chunk) + sp = &p.xor18238optst2 + case EncXOR18238OPTST3: + _, ok = c.(*XOR18238OPTST3Chunk) + sp = &p.xor18238optst3 default: return fmt.Errorf("invalid chunk encoding %q", c.Encoding()) } @@ -390,6 +546,28 @@ func FromData(e Encoding, d []byte) (Chunk, error) { return &FloatHistogramChunk{b: bstream{count: 0, stream: d}}, nil case EncXOROptST: return &XorOptSTChunk{b: bstream{count: 0, stream: d}}, nil + case EncXOROptOtelST: + return &XorOptSTotelChunk{b: bstream{count: 0, stream: d}}, nil + case EncXOR2: + return &XOR2Chunk{XORChunk: XORChunk{b: bstream{count: 0, stream: d}}}, nil + case EncXOR2ST: + return &XOR2STChunk{XORChunk: XORChunk{b: bstream{count: 0, stream: d}}}, nil + case EncXOR2STotel: + return &XOR2STotelChunk{XORChunk: XORChunk{b: bstream{count: 0, stream: d}}}, nil + case EncXOR18111: + return &XOR18111Chunk{b: bstream{count: 0, stream: d}}, nil + case EncXOR18111ST: + return &XOR18111STChunk{b: bstream{count: 0, stream: d}}, nil + case EncXOR18111ST2: + return &XOR18111ST2Chunk{b: bstream{count: 0, stream: d}}, nil + case EncXOR18238: + return &XOR18238Chunk{b: bstream{count: 0, stream: d}}, nil + case EncXOR18238OPTST: + return &XOR18238OPTSTChunk{b: bstream{count: 0, stream: d}}, nil + case EncXOR18238OPTST2: + return &XOR18238OPTST2Chunk{b: bstream{count: 0, stream: d}}, nil + case EncXOR18238OPTST3: + return &XOR18238OPTST3Chunk{b: bstream{count: 0, stream: d}}, nil } return nil, fmt.Errorf("invalid chunk encoding %q", e) } @@ -405,6 +583,28 @@ func NewEmptyChunk(e Encoding) (Chunk, error) { return NewFloatHistogramChunk(), nil case EncXOROptST: return NewXOROptSTChunk(), nil + case EncXOROptOtelST: + return NewXOROptSTotelChunk(), nil + case EncXOR2: + return NewXOR2Chunk(), nil + case EncXOR2ST: + return NewXOR2STChunk(), nil + case EncXOR2STotel: + return NewXOR2STotelChunk(), nil + case EncXOR18111: + return NewXOR18111Chunk(), nil + case EncXOR18111ST: + return NewXOR18111STChunk(), nil + case EncXOR18111ST2: + return NewXOR18111ST2Chunk(), nil + case EncXOR18238: + return NewXOR18238Chunk(), nil + case EncXOR18238OPTST: + return NewXOR18238OPTSTChunk(), nil + case EncXOR18238OPTST2: + return NewXOR18238OPTST2Chunk(), nil + case EncXOR18238OPTST3: + return NewXOR18238OPTST3Chunk(), nil } return nil, fmt.Errorf("invalid chunk encoding %q", e) } diff --git a/tsdb/chunkenc/chunk_test.go b/tsdb/chunkenc/chunk_test.go index 1717300288..130e146f35 100644 --- a/tsdb/chunkenc/chunk_test.go +++ b/tsdb/chunkenc/chunk_test.go @@ -31,15 +31,26 @@ func TestChunk(t *testing.T) { testcases := []struct { encoding Encoding supportsST bool - factory func() Chunk }{ - {encoding: EncXOR, supportsST: false, factory: func() Chunk { return NewXORChunk() }}, - {encoding: EncXOROptST, supportsST: true, factory: func() Chunk { return NewXOROptSTChunk() }}, + {encoding: EncXOR, supportsST: false}, + {encoding: EncXOR2, supportsST: false}, + {encoding: EncXOR18111, supportsST: false}, + {encoding: EncXOR18111ST, supportsST: true}, + {encoding: EncXOR18111ST2, supportsST: true}, + {encoding: EncXOR2ST, supportsST: true}, + {encoding: EncXOR2STotel, supportsST: true}, + {encoding: EncXOROptST, supportsST: true}, + {encoding: EncXOROptOtelST, supportsST: true}, + {encoding: EncXOR18238, supportsST: false}, + {encoding: EncXOR18238OPTST, supportsST: true}, + {encoding: EncXOR18238OPTST2, supportsST: true}, + {encoding: EncXOR18238OPTST3, supportsST: true}, } for _, tc := range testcases { t.Run(fmt.Sprintf("%v", tc.encoding), func(t *testing.T) { for range make([]struct{}, 1) { - c := tc.factory() + c, err := NewEmptyChunk(tc.encoding) + require.NoError(t, err) testChunk(t, c, tc.supportsST) } }) diff --git a/tsdb/chunkenc/xor.go b/tsdb/chunkenc/xor.go index 5a9a59dc22..ee1fb98206 100644 --- a/tsdb/chunkenc/xor.go +++ b/tsdb/chunkenc/xor.go @@ -407,6 +407,727 @@ func (it *xorIterator) readValue() ValueType { return ValFloat } +// XOR2Chunk holds XOR2 encoded sample data. It uses varbit_int encoding for +// timestamp delta-of-delta instead of the coarse 4-bucket encoding used by XORChunk. +type XOR2Chunk struct { + XORChunk +} + +// NewXOR2Chunk returns a new chunk with XOR2 encoding. +func NewXOR2Chunk() *XOR2Chunk { + b := make([]byte, chunkHeaderSize, chunkAllocationSize) + return &XOR2Chunk{XORChunk: XORChunk{b: bstream{stream: b, count: 0}}} +} + +// Encoding returns the encoding type. +func (*XOR2Chunk) Encoding() Encoding { + return EncXOR2 +} + +// Appender implements the Chunk interface. +func (c *XOR2Chunk) Appender() (Appender, error) { + if len(c.b.stream) == chunkHeaderSize { + return &xor2Appender{xorAppender: xorAppender{b: &c.b, t: math.MinInt64, leading: 0xff}}, nil + } + it := c.iterator(nil) + + // To get an appender we must know the state it would have if we had + // appended all existing data from scratch. + // We iterate through the end and populate via the iterator's state. + for it.Next() != ValNone { + } + if err := it.Err(); err != nil { + return nil, err + } + + a := &xor2Appender{ + xorAppender: xorAppender{ + b: &c.b, + t: it.t, + v: it.val, + tDelta: it.tDelta, + leading: it.leading, + trailing: it.trailing, + }, + } + return a, nil +} + +func (c *XOR2Chunk) iterator(it Iterator) *xor2Iterator { + if xor2Iter, ok := it.(*xor2Iterator); ok { + xor2Iter.Reset(c.b.bytes()) + return xor2Iter + } + return &xor2Iterator{ + xorIterator: xorIterator{ + br: newBReader(c.b.bytes()[chunkHeaderSize:]), + numTotal: binary.BigEndian.Uint16(c.b.bytes()), + t: math.MinInt64, + }, + } +} + +// Iterator implements the Chunk interface. +func (c *XOR2Chunk) Iterator(it Iterator) Iterator { + return c.iterator(it) +} + +// xor2Appender uses varbit_int encoding for timestamp delta-of-delta. +type xor2Appender struct { + xorAppender +} + +func (a *xor2Appender) Append(_, t int64, v float64) { + var tDelta uint64 + num := binary.BigEndian.Uint16(a.b.bytes()) + switch num { + case 0: + buf := make([]byte, binary.MaxVarintLen64) + for _, b := range buf[:binary.PutVarint(buf, t)] { + a.b.writeByte(b) + } + a.b.writeBits(math.Float64bits(v), 64) + case 1: + tDelta = uint64(t - a.t) + + buf := make([]byte, binary.MaxVarintLen64) + for _, b := range buf[:binary.PutUvarint(buf, tDelta)] { + a.b.writeByte(b) + } + + a.writeVDelta(v) + default: + tDelta = uint64(t - a.t) + dod := int64(tDelta - a.tDelta) + + putVarbitInt(a.b, dod) + + a.writeVDelta(v) + } + + a.t = t + a.v = v + binary.BigEndian.PutUint16(a.b.bytes(), num+1) + a.tDelta = tDelta +} + +// xor2Iterator uses varbit_int decoding for timestamp delta-of-delta. +type xor2Iterator struct { + xorIterator +} + +func (it *xor2Iterator) Next() ValueType { + if it.err != nil || it.numRead == it.numTotal { + return ValNone + } + + if it.numRead == 0 { + t, err := binary.ReadVarint(&it.br) + if err != nil { + it.err = err + return ValNone + } + v, err := it.br.readBits(64) + if err != nil { + it.err = err + return ValNone + } + it.t = t + it.val = math.Float64frombits(v) + + it.numRead++ + return ValFloat + } + if it.numRead == 1 { + tDelta, err := binary.ReadUvarint(&it.br) + if err != nil { + it.err = err + return ValNone + } + it.tDelta = tDelta + it.t += int64(it.tDelta) + + return it.readValue() + } + + // Read delta-of-delta using varbit_int encoding. + dod, err := readVarbitInt(&it.br) + if err != nil { + it.err = err + return ValNone + } + + it.tDelta = uint64(int64(it.tDelta) + dod) + it.t += int64(it.tDelta) + + return it.readValue() +} + +func (it *xor2Iterator) Seek(t int64) ValueType { + if it.err != nil { + return ValNone + } + + for t > it.t || it.numRead == 0 { + if it.Next() == ValNone { + return ValNone + } + } + return ValFloat +} + +// XOR2STChunk holds XOR2 encoded sample data with start timestamp. +// It uses varbit_int encoding for timestamp and start timestamp delta-of-delta +// instead of the coarse 4-bucket encoding used by XORChunk. +type XOR2STChunk struct { + XORChunk +} + +// NewXOR2STChunk returns a new chunk with XOR2 encoding. +func NewXOR2STChunk() *XOR2STChunk { + b := make([]byte, chunkHeaderSize, chunkAllocationSize) + return &XOR2STChunk{XORChunk: XORChunk{b: bstream{stream: b, count: 0}}} +} + +// Encoding returns the encoding type. +func (*XOR2STChunk) Encoding() Encoding { + return EncXOR2ST +} + +// Appender implements the Chunk interface. +func (c *XOR2STChunk) Appender() (Appender, error) { + if len(c.b.stream) == chunkHeaderSize { + return &xor2stAppender{xorAppender: xorAppender{b: &c.b, t: math.MinInt64, leading: 0xff}}, nil + } + it := c.iterator(nil) + + // To get an appender we must know the state it would have if we had + // appended all existing data from scratch. + // We iterate through the end and populate via the iterator's state. + for it.Next() != ValNone { + } + if err := it.Err(); err != nil { + return nil, err + } + + // Set the bit position for continuing writes. + // The iterator's reader tracks how many bits remain unread in the last byte. + c.b.count = it.br.valid + + a := &xor2stAppender{ + xorAppender: xorAppender{ + b: &c.b, + t: it.t, + v: it.val, + tDelta: it.tDelta, + leading: it.leading, + trailing: it.trailing, + }, + st: it.st, + stDelta: it.stDelta, + } + return a, nil +} + +func (c *XOR2STChunk) iterator(it Iterator) *xor2stIterator { + if xor2Iter, ok := it.(*xor2stIterator); ok { + xor2Iter.Reset(c.b.bytes()) + return xor2Iter + } + return &xor2stIterator{ + xorIterator: xorIterator{ + br: newBReader(c.b.bytes()[chunkHeaderSize:]), + numTotal: binary.BigEndian.Uint16(c.b.bytes()), + t: math.MinInt64, + }, + } +} + +// Iterator implements the Chunk interface. +func (c *XOR2STChunk) Iterator(it Iterator) Iterator { + return c.iterator(it) +} + +// xor2Appender uses varbit_int encoding for timestamp delta-of-delta. +type xor2stAppender struct { + xorAppender + st int64 + stDelta int64 +} + +func (a *xor2stAppender) Append(st, t int64, v float64) { + var ( + tDelta uint64 + stDelta int64 + ) + num := binary.BigEndian.Uint16(a.b.bytes()) + switch num { + case 0: + buf := make([]byte, binary.MaxVarintLen64) + for _, b := range buf[:binary.PutVarint(buf, t)] { + a.b.writeByte(b) + } + + for _, b := range buf[:binary.PutVarint(buf, st)] { + a.b.writeByte(b) + } + + a.b.writeBits(math.Float64bits(v), 64) + case 1: + tDelta = uint64(t - a.t) + + buf := make([]byte, binary.MaxVarintLen64) + for _, b := range buf[:binary.PutUvarint(buf, tDelta)] { + a.b.writeByte(b) + } + + stDelta = st - a.st + for _, b := range buf[:binary.PutVarint(buf, stDelta)] { + a.b.writeByte(b) + } + + a.writeVDelta(v) + default: + tDelta = uint64(t - a.t) + dod := int64(tDelta - a.tDelta) + + putVarbitInt(a.b, dod) + + stDelta = st - a.st + stDod := stDelta - a.stDelta + + putVarbitInt(a.b, stDod) + + a.writeVDelta(v) + } + + a.t = t + a.v = v + binary.BigEndian.PutUint16(a.b.bytes(), num+1) + a.tDelta = tDelta + a.st = st + a.stDelta = stDelta +} + +// xor2Iterator uses varbit_int decoding for timestamp delta-of-delta. +type xor2stIterator struct { + xorIterator + st int64 + stDelta int64 +} + +func (it *xor2stIterator) Next() ValueType { + if it.err != nil || it.numRead == it.numTotal { + return ValNone + } + + if it.numRead == 0 { + t, err := binary.ReadVarint(&it.br) + if err != nil { + it.err = err + return ValNone + } + it.t = t + + st, err := binary.ReadVarint(&it.br) + if err != nil { + it.err = err + return ValNone + } + it.st = st + + v, err := it.br.readBits(64) + if err != nil { + it.err = err + return ValNone + } + it.val = math.Float64frombits(v) + + it.numRead++ + return ValFloat + } + if it.numRead == 1 { + tDelta, err := binary.ReadUvarint(&it.br) + if err != nil { + it.err = err + return ValNone + } + it.tDelta = tDelta + it.t += int64(it.tDelta) + + stDelta, err := binary.ReadVarint(&it.br) + if err != nil { + it.err = err + return ValNone + } + it.stDelta = stDelta + it.st += it.stDelta + + return it.readValue() + } + + // Read delta-of-delta using varbit_int encoding. + dod, err := readVarbitInt(&it.br) + if err != nil { + it.err = err + return ValNone + } + + it.tDelta = uint64(int64(it.tDelta) + dod) + it.t += int64(it.tDelta) + + stDod, err := readVarbitInt(&it.br) + if err != nil { + it.err = err + return ValNone + } + it.stDelta += stDod + it.st += it.stDelta + + return it.readValue() +} + +func (it *xor2stIterator) Reset(b []byte) { + it.xorIterator.Reset(b) + + it.st = 0 + it.stDelta = 0 +} + +func (it *xor2stIterator) Seek(t int64) ValueType { + if it.err != nil { + return ValNone + } + + for t > it.t || it.numRead == 0 { + if it.Next() == ValNone { + return ValNone + } + } + return ValFloat +} + +func (it *xor2stIterator) AtST() int64 { + return it.st +} + +// XOR2STotelChunk holds XOR2 encoded sample data with start timestamp. +// It uses varbit_int encoding for timestamp and start timestamp delta-of-delta +// instead of the coarse 4-bucket encoding used by XORChunk. +type XOR2STotelChunk struct { + XORChunk +} + +// NewXOR2STotelChunk returns a new chunk with XOR2 encoding. +func NewXOR2STotelChunk() *XOR2STotelChunk { + b := make([]byte, chunkHeaderSize, chunkAllocationSize) + return &XOR2STotelChunk{XORChunk: XORChunk{b: bstream{stream: b, count: 0}}} +} + +// Encoding returns the encoding type. +func (*XOR2STotelChunk) Encoding() Encoding { + return EncXOR2STotel +} + +// Appender implements the Chunk interface. +func (c *XOR2STotelChunk) Appender() (Appender, error) { + if len(c.b.stream) == chunkHeaderSize { + return &xor2stOtelAppender{b: &c.b, t: math.MinInt64, leading: 0xff}, nil + } + it := c.iterator(nil) + + // To get an appender we must know the state it would have if we had + // appended all existing data from scratch. + // We iterate through the end and populate via the iterator's state. + for it.Next() != ValNone { + } + if err := it.Err(); err != nil { + return nil, err + } + + // Set the bit position for continuing writes. + // The iterator's reader tracks how many bits remain unread in the last byte. + c.b.count = it.br.valid + + a := &xor2stOtelAppender{ + b: &c.b, + t: it.t, + st: it.st, + v: it.val, + tDelta: it.tDelta, + stDelta: it.stDelta, + leading: it.leading, + trailing: it.trailing, + state: it.state, + } + return a, nil +} + +func (c *XOR2STotelChunk) iterator(it Iterator) *xor2stOtelIterator { + if xor2Iter, ok := it.(*xor2stOtelIterator); ok { + xor2Iter.Reset(c.b.bytes()) + return xor2Iter + } + return &xor2stOtelIterator{ + xorIterator: xorIterator{ + br: newBReader(c.b.bytes()[chunkHeaderSize:]), + numTotal: binary.BigEndian.Uint16(c.b.bytes()), + t: math.MinInt64, + }, + } +} + +// Iterator implements the Chunk interface. +func (c *XOR2STotelChunk) Iterator(it Iterator) Iterator { + return c.iterator(it) +} + +const ( + otelSTrandom = iota // Samples have no particular relationship between timestamp and start timestamp (including ST==0 and constant case). + otelSTequalT // Leading samples have start timestamp equal to their timestamp. + otelSTdiffConst // Leading samples (except first) have the same delta between timestamp and start timestamp. +) + +// xor2Appender uses varbit_int encoding for timestamp delta-of-delta. +type xor2stOtelAppender struct { + b *bstream + + t int64 + st int64 + v float64 + tDelta uint64 + stDelta int64 + + leading uint8 + trailing uint8 + state uint8 +} + +func (a *xor2stOtelAppender) writeVDelta(v float64) { + xorWrite(a.b, v, a.v, &a.leading, &a.trailing) +} + +func (*xor2stOtelAppender) AppendHistogram(*HistogramAppender, int64, int64, *histogram.Histogram, bool) (Chunk, bool, Appender, error) { + panic("appended a histogram sample to a float chunk") +} + +func (*xor2stOtelAppender) AppendFloatHistogram(*FloatHistogramAppender, int64, int64, *histogram.FloatHistogram, bool) (Chunk, bool, Appender, error) { + panic("appended a float histogram sample to a float chunk") +} + +func (a *xor2stOtelAppender) Append(st, t int64, v float64) { + var ( + tDelta uint64 + stDelta int64 + ) + num := binary.BigEndian.Uint16(a.b.bytes()) + switch num { + case 0: + buf := make([]byte, binary.MaxVarintLen64) + for _, b := range buf[:binary.PutVarint(buf, t)] { + a.b.writeByte(b) + } + + for _, b := range buf[:binary.PutVarint(buf, st)] { + a.b.writeByte(b) + } + + a.b.writeBits(math.Float64bits(v), 64) + case 1: + tDelta = uint64(t - a.t) + + buf := make([]byte, binary.MaxVarintLen64) + for _, b := range buf[:binary.PutUvarint(buf, tDelta)] { + a.b.writeByte(b) + } + + stDelta = st - a.st + for _, b := range buf[:binary.PutVarint(buf, stDelta)] { + a.b.writeByte(b) + } + if st == t { + a.state = otelSTequalT + } else if st != a.st { + a.state = otelSTdiffConst + } + + a.writeVDelta(v) + default: + tDelta = uint64(t - a.t) + dod := int64(tDelta - a.tDelta) + + putVarbitInt(a.b, dod) + + stDelta = st - a.st + + state := a.state + switch { + case state == otelSTrandom: + stDod := stDelta - a.stDelta + + putVarbitInt(a.b, stDod) + case state == otelSTequalT && st == t || state == otelSTdiffConst && t-st == a.t-a.st: + a.b.writeBit(zero) // Indicate that there is no state change. + default: + a.b.writeBit(one) // Indicate that there is a state change. + a.state = otelSTrandom + stDod := stDelta - a.stDelta + + putVarbitInt(a.b, stDod) + } + + a.writeVDelta(v) + } + + a.t = t + a.v = v + binary.BigEndian.PutUint16(a.b.bytes(), num+1) + a.tDelta = tDelta + a.st = st + a.stDelta = stDelta +} + +// xor2Iterator uses varbit_int decoding for timestamp delta-of-delta. +type xor2stOtelIterator struct { + xorIterator + st int64 + stDelta int64 + state uint8 +} + +func (it *xor2stOtelIterator) Next() ValueType { + if it.err != nil || it.numRead == it.numTotal { + return ValNone + } + + if it.numRead == 0 { + t, err := binary.ReadVarint(&it.br) + if err != nil { + it.err = err + return ValNone + } + it.t = t + + st, err := binary.ReadVarint(&it.br) + if err != nil { + it.err = err + return ValNone + } + it.st = st + + v, err := it.br.readBits(64) + if err != nil { + it.err = err + return ValNone + } + it.val = math.Float64frombits(v) + + it.numRead++ + return ValFloat + } + if it.numRead == 1 { + tDelta, err := binary.ReadUvarint(&it.br) + if err != nil { + it.err = err + return ValNone + } + it.tDelta = tDelta + it.t += int64(it.tDelta) + + stDelta, err := binary.ReadVarint(&it.br) + if err != nil { + it.err = err + return ValNone + } + it.stDelta = stDelta + prevSt := it.st + it.st += it.stDelta + + if it.st == it.t { + it.state = otelSTequalT + } else if it.st != prevSt { + it.state = otelSTdiffConst + } + + return it.readValue() + } + + // Note the previous timestamp. + t := it.t + + // Read delta-of-delta using varbit_int encoding. + dod, err := readVarbitInt(&it.br) + if err != nil { + it.err = err + return ValNone + } + + it.tDelta = uint64(int64(it.tDelta) + dod) + it.t += int64(it.tDelta) + + if it.state == otelSTrandom { + stDod, err := readVarbitInt(&it.br) + if err != nil { + it.err = err + return ValNone + } + it.stDelta += stDod + it.st += it.stDelta + } else { + stateChange, err := it.br.readBit() + if err != nil { + it.err = err + return ValNone + } + if stateChange == zero { + if it.state == otelSTequalT { + it.stDelta = it.t - it.st + it.st = it.t + } else { + st := it.t + it.st - t + it.stDelta = st - it.st + it.st = st + } + } else { + it.state = otelSTrandom + stDod, err := readVarbitInt(&it.br) + if err != nil { + it.err = err + return ValNone + } + it.stDelta += stDod + it.st += it.stDelta + } + } + + return it.readValue() +} + +func (it *xor2stOtelIterator) Reset(b []byte) { + it.xorIterator.Reset(b) + + it.st = 0 + it.stDelta = 0 + it.state = otelSTrandom +} + +func (it *xor2stOtelIterator) Seek(t int64) ValueType { + if it.err != nil { + return ValNone + } + + for t > it.t || it.numRead == 0 { + if it.Next() == ValNone { + return ValNone + } + } + return ValFloat +} + +func (it *xor2stOtelIterator) AtST() int64 { + return it.st +} + func xorWrite(b *bstream, newValue, currentValue float64, leading, trailing *uint8) { delta := math.Float64bits(newValue) ^ math.Float64bits(currentValue) diff --git a/tsdb/chunkenc/xor18111.go b/tsdb/chunkenc/xor18111.go new file mode 100644 index 0000000000..2658705348 --- /dev/null +++ b/tsdb/chunkenc/xor18111.go @@ -0,0 +1,688 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file implements the XOR18111 chunk encoding, which corresponds to the +// encoding proposed in https://github.com/prometheus/prometheus/pull/18111. + +package chunkenc + +import ( + "encoding/binary" + "math" + "math/bits" + + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/value" +) + +// XOR18111Chunk implements XOR encoding with adaptive control bits and staleness +// optimization, as proposed in https://github.com/prometheus/prometheus/pull/18111. +// It starts with 4-bit control codes (like original XOR) for perfectly regular +// data, and switches to 5-bit control codes when irregular patterns are detected. +// This eliminates overhead on perfectly regular data while maintaining benefits +// for irregular data. +// +// This is a standalone implementation (not embedding XORChunk) for better +// inlining performance. +type XOR18111Chunk struct { + b bstream +} + +// NewXOR18111Chunk returns a new chunk with XOR18111 encoding. +func NewXOR18111Chunk() *XOR18111Chunk { + b := make([]byte, chunkHeaderSize, chunkAllocationSize) + return &XOR18111Chunk{b: bstream{stream: b, count: 0}} +} + +func (c *XOR18111Chunk) Reset(stream []byte) { + c.b.Reset(stream) +} + +// Encoding returns the encoding type. +func (*XOR18111Chunk) Encoding() Encoding { + return EncXOR18111 +} + +// Bytes returns the underlying byte slice of the chunk. +func (c *XOR18111Chunk) Bytes() []byte { + return c.b.bytes() +} + +// NumSamples returns the number of samples in the chunk. +func (c *XOR18111Chunk) NumSamples() int { + return int(binary.BigEndian.Uint16(c.Bytes())) +} + +// Compact implements the Chunk interface. +func (c *XOR18111Chunk) Compact() { + if l := len(c.b.stream); cap(c.b.stream) > l+chunkCompactCapacityThreshold { + buf := make([]byte, l) + copy(buf, c.b.stream) + c.b.stream = buf + } +} + +// Appender implements the Chunk interface. +func (c *XOR18111Chunk) Appender() (Appender, error) { + if len(c.b.stream) == chunkHeaderSize { + return &xor18111Appender{ + b: &c.b, + t: math.MinInt64, + leading: 0xff, + mode: xor18111ModeCompact, + }, nil + } + it := c.iterator(nil) + + // To get an appender we must know the state it would have if we had + // appended all existing data from scratch. + // We iterate through the end and populate via the iterator's state. + for it.Next() != ValNone { + } + if err := it.Err(); err != nil { + return nil, err + } + + // Set the bit position for continuing writes. + // The iterator's reader tracks how many bits remain unread in the last byte. + c.b.count = it.br.valid + + a := &xor18111Appender{ + b: &c.b, + t: it.t, + v: it.baselineV, + tDelta: it.tDelta, + leading: it.leading, + trailing: it.trailing, + mode: it.mode, + } + return a, nil +} + +func (c *XOR18111Chunk) iterator(it Iterator) *xor18111Iterator { + if xor18111Iter, ok := it.(*xor18111Iterator); ok { + xor18111Iter.Reset(c.b.bytes()) + return xor18111Iter + } + return &xor18111Iterator{ + br: newBReader(c.b.bytes()[chunkHeaderSize:]), + numTotal: binary.BigEndian.Uint16(c.b.bytes()), + t: math.MinInt64, + baselineV: 0, + mode: xor18111ModeCompact, + } +} + +// Iterator implements the Chunk interface. +func (c *XOR18111Chunk) Iterator(it Iterator) Iterator { + return c.iterator(it) +} + +const ( + // xor18111ModeCompact uses 4-bit control codes (0, 10, 110, 1110, 1111). + xor18111ModeCompact = 0 + // xor18111ModeFull uses 5-bit control codes (0, 10, 110, 1110, 11110, 11111). + xor18111ModeFull = 1 +) + +// xor18111Appender uses adaptive control bit encoding with staleness optimization. +type xor18111Appender struct { + b *bstream + + t int64 + v float64 + tDelta uint64 + + leading uint8 + trailing uint8 + + mode uint8 +} + +func (a *xor18111Appender) Append(_, t int64, v float64) { + var tDelta uint64 + num := binary.BigEndian.Uint16(a.b.bytes()) + switch num { + case 0: + buf := make([]byte, binary.MaxVarintLen64) + for _, b := range buf[:binary.PutVarint(buf, t)] { + a.b.writeByte(b) + } + a.b.writeBits(math.Float64bits(v), 64) + case 1: + tDelta = uint64(t - a.t) + + buf := make([]byte, binary.MaxVarintLen64) + for _, b := range buf[:binary.PutUvarint(buf, tDelta)] { + a.b.writeByte(b) + } + + a.writeVDelta(v) + default: + tDelta = uint64(t - a.t) + dod := int64(tDelta - a.tDelta) + + if a.mode == xor18111ModeCompact { + switch { + case dod == 0: + a.b.writeBit(zero) + case bitRange(dod, 14): + a.b.writeByte(0b10<<6 | (uint8(dod>>8) & (1<<6 - 1))) + a.b.writeByte(uint8(dod)) + case bitRange(dod, 17): + a.b.writeBits(0b110, 3) + a.b.writeBits(uint64(dod), 17) + case bitRange(dod, 20): + a.b.writeBits(0b1110, 4) + a.b.writeBits(uint64(dod), 20) + default: + a.b.writeBits(0b1111, 4) + a.mode = xor18111ModeFull + a.writeTimestampDeltaFull(dod) + } + } else { + a.writeTimestampDeltaFull(dod) + } + a.writeVDelta(v) + } + + a.t = t + // Only update baseline for non-stale values. + if !value.IsStaleNaN(v) { + a.v = v + } + binary.BigEndian.PutUint16(a.b.bytes(), num+1) + a.tDelta = tDelta +} + +func (a *xor18111Appender) writeTimestampDeltaFull(dod int64) { + switch { + case dod == 0: + a.b.writeBit(zero) + case bitRange(dod, 7): + a.b.writeBits(0b10, 2) + a.b.writeBits(uint64(dod), 7) + case bitRange(dod, 14): + a.b.writeBits(0b110, 3) + a.b.writeBits(uint64(dod), 14) + case bitRange(dod, 20): + a.b.writeBits(0b1110, 4) + a.b.writeBits(uint64(dod), 20) + default: + // Try multiplier encoding. + encoded := false + if a.tDelta > 0 && dod != 0 { + multiplierF := float64(dod) / float64(a.tDelta) + multiplier := int64(multiplierF) + if multiplierF > 0 && multiplierF-float64(multiplier) >= 0.5 { + multiplier++ + } else if multiplierF < 0 && float64(multiplier)-multiplierF >= 0.5 { + multiplier-- + } + + if multiplier >= -15 && multiplier <= 15 && multiplier != 0 { + reconstructed := multiplier * int64(a.tDelta) + residual := dod - reconstructed + + // Only use multiplier encoding if residual fits in 8 bits signed. + if residual >= -128 && residual <= 127 { + // Encode: 11110 [sign] [magnitude] [residual] (18 bits total). + a.b.writeBits(0b11110, 5) + if multiplier > 0 { + a.b.writeBit(zero) + a.b.writeBits(uint64(multiplier-1), 4) + } else { + a.b.writeBit(one) + a.b.writeBits(uint64(-multiplier-1), 4) + } + a.b.writeBits(uint64(int8(residual)), 8) + encoded = true + } + } + } + + if !encoded { + a.b.writeBits(0b11111, 5) + a.b.writeBits(uint64(dod), 64) + } + } +} + +// writeVDelta encodes the value delta with optimized staleness handling. +func (a *xor18111Appender) writeVDelta(v float64) { + if value.IsStaleNaN(v) { + // Write the impossible pattern: 11 + leading=31 + sigbits=63. + // Normal NaN encoding would use ~110 bits; this uses only 13 bits. + a.b.writeBit(one) + a.b.writeBit(one) + a.b.writeBits(31, 5) + a.b.writeBits(63, 6) + return + } + + // Normal XOR encoding against the baseline (last non-stale) value. + delta := math.Float64bits(v) ^ math.Float64bits(a.v) + + if delta == 0 { + a.b.writeBit(zero) + return + } + a.b.writeBit(one) + + newLeading := uint8(bits.LeadingZeros64(delta)) + newTrailing := uint8(bits.TrailingZeros64(delta)) + + // Clamp number of leading zeros to avoid overflow when encoding. + if newLeading >= 32 { + newLeading = 31 + } + + if a.leading != 0xff && newLeading >= a.leading && newTrailing >= a.trailing { + // Stick with the current leading/trailing. + a.b.writeBit(zero) + a.b.writeBits(delta>>a.trailing, 64-int(a.leading)-int(a.trailing)) + return + } + + // Update leading/trailing. + a.leading, a.trailing = newLeading, newTrailing + + a.b.writeBit(one) + a.b.writeBits(uint64(newLeading), 5) + + // Note that if newLeading == newTrailing == 0, then sigbits == 64. But + // that value doesn't actually fit into the 6 bits we have. Luckily, we + // never need to encode 0 significant bits, since that would put us in + // the other case (delta == 0). So instead we write out a 0 and adjust + // it back to 64 on unpacking. + sigbits := 64 - newLeading - newTrailing + a.b.writeBits(uint64(sigbits), 6) + a.b.writeBits(delta>>newTrailing, int(sigbits)) +} + +func (*xor18111Appender) AppendHistogram(*HistogramAppender, int64, int64, *histogram.Histogram, bool) (Chunk, bool, Appender, error) { + panic("appended a histogram sample to a float chunk") +} + +func (*xor18111Appender) AppendFloatHistogram(*FloatHistogramAppender, int64, int64, *histogram.FloatHistogram, bool) (Chunk, bool, Appender, error) { + panic("appended a float histogram sample to a float chunk") +} + +// xor18111Iterator decodes XOR18111 chunks with adaptive control bits and staleness. +type xor18111Iterator struct { + br bstreamReader + numTotal uint16 + numRead uint16 + + t int64 + val float64 + + leading uint8 + trailing uint8 + + tDelta uint64 + err error + + baselineV float64 + mode uint8 +} + +func (it *xor18111Iterator) Seek(t int64) ValueType { + if it.err != nil { + return ValNone + } + + for t > it.t || it.numRead == 0 { + if it.Next() == ValNone { + return ValNone + } + } + return ValFloat +} + +func (it *xor18111Iterator) At() (int64, float64) { + return it.t, it.val +} + +func (*xor18111Iterator) AtHistogram(*histogram.Histogram) (int64, *histogram.Histogram) { + panic("cannot call xor18111Iterator.AtHistogram") +} + +func (*xor18111Iterator) AtFloatHistogram(*histogram.FloatHistogram) (int64, *histogram.FloatHistogram) { + panic("cannot call xor18111Iterator.AtFloatHistogram") +} + +func (it *xor18111Iterator) AtT() int64 { + return it.t +} + +func (*xor18111Iterator) AtST() int64 { + return 0 +} + +func (it *xor18111Iterator) Err() error { + return it.err +} + +func (it *xor18111Iterator) Reset(b []byte) { + // The first 2 bytes contain chunk headers. + // We skip that for actual samples. + it.br = newBReader(b[chunkHeaderSize:]) + it.numTotal = binary.BigEndian.Uint16(b) + + it.numRead = 0 + it.t = 0 + it.val = 0 + it.leading = 0 + it.trailing = 0 + it.tDelta = 0 + it.err = nil + it.baselineV = 0 + it.mode = xor18111ModeCompact +} + +func (it *xor18111Iterator) Next() ValueType { + if it.err != nil || it.numRead == it.numTotal { + return ValNone + } + + if it.numRead == 0 { + t, err := binary.ReadVarint(&it.br) + if err != nil { + it.err = err + return ValNone + } + v, err := it.br.readBits(64) + if err != nil { + it.err = err + return ValNone + } + it.t = t + it.val = math.Float64frombits(v) + if !value.IsStaleNaN(it.val) { + it.baselineV = it.val + } + it.numRead++ + return ValFloat + } + + if it.numRead == 1 { + tDelta, err := binary.ReadUvarint(&it.br) + if err != nil { + it.err = err + return ValNone + } + it.tDelta = tDelta + it.t += int64(it.tDelta) + + return it.readValue() + } + + // Read timestamp delta-of-delta. + if it.mode == xor18111ModeCompact { + var d byte + for range 4 { + d <<= 1 + bit, err := it.br.readBitFast() + if err != nil { + bit, err = it.br.readBit() + } + if err != nil { + it.err = err + return ValNone + } + if bit == zero { + break + } + d |= 1 + } + + // Check for mode switch marker (1111). + if d == 0b1111 { + it.mode = xor18111ModeFull + if err := it.readTimestampDeltaFull(); err != nil { + it.err = err + return ValNone + } + return it.readValue() + } + + var sz uint8 + var dod int64 + switch d { + case 0b0: + // dod == 0. + case 0b10: + sz = 14 + case 0b110: + sz = 17 + case 0b1110: + sz = 20 + } + + if sz != 0 { + b, err := it.br.readBitsFast(sz) + if err != nil { + b, err = it.br.readBits(sz) + } + if err != nil { + it.err = err + return ValNone + } + if b > (1 << (sz - 1)) { + b -= 1 << sz + } + dod = int64(b) + } + + it.tDelta = uint64(int64(it.tDelta) + dod) + it.t += int64(it.tDelta) + } else { + if err := it.readTimestampDeltaFull(); err != nil { + it.err = err + return ValNone + } + } + + return it.readValue() +} + +func (it *xor18111Iterator) readTimestampDeltaFull() error { + var d byte + for range 5 { + d <<= 1 + bit, err := it.br.readBitFast() + if err != nil { + bit, err = it.br.readBit() + } + if err != nil { + return err + } + if bit == zero { + break + } + d |= 1 + } + + var dod int64 + switch d { + case 0b0: + // dod == 0. + case 0b10: + b, err := it.br.readBitsFast(7) + if err != nil { + b, err = it.br.readBits(7) + } + if err != nil { + return err + } + if b > (1 << 6) { + b -= 1 << 7 + } + dod = int64(b) + case 0b110: + b, err := it.br.readBitsFast(14) + if err != nil { + b, err = it.br.readBits(14) + } + if err != nil { + return err + } + if b > (1 << 13) { + b -= 1 << 14 + } + dod = int64(b) + case 0b1110: + b, err := it.br.readBitsFast(20) + if err != nil { + b, err = it.br.readBits(20) + } + if err != nil { + return err + } + if b > (1 << 19) { + b -= 1 << 20 + } + dod = int64(b) + case 0b11110: + sign, err := it.br.readBit() + if err != nil { + return err + } + b, err := it.br.readBits(4) + if err != nil { + return err + } + multiplier := int64(b) + 1 + if sign == one { + multiplier = -multiplier + } + + residualBits, err := it.br.readBits(8) + if err != nil { + return err + } + residual := int64(int8(residualBits)) + + dod = multiplier*int64(it.tDelta) + residual + case 0b11111: + b, err := it.br.readBits(64) + if err != nil { + return err + } + dod = int64(b) + } + + it.tDelta = uint64(int64(it.tDelta) + dod) + it.t += int64(it.tDelta) + return nil +} + +// readValue reads a value with optimized staleness detection. +func (it *xor18111Iterator) readValue() ValueType { + bit, err := it.br.readBitFast() + if err != nil { + bit, err = it.br.readBit() + } + if err != nil { + it.err = err + return ValNone + } + + if bit == zero { + // Value unchanged: return the baseline (last non-stale) value. + it.val = it.baselineV + it.numRead++ + return ValFloat + } + + bit, err = it.br.readBitFast() + if err != nil { + bit, err = it.br.readBit() + } + if err != nil { + it.err = err + return ValNone + } + + if bit == zero { + // Reuse leading/trailing zeros. + sz := 64 - int(it.leading) - int(it.trailing) + b, err := it.br.readBitsFast(uint8(sz)) + if err != nil { + b, err = it.br.readBits(uint8(sz)) + } + if err != nil { + it.err = err + return ValNone + } + + vbits := math.Float64bits(it.baselineV) + vbits ^= b << it.trailing + it.val = math.Float64frombits(vbits) + it.baselineV = it.val + it.numRead++ + return ValFloat + } + + // Read new leading and sigbits. + newLeading, err := it.br.readBitsFast(5) + if err != nil { + newLeading, err = it.br.readBits(5) + } + if err != nil { + it.err = err + return ValNone + } + + sigbits, err := it.br.readBitsFast(6) + if err != nil { + sigbits, err = it.br.readBits(6) + } + if err != nil { + it.err = err + return ValNone + } + + // The pattern leading=31, sigbits=63 is impossible in normal XOR encoding + // (it would require trailing = 64 - 31 - 63 = -30) and is used as the + // staleness marker. + if newLeading == 31 && sigbits == 63 { + it.val = math.Float64frombits(value.StaleNaN) + it.numRead++ + return ValFloat + } + + it.leading = uint8(newLeading) + + if sigbits == 0 { + sigbits = 64 + } + it.trailing = 64 - it.leading - uint8(sigbits) + + b, err := it.br.readBitsFast(uint8(sigbits)) + if err != nil { + b, err = it.br.readBits(uint8(sigbits)) + } + if err != nil { + it.err = err + return ValNone + } + + vbits := math.Float64bits(it.baselineV) + vbits ^= b << it.trailing + it.val = math.Float64frombits(vbits) + it.baselineV = it.val + it.numRead++ + return ValFloat +} diff --git a/tsdb/chunkenc/xor18111st.go b/tsdb/chunkenc/xor18111st.go new file mode 100644 index 0000000000..c76836c31c --- /dev/null +++ b/tsdb/chunkenc/xor18111st.go @@ -0,0 +1,931 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file implements the XOR18111ST chunk encoding: XOR18111 with an +// additional start timestamp stored after each regular timestamp. + +package chunkenc + +import ( + "encoding/binary" + "math" + "math/bits" + + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/value" +) + +// XOR18111STChunk holds XOR18111ST encoded sample data: XOR18111 encoding +// with start timestamp stored alongside each sample's regular timestamp. +// The start timestamp delta-of-delta uses the same encoding mode as the +// regular timestamp but never triggers a mode switch itself. +type XOR18111STChunk struct { + b bstream +} + +// NewXOR18111STChunk returns a new chunk with XOR18111ST encoding. +func NewXOR18111STChunk() *XOR18111STChunk { + b := make([]byte, chunkHeaderSize, chunkAllocationSize) + return &XOR18111STChunk{b: bstream{stream: b, count: 0}} +} + +func (c *XOR18111STChunk) Reset(stream []byte) { + c.b.Reset(stream) +} + +// Encoding returns the encoding type. +func (*XOR18111STChunk) Encoding() Encoding { + return EncXOR18111ST +} + +// Bytes returns the underlying byte slice of the chunk. +func (c *XOR18111STChunk) Bytes() []byte { + return c.b.bytes() +} + +// NumSamples returns the number of samples in the chunk. +func (c *XOR18111STChunk) NumSamples() int { + return int(binary.BigEndian.Uint16(c.Bytes())) +} + +// Compact implements the Chunk interface. +func (c *XOR18111STChunk) Compact() { + if l := len(c.b.stream); cap(c.b.stream) > l+chunkCompactCapacityThreshold { + buf := make([]byte, l) + copy(buf, c.b.stream) + c.b.stream = buf + } +} + +// Appender implements the Chunk interface. +func (c *XOR18111STChunk) Appender() (Appender, error) { + if len(c.b.stream) == chunkHeaderSize { + return &xor18111stAppender{ + b: &c.b, + t: math.MinInt64, + leading: 0xff, + mode: xor18111ModeCompact, + }, nil + } + it := c.iterator(nil) + + // To get an appender we must know the state it would have if we had + // appended all existing data from scratch. + // We iterate through the end and populate via the iterator's state. + for it.Next() != ValNone { + } + if err := it.Err(); err != nil { + return nil, err + } + + // Set the bit position for continuing writes. + // The iterator's reader tracks how many bits remain unread in the last byte. + c.b.count = it.br.valid + + a := &xor18111stAppender{ + b: &c.b, + t: it.t, + st: it.st, + v: it.baselineV, + tDelta: it.tDelta, + stDelta: it.stDelta, + leading: it.leading, + trailing: it.trailing, + mode: it.mode, + } + return a, nil +} + +func (c *XOR18111STChunk) iterator(it Iterator) *xor18111stIterator { + if xor18111stIter, ok := it.(*xor18111stIterator); ok { + xor18111stIter.Reset(c.b.bytes()) + return xor18111stIter + } + return &xor18111stIterator{ + br: newBReader(c.b.bytes()[chunkHeaderSize:]), + numTotal: binary.BigEndian.Uint16(c.b.bytes()), + t: math.MinInt64, + mode: xor18111ModeCompact, + } +} + +// Iterator implements the Chunk interface. +func (c *XOR18111STChunk) Iterator(it Iterator) Iterator { + return c.iterator(it) +} + +// xor18111stAppender appends samples with start timestamps using the XOR18111ST +// encoding. +type xor18111stAppender struct { + b *bstream + + t int64 + st int64 + v float64 + tDelta uint64 + stDelta int64 + + leading uint8 + trailing uint8 + + mode uint8 +} + +func (a *xor18111stAppender) Append(st, t int64, v float64) { + var ( + tDelta uint64 + stDelta int64 + ) + num := binary.BigEndian.Uint16(a.b.bytes()) + switch num { + case 0: + buf := make([]byte, binary.MaxVarintLen64) + for _, b := range buf[:binary.PutVarint(buf, t)] { + a.b.writeByte(b) + } + for _, b := range buf[:binary.PutVarint(buf, st)] { + a.b.writeByte(b) + } + a.b.writeBits(math.Float64bits(v), 64) + case 1: + tDelta = uint64(t - a.t) + stDelta = st - a.st + + buf := make([]byte, binary.MaxVarintLen64) + for _, b := range buf[:binary.PutUvarint(buf, tDelta)] { + a.b.writeByte(b) + } + for _, b := range buf[:binary.PutVarint(buf, stDelta)] { + a.b.writeByte(b) + } + + a.writeVDelta(v) + default: + tDelta = uint64(t - a.t) + dod := int64(tDelta - a.tDelta) + + // Encode the regular timestamp dod. This may switch the mode to full. + if a.mode == xor18111ModeCompact { + switch { + case dod == 0: + a.b.writeBit(zero) + case bitRange(dod, 14): + a.b.writeByte(0b10<<6 | (uint8(dod>>8) & (1<<6 - 1))) + a.b.writeByte(uint8(dod)) + case bitRange(dod, 17): + a.b.writeBits(0b110, 3) + a.b.writeBits(uint64(dod), 17) + case bitRange(dod, 20): + a.b.writeBits(0b1110, 4) + a.b.writeBits(uint64(dod), 20) + default: + a.b.writeBits(0b1111, 4) + a.mode = xor18111ModeFull + a.writeTimestampDeltaFull(dod) + } + } else { + a.writeTimestampDeltaFull(dod) + } + + // Encode the start timestamp dod using the current mode, without + // switching the mode. + stDelta = st - a.st + stDod := stDelta - a.stDelta + a.writeSTDod(stDod) + + a.writeVDelta(v) + } + + a.t = t + a.st = st + if !value.IsStaleNaN(v) { + a.v = v + } + binary.BigEndian.PutUint16(a.b.bytes(), num+1) + a.tDelta = tDelta + a.stDelta = stDelta +} + +// writeTimestampDeltaFull encodes a timestamp dod in full mode. This is +// identical to the method in xor18111Appender. +func (a *xor18111stAppender) writeTimestampDeltaFull(dod int64) { + switch { + case dod == 0: + a.b.writeBit(zero) + case bitRange(dod, 7): + a.b.writeBits(0b10, 2) + a.b.writeBits(uint64(dod), 7) + case bitRange(dod, 14): + a.b.writeBits(0b110, 3) + a.b.writeBits(uint64(dod), 14) + case bitRange(dod, 20): + a.b.writeBits(0b1110, 4) + a.b.writeBits(uint64(dod), 20) + default: + encoded := false + if a.tDelta > 0 && dod != 0 { + multiplierF := float64(dod) / float64(a.tDelta) + multiplier := int64(multiplierF) + if multiplierF > 0 && multiplierF-float64(multiplier) >= 0.5 { + multiplier++ + } else if multiplierF < 0 && float64(multiplier)-multiplierF >= 0.5 { + multiplier-- + } + + if multiplier >= -15 && multiplier <= 15 && multiplier != 0 { + reconstructed := multiplier * int64(a.tDelta) + residual := dod - reconstructed + + if residual >= -128 && residual <= 127 { + a.b.writeBits(0b11110, 5) + if multiplier > 0 { + a.b.writeBit(zero) + a.b.writeBits(uint64(multiplier-1), 4) + } else { + a.b.writeBit(one) + a.b.writeBits(uint64(-multiplier-1), 4) + } + a.b.writeBits(uint64(int8(residual)), 8) + encoded = true + } + } + } + + if !encoded { + a.b.writeBits(0b11111, 5) + a.b.writeBits(uint64(dod), 64) + } + } +} + +// writeSTDod encodes the start timestamp delta-of-delta using the current mode +// without ever triggering a mode switch. In compact mode the bit patterns are +// the same as the regular timestamp compact encoding, except that 0b1111 is +// the 64-bit fallback rather than a mode-switch marker. +func (a *xor18111stAppender) writeSTDod(stDod int64) { + if a.mode == xor18111ModeCompact { + switch { + case stDod == 0: + a.b.writeBit(zero) + case bitRange(stDod, 14): + a.b.writeByte(0b10<<6 | (uint8(stDod>>8) & (1<<6 - 1))) + a.b.writeByte(uint8(stDod)) + case bitRange(stDod, 17): + a.b.writeBits(0b110, 3) + a.b.writeBits(uint64(stDod), 17) + case bitRange(stDod, 20): + a.b.writeBits(0b1110, 4) + a.b.writeBits(uint64(stDod), 20) + default: + // 64-bit fallback: 1111 + 64 bits, no mode switch. + a.b.writeBits(0b1111, 4) + a.b.writeBits(uint64(stDod), 64) + } + } else { + // Full mode: same 5-bit encoding as the timestamp, no mode switch. + switch { + case stDod == 0: + a.b.writeBit(zero) + case bitRange(stDod, 7): + a.b.writeBits(0b10, 2) + a.b.writeBits(uint64(stDod), 7) + case bitRange(stDod, 14): + a.b.writeBits(0b110, 3) + a.b.writeBits(uint64(stDod), 14) + case bitRange(stDod, 20): + a.b.writeBits(0b1110, 4) + a.b.writeBits(uint64(stDod), 20) + default: + // Try multiplier encoding (uses tDelta as the reference, same as + // timestamp full mode). + encoded := false + if a.tDelta > 0 && stDod != 0 { + multiplierF := float64(stDod) / float64(a.tDelta) + multiplier := int64(multiplierF) + if multiplierF > 0 && multiplierF-float64(multiplier) >= 0.5 { + multiplier++ + } else if multiplierF < 0 && float64(multiplier)-multiplierF >= 0.5 { + multiplier-- + } + + if multiplier >= -15 && multiplier <= 15 && multiplier != 0 { + reconstructed := multiplier * int64(a.tDelta) + residual := stDod - reconstructed + + if residual >= -128 && residual <= 127 { + a.b.writeBits(0b11110, 5) + if multiplier > 0 { + a.b.writeBit(zero) + a.b.writeBits(uint64(multiplier-1), 4) + } else { + a.b.writeBit(one) + a.b.writeBits(uint64(-multiplier-1), 4) + } + a.b.writeBits(uint64(int8(residual)), 8) + encoded = true + } + } + } + + if !encoded { + a.b.writeBits(0b11111, 5) + a.b.writeBits(uint64(stDod), 64) + } + } + } +} + +// writeVDelta encodes the value delta with optimized staleness handling. +// This is identical to the method in xor18111Appender. +func (a *xor18111stAppender) writeVDelta(v float64) { + if value.IsStaleNaN(v) { + a.b.writeBit(one) + a.b.writeBit(one) + a.b.writeBits(31, 5) + a.b.writeBits(63, 6) + return + } + + delta := math.Float64bits(v) ^ math.Float64bits(a.v) + + if delta == 0 { + a.b.writeBit(zero) + return + } + a.b.writeBit(one) + + newLeading := uint8(bits.LeadingZeros64(delta)) + newTrailing := uint8(bits.TrailingZeros64(delta)) + + if newLeading >= 32 { + newLeading = 31 + } + + if a.leading != 0xff && newLeading >= a.leading && newTrailing >= a.trailing { + a.b.writeBit(zero) + a.b.writeBits(delta>>a.trailing, 64-int(a.leading)-int(a.trailing)) + return + } + + a.leading, a.trailing = newLeading, newTrailing + + a.b.writeBit(one) + a.b.writeBits(uint64(newLeading), 5) + + sigbits := 64 - newLeading - newTrailing + a.b.writeBits(uint64(sigbits), 6) + a.b.writeBits(delta>>newTrailing, int(sigbits)) +} + +func (*xor18111stAppender) AppendHistogram(*HistogramAppender, int64, int64, *histogram.Histogram, bool) (Chunk, bool, Appender, error) { + panic("appended a histogram sample to a float chunk") +} + +func (*xor18111stAppender) AppendFloatHistogram(*FloatHistogramAppender, int64, int64, *histogram.FloatHistogram, bool) (Chunk, bool, Appender, error) { + panic("appended a float histogram sample to a float chunk") +} + +// xor18111stIterator decodes XOR18111ST chunks. +type xor18111stIterator struct { + br bstreamReader + numTotal uint16 + numRead uint16 + + t int64 + st int64 + val float64 + + leading uint8 + trailing uint8 + + tDelta uint64 + stDelta int64 + err error + + baselineV float64 + mode uint8 +} + +func (it *xor18111stIterator) Seek(t int64) ValueType { + if it.err != nil { + return ValNone + } + + for t > it.t || it.numRead == 0 { + if it.Next() == ValNone { + return ValNone + } + } + return ValFloat +} + +func (it *xor18111stIterator) At() (int64, float64) { + return it.t, it.val +} + +func (*xor18111stIterator) AtHistogram(*histogram.Histogram) (int64, *histogram.Histogram) { + panic("cannot call xor18111stIterator.AtHistogram") +} + +func (*xor18111stIterator) AtFloatHistogram(*histogram.FloatHistogram) (int64, *histogram.FloatHistogram) { + panic("cannot call xor18111stIterator.AtFloatHistogram") +} + +func (it *xor18111stIterator) AtT() int64 { + return it.t +} + +func (it *xor18111stIterator) AtST() int64 { + return it.st +} + +func (it *xor18111stIterator) Err() error { + return it.err +} + +func (it *xor18111stIterator) Reset(b []byte) { + it.br = newBReader(b[chunkHeaderSize:]) + it.numTotal = binary.BigEndian.Uint16(b) + + it.numRead = 0 + it.t = 0 + it.st = 0 + it.val = 0 + it.leading = 0 + it.trailing = 0 + it.tDelta = 0 + it.stDelta = 0 + it.err = nil + it.baselineV = 0 + it.mode = xor18111ModeCompact +} + +func (it *xor18111stIterator) Next() ValueType { + if it.err != nil || it.numRead == it.numTotal { + return ValNone + } + + if it.numRead == 0 { + t, err := binary.ReadVarint(&it.br) + if err != nil { + it.err = err + return ValNone + } + it.t = t + + st, err := binary.ReadVarint(&it.br) + if err != nil { + it.err = err + return ValNone + } + it.st = st + + v, err := it.br.readBits(64) + if err != nil { + it.err = err + return ValNone + } + it.val = math.Float64frombits(v) + if !value.IsStaleNaN(it.val) { + it.baselineV = it.val + } + it.numRead++ + return ValFloat + } + + if it.numRead == 1 { + tDelta, err := binary.ReadUvarint(&it.br) + if err != nil { + it.err = err + return ValNone + } + it.tDelta = tDelta + it.t += int64(it.tDelta) + + stDelta, err := binary.ReadVarint(&it.br) + if err != nil { + it.err = err + return ValNone + } + it.stDelta = stDelta + it.st += it.stDelta + + return it.readValue() + } + + // Read the regular timestamp dod. This may switch the mode to full. + if it.mode == xor18111ModeCompact { + var d byte + for range 4 { + d <<= 1 + bit, err := it.br.readBitFast() + if err != nil { + bit, err = it.br.readBit() + } + if err != nil { + it.err = err + return ValNone + } + if bit == zero { + break + } + d |= 1 + } + + if d == 0b1111 { + // Mode switch marker: switch to full and read a full-mode dod. + it.mode = xor18111ModeFull + if err := it.readTimestampDeltaFull(); err != nil { + it.err = err + return ValNone + } + } else { + var sz uint8 + var dod int64 + switch d { + case 0b0: + // dod == 0. + case 0b10: + sz = 14 + case 0b110: + sz = 17 + case 0b1110: + sz = 20 + } + + if sz != 0 { + b, err := it.br.readBitsFast(sz) + if err != nil { + b, err = it.br.readBits(sz) + } + if err != nil { + it.err = err + return ValNone + } + if b > (1 << (sz - 1)) { + b -= 1 << sz + } + dod = int64(b) + } + + it.tDelta = uint64(int64(it.tDelta) + dod) + it.t += int64(it.tDelta) + } + } else { + if err := it.readTimestampDeltaFull(); err != nil { + it.err = err + return ValNone + } + } + + // Read the start timestamp dod using the current mode, without mode switch. + if err := it.readSTDod(); err != nil { + it.err = err + return ValNone + } + + return it.readValue() +} + +// readTimestampDeltaFull reads a timestamp dod in full mode and updates it.t. +func (it *xor18111stIterator) readTimestampDeltaFull() error { + var d byte + for range 5 { + d <<= 1 + bit, err := it.br.readBitFast() + if err != nil { + bit, err = it.br.readBit() + } + if err != nil { + return err + } + if bit == zero { + break + } + d |= 1 + } + + var dod int64 + switch d { + case 0b0: + // dod == 0. + case 0b10: + b, err := it.br.readBitsFast(7) + if err != nil { + b, err = it.br.readBits(7) + } + if err != nil { + return err + } + if b > (1 << 6) { + b -= 1 << 7 + } + dod = int64(b) + case 0b110: + b, err := it.br.readBitsFast(14) + if err != nil { + b, err = it.br.readBits(14) + } + if err != nil { + return err + } + if b > (1 << 13) { + b -= 1 << 14 + } + dod = int64(b) + case 0b1110: + b, err := it.br.readBitsFast(20) + if err != nil { + b, err = it.br.readBits(20) + } + if err != nil { + return err + } + if b > (1 << 19) { + b -= 1 << 20 + } + dod = int64(b) + case 0b11110: + sign, err := it.br.readBit() + if err != nil { + return err + } + b, err := it.br.readBits(4) + if err != nil { + return err + } + multiplier := int64(b) + 1 + if sign == one { + multiplier = -multiplier + } + residualBits, err := it.br.readBits(8) + if err != nil { + return err + } + dod = multiplier*int64(it.tDelta) + int64(int8(residualBits)) + case 0b11111: + b, err := it.br.readBits(64) + if err != nil { + return err + } + dod = int64(b) + } + + it.tDelta = uint64(int64(it.tDelta) + dod) + it.t += int64(it.tDelta) + return nil +} + +// readSTDod reads the start timestamp dod using the current mode without +// triggering a mode switch, and updates it.st. +func (it *xor18111stIterator) readSTDod() error { + if it.mode == xor18111ModeCompact { + var d byte + for range 4 { + d <<= 1 + bit, err := it.br.readBitFast() + if err != nil { + bit, err = it.br.readBit() + } + if err != nil { + return err + } + if bit == zero { + break + } + d |= 1 + } + + var sz uint8 + var stDod int64 + switch d { + case 0b0: + // stDod == 0. + case 0b10: + sz = 14 + case 0b110: + sz = 17 + case 0b1110: + sz = 20 + case 0b1111: + // 64-bit fallback: no mode switch. + b, err := it.br.readBits(64) + if err != nil { + return err + } + stDod = int64(b) + it.stDelta += stDod + it.st += it.stDelta + return nil + } + + if sz != 0 { + b, err := it.br.readBitsFast(sz) + if err != nil { + b, err = it.br.readBits(sz) + } + if err != nil { + return err + } + if b > (1 << (sz - 1)) { + b -= 1 << sz + } + stDod = int64(b) + } + + it.stDelta += stDod + it.st += it.stDelta + return nil + } + + // Full mode: same 5-bit encoding as the timestamp, no mode switch. + var d byte + for range 5 { + d <<= 1 + bit, err := it.br.readBitFast() + if err != nil { + bit, err = it.br.readBit() + } + if err != nil { + return err + } + if bit == zero { + break + } + d |= 1 + } + + var stDod int64 + switch d { + case 0b0: + // stDod == 0. + case 0b10: + b, err := it.br.readBitsFast(7) + if err != nil { + b, err = it.br.readBits(7) + } + if err != nil { + return err + } + if b > (1 << 6) { + b -= 1 << 7 + } + stDod = int64(b) + case 0b110: + b, err := it.br.readBitsFast(14) + if err != nil { + b, err = it.br.readBits(14) + } + if err != nil { + return err + } + if b > (1 << 13) { + b -= 1 << 14 + } + stDod = int64(b) + case 0b1110: + b, err := it.br.readBitsFast(20) + if err != nil { + b, err = it.br.readBits(20) + } + if err != nil { + return err + } + if b > (1 << 19) { + b -= 1 << 20 + } + stDod = int64(b) + case 0b11110: + sign, err := it.br.readBit() + if err != nil { + return err + } + b, err := it.br.readBits(4) + if err != nil { + return err + } + multiplier := int64(b) + 1 + if sign == one { + multiplier = -multiplier + } + residualBits, err := it.br.readBits(8) + if err != nil { + return err + } + stDod = multiplier*int64(it.tDelta) + int64(int8(residualBits)) + case 0b11111: + b, err := it.br.readBits(64) + if err != nil { + return err + } + stDod = int64(b) + } + + it.stDelta += stDod + it.st += it.stDelta + return nil +} + +// readValue reads a value with optimized staleness detection. +// This is identical to the method in xor18111Iterator. +func (it *xor18111stIterator) readValue() ValueType { + bit, err := it.br.readBitFast() + if err != nil { + bit, err = it.br.readBit() + } + if err != nil { + it.err = err + return ValNone + } + + if bit == zero { + it.val = it.baselineV + it.numRead++ + return ValFloat + } + + bit, err = it.br.readBitFast() + if err != nil { + bit, err = it.br.readBit() + } + if err != nil { + it.err = err + return ValNone + } + + if bit == zero { + sz := 64 - int(it.leading) - int(it.trailing) + b, err := it.br.readBitsFast(uint8(sz)) + if err != nil { + b, err = it.br.readBits(uint8(sz)) + } + if err != nil { + it.err = err + return ValNone + } + + vbits := math.Float64bits(it.baselineV) + vbits ^= b << it.trailing + it.val = math.Float64frombits(vbits) + it.baselineV = it.val + it.numRead++ + return ValFloat + } + + newLeading, err := it.br.readBitsFast(5) + if err != nil { + newLeading, err = it.br.readBits(5) + } + if err != nil { + it.err = err + return ValNone + } + + sigbits, err := it.br.readBitsFast(6) + if err != nil { + sigbits, err = it.br.readBits(6) + } + if err != nil { + it.err = err + return ValNone + } + + if newLeading == 31 && sigbits == 63 { + it.val = math.Float64frombits(value.StaleNaN) + it.numRead++ + return ValFloat + } + + it.leading = uint8(newLeading) + + if sigbits == 0 { + sigbits = 64 + } + it.trailing = 64 - it.leading - uint8(sigbits) + + b, err := it.br.readBitsFast(uint8(sigbits)) + if err != nil { + b, err = it.br.readBits(uint8(sigbits)) + } + if err != nil { + it.err = err + return ValNone + } + + vbits := math.Float64bits(it.baselineV) + vbits ^= b << it.trailing + it.val = math.Float64frombits(vbits) + it.baselineV = it.val + it.numRead++ + return ValFloat +} diff --git a/tsdb/chunkenc/xor18111st2.go b/tsdb/chunkenc/xor18111st2.go new file mode 100644 index 0000000000..d272545521 --- /dev/null +++ b/tsdb/chunkenc/xor18111st2.go @@ -0,0 +1,943 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file implements the XOR18111ST2 chunk encoding: XOR18111 with an +// additional start timestamp that has its own independent append mode. + +package chunkenc + +import ( + "encoding/binary" + "math" + "math/bits" + + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/value" +) + +// XOR18111ST2Chunk holds XOR18111ST2 encoded sample data: XOR18111 encoding +// with start timestamp stored alongside each sample's regular timestamp. +// The start timestamp has its own independent mode that can switch from compact +// to full independently of the regular timestamp mode. +type XOR18111ST2Chunk struct { + b bstream +} + +// NewXOR18111ST2Chunk returns a new chunk with XOR18111ST2 encoding. +func NewXOR18111ST2Chunk() *XOR18111ST2Chunk { + b := make([]byte, chunkHeaderSize, chunkAllocationSize) + return &XOR18111ST2Chunk{b: bstream{stream: b, count: 0}} +} + +func (c *XOR18111ST2Chunk) Reset(stream []byte) { + c.b.Reset(stream) +} + +// Encoding returns the encoding type. +func (*XOR18111ST2Chunk) Encoding() Encoding { + return EncXOR18111ST2 +} + +// Bytes returns the underlying byte slice of the chunk. +func (c *XOR18111ST2Chunk) Bytes() []byte { + return c.b.bytes() +} + +// NumSamples returns the number of samples in the chunk. +func (c *XOR18111ST2Chunk) NumSamples() int { + return int(binary.BigEndian.Uint16(c.Bytes())) +} + +// Compact implements the Chunk interface. +func (c *XOR18111ST2Chunk) Compact() { + if l := len(c.b.stream); cap(c.b.stream) > l+chunkCompactCapacityThreshold { + buf := make([]byte, l) + copy(buf, c.b.stream) + c.b.stream = buf + } +} + +// Appender implements the Chunk interface. +func (c *XOR18111ST2Chunk) Appender() (Appender, error) { + if len(c.b.stream) == chunkHeaderSize { + return &xor18111st2Appender{ + b: &c.b, + t: math.MinInt64, + leading: 0xff, + mode: xor18111ModeCompact, + stMode: xor18111ModeCompact, + }, nil + } + it := c.iterator(nil) + + // To get an appender we must know the state it would have if we had + // appended all existing data from scratch. + // We iterate through the end and populate via the iterator's state. + for it.Next() != ValNone { + } + if err := it.Err(); err != nil { + return nil, err + } + + // Set the bit position for continuing writes. + // The iterator's reader tracks how many bits remain unread in the last byte. + c.b.count = it.br.valid + + a := &xor18111st2Appender{ + b: &c.b, + t: it.t, + st: it.st, + v: it.baselineV, + tDelta: it.tDelta, + stDelta: it.stDelta, + leading: it.leading, + trailing: it.trailing, + mode: it.mode, + stMode: it.stMode, + } + return a, nil +} + +func (c *XOR18111ST2Chunk) iterator(it Iterator) *xor18111st2Iterator { + if xor18111st2Iter, ok := it.(*xor18111st2Iterator); ok { + xor18111st2Iter.Reset(c.b.bytes()) + return xor18111st2Iter + } + return &xor18111st2Iterator{ + br: newBReader(c.b.bytes()[chunkHeaderSize:]), + numTotal: binary.BigEndian.Uint16(c.b.bytes()), + t: math.MinInt64, + mode: xor18111ModeCompact, + stMode: xor18111ModeCompact, + } +} + +// Iterator implements the Chunk interface. +func (c *XOR18111ST2Chunk) Iterator(it Iterator) Iterator { + return c.iterator(it) +} + +// xor18111st2Appender appends samples with start timestamps using the +// XOR18111ST2 encoding. The start timestamp has its own independent mode. +type xor18111st2Appender struct { + b *bstream + + t int64 + st int64 + v float64 + tDelta uint64 + stDelta int64 + + leading uint8 + trailing uint8 + + mode uint8 + stMode uint8 +} + +func (a *xor18111st2Appender) Append(st, t int64, v float64) { + var ( + tDelta uint64 + stDelta int64 + ) + num := binary.BigEndian.Uint16(a.b.bytes()) + switch num { + case 0: + buf := make([]byte, binary.MaxVarintLen64) + for _, b := range buf[:binary.PutVarint(buf, t)] { + a.b.writeByte(b) + } + for _, b := range buf[:binary.PutVarint(buf, st)] { + a.b.writeByte(b) + } + a.b.writeBits(math.Float64bits(v), 64) + case 1: + tDelta = uint64(t - a.t) + stDelta = st - a.st + + buf := make([]byte, binary.MaxVarintLen64) + for _, b := range buf[:binary.PutUvarint(buf, tDelta)] { + a.b.writeByte(b) + } + for _, b := range buf[:binary.PutVarint(buf, stDelta)] { + a.b.writeByte(b) + } + + a.writeVDelta(v) + default: + tDelta = uint64(t - a.t) + dod := int64(tDelta - a.tDelta) + + // Encode the regular timestamp dod. This may switch the timestamp mode + // to full. + if a.mode == xor18111ModeCompact { + switch { + case dod == 0: + a.b.writeBit(zero) + case bitRange(dod, 14): + a.b.writeByte(0b10<<6 | (uint8(dod>>8) & (1<<6 - 1))) + a.b.writeByte(uint8(dod)) + case bitRange(dod, 17): + a.b.writeBits(0b110, 3) + a.b.writeBits(uint64(dod), 17) + case bitRange(dod, 20): + a.b.writeBits(0b1110, 4) + a.b.writeBits(uint64(dod), 20) + default: + a.b.writeBits(0b1111, 4) + a.mode = xor18111ModeFull + a.writeTimestampDeltaFull(dod) + } + } else { + a.writeTimestampDeltaFull(dod) + } + + // Encode the start timestamp dod using the ST's own independent mode. + stDelta = st - a.st + stDod := stDelta - a.stDelta + a.writeSTDod(stDod) + + a.writeVDelta(v) + } + + a.t = t + a.st = st + if !value.IsStaleNaN(v) { + a.v = v + } + binary.BigEndian.PutUint16(a.b.bytes(), num+1) + a.tDelta = tDelta + a.stDelta = stDelta +} + +// writeTimestampDeltaFull encodes a timestamp dod in full mode. This is +// identical to the method in xor18111Appender. +func (a *xor18111st2Appender) writeTimestampDeltaFull(dod int64) { + switch { + case dod == 0: + a.b.writeBit(zero) + case bitRange(dod, 7): + a.b.writeBits(0b10, 2) + a.b.writeBits(uint64(dod), 7) + case bitRange(dod, 14): + a.b.writeBits(0b110, 3) + a.b.writeBits(uint64(dod), 14) + case bitRange(dod, 20): + a.b.writeBits(0b1110, 4) + a.b.writeBits(uint64(dod), 20) + default: + encoded := false + if a.tDelta > 0 && dod != 0 { + multiplierF := float64(dod) / float64(a.tDelta) + multiplier := int64(multiplierF) + if multiplierF > 0 && multiplierF-float64(multiplier) >= 0.5 { + multiplier++ + } else if multiplierF < 0 && float64(multiplier)-multiplierF >= 0.5 { + multiplier-- + } + + if multiplier >= -15 && multiplier <= 15 && multiplier != 0 { + reconstructed := multiplier * int64(a.tDelta) + residual := dod - reconstructed + + if residual >= -128 && residual <= 127 { + a.b.writeBits(0b11110, 5) + if multiplier > 0 { + a.b.writeBit(zero) + a.b.writeBits(uint64(multiplier-1), 4) + } else { + a.b.writeBit(one) + a.b.writeBits(uint64(-multiplier-1), 4) + } + a.b.writeBits(uint64(int8(residual)), 8) + encoded = true + } + } + } + + if !encoded { + a.b.writeBits(0b11111, 5) + a.b.writeBits(uint64(dod), 64) + } + } +} + +// writeSTDod encodes the start timestamp delta-of-delta using the ST's own +// independent mode. In compact mode, 0b1111 triggers a mode switch to full +// (same semantics as the regular timestamp). +func (a *xor18111st2Appender) writeSTDod(stDod int64) { + if a.stMode == xor18111ModeCompact { + switch { + case stDod == 0: + a.b.writeBit(zero) + case bitRange(stDod, 14): + a.b.writeByte(0b10<<6 | (uint8(stDod>>8) & (1<<6 - 1))) + a.b.writeByte(uint8(stDod)) + case bitRange(stDod, 17): + a.b.writeBits(0b110, 3) + a.b.writeBits(uint64(stDod), 17) + case bitRange(stDod, 20): + a.b.writeBits(0b1110, 4) + a.b.writeBits(uint64(stDod), 20) + default: + // Mode switch: write marker and switch to full. + a.b.writeBits(0b1111, 4) + a.stMode = xor18111ModeFull + a.writeSTDodFull(stDod) + } + } else { + a.writeSTDodFull(stDod) + } +} + +// writeSTDodFull encodes a start timestamp dod in full mode. Uses stDelta as +// the multiplier reference since the ST advances independently of the +// timestamp. +func (a *xor18111st2Appender) writeSTDodFull(stDod int64) { + switch { + case stDod == 0: + a.b.writeBit(zero) + case bitRange(stDod, 7): + a.b.writeBits(0b10, 2) + a.b.writeBits(uint64(stDod), 7) + case bitRange(stDod, 14): + a.b.writeBits(0b110, 3) + a.b.writeBits(uint64(stDod), 14) + case bitRange(stDod, 20): + a.b.writeBits(0b1110, 4) + a.b.writeBits(uint64(stDod), 20) + default: + encoded := false + if a.stDelta != 0 && stDod != 0 { + multiplierF := float64(stDod) / float64(a.stDelta) + multiplier := int64(multiplierF) + if multiplierF > 0 && multiplierF-float64(multiplier) >= 0.5 { + multiplier++ + } else if multiplierF < 0 && float64(multiplier)-multiplierF >= 0.5 { + multiplier-- + } + + if multiplier >= -15 && multiplier <= 15 && multiplier != 0 { + reconstructed := multiplier * a.stDelta + residual := stDod - reconstructed + + if residual >= -128 && residual <= 127 { + a.b.writeBits(0b11110, 5) + if multiplier > 0 { + a.b.writeBit(zero) + a.b.writeBits(uint64(multiplier-1), 4) + } else { + a.b.writeBit(one) + a.b.writeBits(uint64(-multiplier-1), 4) + } + a.b.writeBits(uint64(int8(residual)), 8) + encoded = true + } + } + } + + if !encoded { + a.b.writeBits(0b11111, 5) + a.b.writeBits(uint64(stDod), 64) + } + } +} + +// writeVDelta encodes the value delta with optimized staleness handling. +// This is identical to the method in xor18111Appender. +func (a *xor18111st2Appender) writeVDelta(v float64) { + if value.IsStaleNaN(v) { + a.b.writeBit(one) + a.b.writeBit(one) + a.b.writeBits(31, 5) + a.b.writeBits(63, 6) + return + } + + delta := math.Float64bits(v) ^ math.Float64bits(a.v) + + if delta == 0 { + a.b.writeBit(zero) + return + } + a.b.writeBit(one) + + newLeading := uint8(bits.LeadingZeros64(delta)) + newTrailing := uint8(bits.TrailingZeros64(delta)) + + if newLeading >= 32 { + newLeading = 31 + } + + if a.leading != 0xff && newLeading >= a.leading && newTrailing >= a.trailing { + a.b.writeBit(zero) + a.b.writeBits(delta>>a.trailing, 64-int(a.leading)-int(a.trailing)) + return + } + + a.leading, a.trailing = newLeading, newTrailing + + a.b.writeBit(one) + a.b.writeBits(uint64(newLeading), 5) + + sigbits := 64 - newLeading - newTrailing + a.b.writeBits(uint64(sigbits), 6) + a.b.writeBits(delta>>newTrailing, int(sigbits)) +} + +func (*xor18111st2Appender) AppendHistogram(*HistogramAppender, int64, int64, *histogram.Histogram, bool) (Chunk, bool, Appender, error) { + panic("appended a histogram sample to a float chunk") +} + +func (*xor18111st2Appender) AppendFloatHistogram(*FloatHistogramAppender, int64, int64, *histogram.FloatHistogram, bool) (Chunk, bool, Appender, error) { + panic("appended a float histogram sample to a float chunk") +} + +// xor18111st2Iterator decodes XOR18111ST2 chunks. +type xor18111st2Iterator struct { + br bstreamReader + numTotal uint16 + numRead uint16 + + t int64 + st int64 + val float64 + + leading uint8 + trailing uint8 + + tDelta uint64 + stDelta int64 + err error + + baselineV float64 + mode uint8 + stMode uint8 +} + +func (it *xor18111st2Iterator) Seek(t int64) ValueType { + if it.err != nil { + return ValNone + } + + for t > it.t || it.numRead == 0 { + if it.Next() == ValNone { + return ValNone + } + } + return ValFloat +} + +func (it *xor18111st2Iterator) At() (int64, float64) { + return it.t, it.val +} + +func (*xor18111st2Iterator) AtHistogram(*histogram.Histogram) (int64, *histogram.Histogram) { + panic("cannot call xor18111st2Iterator.AtHistogram") +} + +func (*xor18111st2Iterator) AtFloatHistogram(*histogram.FloatHistogram) (int64, *histogram.FloatHistogram) { + panic("cannot call xor18111st2Iterator.AtFloatHistogram") +} + +func (it *xor18111st2Iterator) AtT() int64 { + return it.t +} + +func (it *xor18111st2Iterator) AtST() int64 { + return it.st +} + +func (it *xor18111st2Iterator) Err() error { + return it.err +} + +func (it *xor18111st2Iterator) Reset(b []byte) { + it.br = newBReader(b[chunkHeaderSize:]) + it.numTotal = binary.BigEndian.Uint16(b) + + it.numRead = 0 + it.t = 0 + it.st = 0 + it.val = 0 + it.leading = 0 + it.trailing = 0 + it.tDelta = 0 + it.stDelta = 0 + it.err = nil + it.baselineV = 0 + it.mode = xor18111ModeCompact + it.stMode = xor18111ModeCompact +} + +func (it *xor18111st2Iterator) Next() ValueType { + if it.err != nil || it.numRead == it.numTotal { + return ValNone + } + + if it.numRead == 0 { + t, err := binary.ReadVarint(&it.br) + if err != nil { + it.err = err + return ValNone + } + it.t = t + + st, err := binary.ReadVarint(&it.br) + if err != nil { + it.err = err + return ValNone + } + it.st = st + + v, err := it.br.readBits(64) + if err != nil { + it.err = err + return ValNone + } + it.val = math.Float64frombits(v) + if !value.IsStaleNaN(it.val) { + it.baselineV = it.val + } + it.numRead++ + return ValFloat + } + + if it.numRead == 1 { + tDelta, err := binary.ReadUvarint(&it.br) + if err != nil { + it.err = err + return ValNone + } + it.tDelta = tDelta + it.t += int64(it.tDelta) + + stDelta, err := binary.ReadVarint(&it.br) + if err != nil { + it.err = err + return ValNone + } + it.stDelta = stDelta + it.st += it.stDelta + + return it.readValue() + } + + // Read the regular timestamp dod. This may switch the timestamp mode to + // full. + if it.mode == xor18111ModeCompact { + var d byte + for range 4 { + d <<= 1 + bit, err := it.br.readBitFast() + if err != nil { + bit, err = it.br.readBit() + } + if err != nil { + it.err = err + return ValNone + } + if bit == zero { + break + } + d |= 1 + } + + if d == 0b1111 { + // Mode switch marker: switch to full and read a full-mode dod. + it.mode = xor18111ModeFull + if err := it.readTimestampDeltaFull(); err != nil { + it.err = err + return ValNone + } + } else { + var sz uint8 + var dod int64 + switch d { + case 0b0: + // dod == 0. + case 0b10: + sz = 14 + case 0b110: + sz = 17 + case 0b1110: + sz = 20 + } + + if sz != 0 { + b, err := it.br.readBitsFast(sz) + if err != nil { + b, err = it.br.readBits(sz) + } + if err != nil { + it.err = err + return ValNone + } + if b > (1 << (sz - 1)) { + b -= 1 << sz + } + dod = int64(b) + } + + it.tDelta = uint64(int64(it.tDelta) + dod) + it.t += int64(it.tDelta) + } + } else { + if err := it.readTimestampDeltaFull(); err != nil { + it.err = err + return ValNone + } + } + + // Read the start timestamp dod using the ST's own independent mode. + if err := it.readSTDod(); err != nil { + it.err = err + return ValNone + } + + return it.readValue() +} + +// readTimestampDeltaFull reads a timestamp dod in full mode and updates it.t. +func (it *xor18111st2Iterator) readTimestampDeltaFull() error { + var d byte + for range 5 { + d <<= 1 + bit, err := it.br.readBitFast() + if err != nil { + bit, err = it.br.readBit() + } + if err != nil { + return err + } + if bit == zero { + break + } + d |= 1 + } + + var dod int64 + switch d { + case 0b0: + // dod == 0. + case 0b10: + b, err := it.br.readBitsFast(7) + if err != nil { + b, err = it.br.readBits(7) + } + if err != nil { + return err + } + if b > (1 << 6) { + b -= 1 << 7 + } + dod = int64(b) + case 0b110: + b, err := it.br.readBitsFast(14) + if err != nil { + b, err = it.br.readBits(14) + } + if err != nil { + return err + } + if b > (1 << 13) { + b -= 1 << 14 + } + dod = int64(b) + case 0b1110: + b, err := it.br.readBitsFast(20) + if err != nil { + b, err = it.br.readBits(20) + } + if err != nil { + return err + } + if b > (1 << 19) { + b -= 1 << 20 + } + dod = int64(b) + case 0b11110: + sign, err := it.br.readBit() + if err != nil { + return err + } + b, err := it.br.readBits(4) + if err != nil { + return err + } + multiplier := int64(b) + 1 + if sign == one { + multiplier = -multiplier + } + residualBits, err := it.br.readBits(8) + if err != nil { + return err + } + dod = multiplier*int64(it.tDelta) + int64(int8(residualBits)) + case 0b11111: + b, err := it.br.readBits(64) + if err != nil { + return err + } + dod = int64(b) + } + + it.tDelta = uint64(int64(it.tDelta) + dod) + it.t += int64(it.tDelta) + return nil +} + +// readSTDod reads the start timestamp dod using the ST's own independent mode +// and updates it.st. +func (it *xor18111st2Iterator) readSTDod() error { + if it.stMode == xor18111ModeCompact { + var d byte + for range 4 { + d <<= 1 + bit, err := it.br.readBitFast() + if err != nil { + bit, err = it.br.readBit() + } + if err != nil { + return err + } + if bit == zero { + break + } + d |= 1 + } + + if d == 0b1111 { + // Mode switch: switch to full and read a full-mode dod. + it.stMode = xor18111ModeFull + return it.readSTDodFull() + } + + var sz uint8 + var stDod int64 + switch d { + case 0b0: + // stDod == 0. + case 0b10: + sz = 14 + case 0b110: + sz = 17 + case 0b1110: + sz = 20 + } + + if sz != 0 { + b, err := it.br.readBitsFast(sz) + if err != nil { + b, err = it.br.readBits(sz) + } + if err != nil { + return err + } + if b > (1 << (sz - 1)) { + b -= 1 << sz + } + stDod = int64(b) + } + + it.stDelta += stDod + it.st += it.stDelta + return nil + } + + return it.readSTDodFull() +} + +// readSTDodFull reads an ST dod in full mode and updates it.st. Uses stDelta +// as the multiplier reference. +func (it *xor18111st2Iterator) readSTDodFull() error { + var d byte + for range 5 { + d <<= 1 + bit, err := it.br.readBitFast() + if err != nil { + bit, err = it.br.readBit() + } + if err != nil { + return err + } + if bit == zero { + break + } + d |= 1 + } + + var stDod int64 + switch d { + case 0b0: + // stDod == 0. + case 0b10: + b, err := it.br.readBitsFast(7) + if err != nil { + b, err = it.br.readBits(7) + } + if err != nil { + return err + } + if b > (1 << 6) { + b -= 1 << 7 + } + stDod = int64(b) + case 0b110: + b, err := it.br.readBitsFast(14) + if err != nil { + b, err = it.br.readBits(14) + } + if err != nil { + return err + } + if b > (1 << 13) { + b -= 1 << 14 + } + stDod = int64(b) + case 0b1110: + b, err := it.br.readBitsFast(20) + if err != nil { + b, err = it.br.readBits(20) + } + if err != nil { + return err + } + if b > (1 << 19) { + b -= 1 << 20 + } + stDod = int64(b) + case 0b11110: + sign, err := it.br.readBit() + if err != nil { + return err + } + b, err := it.br.readBits(4) + if err != nil { + return err + } + multiplier := int64(b) + 1 + if sign == one { + multiplier = -multiplier + } + residualBits, err := it.br.readBits(8) + if err != nil { + return err + } + stDod = multiplier*it.stDelta + int64(int8(residualBits)) + case 0b11111: + b, err := it.br.readBits(64) + if err != nil { + return err + } + stDod = int64(b) + } + + it.stDelta += stDod + it.st += it.stDelta + return nil +} + +// readValue reads a value with optimized staleness detection. +// This is identical to the method in xor18111Iterator. +func (it *xor18111st2Iterator) readValue() ValueType { + bit, err := it.br.readBitFast() + if err != nil { + bit, err = it.br.readBit() + } + if err != nil { + it.err = err + return ValNone + } + + if bit == zero { + it.val = it.baselineV + it.numRead++ + return ValFloat + } + + bit, err = it.br.readBitFast() + if err != nil { + bit, err = it.br.readBit() + } + if err != nil { + it.err = err + return ValNone + } + + if bit == zero { + sz := 64 - int(it.leading) - int(it.trailing) + b, err := it.br.readBitsFast(uint8(sz)) + if err != nil { + b, err = it.br.readBits(uint8(sz)) + } + if err != nil { + it.err = err + return ValNone + } + + vbits := math.Float64bits(it.baselineV) + vbits ^= b << it.trailing + it.val = math.Float64frombits(vbits) + it.baselineV = it.val + it.numRead++ + return ValFloat + } + + newLeading, err := it.br.readBitsFast(5) + if err != nil { + newLeading, err = it.br.readBits(5) + } + if err != nil { + it.err = err + return ValNone + } + + sigbits, err := it.br.readBitsFast(6) + if err != nil { + sigbits, err = it.br.readBits(6) + } + if err != nil { + it.err = err + return ValNone + } + + if newLeading == 31 && sigbits == 63 { + it.val = math.Float64frombits(value.StaleNaN) + it.numRead++ + return ValFloat + } + + it.leading = uint8(newLeading) + + if sigbits == 0 { + sigbits = 64 + } + it.trailing = 64 - it.leading - uint8(sigbits) + + b, err := it.br.readBitsFast(uint8(sigbits)) + if err != nil { + b, err = it.br.readBits(uint8(sigbits)) + } + if err != nil { + it.err = err + return ValNone + } + + vbits := math.Float64bits(it.baselineV) + vbits ^= b << it.trailing + it.val = math.Float64frombits(vbits) + it.baselineV = it.val + it.numRead++ + return ValFloat +} diff --git a/tsdb/chunkenc/xor18111st2_test.go b/tsdb/chunkenc/xor18111st2_test.go new file mode 100644 index 0000000000..91f1ef8417 --- /dev/null +++ b/tsdb/chunkenc/xor18111st2_test.go @@ -0,0 +1,22 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package chunkenc + +import "testing" + +func TestXOR18111ST2Chunk(t *testing.T) { + testChunkSTHandling(t, ValFloat, func() Chunk { + return NewXOR18111ST2Chunk() + }) +} diff --git a/tsdb/chunkenc/xor18111st_test.go b/tsdb/chunkenc/xor18111st_test.go new file mode 100644 index 0000000000..1fce56d4bc --- /dev/null +++ b/tsdb/chunkenc/xor18111st_test.go @@ -0,0 +1,22 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package chunkenc + +import "testing" + +func TestXOR18111STChunk(t *testing.T) { + testChunkSTHandling(t, ValFloat, func() Chunk { + return NewXOR18111STChunk() + }) +} diff --git a/tsdb/chunkenc/xor18238.go b/tsdb/chunkenc/xor18238.go new file mode 100644 index 0000000000..e2ddffad2d --- /dev/null +++ b/tsdb/chunkenc/xor18238.go @@ -0,0 +1,675 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file implements the XOR18238 chunk encoding, which corresponds to the +// encoding proposed in https://github.com/prometheus/prometheus/pull/18238. + +package chunkenc + +import ( + "encoding/binary" + "errors" + "math" + "math/bits" + + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/value" +) + +// xor18238STHeaderSize is the size in bytes of the ST header appended after +// the standard chunk header in an XOR18238 chunk. +const xor18238STHeaderSize = 1 + +// errXOR18238STNotSupported is returned when an XOR18238 chunk has +// first_st_known set. +var errXOR18238STNotSupported = errors.New("XOR18238 chunk with start timestamp not supported") + +// XOR18238Chunk implements XOR encoding with joint timestamp+value control bits +// and byte-packed dod encoding for efficient appending. +// +// Control prefix for samples >= 2: +// +// 0 → dod=0 AND value unchanged (1 bit) +// 10 → dod=0, value changed (2 bits, then value encoding) +// 110 → dod≠0, 13-bit signed [-4096, 4095] (prefix+dod packed into 2 bytes) +// 1110 → dod≠0, 20-bit signed [-524288, 524287] (prefix+dod packed into 3 bytes) +// 11110 → dod≠0, 64-bit escape (5+64 bits, then value encoding) +// 11111 → dod=0, stale NaN (5 bits, no value field) +// +// The dod bins are widened so that prefix+dod aligns to byte boundaries, +// replacing writeBit calls with writeByte for common cases. +// +// Value encoding for the dod≠0 cases (``): +// +// 0 → value unchanged +// 10 → reuse previous leading/trailing window +// 110 → new leading/trailing window +// 111 → stale NaN +// +// Value encoding for the dod=0, value-changed case (``): +// +// 0 → reuse previous leading/trailing window +// 1 → new leading/trailing window +type XOR18238Chunk struct { + b bstream +} + +// NewXOR18238Chunk returns a new chunk with XOR18238 encoding. +func NewXOR18238Chunk() *XOR18238Chunk { + b := make([]byte, chunkHeaderSize+xor18238STHeaderSize, chunkAllocationSize) + return &XOR18238Chunk{b: bstream{stream: b, count: 0}} +} + +func (c *XOR18238Chunk) Reset(stream []byte) { + c.b.Reset(stream) +} + +// Encoding returns the encoding type. +func (*XOR18238Chunk) Encoding() Encoding { + return EncXOR18238 +} + +// Bytes returns the underlying byte slice of the chunk. +func (c *XOR18238Chunk) Bytes() []byte { + return c.b.bytes() +} + +// NumSamples returns the number of samples in the chunk. +func (c *XOR18238Chunk) NumSamples() int { + return int(binary.BigEndian.Uint16(c.Bytes())) +} + +// Compact implements the Chunk interface. +func (c *XOR18238Chunk) Compact() { + if l := len(c.b.stream); cap(c.b.stream) > l+chunkCompactCapacityThreshold { + buf := make([]byte, l) + copy(buf, c.b.stream) + c.b.stream = buf + } +} + +// Appender implements the Chunk interface. +func (c *XOR18238Chunk) Appender() (Appender, error) { + if len(c.b.stream) == chunkHeaderSize+xor18238STHeaderSize { + return &xor18238Appender{ + b: &c.b, + t: math.MinInt64, + leading: 0xff, + num: 0, + }, nil + } + it := c.iterator(nil) + + for it.Next() != ValNone { + } + if err := it.Err(); err != nil { + return nil, err + } + + a := &xor18238Appender{ + b: &c.b, + t: it.t, + v: it.baselineV, + tDelta: it.tDelta, + leading: it.leading, + trailing: it.trailing, + num: binary.BigEndian.Uint16(c.b.bytes()), + } + return a, nil +} + +func (c *XOR18238Chunk) iterator(it Iterator) *xor18238Iterator { + if xor18238Iter, ok := it.(*xor18238Iterator); ok { + xor18238Iter.Reset(c.b.bytes()) + return xor18238Iter + } + return &xor18238Iterator{ + br: newBReader(c.b.bytes()[chunkHeaderSize+xor18238STHeaderSize:]), + numTotal: binary.BigEndian.Uint16(c.b.bytes()), + t: math.MinInt64, + baselineV: 0, + } +} + +// Iterator implements the Chunk interface. +func (c *XOR18238Chunk) Iterator(it Iterator) Iterator { + return c.iterator(it) +} + +// xor18238Appender uses joint timestamp+value control bits and byte-packed dod bins. +type xor18238Appender struct { + b *bstream + + t int64 + v float64 + tDelta uint64 + + leading uint8 + trailing uint8 + + num uint16 // Cached sample count, written back to b on each Append. +} + +func (a *xor18238Appender) Append(_, t int64, v float64) { + var tDelta uint64 + switch a.num { + case 0: + buf := make([]byte, binary.MaxVarintLen64) + for _, b := range buf[:binary.PutVarint(buf, t)] { + a.b.writeByte(b) + } + a.b.writeBits(math.Float64bits(v), 64) + case 1: + tDelta = uint64(t - a.t) + + buf := make([]byte, binary.MaxVarintLen64) + for _, b := range buf[:binary.PutUvarint(buf, tDelta)] { + a.b.writeByte(b) + } + + a.writeVDelta(v) + default: + tDelta = uint64(t - a.t) + dod := int64(tDelta - a.tDelta) + + if dod == 0 { + // Timestamp unchanged. + switch { + case value.IsStaleNaN(v): + // Stale NaN with dod=0: joint control `11111`. + a.b.writeBits(0b11111, 5) + case math.Float64bits(v)^math.Float64bits(a.v) == 0: + // Both unchanged: single 0 bit. + a.b.writeBit(zero) + default: + // Value changed: joint control `10` + value. + a.b.writeBits(0b10, 2) + a.writeVDeltaKnownNonZero(v) + } + } else { + // Timestamp changed: byte-packed dod encoding + value. + switch { + case dod >= -(1<<12) && dod <= (1<<12)-1: + // 13-bit dod: prefix `110` packed with top 5 bits → 2 bytes total. + a.b.writeByte(0b110_00000 | byte(uint64(dod)>>8)&0x1F) + a.b.writeByte(byte(uint64(dod))) + case dod >= -(1<<19) && dod <= (1<<19)-1: + // 20-bit dod: prefix `1110` packed with top 4 bits → 3 bytes total. + a.b.writeByte(0b1110_0000 | byte(uint64(dod)>>16)&0x0F) + a.b.writeByte(byte(uint64(dod) >> 8)) + a.b.writeByte(byte(uint64(dod))) + default: + // 64-bit escape (rare): `11110`. + a.b.writeBits(0b11110, 5) + a.b.writeBits(uint64(dod), 64) + } + a.writeVDelta(v) + } + } + + a.t = t + if !value.IsStaleNaN(v) { + a.v = v + } + a.num++ + binary.BigEndian.PutUint16(a.b.bytes(), a.num) + a.tDelta = tDelta +} + +// writeVDelta encodes the value delta for the dod≠0 case. +// Encoding: +// +// `0` → value unchanged (XOR = 0) +// `10` → reuse previous leading/trailing window +// `110` → new leading/trailing window +// `111` → stale NaN marker +func (a *xor18238Appender) writeVDelta(v float64) { + if value.IsStaleNaN(v) { + a.b.writeBits(0b111, 3) + return + } + + delta := math.Float64bits(v) ^ math.Float64bits(a.v) + + if delta == 0 { + a.b.writeBit(zero) + return + } + + newLeading := uint8(bits.LeadingZeros64(delta)) + newTrailing := uint8(bits.TrailingZeros64(delta)) + + if newLeading >= 32 { + newLeading = 31 + } + + if a.leading != 0xff && newLeading >= a.leading && newTrailing >= a.trailing { + a.b.writeBits(0b10, 2) + a.b.writeBits(delta>>a.trailing, 64-int(a.leading)-int(a.trailing)) + return + } + + a.leading, a.trailing = newLeading, newTrailing + + a.b.writeBits(0b110, 3) + a.b.writeBits(uint64(newLeading), 5) + + sigbits := 64 - newLeading - newTrailing + a.b.writeBits(uint64(sigbits), 6) + a.b.writeBits(delta>>newTrailing, int(sigbits)) +} + +// writeVDeltaKnownNonZero encodes the value delta when it is known to be +// non-zero and non-stale (dod=0, value-changed case). Skips the val=0 check, +// saving 1 bit on the reuse path. Stale NaN with dod=0 is handled at the +// joint control level (`11111`) and never reaches this function. +// +// Encoding: +// +// `0` → reuse previous leading/trailing window +// `1` → new leading/trailing window +func (a *xor18238Appender) writeVDeltaKnownNonZero(v float64) { + delta := math.Float64bits(v) ^ math.Float64bits(a.v) + + newLeading := uint8(bits.LeadingZeros64(delta)) + newTrailing := uint8(bits.TrailingZeros64(delta)) + + if newLeading >= 32 { + newLeading = 31 + } + + if a.leading != 0xff && newLeading >= a.leading && newTrailing >= a.trailing { + a.b.writeBit(zero) + a.b.writeBits(delta>>a.trailing, 64-int(a.leading)-int(a.trailing)) + return + } + + a.leading, a.trailing = newLeading, newTrailing + + a.b.writeBit(one) + a.b.writeBits(uint64(newLeading), 5) + + sigbits := 64 - newLeading - newTrailing + a.b.writeBits(uint64(sigbits), 6) + a.b.writeBits(delta>>newTrailing, int(sigbits)) +} + +func (*xor18238Appender) AppendHistogram(*HistogramAppender, int64, int64, *histogram.Histogram, bool) (Chunk, bool, Appender, error) { + panic("appended a histogram sample to a float chunk") +} + +func (*xor18238Appender) AppendFloatHistogram(*FloatHistogramAppender, int64, int64, *histogram.FloatHistogram, bool) (Chunk, bool, Appender, error) { + panic("appended a float histogram sample to a float chunk") +} + +// xor18238Iterator decodes XOR18238 chunks. +type xor18238Iterator struct { + br bstreamReader + numTotal uint16 + numRead uint16 + + t int64 + val float64 + + leading uint8 + trailing uint8 + + tDelta uint64 + err error + + baselineV float64 // Last non-stale value for XOR baseline. +} + +func (it *xor18238Iterator) Seek(t int64) ValueType { + if it.err != nil { + return ValNone + } + + for t > it.t || it.numRead == 0 { + if it.Next() == ValNone { + return ValNone + } + } + return ValFloat +} + +func (it *xor18238Iterator) At() (int64, float64) { + return it.t, it.val +} + +func (*xor18238Iterator) AtHistogram(*histogram.Histogram) (int64, *histogram.Histogram) { + panic("cannot call xor18238Iterator.AtHistogram") +} + +func (*xor18238Iterator) AtFloatHistogram(*histogram.FloatHistogram) (int64, *histogram.FloatHistogram) { + panic("cannot call xor18238Iterator.AtFloatHistogram") +} + +func (it *xor18238Iterator) AtT() int64 { + return it.t +} + +func (*xor18238Iterator) AtST() int64 { + return 0 +} + +func (it *xor18238Iterator) Err() error { + return it.err +} + +func (it *xor18238Iterator) Reset(b []byte) { + it.br = newBReader(b[chunkHeaderSize+xor18238STHeaderSize:]) + it.numTotal = binary.BigEndian.Uint16(b) + + it.numRead = 0 + it.t = 0 + it.val = 0 + it.leading = 0 + it.trailing = 0 + it.tDelta = 0 + it.baselineV = 0 + + // The ST header byte follows the standard chunk header. Bit 7 + // (first_st_known) indicates that start timestamp data is present, which + // this implementation does not support. + if b[chunkHeaderSize]&0x80 != 0 { + it.err = errXOR18238STNotSupported + return + } + it.err = nil +} + +func (it *xor18238Iterator) Next() ValueType { + if it.err != nil || it.numRead == it.numTotal { + return ValNone + } + + if it.numRead == 0 { + t, err := binary.ReadVarint(&it.br) + if err != nil { + it.err = err + return ValNone + } + v, err := it.br.readBits(64) + if err != nil { + it.err = err + return ValNone + } + it.t = t + it.val = math.Float64frombits(v) + if !value.IsStaleNaN(it.val) { + it.baselineV = it.val + } + it.numRead++ + return ValFloat + } + + if it.numRead == 1 { + tDelta, err := binary.ReadUvarint(&it.br) + if err != nil { + it.err = err + return ValNone + } + it.tDelta = tDelta + it.t += int64(it.tDelta) + + return it.readValue() + } + + ctrl, err := it.br.readXOR18238Control() + if err != nil { + it.err = err + return ValNone + } + + switch ctrl { + case 0: + // dod=0, value unchanged: `0`. + it.t += int64(it.tDelta) + it.val = it.baselineV + it.numRead++ + return ValFloat + case 1: + // dod=0, value changed: `10`. + it.t += int64(it.tDelta) + return it.readValueKnownNonZero() + case 2: + // 13-bit dod: `110`. + if err := it.readDod(13); err != nil { + it.err = err + return ValNone + } + case 3: + // 20-bit dod: `1110`. + if err := it.readDod(20); err != nil { + it.err = err + return ValNone + } + case 4: + // 64-bit escape: `11110`. + if err := it.readDod(64); err != nil { + it.err = err + return ValNone + } + default: + // dod=0, stale NaN: `11111`. + it.t += int64(it.tDelta) + it.val = math.Float64frombits(value.StaleNaN) + it.numRead++ + return ValFloat + } + + return it.readValue() +} + +func (it *xor18238Iterator) readDod(w uint8) error { + var b uint64 + if it.br.valid >= w { + it.br.valid -= w + b = (it.br.buffer >> it.br.valid) & ((uint64(1) << w) - 1) + } else { + var err error + b, err = it.br.readBits(w) + if err != nil { + return err + } + } + + if w < 64 && b >= (1<<(w-1)) { + b -= 1 << w + } + dod := int64(b) + + it.tDelta = uint64(int64(it.tDelta) + dod) + it.t += int64(it.tDelta) + return nil +} + +func (it *xor18238Iterator) readValue() ValueType { + // First bit: `0` = value unchanged, `1` = value changed. + var bit bit + if it.br.valid > 0 { + it.br.valid-- + bit = (it.br.buffer & (uint64(1) << it.br.valid)) != 0 + } else { + var err error + bit, err = it.br.readBit() + if err != nil { + it.err = err + return ValNone + } + } + + if bit == zero { + // `0` = value unchanged. + it.val = it.baselineV + it.numRead++ + return ValFloat + } + + // Second bit: `10` = reuse window, `11x` = new window or stale. + if it.br.valid > 0 { + it.br.valid-- + bit = (it.br.buffer & (uint64(1) << it.br.valid)) != 0 + } else { + var err error + bit, err = it.br.readBit() + if err != nil { + it.err = err + return ValNone + } + } + + if bit == zero { + // `10` = reuse previous leading/trailing window. + sz := uint8(64 - int(it.leading) - int(it.trailing)) + var valueBits uint64 + if it.br.valid >= sz { + it.br.valid -= sz + valueBits = (it.br.buffer >> it.br.valid) & ((uint64(1) << sz) - 1) + } else { + var err error + valueBits, err = it.br.readBits(sz) + if err != nil { + it.err = err + return ValNone + } + } + + vbits := math.Float64bits(it.baselineV) + vbits ^= valueBits << it.trailing + it.val = math.Float64frombits(vbits) + it.baselineV = it.val + it.numRead++ + return ValFloat + } + + // Third bit: `110` = new window, `111` = stale NaN. + if it.br.valid > 0 { + it.br.valid-- + bit = (it.br.buffer & (uint64(1) << it.br.valid)) != 0 + } else { + var err error + bit, err = it.br.readBit() + if err != nil { + it.err = err + return ValNone + } + } + + if bit == zero { + // `110` = new leading/trailing window. + return it.readNewLeadingTrailing() + } + + // `111` = stale NaN. + it.val = math.Float64frombits(value.StaleNaN) + it.numRead++ + return ValFloat +} + +func (it *xor18238Iterator) readValueKnownNonZero() ValueType { + var bit bit + if it.br.valid > 0 { + it.br.valid-- + bit = (it.br.buffer & (uint64(1) << it.br.valid)) != 0 + } else { + var err error + bit, err = it.br.readBit() + if err != nil { + it.err = err + return ValNone + } + } + + if bit == zero { + sz := uint8(64 - int(it.leading) - int(it.trailing)) + var valueBits uint64 + if it.br.valid >= sz { + it.br.valid -= sz + valueBits = (it.br.buffer >> it.br.valid) & ((uint64(1) << sz) - 1) + } else { + var err error + valueBits, err = it.br.readBits(sz) + if err != nil { + it.err = err + return ValNone + } + } + + vbits := math.Float64bits(it.baselineV) + vbits ^= valueBits << it.trailing + it.val = math.Float64frombits(vbits) + it.baselineV = it.val + it.numRead++ + return ValFloat + } + + return it.readNewLeadingTrailing() +} + +func (it *xor18238Iterator) readNewLeadingTrailing() ValueType { + var newLeading uint64 + if it.br.valid >= 5 { + it.br.valid -= 5 + newLeading = (it.br.buffer >> it.br.valid) & 0x1f + } else { + var err error + newLeading, err = it.br.readBits(5) + if err != nil { + it.err = err + return ValNone + } + } + + var sigbits uint64 + if it.br.valid >= 6 { + it.br.valid -= 6 + sigbits = (it.br.buffer >> it.br.valid) & 0x3f + } else { + var err error + sigbits, err = it.br.readBits(6) + if err != nil { + it.err = err + return ValNone + } + } + + it.leading = uint8(newLeading) + + if sigbits == 0 { + sigbits = 64 + } + it.trailing = 64 - it.leading - uint8(sigbits) + + n := uint8(sigbits) + var valueBits uint64 + if it.br.valid >= n { + it.br.valid -= n + valueBits = (it.br.buffer >> it.br.valid) & ((uint64(1) << n) - 1) + } else { + var err error + valueBits, err = it.br.readBits(n) + if err != nil { + it.err = err + return ValNone + } + } + + vbits := math.Float64bits(it.baselineV) + vbits ^= valueBits << it.trailing + it.val = math.Float64frombits(vbits) + it.baselineV = it.val + it.numRead++ + return ValFloat +} diff --git a/tsdb/chunkenc/xor18238optst.go b/tsdb/chunkenc/xor18238optst.go new file mode 100644 index 0000000000..f394647a02 --- /dev/null +++ b/tsdb/chunkenc/xor18238optst.go @@ -0,0 +1,761 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file implements the XOR18238OPTST chunk encoding: XOR18238 with +// optional start timestamp encoding identical to XOROptST. +// +// The existing 1-byte ST header (at b[chunkHeaderSize]) is reused with the +// same layout as XOROptST: +// +// bit 7 (0x80): firstSTKnown — ST for the first sample is present in the stream +// bits 6-0: firstSTChangeOn — sample index where the first ST change begins +// +// When no ST is provided (st == 0 always), the header stays 0x00 and the +// chunk is byte-for-byte identical to XOR18238, ensuring there is no overhead +// for series that carry no start timestamp. +// +// When ST is present, the ST delta (prevT - st) is appended after each +// sample's joint timestamp+value encoding using putVarbitInt, exactly as in +// XOROptST. + +package chunkenc + +import ( + "encoding/binary" + "math" + "math/bits" + + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/value" +) + +// XOR18238OPTSTChunk holds XOR18238 encoded samples with optional start +// timestamp per chunk or per sample. See XOROptST for the ST header format. +type XOR18238OPTSTChunk struct { + b bstream +} + +// NewXOR18238OPTSTChunk returns a new chunk with XOR18238OPTST encoding. +func NewXOR18238OPTSTChunk() *XOR18238OPTSTChunk { + b := make([]byte, chunkHeaderSize+chunkSTHeaderSize, chunkAllocationSize) + return &XOR18238OPTSTChunk{b: bstream{stream: b, count: 0}} +} + +func (c *XOR18238OPTSTChunk) Reset(stream []byte) { + c.b.Reset(stream) +} + +// Encoding returns the encoding type. +func (*XOR18238OPTSTChunk) Encoding() Encoding { + return EncXOR18238OPTST +} + +// Bytes returns the underlying byte slice of the chunk. +func (c *XOR18238OPTSTChunk) Bytes() []byte { + return c.b.bytes() +} + +// NumSamples returns the number of samples in the chunk. +func (c *XOR18238OPTSTChunk) NumSamples() int { + return int(binary.BigEndian.Uint16(c.Bytes())) +} + +// Compact implements the Chunk interface. +func (c *XOR18238OPTSTChunk) Compact() { + if l := len(c.b.stream); cap(c.b.stream) > l+chunkCompactCapacityThreshold { + buf := make([]byte, l) + copy(buf, c.b.stream) + c.b.stream = buf + } +} + +// Appender implements the Chunk interface. +func (c *XOR18238OPTSTChunk) Appender() (Appender, error) { + if len(c.b.stream) == chunkHeaderSize+chunkSTHeaderSize { + return &xor18238OPTSTAppender{ + b: &c.b, + t: math.MinInt64, + leading: 0xff, + }, nil + } + it := c.iterator(nil) + + for it.Next() != ValNone { + } + if err := it.Err(); err != nil { + return nil, err + } + + // Set the bit position for continuing writes. The iterator's reader tracks + // how many bits remain unread in the last byte. + c.b.count = it.br.valid + + a := &xor18238OPTSTAppender{ + b: &c.b, + st: it.st, + t: it.t, + v: it.baselineV, + tDelta: it.tDelta, + stDiff: it.stDiff, + leading: it.leading, + trailing: it.trailing, + numTotal: binary.BigEndian.Uint16(c.b.bytes()), + firstSTKnown: it.firstSTKnown, + firstSTChangeOn: uint16(it.firstSTChangeOn), + } + return a, nil +} + +func (c *XOR18238OPTSTChunk) iterator(it Iterator) *xor18238OPTSTIterator { + if iter, ok := it.(*xor18238OPTSTIterator); ok { + iter.Reset(c.b.bytes()) + return iter + } + iter := &xor18238OPTSTIterator{} + iter.Reset(c.b.bytes()) + return iter +} + +// Iterator implements the Chunk interface. +func (c *XOR18238OPTSTChunk) Iterator(it Iterator) Iterator { + return c.iterator(it) +} + +// xor18238OPTSTAppender appends samples with optional start timestamps using +// the XOR18238 joint control bit encoding for regular timestamp and value, +// and putVarbitInt for the start timestamp delta. +type xor18238OPTSTAppender struct { + b *bstream + + st int64 + t int64 + v float64 + tDelta uint64 + stDiff int64 // prevT - st for the previous sample. + + leading uint8 + trailing uint8 + + numTotal uint16 + firstSTChangeOn uint16 + firstSTKnown bool +} + +func (a *xor18238OPTSTAppender) Append(st, t int64, v float64) { + var ( + tDelta uint64 + stDiff int64 + ) + + switch a.numTotal { + case 0: + buf := make([]byte, binary.MaxVarintLen64) + for _, b := range buf[:binary.PutVarint(buf, t)] { + a.b.writeByte(b) + } + a.b.writeBits(math.Float64bits(v), 64) + + if st != 0 { + for _, b := range buf[:binary.PutVarint(buf, t-st)] { + a.b.writeByte(b) + } + a.firstSTKnown = true + writeHeaderFirstSTKnown(a.b.bytes()[chunkHeaderSize:]) + } + + case 1: + tDelta = uint64(t - a.t) + + buf := make([]byte, binary.MaxVarintLen64) + for _, b := range buf[:binary.PutUvarint(buf, tDelta)] { + a.b.writeByte(b) + } + + a.writeVDelta(v) + + if st != a.st { + stDiff = a.t - st + a.firstSTChangeOn = 1 + writeHeaderFirstSTChangeOn(a.b.bytes()[chunkHeaderSize:], 1) + putVarbitInt(a.b, stDiff) + } + + default: + tDelta = uint64(t - a.t) + dod := int64(tDelta - a.tDelta) + + // Fast path: no ST involvement at all. + if st == 0 && a.numTotal != maxFirstSTChangeOn && a.firstSTChangeOn == 0 && !a.firstSTKnown { + a.encodeJoint(dod, v) + a.t = t + if !value.IsStaleNaN(v) { + a.v = v + } + a.tDelta = tDelta + a.numTotal++ + binary.BigEndian.PutUint16(a.b.bytes(), a.numTotal) + return + } + + // Slow path: ST may be involved. + a.encodeJoint(dod, v) + + if a.firstSTChangeOn == 0 { + if st != a.st || a.numTotal == maxFirstSTChangeOn { + // First ST change: record prevT - st. + stDiff = a.t - st + a.firstSTChangeOn = a.numTotal + writeHeaderFirstSTChangeOn(a.b.bytes()[chunkHeaderSize:], a.numTotal) + putVarbitInt(a.b, stDiff) + } + } else { + stDiff = a.t - st + putVarbitInt(a.b, stDiff-a.stDiff) + } + } + + a.st = st + a.t = t + if !value.IsStaleNaN(v) { + a.v = v + } + a.tDelta = tDelta + a.stDiff = stDiff + a.numTotal++ + binary.BigEndian.PutUint16(a.b.bytes(), a.numTotal) +} + +// encodeJoint writes the XOR18238 joint timestamp+value control sequence for +// samples >= 2. +func (a *xor18238OPTSTAppender) encodeJoint(dod int64, v float64) { + if dod == 0 { + switch { + case value.IsStaleNaN(v): + a.b.writeBits(0b11111, 5) + case math.Float64bits(v)^math.Float64bits(a.v) == 0: + a.b.writeBit(zero) + default: + a.b.writeBits(0b10, 2) + a.writeVDeltaKnownNonZero(v) + } + return + } + + switch { + case dod >= -(1<<12) && dod <= (1<<12)-1: + // 13-bit dod: prefix `110` packed with top 5 bits → 2 bytes total. + a.b.writeByte(0b110_00000 | byte(uint64(dod)>>8)&0x1F) + a.b.writeByte(byte(uint64(dod))) + case dod >= -(1<<19) && dod <= (1<<19)-1: + // 20-bit dod: prefix `1110` packed with top 4 bits → 3 bytes total. + a.b.writeByte(0b1110_0000 | byte(uint64(dod)>>16)&0x0F) + a.b.writeByte(byte(uint64(dod) >> 8)) + a.b.writeByte(byte(uint64(dod))) + default: + // 64-bit escape (rare): `11110`. + a.b.writeBits(0b11110, 5) + a.b.writeBits(uint64(dod), 64) + } + a.writeVDelta(v) +} + +// writeVDelta encodes the value delta for the dod≠0 case. +func (a *xor18238OPTSTAppender) writeVDelta(v float64) { + if value.IsStaleNaN(v) { + a.b.writeBits(0b111, 3) + return + } + + delta := math.Float64bits(v) ^ math.Float64bits(a.v) + + if delta == 0 { + a.b.writeBit(zero) + return + } + + newLeading := uint8(bits.LeadingZeros64(delta)) + newTrailing := uint8(bits.TrailingZeros64(delta)) + + if newLeading >= 32 { + newLeading = 31 + } + + if a.leading != 0xff && newLeading >= a.leading && newTrailing >= a.trailing { + a.b.writeBits(0b10, 2) + a.b.writeBits(delta>>a.trailing, 64-int(a.leading)-int(a.trailing)) + return + } + + a.leading, a.trailing = newLeading, newTrailing + + a.b.writeBits(0b110, 3) + a.b.writeBits(uint64(newLeading), 5) + + sigbits := 64 - newLeading - newTrailing + a.b.writeBits(uint64(sigbits), 6) + a.b.writeBits(delta>>newTrailing, int(sigbits)) +} + +// writeVDeltaKnownNonZero encodes the value delta when it is known to be +// non-zero and non-stale (dod=0, value-changed case). +func (a *xor18238OPTSTAppender) writeVDeltaKnownNonZero(v float64) { + delta := math.Float64bits(v) ^ math.Float64bits(a.v) + + newLeading := uint8(bits.LeadingZeros64(delta)) + newTrailing := uint8(bits.TrailingZeros64(delta)) + + if newLeading >= 32 { + newLeading = 31 + } + + if a.leading != 0xff && newLeading >= a.leading && newTrailing >= a.trailing { + a.b.writeBit(zero) + a.b.writeBits(delta>>a.trailing, 64-int(a.leading)-int(a.trailing)) + return + } + + a.leading, a.trailing = newLeading, newTrailing + + a.b.writeBit(one) + a.b.writeBits(uint64(newLeading), 5) + + sigbits := 64 - newLeading - newTrailing + a.b.writeBits(uint64(sigbits), 6) + a.b.writeBits(delta>>newTrailing, int(sigbits)) +} + +func (*xor18238OPTSTAppender) AppendHistogram(*HistogramAppender, int64, int64, *histogram.Histogram, bool) (Chunk, bool, Appender, error) { + panic("appended a histogram sample to a float chunk") +} + +func (*xor18238OPTSTAppender) AppendFloatHistogram(*FloatHistogramAppender, int64, int64, *histogram.FloatHistogram, bool) (Chunk, bool, Appender, error) { + panic("appended a float histogram sample to a float chunk") +} + +// xor18238OPTSTIterator decodes XOR18238OPTST chunks. +type xor18238OPTSTIterator struct { + br bstreamReader + numTotal uint16 + numRead uint16 + + firstSTKnown bool + firstSTChangeOn uint8 + + leading uint8 + trailing uint8 + + st int64 + t int64 + val float64 + + tDelta uint64 + stDiff int64 // Accumulated prevT - st. + err error + + baselineV float64 // Last non-stale value for XOR baseline. +} + +func (it *xor18238OPTSTIterator) Seek(t int64) ValueType { + if it.err != nil { + return ValNone + } + + for t > it.t || it.numRead == 0 { + if it.Next() == ValNone { + return ValNone + } + } + return ValFloat +} + +func (it *xor18238OPTSTIterator) At() (int64, float64) { + return it.t, it.val +} + +func (*xor18238OPTSTIterator) AtHistogram(*histogram.Histogram) (int64, *histogram.Histogram) { + panic("cannot call xor18238OPTSTIterator.AtHistogram") +} + +func (*xor18238OPTSTIterator) AtFloatHistogram(*histogram.FloatHistogram) (int64, *histogram.FloatHistogram) { + panic("cannot call xor18238OPTSTIterator.AtFloatHistogram") +} + +func (it *xor18238OPTSTIterator) AtT() int64 { + return it.t +} + +func (it *xor18238OPTSTIterator) AtST() int64 { + return it.st +} + +func (it *xor18238OPTSTIterator) Err() error { + return it.err +} + +func (it *xor18238OPTSTIterator) Reset(b []byte) { + it.br = newBReader(b[chunkHeaderSize+chunkSTHeaderSize:]) + it.numTotal = binary.BigEndian.Uint16(b) + it.firstSTKnown, it.firstSTChangeOn = readSTHeader(b[chunkHeaderSize:]) + + it.numRead = 0 + it.st = 0 + it.t = 0 + it.val = 0 + it.leading = 0 + it.trailing = 0 + it.tDelta = 0 + it.stDiff = 0 + it.baselineV = 0 + it.err = nil +} + +func (it *xor18238OPTSTIterator) Next() ValueType { + if it.err != nil || it.numRead == it.numTotal { + return ValNone + } + + if it.numRead == 0 { + t, err := binary.ReadVarint(&it.br) + if err != nil { + it.err = err + return ValNone + } + v, err := it.br.readBits(64) + if err != nil { + it.err = err + return ValNone + } + it.t = t + it.val = math.Float64frombits(v) + if !value.IsStaleNaN(it.val) { + it.baselineV = it.val + } + + // Optional ST for sample 0. + if it.firstSTKnown { + stDiff, err := binary.ReadVarint(&it.br) + if err != nil { + it.err = err + return ValNone + } + it.st = t - stDiff + } + + it.numRead++ + return ValFloat + } + + if it.numRead == 1 { + tDelta, err := binary.ReadUvarint(&it.br) + if err != nil { + it.err = err + return ValNone + } + prevT := it.t + it.tDelta = tDelta + it.t += int64(it.tDelta) + + if err := it.decodeValue(); err != nil { + it.err = err + return ValNone + } + + // Optional ST delta for sample 1. + if it.firstSTChangeOn == 1 { + sdod, err := readVarbitInt(&it.br) + if err != nil { + it.err = err + return ValNone + } + it.stDiff = sdod + it.st = prevT - sdod + } + + it.numRead++ + return ValFloat + } + + // Sample N >= 2: read joint XOR18238 control, then optional ST data. + prevT := it.t + savedNumRead := it.numRead + + ctrl, err := it.br.readXOR18238Control() + if err != nil { + it.err = err + return ValNone + } + + switch ctrl { + case 0: + // dod=0, value unchanged. + it.t += int64(it.tDelta) + it.val = it.baselineV + case 1: + // dod=0, value changed. + it.t += int64(it.tDelta) + if err := it.decodeValueKnownNonZero(); err != nil { + it.err = err + return ValNone + } + case 2: + // 13-bit dod. + if err := it.readDod(13); err != nil { + it.err = err + return ValNone + } + if err := it.decodeValue(); err != nil { + it.err = err + return ValNone + } + case 3: + // 20-bit dod. + if err := it.readDod(20); err != nil { + it.err = err + return ValNone + } + if err := it.decodeValue(); err != nil { + it.err = err + return ValNone + } + case 4: + // 64-bit escape. + if err := it.readDod(64); err != nil { + it.err = err + return ValNone + } + if err := it.decodeValue(); err != nil { + it.err = err + return ValNone + } + default: + // dod=0, stale NaN. + it.t += int64(it.tDelta) + it.val = math.Float64frombits(value.StaleNaN) + } + + // Optional ST data, appended after the joint timestamp+value encoding. + // The ST delta was encoded as (prevT - st), using the PREVIOUS sample's t. + if it.firstSTChangeOn > 0 && savedNumRead >= uint16(it.firstSTChangeOn) { + sdod, err := readVarbitInt(&it.br) + if err != nil { + it.err = err + return ValNone + } + if savedNumRead == uint16(it.firstSTChangeOn) { + it.stDiff = sdod + } else { + it.stDiff += sdod + } + it.st = prevT - it.stDiff + } + + it.numRead++ + return ValFloat +} + +// readDod reads a signed dod of width w bits and updates it.tDelta and it.t. +func (it *xor18238OPTSTIterator) readDod(w uint8) error { + var b uint64 + if it.br.valid >= w { + it.br.valid -= w + b = (it.br.buffer >> it.br.valid) & ((uint64(1) << w) - 1) + } else { + var err error + b, err = it.br.readBits(w) + if err != nil { + return err + } + } + + if w < 64 && b >= (1<<(w-1)) { + b -= 1 << w + } + + it.tDelta = uint64(int64(it.tDelta) + int64(b)) + it.t += int64(it.tDelta) + return nil +} + +// decodeValue reads the XOR18238 value encoding for the dod≠0 case: +// +// `0` → value unchanged +// `10` → reuse previous leading/trailing window +// `110` → new leading/trailing window +// `111` → stale NaN +func (it *xor18238OPTSTIterator) decodeValue() error { + var bit bit + if it.br.valid > 0 { + it.br.valid-- + bit = (it.br.buffer & (uint64(1) << it.br.valid)) != 0 + } else { + var err error + bit, err = it.br.readBit() + if err != nil { + return err + } + } + + if bit == zero { + // `0` → value unchanged. + it.val = it.baselineV + return nil + } + + if it.br.valid > 0 { + it.br.valid-- + bit = (it.br.buffer & (uint64(1) << it.br.valid)) != 0 + } else { + var err error + bit, err = it.br.readBit() + if err != nil { + return err + } + } + + if bit == zero { + // `10` → reuse previous leading/trailing window. + sz := uint8(64 - int(it.leading) - int(it.trailing)) + var valueBits uint64 + if it.br.valid >= sz { + it.br.valid -= sz + valueBits = (it.br.buffer >> it.br.valid) & ((uint64(1) << sz) - 1) + } else { + var err error + valueBits, err = it.br.readBits(sz) + if err != nil { + return err + } + } + vbits := math.Float64bits(it.baselineV) + vbits ^= valueBits << it.trailing + it.val = math.Float64frombits(vbits) + it.baselineV = it.val + return nil + } + + if it.br.valid > 0 { + it.br.valid-- + bit = (it.br.buffer & (uint64(1) << it.br.valid)) != 0 + } else { + var err error + bit, err = it.br.readBit() + if err != nil { + return err + } + } + + if bit == zero { + // `110` → new leading/trailing window. + return it.decodeNewLeadingTrailing() + } + + // `111` → stale NaN. + it.val = math.Float64frombits(value.StaleNaN) + return nil +} + +// decodeValueKnownNonZero reads the XOR18238 value encoding for the dod=0, +// value-changed case: +// +// `0` → reuse previous leading/trailing window +// `1` → new leading/trailing window +func (it *xor18238OPTSTIterator) decodeValueKnownNonZero() error { + var bit bit + if it.br.valid > 0 { + it.br.valid-- + bit = (it.br.buffer & (uint64(1) << it.br.valid)) != 0 + } else { + var err error + bit, err = it.br.readBit() + if err != nil { + return err + } + } + + if bit == zero { + // `0` → reuse previous leading/trailing window. + sz := uint8(64 - int(it.leading) - int(it.trailing)) + var valueBits uint64 + if it.br.valid >= sz { + it.br.valid -= sz + valueBits = (it.br.buffer >> it.br.valid) & ((uint64(1) << sz) - 1) + } else { + var err error + valueBits, err = it.br.readBits(sz) + if err != nil { + return err + } + } + vbits := math.Float64bits(it.baselineV) + vbits ^= valueBits << it.trailing + it.val = math.Float64frombits(vbits) + it.baselineV = it.val + return nil + } + + // `1` → new leading/trailing window. + return it.decodeNewLeadingTrailing() +} + +// decodeNewLeadingTrailing reads a new leading/sigbits/value triple and +// updates it.leading, it.trailing, it.val, and it.baselineV. +func (it *xor18238OPTSTIterator) decodeNewLeadingTrailing() error { + var newLeading uint64 + if it.br.valid >= 5 { + it.br.valid -= 5 + newLeading = (it.br.buffer >> it.br.valid) & 0x1f + } else { + var err error + newLeading, err = it.br.readBits(5) + if err != nil { + return err + } + } + + var sigbits uint64 + if it.br.valid >= 6 { + it.br.valid -= 6 + sigbits = (it.br.buffer >> it.br.valid) & 0x3f + } else { + var err error + sigbits, err = it.br.readBits(6) + if err != nil { + return err + } + } + + it.leading = uint8(newLeading) + if sigbits == 0 { + sigbits = 64 + } + it.trailing = 64 - it.leading - uint8(sigbits) + + n := uint8(sigbits) + var valueBits uint64 + if it.br.valid >= n { + it.br.valid -= n + valueBits = (it.br.buffer >> it.br.valid) & ((uint64(1) << n) - 1) + } else { + var err error + valueBits, err = it.br.readBits(n) + if err != nil { + return err + } + } + + vbits := math.Float64bits(it.baselineV) + vbits ^= valueBits << it.trailing + it.val = math.Float64frombits(vbits) + it.baselineV = it.val + return nil +} diff --git a/tsdb/chunkenc/xor18238optst2.go b/tsdb/chunkenc/xor18238optst2.go new file mode 100644 index 0000000000..387c7ab82f --- /dev/null +++ b/tsdb/chunkenc/xor18238optst2.go @@ -0,0 +1,799 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file implements the XOR18238OPTST2 chunk encoding: XOR18238 with +// a combined ST encoding that captures the strengths of both XOR18238OPTST +// and the prefix-table approach. +// +// The ST header byte (at b[chunkHeaderSize]) uses the same layout as XOROptST: +// +// bit 7 (0x80): firstSTKnown — ST for the first sample is present in the stream +// bits 6-0: firstSTChangeOn — sample index where the first ST change begins +// +// When no ST is provided (st == 0 always), the header stays 0x00 and the +// chunk is byte-for-byte identical to XOR18238. +// +// Starting from the second sample, ST changes are encoded with a 1-bit prefix +// followed by XOR18238OPTST's varbit dod encoding for the non-zero case: +// +// 0 — d(ST) = 0: ST unchanged (efficient for constant-ST series) +// 1 — dod(prevT - st): the delta-of-delta of (prevT - st) +// +// This combines the two strengths: +// - The "0" prefix handles constant or mostly-zero ST cheaply (1 bit/sample). +// - The varbit dod(prevT-st) encoding handles delta-offset metrics efficiently, +// since (prevT - st) is nearly constant when ST tracks T at a fixed lag. + +package chunkenc + +import ( + "encoding/binary" + "math" + "math/bits" + + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/value" +) + +// XOR18238OPTST2Chunk holds XOR18238 encoded samples with optional start +// timestamp per chunk or per sample. See XOROptST for the ST header format. +type XOR18238OPTST2Chunk struct { + b bstream +} + +// NewXOR18238OPTST2Chunk returns a new chunk with XOR18238OPTST2 encoding. +func NewXOR18238OPTST2Chunk() *XOR18238OPTST2Chunk { + b := make([]byte, chunkHeaderSize+chunkSTHeaderSize, chunkAllocationSize) + return &XOR18238OPTST2Chunk{b: bstream{stream: b, count: 0}} +} + +func (c *XOR18238OPTST2Chunk) Reset(stream []byte) { + c.b.Reset(stream) +} + +// Encoding returns the encoding type. +func (*XOR18238OPTST2Chunk) Encoding() Encoding { + return EncXOR18238OPTST2 +} + +// Bytes returns the underlying byte slice of the chunk. +func (c *XOR18238OPTST2Chunk) Bytes() []byte { + return c.b.bytes() +} + +// NumSamples returns the number of samples in the chunk. +func (c *XOR18238OPTST2Chunk) NumSamples() int { + return int(binary.BigEndian.Uint16(c.Bytes())) +} + +// Compact implements the Chunk interface. +func (c *XOR18238OPTST2Chunk) Compact() { + if l := len(c.b.stream); cap(c.b.stream) > l+chunkCompactCapacityThreshold { + buf := make([]byte, l) + copy(buf, c.b.stream) + c.b.stream = buf + } +} + +// Appender implements the Chunk interface. +func (c *XOR18238OPTST2Chunk) Appender() (Appender, error) { + if len(c.b.stream) == chunkHeaderSize+chunkSTHeaderSize { + return &xor18238OPTST2Appender{ + b: &c.b, + t: math.MinInt64, + leading: 0xff, + }, nil + } + it := c.iterator(nil) + + for it.Next() != ValNone { + } + if err := it.Err(); err != nil { + return nil, err + } + + // Set the bit position for continuing writes. The iterator's reader tracks + // how many bits remain unread in the last byte. + c.b.count = it.br.valid + + a := &xor18238OPTST2Appender{ + b: &c.b, + st: it.st, + t: it.t, + v: it.baselineV, + tDelta: it.tDelta, + stDiff: it.stDiff, + leading: it.leading, + trailing: it.trailing, + numTotal: binary.BigEndian.Uint16(c.b.bytes()), + firstSTKnown: it.firstSTKnown, + firstSTChangeOn: uint16(it.firstSTChangeOn), + } + return a, nil +} + +func (c *XOR18238OPTST2Chunk) iterator(it Iterator) *xor18238OPTST2Iterator { + if iter, ok := it.(*xor18238OPTST2Iterator); ok { + iter.Reset(c.b.bytes()) + return iter + } + iter := &xor18238OPTST2Iterator{} + iter.Reset(c.b.bytes()) + return iter +} + +// Iterator implements the Chunk interface. +func (c *XOR18238OPTST2Chunk) Iterator(it Iterator) Iterator { + return c.iterator(it) +} + +// xor18238OPTST2Appender appends samples with optional start timestamps. +// ST encoding after the first change uses a 1-bit prefix: 0 = unchanged, +// 1 = varbit dod(prevT-st) (same quantity as XOR18238OPTST). +type xor18238OPTST2Appender struct { + b *bstream + + st int64 + t int64 + v float64 + tDelta uint64 + stDiff int64 // prevT - st for the previous sample (same as XOR18238OPTST). + + leading uint8 + trailing uint8 + + numTotal uint16 + firstSTChangeOn uint16 + firstSTKnown bool +} + +func (a *xor18238OPTST2Appender) Append(st, t int64, v float64) { + var ( + tDelta uint64 + stDiff int64 + ) + + switch a.numTotal { + case 0: + buf := make([]byte, binary.MaxVarintLen64) + for _, b := range buf[:binary.PutVarint(buf, t)] { + a.b.writeByte(b) + } + a.b.writeBits(math.Float64bits(v), 64) + + if st != 0 { + for _, b := range buf[:binary.PutVarint(buf, t-st)] { + a.b.writeByte(b) + } + a.firstSTKnown = true + writeHeaderFirstSTKnown(a.b.bytes()[chunkHeaderSize:]) + } + + case 1: + tDelta = uint64(t - a.t) + + buf := make([]byte, binary.MaxVarintLen64) + for _, b := range buf[:binary.PutUvarint(buf, tDelta)] { + a.b.writeByte(b) + } + + a.writeVDelta(v) + + if st != a.st { + stDiff = a.t - st + a.firstSTChangeOn = 1 + writeHeaderFirstSTChangeOn(a.b.bytes()[chunkHeaderSize:], 1) + a.b.writeBit(one) + putVarbitInt(a.b, stDiff) + } + + default: + tDelta = uint64(t - a.t) + dod := int64(tDelta - a.tDelta) + + // Fast path: no ST involvement at all. + if st == 0 && a.numTotal != maxFirstSTChangeOn && a.firstSTChangeOn == 0 && !a.firstSTKnown { + a.encodeJoint(dod, v) + a.t = t + if !value.IsStaleNaN(v) { + a.v = v + } + a.tDelta = tDelta + a.numTotal++ + binary.BigEndian.PutUint16(a.b.bytes(), a.numTotal) + return + } + + // Slow path: ST may be involved. + a.encodeJoint(dod, v) + + if a.firstSTChangeOn == 0 { + if st != a.st || a.numTotal == maxFirstSTChangeOn { + stDiff = a.t - st + a.firstSTChangeOn = a.numTotal + writeHeaderFirstSTChangeOn(a.b.bytes()[chunkHeaderSize:], a.numTotal) + if st == a.st { + // Forced by maxFirstSTChangeOn; ST has not changed. + a.b.writeBit(zero) + } else { + // First ST change: write absolute stDiff (no dod yet). + a.b.writeBit(one) + putVarbitInt(a.b, stDiff) + } + } + } else { + stDiff = a.t - st + if st == a.st { + // ST unchanged: 1-bit prefix only, advance tracking. + a.b.writeBit(zero) + } else { + // ST changed: 1-bit prefix + varbit dod(prevT-st). + a.b.writeBit(one) + putVarbitInt(a.b, stDiff-a.stDiff) + } + } + } + + a.st = st + a.t = t + if !value.IsStaleNaN(v) { + a.v = v + } + a.tDelta = tDelta + a.stDiff = stDiff + a.numTotal++ + binary.BigEndian.PutUint16(a.b.bytes(), a.numTotal) +} + +// encodeJoint writes the XOR18238 joint timestamp+value control sequence for +// samples >= 2. +func (a *xor18238OPTST2Appender) encodeJoint(dod int64, v float64) { + if dod == 0 { + switch { + case value.IsStaleNaN(v): + a.b.writeBits(0b11111, 5) + case math.Float64bits(v)^math.Float64bits(a.v) == 0: + a.b.writeBit(zero) + default: + a.b.writeBits(0b10, 2) + a.writeVDeltaKnownNonZero(v) + } + return + } + + switch { + case dod >= -(1<<12) && dod <= (1<<12)-1: + // 13-bit dod: prefix `110` packed with top 5 bits → 2 bytes total. + a.b.writeByte(0b110_00000 | byte(uint64(dod)>>8)&0x1F) + a.b.writeByte(byte(uint64(dod))) + case dod >= -(1<<19) && dod <= (1<<19)-1: + // 20-bit dod: prefix `1110` packed with top 4 bits → 3 bytes total. + a.b.writeByte(0b1110_0000 | byte(uint64(dod)>>16)&0x0F) + a.b.writeByte(byte(uint64(dod) >> 8)) + a.b.writeByte(byte(uint64(dod))) + default: + // 64-bit escape (rare): `11110`. + a.b.writeBits(0b11110, 5) + a.b.writeBits(uint64(dod), 64) + } + a.writeVDelta(v) +} + +// writeVDelta encodes the value delta for the dod≠0 case. +func (a *xor18238OPTST2Appender) writeVDelta(v float64) { + if value.IsStaleNaN(v) { + a.b.writeBits(0b111, 3) + return + } + + delta := math.Float64bits(v) ^ math.Float64bits(a.v) + + if delta == 0 { + a.b.writeBit(zero) + return + } + + newLeading := uint8(bits.LeadingZeros64(delta)) + newTrailing := uint8(bits.TrailingZeros64(delta)) + + if newLeading >= 32 { + newLeading = 31 + } + + if a.leading != 0xff && newLeading >= a.leading && newTrailing >= a.trailing { + a.b.writeBits(0b10, 2) + a.b.writeBits(delta>>a.trailing, 64-int(a.leading)-int(a.trailing)) + return + } + + a.leading, a.trailing = newLeading, newTrailing + + a.b.writeBits(0b110, 3) + a.b.writeBits(uint64(newLeading), 5) + + sigbits := 64 - newLeading - newTrailing + a.b.writeBits(uint64(sigbits), 6) + a.b.writeBits(delta>>newTrailing, int(sigbits)) +} + +// writeVDeltaKnownNonZero encodes the value delta when it is known to be +// non-zero and non-stale (dod=0, value-changed case). +func (a *xor18238OPTST2Appender) writeVDeltaKnownNonZero(v float64) { + delta := math.Float64bits(v) ^ math.Float64bits(a.v) + + newLeading := uint8(bits.LeadingZeros64(delta)) + newTrailing := uint8(bits.TrailingZeros64(delta)) + + if newLeading >= 32 { + newLeading = 31 + } + + if a.leading != 0xff && newLeading >= a.leading && newTrailing >= a.trailing { + a.b.writeBit(zero) + a.b.writeBits(delta>>a.trailing, 64-int(a.leading)-int(a.trailing)) + return + } + + a.leading, a.trailing = newLeading, newTrailing + + a.b.writeBit(one) + a.b.writeBits(uint64(newLeading), 5) + + sigbits := 64 - newLeading - newTrailing + a.b.writeBits(uint64(sigbits), 6) + a.b.writeBits(delta>>newTrailing, int(sigbits)) +} + +func (*xor18238OPTST2Appender) AppendHistogram(*HistogramAppender, int64, int64, *histogram.Histogram, bool) (Chunk, bool, Appender, error) { + panic("appended a histogram sample to a float chunk") +} + +func (*xor18238OPTST2Appender) AppendFloatHistogram(*FloatHistogramAppender, int64, int64, *histogram.FloatHistogram, bool) (Chunk, bool, Appender, error) { + panic("appended a float histogram sample to a float chunk") +} + +// xor18238OPTST2Iterator decodes XOR18238OPTST2 chunks. +type xor18238OPTST2Iterator struct { + br bstreamReader + numTotal uint16 + numRead uint16 + + firstSTKnown bool + firstSTChangeOn uint8 + + leading uint8 + trailing uint8 + + st int64 + t int64 + val float64 + + tDelta uint64 + stDiff int64 // prevT - st for the previous sample (same as XOR18238OPTST). + err error + + baselineV float64 // Last non-stale value for XOR baseline. +} + +func (it *xor18238OPTST2Iterator) Seek(t int64) ValueType { + if it.err != nil { + return ValNone + } + + for t > it.t || it.numRead == 0 { + if it.Next() == ValNone { + return ValNone + } + } + return ValFloat +} + +func (it *xor18238OPTST2Iterator) At() (int64, float64) { + return it.t, it.val +} + +func (*xor18238OPTST2Iterator) AtHistogram(*histogram.Histogram) (int64, *histogram.Histogram) { + panic("cannot call xor18238OPTST2Iterator.AtHistogram") +} + +func (*xor18238OPTST2Iterator) AtFloatHistogram(*histogram.FloatHistogram) (int64, *histogram.FloatHistogram) { + panic("cannot call xor18238OPTST2Iterator.AtFloatHistogram") +} + +func (it *xor18238OPTST2Iterator) AtT() int64 { + return it.t +} + +func (it *xor18238OPTST2Iterator) AtST() int64 { + return it.st +} + +func (it *xor18238OPTST2Iterator) Err() error { + return it.err +} + +func (it *xor18238OPTST2Iterator) Reset(b []byte) { + it.br = newBReader(b[chunkHeaderSize+chunkSTHeaderSize:]) + it.numTotal = binary.BigEndian.Uint16(b) + it.firstSTKnown, it.firstSTChangeOn = readSTHeader(b[chunkHeaderSize:]) + + it.numRead = 0 + it.st = 0 + it.t = 0 + it.val = 0 + it.leading = 0 + it.trailing = 0 + it.tDelta = 0 + it.stDiff = 0 + it.baselineV = 0 + it.err = nil +} + +func (it *xor18238OPTST2Iterator) Next() ValueType { + if it.err != nil || it.numRead == it.numTotal { + return ValNone + } + + if it.numRead == 0 { + t, err := binary.ReadVarint(&it.br) + if err != nil { + it.err = err + return ValNone + } + v, err := it.br.readBits(64) + if err != nil { + it.err = err + return ValNone + } + it.t = t + it.val = math.Float64frombits(v) + if !value.IsStaleNaN(it.val) { + it.baselineV = it.val + } + + // Optional ST for sample 0. + if it.firstSTKnown { + stDiff, err := binary.ReadVarint(&it.br) + if err != nil { + it.err = err + return ValNone + } + it.st = t - stDiff + } + + it.numRead++ + return ValFloat + } + + if it.numRead == 1 { + prevT := it.t // t[0], needed for stDiff computation. + tDelta, err := binary.ReadUvarint(&it.br) + if err != nil { + it.err = err + return ValNone + } + it.tDelta = tDelta + it.t += int64(it.tDelta) + + if err := it.decodeValue(); err != nil { + it.err = err + return ValNone + } + + // Optional ST for sample 1. + if it.firstSTChangeOn == 1 { + if err := it.decodeST(prevT); err != nil { + it.err = err + return ValNone + } + } + + it.numRead++ + return ValFloat + } + + // Sample N >= 2: read joint XOR18238 control, then optional ST data. + prevT := it.t // save before the switch updates it.t. + savedNumRead := it.numRead + + ctrl, err := it.br.readXOR18238Control() + if err != nil { + it.err = err + return ValNone + } + + switch ctrl { + case 0: + // dod=0, value unchanged. + it.t += int64(it.tDelta) + it.val = it.baselineV + case 1: + // dod=0, value changed. + it.t += int64(it.tDelta) + if err := it.decodeValueKnownNonZero(); err != nil { + it.err = err + return ValNone + } + case 2: + // 13-bit dod. + if err := it.readDod(13); err != nil { + it.err = err + return ValNone + } + if err := it.decodeValue(); err != nil { + it.err = err + return ValNone + } + case 3: + // 20-bit dod. + if err := it.readDod(20); err != nil { + it.err = err + return ValNone + } + if err := it.decodeValue(); err != nil { + it.err = err + return ValNone + } + case 4: + // 64-bit escape. + if err := it.readDod(64); err != nil { + it.err = err + return ValNone + } + if err := it.decodeValue(); err != nil { + it.err = err + return ValNone + } + default: + // dod=0, stale NaN. + it.t += int64(it.tDelta) + it.val = math.Float64frombits(value.StaleNaN) + } + + // Optional ST data, appended after the joint timestamp+value encoding. + if it.firstSTChangeOn > 0 && savedNumRead >= uint16(it.firstSTChangeOn) { + if err := it.decodeST(prevT); err != nil { + it.err = err + return ValNone + } + } + + it.numRead++ + return ValFloat +} + +// decodeST decodes the combined ST encoding and updates it.st and it.stDiff. +// prevT is the previous sample's timestamp (t[N-1] when decoding sample N). +// +// Format: 0 = ST unchanged; 1 + varbit-int = dod(prevT-st). +func (it *xor18238OPTST2Iterator) decodeST(prevT int64) error { + bit, err := it.br.readBit() + if err != nil { + return err + } + if bit == zero { + // ST unchanged; advance stDiff tracking so the next dod is correct. + it.stDiff = prevT - it.st + return nil + } + + sdod, err := readVarbitInt(&it.br) + if err != nil { + return err + } + if it.numRead == uint16(it.firstSTChangeOn) { + // First write: sdod is the absolute stDiff value, not a delta. + it.stDiff = sdod + } else { + it.stDiff += sdod + } + it.st = prevT - it.stDiff + return nil +} + +// readDod reads a signed dod of width w bits and updates it.tDelta and it.t. +func (it *xor18238OPTST2Iterator) readDod(w uint8) error { + var b uint64 + if it.br.valid >= w { + it.br.valid -= w + b = (it.br.buffer >> it.br.valid) & ((uint64(1) << w) - 1) + } else { + var err error + b, err = it.br.readBits(w) + if err != nil { + return err + } + } + + if w < 64 && b >= (1<<(w-1)) { + b -= 1 << w + } + + it.tDelta = uint64(int64(it.tDelta) + int64(b)) + it.t += int64(it.tDelta) + return nil +} + +// decodeValue reads the XOR18238 value encoding for the dod≠0 case: +// +// `0` → value unchanged +// `10` → reuse previous leading/trailing window +// `110` → new leading/trailing window +// `111` → stale NaN +func (it *xor18238OPTST2Iterator) decodeValue() error { + var bit bit + if it.br.valid > 0 { + it.br.valid-- + bit = (it.br.buffer & (uint64(1) << it.br.valid)) != 0 + } else { + var err error + bit, err = it.br.readBit() + if err != nil { + return err + } + } + + if bit == zero { + // `0` → value unchanged. + it.val = it.baselineV + return nil + } + + if it.br.valid > 0 { + it.br.valid-- + bit = (it.br.buffer & (uint64(1) << it.br.valid)) != 0 + } else { + var err error + bit, err = it.br.readBit() + if err != nil { + return err + } + } + + if bit == zero { + // `10` → reuse previous leading/trailing window. + sz := uint8(64 - int(it.leading) - int(it.trailing)) + var valueBits uint64 + if it.br.valid >= sz { + it.br.valid -= sz + valueBits = (it.br.buffer >> it.br.valid) & ((uint64(1) << sz) - 1) + } else { + var err error + valueBits, err = it.br.readBits(sz) + if err != nil { + return err + } + } + vbits := math.Float64bits(it.baselineV) + vbits ^= valueBits << it.trailing + it.val = math.Float64frombits(vbits) + it.baselineV = it.val + return nil + } + + if it.br.valid > 0 { + it.br.valid-- + bit = (it.br.buffer & (uint64(1) << it.br.valid)) != 0 + } else { + var err error + bit, err = it.br.readBit() + if err != nil { + return err + } + } + + if bit == zero { + // `110` → new leading/trailing window. + return it.decodeNewLeadingTrailing() + } + + // `111` → stale NaN. + it.val = math.Float64frombits(value.StaleNaN) + return nil +} + +// decodeValueKnownNonZero reads the XOR18238 value encoding for the dod=0, +// value-changed case: +// +// `0` → reuse previous leading/trailing window +// `1` → new leading/trailing window +func (it *xor18238OPTST2Iterator) decodeValueKnownNonZero() error { + var bit bit + if it.br.valid > 0 { + it.br.valid-- + bit = (it.br.buffer & (uint64(1) << it.br.valid)) != 0 + } else { + var err error + bit, err = it.br.readBit() + if err != nil { + return err + } + } + + if bit == zero { + // `0` → reuse previous leading/trailing window. + sz := uint8(64 - int(it.leading) - int(it.trailing)) + var valueBits uint64 + if it.br.valid >= sz { + it.br.valid -= sz + valueBits = (it.br.buffer >> it.br.valid) & ((uint64(1) << sz) - 1) + } else { + var err error + valueBits, err = it.br.readBits(sz) + if err != nil { + return err + } + } + vbits := math.Float64bits(it.baselineV) + vbits ^= valueBits << it.trailing + it.val = math.Float64frombits(vbits) + it.baselineV = it.val + return nil + } + + // `1` → new leading/trailing window. + return it.decodeNewLeadingTrailing() +} + +// decodeNewLeadingTrailing reads a new leading/sigbits/value triple and +// updates it.leading, it.trailing, it.val, and it.baselineV. +func (it *xor18238OPTST2Iterator) decodeNewLeadingTrailing() error { + var newLeading uint64 + if it.br.valid >= 5 { + it.br.valid -= 5 + newLeading = (it.br.buffer >> it.br.valid) & 0x1f + } else { + var err error + newLeading, err = it.br.readBits(5) + if err != nil { + return err + } + } + + var sigbits uint64 + if it.br.valid >= 6 { + it.br.valid -= 6 + sigbits = (it.br.buffer >> it.br.valid) & 0x3f + } else { + var err error + sigbits, err = it.br.readBits(6) + if err != nil { + return err + } + } + + it.leading = uint8(newLeading) + if sigbits == 0 { + sigbits = 64 + } + it.trailing = 64 - it.leading - uint8(sigbits) + + n := uint8(sigbits) + var valueBits uint64 + if it.br.valid >= n { + it.br.valid -= n + valueBits = (it.br.buffer >> it.br.valid) & ((uint64(1) << n) - 1) + } else { + var err error + valueBits, err = it.br.readBits(n) + if err != nil { + return err + } + } + + vbits := math.Float64bits(it.baselineV) + vbits ^= valueBits << it.trailing + it.val = math.Float64frombits(vbits) + it.baselineV = it.val + return nil +} diff --git a/tsdb/chunkenc/xor18238optst2_test.go b/tsdb/chunkenc/xor18238optst2_test.go new file mode 100644 index 0000000000..5be1a3c067 --- /dev/null +++ b/tsdb/chunkenc/xor18238optst2_test.go @@ -0,0 +1,81 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package chunkenc + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestXOR18238OPTST2Chunk(t *testing.T) { + testChunkSTHandling(t, ValFloat, func() Chunk { + return NewXOR18238OPTST2Chunk() + }) +} + +func TestXOR18238OPTST2Chunk_MoreThan127Samples(t *testing.T) { + const afterMax = maxFirstSTChangeOn + 3 + t.Run("zero ST", func(t *testing.T) { + chunk := NewXOR18238OPTST2Chunk() + app, err := chunk.Appender() + require.NoError(t, err) + for i := range afterMax { + app.Append(0, int64(i*10+1), float64(i)*1.5) + } + + it := chunk.Iterator(nil) + for i := range afterMax { + require.Equal(t, ValFloat, it.Next()) + st := it.AtST() + ts, v := it.At() + require.Equal(t, int64(0), st) + require.Equal(t, int64(i*10+1), ts) + require.Equal(t, float64(i)*1.5, v) + } + + require.Equal(t, ValNone, it.Next()) + require.NoError(t, it.Err()) + }) + + t.Run("non-zero ST after 127", func(t *testing.T) { + chunk := NewXOR18238OPTST2Chunk() + app, err := chunk.Appender() + require.NoError(t, err) + for i := range afterMax { + st := int64(0) + if i == afterMax-1 { + st = int64((afterMax - 1) * 10) + } + app.Append(st, int64(i*10+1), float64(i)*1.5) + } + + it := chunk.Iterator(nil) + for i := range afterMax { + require.Equal(t, ValFloat, it.Next()) + st := it.AtST() + ts, v := it.At() + if i == afterMax-1 { + require.Equal(t, int64((afterMax-1)*10), st) + } else { + require.Equal(t, int64(0), st) + } + require.Equal(t, int64(i*10+1), ts) + require.Equal(t, float64(i)*1.5, v) + } + + require.Equal(t, ValNone, it.Next()) + require.NoError(t, it.Err()) + }) +} diff --git a/tsdb/chunkenc/xor18238optst3.go b/tsdb/chunkenc/xor18238optst3.go new file mode 100644 index 0000000000..2f77d9e25a --- /dev/null +++ b/tsdb/chunkenc/xor18238optst3.go @@ -0,0 +1,761 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file implements the XOR18238OPTST3 chunk encoding: XOR18238 with +// optional start timestamp encoding using dod(st) directly, independent of t. +// +// The existing 1-byte ST header (at b[chunkHeaderSize]) is reused with the +// same layout as XOROptST: +// +// bit 7 (0x80): firstSTKnown — ST for the first sample is present in the stream +// bits 6-0: firstSTChangeOn — sample index where the first ST change begins +// +// When no ST is provided (st == 0 always), the header stays 0x00 and the +// chunk is byte-for-byte identical to XOR18238, ensuring there is no overhead +// for series that carry no start timestamp. +// +// When ST is present, the delta stDelta = st - prevST is appended after each +// sample's joint timestamp+value encoding using putVarbitInt. The first write +// encodes the absolute stDelta; subsequent writes encode dod(stDelta). This +// approach is independent of the sample timestamp t. + +package chunkenc + +import ( + "encoding/binary" + "math" + "math/bits" + + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/value" +) + +// XOR18238OPTST3Chunk holds XOR18238 encoded samples with optional start +// timestamp per chunk or per sample. See XOROptST for the ST header format. +type XOR18238OPTST3Chunk struct { + b bstream +} + +// NewXOR18238OPTST3Chunk returns a new chunk with XOR18238OPTST3 encoding. +func NewXOR18238OPTST3Chunk() *XOR18238OPTST3Chunk { + b := make([]byte, chunkHeaderSize+chunkSTHeaderSize, chunkAllocationSize) + return &XOR18238OPTST3Chunk{b: bstream{stream: b, count: 0}} +} + +func (c *XOR18238OPTST3Chunk) Reset(stream []byte) { + c.b.Reset(stream) +} + +// Encoding returns the encoding type. +func (*XOR18238OPTST3Chunk) Encoding() Encoding { + return EncXOR18238OPTST3 +} + +// Bytes returns the underlying byte slice of the chunk. +func (c *XOR18238OPTST3Chunk) Bytes() []byte { + return c.b.bytes() +} + +// NumSamples returns the number of samples in the chunk. +func (c *XOR18238OPTST3Chunk) NumSamples() int { + return int(binary.BigEndian.Uint16(c.Bytes())) +} + +// Compact implements the Chunk interface. +func (c *XOR18238OPTST3Chunk) Compact() { + if l := len(c.b.stream); cap(c.b.stream) > l+chunkCompactCapacityThreshold { + buf := make([]byte, l) + copy(buf, c.b.stream) + c.b.stream = buf + } +} + +// Appender implements the Chunk interface. +func (c *XOR18238OPTST3Chunk) Appender() (Appender, error) { + if len(c.b.stream) == chunkHeaderSize+chunkSTHeaderSize { + return &xor18238OPTST3Appender{ + b: &c.b, + t: math.MinInt64, + leading: 0xff, + }, nil + } + it := c.iterator(nil) + + for it.Next() != ValNone { + } + if err := it.Err(); err != nil { + return nil, err + } + + // Set the bit position for continuing writes. The iterator's reader tracks + // how many bits remain unread in the last byte. + c.b.count = it.br.valid + + a := &xor18238OPTST3Appender{ + b: &c.b, + st: it.st, + t: it.t, + v: it.baselineV, + tDelta: it.tDelta, + stDelta: it.stDelta, + leading: it.leading, + trailing: it.trailing, + numTotal: binary.BigEndian.Uint16(c.b.bytes()), + firstSTKnown: it.firstSTKnown, + firstSTChangeOn: uint16(it.firstSTChangeOn), + } + return a, nil +} + +func (c *XOR18238OPTST3Chunk) iterator(it Iterator) *xor18238OPTST3Iterator { + if iter, ok := it.(*xor18238OPTST3Iterator); ok { + iter.Reset(c.b.bytes()) + return iter + } + iter := &xor18238OPTST3Iterator{} + iter.Reset(c.b.bytes()) + return iter +} + +// Iterator implements the Chunk interface. +func (c *XOR18238OPTST3Chunk) Iterator(it Iterator) Iterator { + return c.iterator(it) +} + +// xor18238OPTST3Appender appends samples with optional start timestamps using +// the XOR18238 joint control bit encoding for regular timestamp and value, +// and putVarbitInt for dod(st - prevST). +type xor18238OPTST3Appender struct { + b *bstream + + st int64 + t int64 + v float64 + tDelta uint64 + stDelta int64 // st - prevST for the previous sample. + + leading uint8 + trailing uint8 + + numTotal uint16 + firstSTChangeOn uint16 + firstSTKnown bool +} + +func (a *xor18238OPTST3Appender) Append(st, t int64, v float64) { + var ( + tDelta uint64 + stDelta int64 + ) + + switch a.numTotal { + case 0: + buf := make([]byte, binary.MaxVarintLen64) + for _, b := range buf[:binary.PutVarint(buf, t)] { + a.b.writeByte(b) + } + a.b.writeBits(math.Float64bits(v), 64) + + if st != 0 { + for _, b := range buf[:binary.PutVarint(buf, t-st)] { + a.b.writeByte(b) + } + a.firstSTKnown = true + writeHeaderFirstSTKnown(a.b.bytes()[chunkHeaderSize:]) + } + + case 1: + tDelta = uint64(t - a.t) + + buf := make([]byte, binary.MaxVarintLen64) + for _, b := range buf[:binary.PutUvarint(buf, tDelta)] { + a.b.writeByte(b) + } + + a.writeVDelta(v) + + if st != a.st { + stDelta = st - a.st + a.firstSTChangeOn = 1 + writeHeaderFirstSTChangeOn(a.b.bytes()[chunkHeaderSize:], 1) + putVarbitInt(a.b, stDelta) + } + + default: + tDelta = uint64(t - a.t) + dod := int64(tDelta - a.tDelta) + + // Fast path: no ST involvement at all. + if st == 0 && a.numTotal != maxFirstSTChangeOn && a.firstSTChangeOn == 0 && !a.firstSTKnown { + a.encodeJoint(dod, v) + a.t = t + if !value.IsStaleNaN(v) { + a.v = v + } + a.tDelta = tDelta + a.numTotal++ + binary.BigEndian.PutUint16(a.b.bytes(), a.numTotal) + return + } + + // Slow path: ST may be involved. + a.encodeJoint(dod, v) + + if a.firstSTChangeOn == 0 { + if st != a.st || a.numTotal == maxFirstSTChangeOn { + // First ST change: record st - prevST. + stDelta = st - a.st + a.firstSTChangeOn = a.numTotal + writeHeaderFirstSTChangeOn(a.b.bytes()[chunkHeaderSize:], a.numTotal) + putVarbitInt(a.b, stDelta) + } + } else { + stDelta = st - a.st + putVarbitInt(a.b, stDelta-a.stDelta) + } + } + + a.st = st + a.t = t + if !value.IsStaleNaN(v) { + a.v = v + } + a.tDelta = tDelta + a.stDelta = stDelta + a.numTotal++ + binary.BigEndian.PutUint16(a.b.bytes(), a.numTotal) +} + +// encodeJoint writes the XOR18238 joint timestamp+value control sequence for +// samples >= 2. +func (a *xor18238OPTST3Appender) encodeJoint(dod int64, v float64) { + if dod == 0 { + switch { + case value.IsStaleNaN(v): + a.b.writeBits(0b11111, 5) + case math.Float64bits(v)^math.Float64bits(a.v) == 0: + a.b.writeBit(zero) + default: + a.b.writeBits(0b10, 2) + a.writeVDeltaKnownNonZero(v) + } + return + } + + switch { + case dod >= -(1<<12) && dod <= (1<<12)-1: + // 13-bit dod: prefix `110` packed with top 5 bits → 2 bytes total. + a.b.writeByte(0b110_00000 | byte(uint64(dod)>>8)&0x1F) + a.b.writeByte(byte(uint64(dod))) + case dod >= -(1<<19) && dod <= (1<<19)-1: + // 20-bit dod: prefix `1110` packed with top 4 bits → 3 bytes total. + a.b.writeByte(0b1110_0000 | byte(uint64(dod)>>16)&0x0F) + a.b.writeByte(byte(uint64(dod) >> 8)) + a.b.writeByte(byte(uint64(dod))) + default: + // 64-bit escape (rare): `11110`. + a.b.writeBits(0b11110, 5) + a.b.writeBits(uint64(dod), 64) + } + a.writeVDelta(v) +} + +// writeVDelta encodes the value delta for the dod≠0 case. +func (a *xor18238OPTST3Appender) writeVDelta(v float64) { + if value.IsStaleNaN(v) { + a.b.writeBits(0b111, 3) + return + } + + delta := math.Float64bits(v) ^ math.Float64bits(a.v) + + if delta == 0 { + a.b.writeBit(zero) + return + } + + newLeading := uint8(bits.LeadingZeros64(delta)) + newTrailing := uint8(bits.TrailingZeros64(delta)) + + if newLeading >= 32 { + newLeading = 31 + } + + if a.leading != 0xff && newLeading >= a.leading && newTrailing >= a.trailing { + a.b.writeBits(0b10, 2) + a.b.writeBits(delta>>a.trailing, 64-int(a.leading)-int(a.trailing)) + return + } + + a.leading, a.trailing = newLeading, newTrailing + + a.b.writeBits(0b110, 3) + a.b.writeBits(uint64(newLeading), 5) + + sigbits := 64 - newLeading - newTrailing + a.b.writeBits(uint64(sigbits), 6) + a.b.writeBits(delta>>newTrailing, int(sigbits)) +} + +// writeVDeltaKnownNonZero encodes the value delta when it is known to be +// non-zero and non-stale (dod=0, value-changed case). +func (a *xor18238OPTST3Appender) writeVDeltaKnownNonZero(v float64) { + delta := math.Float64bits(v) ^ math.Float64bits(a.v) + + newLeading := uint8(bits.LeadingZeros64(delta)) + newTrailing := uint8(bits.TrailingZeros64(delta)) + + if newLeading >= 32 { + newLeading = 31 + } + + if a.leading != 0xff && newLeading >= a.leading && newTrailing >= a.trailing { + a.b.writeBit(zero) + a.b.writeBits(delta>>a.trailing, 64-int(a.leading)-int(a.trailing)) + return + } + + a.leading, a.trailing = newLeading, newTrailing + + a.b.writeBit(one) + a.b.writeBits(uint64(newLeading), 5) + + sigbits := 64 - newLeading - newTrailing + a.b.writeBits(uint64(sigbits), 6) + a.b.writeBits(delta>>newTrailing, int(sigbits)) +} + +func (*xor18238OPTST3Appender) AppendHistogram(*HistogramAppender, int64, int64, *histogram.Histogram, bool) (Chunk, bool, Appender, error) { + panic("appended a histogram sample to a float chunk") +} + +func (*xor18238OPTST3Appender) AppendFloatHistogram(*FloatHistogramAppender, int64, int64, *histogram.FloatHistogram, bool) (Chunk, bool, Appender, error) { + panic("appended a float histogram sample to a float chunk") +} + +// xor18238OPTST3Iterator decodes XOR18238OPTST3 chunks. +type xor18238OPTST3Iterator struct { + br bstreamReader + numTotal uint16 + numRead uint16 + + firstSTKnown bool + firstSTChangeOn uint8 + + leading uint8 + trailing uint8 + + st int64 + t int64 + val float64 + + tDelta uint64 + stDelta int64 // Accumulated st delta. + err error + + baselineV float64 // Last non-stale value for XOR baseline. +} + +func (it *xor18238OPTST3Iterator) Seek(t int64) ValueType { + if it.err != nil { + return ValNone + } + + for t > it.t || it.numRead == 0 { + if it.Next() == ValNone { + return ValNone + } + } + return ValFloat +} + +func (it *xor18238OPTST3Iterator) At() (int64, float64) { + return it.t, it.val +} + +func (*xor18238OPTST3Iterator) AtHistogram(*histogram.Histogram) (int64, *histogram.Histogram) { + panic("cannot call xor18238OPTST3Iterator.AtHistogram") +} + +func (*xor18238OPTST3Iterator) AtFloatHistogram(*histogram.FloatHistogram) (int64, *histogram.FloatHistogram) { + panic("cannot call xor18238OPTST3Iterator.AtFloatHistogram") +} + +func (it *xor18238OPTST3Iterator) AtT() int64 { + return it.t +} + +func (it *xor18238OPTST3Iterator) AtST() int64 { + return it.st +} + +func (it *xor18238OPTST3Iterator) Err() error { + return it.err +} + +func (it *xor18238OPTST3Iterator) Reset(b []byte) { + it.br = newBReader(b[chunkHeaderSize+chunkSTHeaderSize:]) + it.numTotal = binary.BigEndian.Uint16(b) + it.firstSTKnown, it.firstSTChangeOn = readSTHeader(b[chunkHeaderSize:]) + + it.numRead = 0 + it.st = 0 + it.t = 0 + it.val = 0 + it.leading = 0 + it.trailing = 0 + it.tDelta = 0 + it.stDelta = 0 + it.baselineV = 0 + it.err = nil +} + +func (it *xor18238OPTST3Iterator) Next() ValueType { + if it.err != nil || it.numRead == it.numTotal { + return ValNone + } + + if it.numRead == 0 { + t, err := binary.ReadVarint(&it.br) + if err != nil { + it.err = err + return ValNone + } + v, err := it.br.readBits(64) + if err != nil { + it.err = err + return ValNone + } + it.t = t + it.val = math.Float64frombits(v) + if !value.IsStaleNaN(it.val) { + it.baselineV = it.val + } + + // Optional ST for sample 0. + if it.firstSTKnown { + stDiff, err := binary.ReadVarint(&it.br) + if err != nil { + it.err = err + return ValNone + } + it.st = t - stDiff + } + + it.numRead++ + return ValFloat + } + + if it.numRead == 1 { + tDelta, err := binary.ReadUvarint(&it.br) + if err != nil { + it.err = err + return ValNone + } + it.tDelta = tDelta + it.t += int64(it.tDelta) + + if err := it.decodeValue(); err != nil { + it.err = err + return ValNone + } + + // Optional ST delta for sample 1: absolute stDelta = st[1] - st[0]. + if it.firstSTChangeOn == 1 { + sdod, err := readVarbitInt(&it.br) + if err != nil { + it.err = err + return ValNone + } + it.stDelta = sdod + it.st = it.st + it.stDelta + } + + it.numRead++ + return ValFloat + } + + // Sample N >= 2: read joint XOR18238 control, then optional ST data. + prevST := it.st + savedNumRead := it.numRead + + ctrl, err := it.br.readXOR18238Control() + if err != nil { + it.err = err + return ValNone + } + + switch ctrl { + case 0: + // dod=0, value unchanged. + it.t += int64(it.tDelta) + it.val = it.baselineV + case 1: + // dod=0, value changed. + it.t += int64(it.tDelta) + if err := it.decodeValueKnownNonZero(); err != nil { + it.err = err + return ValNone + } + case 2: + // 13-bit dod. + if err := it.readDod(13); err != nil { + it.err = err + return ValNone + } + if err := it.decodeValue(); err != nil { + it.err = err + return ValNone + } + case 3: + // 20-bit dod. + if err := it.readDod(20); err != nil { + it.err = err + return ValNone + } + if err := it.decodeValue(); err != nil { + it.err = err + return ValNone + } + case 4: + // 64-bit escape. + if err := it.readDod(64); err != nil { + it.err = err + return ValNone + } + if err := it.decodeValue(); err != nil { + it.err = err + return ValNone + } + default: + // dod=0, stale NaN. + it.t += int64(it.tDelta) + it.val = math.Float64frombits(value.StaleNaN) + } + + // Optional ST data, appended after the joint timestamp+value encoding. + // The ST delta is encoded as dod(st - prevST), independent of t. + if it.firstSTChangeOn > 0 && savedNumRead >= uint16(it.firstSTChangeOn) { + sdod, err := readVarbitInt(&it.br) + if err != nil { + it.err = err + return ValNone + } + if savedNumRead == uint16(it.firstSTChangeOn) { + it.stDelta = sdod + } else { + it.stDelta += sdod + } + it.st = prevST + it.stDelta + } + + it.numRead++ + return ValFloat +} + +// readDod reads a signed dod of width w bits and updates it.tDelta and it.t. +func (it *xor18238OPTST3Iterator) readDod(w uint8) error { + var b uint64 + if it.br.valid >= w { + it.br.valid -= w + b = (it.br.buffer >> it.br.valid) & ((uint64(1) << w) - 1) + } else { + var err error + b, err = it.br.readBits(w) + if err != nil { + return err + } + } + + if w < 64 && b >= (1<<(w-1)) { + b -= 1 << w + } + + it.tDelta = uint64(int64(it.tDelta) + int64(b)) + it.t += int64(it.tDelta) + return nil +} + +// decodeValue reads the XOR18238 value encoding for the dod≠0 case: +// +// `0` → value unchanged +// `10` → reuse previous leading/trailing window +// `110` → new leading/trailing window +// `111` → stale NaN +func (it *xor18238OPTST3Iterator) decodeValue() error { + var bit bit + if it.br.valid > 0 { + it.br.valid-- + bit = (it.br.buffer & (uint64(1) << it.br.valid)) != 0 + } else { + var err error + bit, err = it.br.readBit() + if err != nil { + return err + } + } + + if bit == zero { + // `0` → value unchanged. + it.val = it.baselineV + return nil + } + + if it.br.valid > 0 { + it.br.valid-- + bit = (it.br.buffer & (uint64(1) << it.br.valid)) != 0 + } else { + var err error + bit, err = it.br.readBit() + if err != nil { + return err + } + } + + if bit == zero { + // `10` → reuse previous leading/trailing window. + sz := uint8(64 - int(it.leading) - int(it.trailing)) + var valueBits uint64 + if it.br.valid >= sz { + it.br.valid -= sz + valueBits = (it.br.buffer >> it.br.valid) & ((uint64(1) << sz) - 1) + } else { + var err error + valueBits, err = it.br.readBits(sz) + if err != nil { + return err + } + } + vbits := math.Float64bits(it.baselineV) + vbits ^= valueBits << it.trailing + it.val = math.Float64frombits(vbits) + it.baselineV = it.val + return nil + } + + if it.br.valid > 0 { + it.br.valid-- + bit = (it.br.buffer & (uint64(1) << it.br.valid)) != 0 + } else { + var err error + bit, err = it.br.readBit() + if err != nil { + return err + } + } + + if bit == zero { + // `110` → new leading/trailing window. + return it.decodeNewLeadingTrailing() + } + + // `111` → stale NaN. + it.val = math.Float64frombits(value.StaleNaN) + return nil +} + +// decodeValueKnownNonZero reads the XOR18238 value encoding for the dod=0, +// value-changed case: +// +// `0` → reuse previous leading/trailing window +// `1` → new leading/trailing window +func (it *xor18238OPTST3Iterator) decodeValueKnownNonZero() error { + var bit bit + if it.br.valid > 0 { + it.br.valid-- + bit = (it.br.buffer & (uint64(1) << it.br.valid)) != 0 + } else { + var err error + bit, err = it.br.readBit() + if err != nil { + return err + } + } + + if bit == zero { + // `0` → reuse previous leading/trailing window. + sz := uint8(64 - int(it.leading) - int(it.trailing)) + var valueBits uint64 + if it.br.valid >= sz { + it.br.valid -= sz + valueBits = (it.br.buffer >> it.br.valid) & ((uint64(1) << sz) - 1) + } else { + var err error + valueBits, err = it.br.readBits(sz) + if err != nil { + return err + } + } + vbits := math.Float64bits(it.baselineV) + vbits ^= valueBits << it.trailing + it.val = math.Float64frombits(vbits) + it.baselineV = it.val + return nil + } + + // `1` → new leading/trailing window. + return it.decodeNewLeadingTrailing() +} + +// decodeNewLeadingTrailing reads a new leading/sigbits/value triple and +// updates it.leading, it.trailing, it.val, and it.baselineV. +func (it *xor18238OPTST3Iterator) decodeNewLeadingTrailing() error { + var newLeading uint64 + if it.br.valid >= 5 { + it.br.valid -= 5 + newLeading = (it.br.buffer >> it.br.valid) & 0x1f + } else { + var err error + newLeading, err = it.br.readBits(5) + if err != nil { + return err + } + } + + var sigbits uint64 + if it.br.valid >= 6 { + it.br.valid -= 6 + sigbits = (it.br.buffer >> it.br.valid) & 0x3f + } else { + var err error + sigbits, err = it.br.readBits(6) + if err != nil { + return err + } + } + + it.leading = uint8(newLeading) + if sigbits == 0 { + sigbits = 64 + } + it.trailing = 64 - it.leading - uint8(sigbits) + + n := uint8(sigbits) + var valueBits uint64 + if it.br.valid >= n { + it.br.valid -= n + valueBits = (it.br.buffer >> it.br.valid) & ((uint64(1) << n) - 1) + } else { + var err error + valueBits, err = it.br.readBits(n) + if err != nil { + return err + } + } + + vbits := math.Float64bits(it.baselineV) + vbits ^= valueBits << it.trailing + it.val = math.Float64frombits(vbits) + it.baselineV = it.val + return nil +} diff --git a/tsdb/chunkenc/xor18238optst3_test.go b/tsdb/chunkenc/xor18238optst3_test.go new file mode 100644 index 0000000000..2c922bb29e --- /dev/null +++ b/tsdb/chunkenc/xor18238optst3_test.go @@ -0,0 +1,81 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package chunkenc + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestXOR18238OPTST3Chunk(t *testing.T) { + testChunkSTHandling(t, ValFloat, func() Chunk { + return NewXOR18238OPTST3Chunk() + }) +} + +func TestXOR18238OPTST3Chunk_MoreThan127Samples(t *testing.T) { + const afterMax = maxFirstSTChangeOn + 3 + t.Run("zero ST", func(t *testing.T) { + chunk := NewXOR18238OPTST3Chunk() + app, err := chunk.Appender() + require.NoError(t, err) + for i := range afterMax { + app.Append(0, int64(i*10+1), float64(i)*1.5) + } + + it := chunk.Iterator(nil) + for i := range afterMax { + require.Equal(t, ValFloat, it.Next()) + st := it.AtST() + ts, v := it.At() + require.Equal(t, int64(0), st) + require.Equal(t, int64(i*10+1), ts) + require.Equal(t, float64(i)*1.5, v) + } + + require.Equal(t, ValNone, it.Next()) + require.NoError(t, it.Err()) + }) + + t.Run("non-zero ST after 127", func(t *testing.T) { + chunk := NewXOR18238OPTST3Chunk() + app, err := chunk.Appender() + require.NoError(t, err) + for i := range afterMax { + st := int64(0) + if i == afterMax-1 { + st = int64((afterMax - 1) * 10) + } + app.Append(st, int64(i*10+1), float64(i)*1.5) + } + + it := chunk.Iterator(nil) + for i := range afterMax { + require.Equal(t, ValFloat, it.Next()) + st := it.AtST() + ts, v := it.At() + if i == afterMax-1 { + require.Equal(t, int64((afterMax-1)*10), st) + } else { + require.Equal(t, int64(0), st) + } + require.Equal(t, int64(i*10+1), ts) + require.Equal(t, float64(i)*1.5, v) + } + + require.Equal(t, ValNone, it.Next()) + require.NoError(t, it.Err()) + }) +} diff --git a/tsdb/chunkenc/xor18238optst_test.go b/tsdb/chunkenc/xor18238optst_test.go new file mode 100644 index 0000000000..780ed7858f --- /dev/null +++ b/tsdb/chunkenc/xor18238optst_test.go @@ -0,0 +1,81 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package chunkenc + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestXOR18238OPTSTChunk(t *testing.T) { + testChunkSTHandling(t, ValFloat, func() Chunk { + return NewXOR18238OPTSTChunk() + }) +} + +func TestXOR18238OPTSTChunk_MoreThan127Samples(t *testing.T) { + const afterMax = maxFirstSTChangeOn + 3 + t.Run("zero ST", func(t *testing.T) { + chunk := NewXOR18238OPTSTChunk() + app, err := chunk.Appender() + require.NoError(t, err) + for i := range afterMax { + app.Append(0, int64(i*10+1), float64(i)*1.5) + } + + it := chunk.Iterator(nil) + for i := range afterMax { + require.Equal(t, ValFloat, it.Next()) + st := it.AtST() + ts, v := it.At() + require.Equal(t, int64(0), st) + require.Equal(t, int64(i*10+1), ts) + require.Equal(t, float64(i)*1.5, v) + } + + require.Equal(t, ValNone, it.Next()) + require.NoError(t, it.Err()) + }) + + t.Run("non-zero ST after 127", func(t *testing.T) { + chunk := NewXOR18238OPTSTChunk() + app, err := chunk.Appender() + require.NoError(t, err) + for i := range afterMax { + st := int64(0) + if i == afterMax-1 { + st = int64((afterMax - 1) * 10) + } + app.Append(st, int64(i*10+1), float64(i)*1.5) + } + + it := chunk.Iterator(nil) + for i := range afterMax { + require.Equal(t, ValFloat, it.Next()) + st := it.AtST() + ts, v := it.At() + if i == afterMax-1 { + require.Equal(t, int64((afterMax-1)*10), st) + } else { + require.Equal(t, int64(0), st) + } + require.Equal(t, int64(i*10+1), ts) + require.Equal(t, float64(i)*1.5, v) + } + + require.Equal(t, ValNone, it.Next()) + require.NoError(t, it.Err()) + }) +} diff --git a/tsdb/chunkenc/xor_test.go b/tsdb/chunkenc/xor_test.go index b30c65283d..5a3cabd9ce 100644 --- a/tsdb/chunkenc/xor_test.go +++ b/tsdb/chunkenc/xor_test.go @@ -40,3 +40,17 @@ func BenchmarkXorRead(b *testing.B) { _, _ = ts, v } } + +func TestXor2STChunk(t *testing.T) { + testChunkSTHandling(t, ValFloat, func() Chunk { + return NewXOR2STChunk() + }, + ) +} + +func TestXor2STotelChunk(t *testing.T) { + testChunkSTHandling(t, ValFloat, func() Chunk { + return NewXOR2STotelChunk() + }, + ) +} diff --git a/tsdb/chunkenc/xoroptst.go b/tsdb/chunkenc/xoroptst.go index b138ddbdf4..f5c34eaae3 100644 --- a/tsdb/chunkenc/xoroptst.go +++ b/tsdb/chunkenc/xoroptst.go @@ -274,31 +274,31 @@ func (a *xorOptSTAppender) Append(st, t int64, v float64) { default: tDelta = uint64(t - a.t) dod := int64(tDelta - a.tDelta) - - // Gorilla has a max resolution of seconds, Prometheus milliseconds. - // Thus we use higher value range steps with larger bit size. - // - // TODO(beorn7): This seems to needlessly jump to large bit - // sizes even for very small deviations from zero. Timestamp - // compression can probably benefit from some smaller bit - // buckets. See also what was done for histogram encoding in - // varbit.go. - switch { - case dod == 0: - a.b.writeBit(zero) - case bitRange(dod, 14): - a.b.writeByte(0b10<<6 | (uint8(dod>>8) & (1<<6 - 1))) // 0b10 size code combined with 6 bits of dod. - a.b.writeByte(uint8(dod)) // Bottom 8 bits of dod. - case bitRange(dod, 17): - a.b.writeBits(0b110, 3) - a.b.writeBits(uint64(dod), 17) - case bitRange(dod, 20): - a.b.writeBits(0b1110, 4) - a.b.writeBits(uint64(dod), 20) - default: - a.b.writeBits(0b1111, 4) - a.b.writeBits(uint64(dod), 64) - } + putVarbitInt(a.b, dod) + // // Gorilla has a max resolution of seconds, Prometheus milliseconds. + // // Thus we use higher value range steps with larger bit size. + // // + // // TODO(beorn7): This seems to needlessly jump to large bit + // // sizes even for very small deviations from zero. Timestamp + // // compression can probably benefit from some smaller bit + // // buckets. See also what was done for histogram encoding in + // // varbit.go. + // switch { + // case dod == 0: + // a.b.writeBit(zero) + // case bitRange(dod, 14): + // a.b.writeByte(0b10<<6 | (uint8(dod>>8) & (1<<6 - 1))) // 0b10 size code combined with 6 bits of dod. + // a.b.writeByte(uint8(dod)) // Bottom 8 bits of dod. + // case bitRange(dod, 17): + // a.b.writeBits(0b110, 3) + // a.b.writeBits(uint64(dod), 17) + // case bitRange(dod, 20): + // a.b.writeBits(0b1110, 4) + // a.b.writeBits(uint64(dod), 20) + // default: + // a.b.writeBits(0b1111, 4) + // a.b.writeBits(uint64(dod), 64) + // } a.writeVDelta(v) } @@ -356,59 +356,61 @@ func (a *xorOptSTAppender) Append(st, t int64, v float64) { // a.b.writeByte(b) // } sdod := stDiff - // Gorilla has a max resolution of seconds, Prometheus milliseconds. - // Thus we use higher value range steps with larger bit size. - // - // TODO(beorn7): This seems to needlessly jump to large bit - // sizes even for very small deviations from zero. Timestamp - // compression can probably benefit from some smaller bit - // buckets. See also what was done for histogram encoding in - // varbit.go. - switch { - case sdod == 0: - a.b.writeBit(zero) - case bitRange(sdod, 14): - a.b.writeByte(0b10<<6 | (uint8(sdod>>8) & (1<<6 - 1))) // 0b10 size code combined with 6 bits of dod. - a.b.writeByte(uint8(sdod)) // Bottom 8 bits of dod. - case bitRange(sdod, 17): - a.b.writeBits(0b110, 3) - a.b.writeBits(uint64(sdod), 17) - case bitRange(sdod, 20): - a.b.writeBits(0b1110, 4) - a.b.writeBits(uint64(sdod), 20) - default: - a.b.writeBits(0b1111, 4) - a.b.writeBits(uint64(sdod), 64) - } + putVarbitInt(a.b, sdod) + // // Gorilla has a max resolution of seconds, Prometheus milliseconds. + // // Thus we use higher value range steps with larger bit size. + // // + // // TODO(beorn7): This seems to needlessly jump to large bit + // // sizes even for very small deviations from zero. Timestamp + // // compression can probably benefit from some smaller bit + // // buckets. See also what was done for histogram encoding in + // // varbit.go. + // switch { + // case sdod == 0: + // a.b.writeBit(zero) + // case bitRange(sdod, 14): + // a.b.writeByte(0b10<<6 | (uint8(sdod>>8) & (1<<6 - 1))) // 0b10 size code combined with 6 bits of dod. + // a.b.writeByte(uint8(sdod)) // Bottom 8 bits of dod. + // case bitRange(sdod, 17): + // a.b.writeBits(0b110, 3) + // a.b.writeBits(uint64(sdod), 17) + // case bitRange(sdod, 20): + // a.b.writeBits(0b1110, 4) + // a.b.writeBits(uint64(sdod), 20) + // default: + // a.b.writeBits(0b1111, 4) + // a.b.writeBits(uint64(sdod), 64) + // } default: tDelta = uint64(t - a.t) dod := int64(tDelta - a.tDelta) + putVarbitInt(a.b, dod) - // Gorilla has a max resolution of seconds, Prometheus milliseconds. - // Thus we use higher value range steps with larger bit size. - // - // TODO(beorn7): This seems to needlessly jump to large bit - // sizes even for very small deviations from zero. Timestamp - // compression can probably benefit from some smaller bit - // buckets. See also what was done for histogram encoding in - // varbit.go. - switch { - case dod == 0: - a.b.writeBit(zero) - case bitRange(dod, 14): - a.b.writeByte(0b10<<6 | (uint8(dod>>8) & (1<<6 - 1))) // 0b10 size code combined with 6 bits of dod. - a.b.writeByte(uint8(dod)) // Bottom 8 bits of dod. - case bitRange(dod, 17): - a.b.writeBits(0b110, 3) - a.b.writeBits(uint64(dod), 17) - case bitRange(dod, 20): - a.b.writeBits(0b1110, 4) - a.b.writeBits(uint64(dod), 20) - default: - a.b.writeBits(0b1111, 4) - a.b.writeBits(uint64(dod), 64) - } + // // Gorilla has a max resolution of seconds, Prometheus milliseconds. + // // Thus we use higher value range steps with larger bit size. + // // + // // TODO(beorn7): This seems to needlessly jump to large bit + // // sizes even for very small deviations from zero. Timestamp + // // compression can probably benefit from some smaller bit + // // buckets. See also what was done for histogram encoding in + // // varbit.go. + // switch { + // case dod == 0: + // a.b.writeBit(zero) + // case bitRange(dod, 14): + // a.b.writeByte(0b10<<6 | (uint8(dod>>8) & (1<<6 - 1))) // 0b10 size code combined with 6 bits of dod. + // a.b.writeByte(uint8(dod)) // Bottom 8 bits of dod. + // case bitRange(dod, 17): + // a.b.writeBits(0b110, 3) + // a.b.writeBits(uint64(dod), 17) + // case bitRange(dod, 20): + // a.b.writeBits(0b1110, 4) + // a.b.writeBits(uint64(dod), 20) + // default: + // a.b.writeBits(0b1111, 4) + // a.b.writeBits(uint64(dod), 64) + // } a.writeVDelta(v) @@ -418,58 +420,60 @@ func (a *xorOptSTAppender) Append(st, t int64, v float64) { a.firstSTChangeOn = a.numTotal writeHeaderFirstSTChangeOn(a.b.bytes()[chunkHeaderSize:], a.numTotal) sdod := stDiff - // Gorilla has a max resolution of seconds, Prometheus milliseconds. - // Thus we use higher value range steps with larger bit size. - // - // TODO(beorn7): This seems to needlessly jump to large bit - // sizes even for very small deviations from zero. Timestamp - // compression can probably benefit from some smaller bit - // buckets. See also what was done for histogram encoding in - // varbit.go. - switch { - case sdod == 0: - a.b.writeBit(zero) - case bitRange(sdod, 14): - a.b.writeByte(0b10<<6 | (uint8(sdod>>8) & (1<<6 - 1))) // 0b10 size code combined with 6 bits of dod. - a.b.writeByte(uint8(sdod)) // Bottom 8 bits of dod. - case bitRange(sdod, 17): - a.b.writeBits(0b110, 3) - a.b.writeBits(uint64(sdod), 17) - case bitRange(sdod, 20): - a.b.writeBits(0b1110, 4) - a.b.writeBits(uint64(sdod), 20) - default: - a.b.writeBits(0b1111, 4) - a.b.writeBits(uint64(sdod), 64) - } + putVarbitInt(a.b, sdod) + // // Gorilla has a max resolution of seconds, Prometheus milliseconds. + // // Thus we use higher value range steps with larger bit size. + // // + // // TODO(beorn7): This seems to needlessly jump to large bit + // // sizes even for very small deviations from zero. Timestamp + // // compression can probably benefit from some smaller bit + // // buckets. See also what was done for histogram encoding in + // // varbit.go. + // switch { + // case sdod == 0: + // a.b.writeBit(zero) + // case bitRange(sdod, 14): + // a.b.writeByte(0b10<<6 | (uint8(sdod>>8) & (1<<6 - 1))) // 0b10 size code combined with 6 bits of dod. + // a.b.writeByte(uint8(sdod)) // Bottom 8 bits of dod. + // case bitRange(sdod, 17): + // a.b.writeBits(0b110, 3) + // a.b.writeBits(uint64(sdod), 17) + // case bitRange(sdod, 20): + // a.b.writeBits(0b1110, 4) + // a.b.writeBits(uint64(sdod), 20) + // default: + // a.b.writeBits(0b1111, 4) + // a.b.writeBits(uint64(sdod), 64) + // } } } else { stDiff = a.t - st sdod := stDiff - a.stDiff - // Gorilla has a max resolution of seconds, Prometheus milliseconds. - // Thus we use higher value range steps with larger bit size. - // - // TODO(beorn7): This seems to needlessly jump to large bit - // sizes even for very small deviations from zero. Timestamp - // compression can probably benefit from some smaller bit - // buckets. See also what was done for histogram encoding in - // varbit.go. - switch { - case sdod == 0: - a.b.writeBit(zero) - case bitRange(sdod, 14): - a.b.writeByte(0b10<<6 | (uint8(sdod>>8) & (1<<6 - 1))) // 0b10 size code combined with 6 bits of dod. - a.b.writeByte(uint8(sdod)) // Bottom 8 bits of dod. - case bitRange(sdod, 17): - a.b.writeBits(0b110, 3) - a.b.writeBits(uint64(sdod), 17) - case bitRange(sdod, 20): - a.b.writeBits(0b1110, 4) - a.b.writeBits(uint64(sdod), 20) - default: - a.b.writeBits(0b1111, 4) - a.b.writeBits(uint64(sdod), 64) - } + putVarbitInt(a.b, sdod) + // // Gorilla has a max resolution of seconds, Prometheus milliseconds. + // // Thus we use higher value range steps with larger bit size. + // // + // // TODO(beorn7): This seems to needlessly jump to large bit + // // sizes even for very small deviations from zero. Timestamp + // // compression can probably benefit from some smaller bit + // // buckets. See also what was done for histogram encoding in + // // varbit.go. + // switch { + // case sdod == 0: + // a.b.writeBit(zero) + // case bitRange(sdod, 14): + // a.b.writeByte(0b10<<6 | (uint8(sdod>>8) & (1<<6 - 1))) // 0b10 size code combined with 6 bits of dod. + // a.b.writeByte(uint8(sdod)) // Bottom 8 bits of dod. + // case bitRange(sdod, 17): + // a.b.writeBits(0b110, 3) + // a.b.writeBits(uint64(sdod), 17) + // case bitRange(sdod, 20): + // a.b.writeBits(0b1110, 4) + // a.b.writeBits(uint64(sdod), 20) + // default: + // a.b.writeBits(0b1111, 4) + // a.b.writeBits(uint64(sdod), 64) + // } } } @@ -532,64 +536,62 @@ func (it *xorOptSTtIterator) Next() ValueType { // Optional ST delta read. if it.firstSTChangeOn == 1 { - // stDiff, err := binary.ReadVarint(&it.br) - // if err != nil { - // return it.retErr(err) + // var d byte + // // read delta-of-delta + // for range 4 { + // d <<= 1 + // bit, err := it.br.readBitFast() + // if err != nil { + // bit, err = it.br.readBit() + // if err != nil { + // return it.retErr(err) + // } + // } + // if bit == zero { + // break + // } + // d |= 1 // } - // it.stDiff = stDiff - // it.st = it.t - stDiff - var d byte - // read delta-of-delta - for range 4 { - d <<= 1 - bit, err := it.br.readBitFast() - if err != nil { - bit, err = it.br.readBit() - if err != nil { - return it.retErr(err) - } - } - if bit == zero { - break - } - d |= 1 - } - var sz uint8 - var sdod int64 - switch d { - case 0b0: - // dod == 0 - case 0b10: - sz = 14 - case 0b110: - sz = 17 - case 0b1110: - sz = 20 - case 0b1111: - // Do not use fast because it's very unlikely it will succeed. - bits, err := it.br.readBits(64) - if err != nil { - return it.retErr(err) - } + // var sz uint8 + // var sdod int64 + // switch d { + // case 0b0: + // // dod == 0 + // case 0b10: + // sz = 14 + // case 0b110: + // sz = 17 + // case 0b1110: + // sz = 20 + // case 0b1111: + // // Do not use fast because it's very unlikely it will succeed. + // bits, err := it.br.readBits(64) + // if err != nil { + // return it.retErr(err) + // } - sdod = int64(bits) - } + // sdod = int64(bits) + // } - if sz != 0 { - bits, err := it.br.readBitsFast(sz) - if err != nil { - bits, err = it.br.readBits(sz) - if err != nil { - return it.retErr(err) - } - } + // if sz != 0 { + // bits, err := it.br.readBitsFast(sz) + // if err != nil { + // bits, err = it.br.readBits(sz) + // if err != nil { + // return it.retErr(err) + // } + // } - // Account for negative numbers, which come back as high unsigned numbers. - // See docs/bstream.md. - if bits > (1 << (sz - 1)) { - bits -= 1 << sz - } - sdod = int64(bits) + // // Account for negative numbers, which come back as high unsigned numbers. + // // See docs/bstream.md. + // if bits > (1 << (sz - 1)) { + // bits -= 1 << sz + // } + // sdod = int64(bits) + // } + sdod, err := readVarbitInt(&it.br) + if err != nil { + return it.retErr(err) } it.stDiff = sdod it.st = it.t - sdod @@ -600,58 +602,62 @@ func (it *xorOptSTtIterator) Next() ValueType { return ValFloat } - var d byte - // read delta-of-delta - for range 4 { - d <<= 1 - bit, err := it.br.readBitFast() - if err != nil { - bit, err = it.br.readBit() - } - if err != nil { - return it.retErr(err) - } - if bit == zero { - break - } - d |= 1 - } - var sz uint8 - var dod int64 - switch d { - case 0b0: - // dod == 0 - case 0b10: - sz = 14 - case 0b110: - sz = 17 - case 0b1110: - sz = 20 - case 0b1111: - // Do not use fast because it's very unlikely it will succeed. - bits, err := it.br.readBits(64) - if err != nil { - return it.retErr(err) - } + // var d byte + // // read delta-of-delta + // for range 4 { + // d <<= 1 + // bit, err := it.br.readBitFast() + // if err != nil { + // bit, err = it.br.readBit() + // } + // if err != nil { + // return it.retErr(err) + // } + // if bit == zero { + // break + // } + // d |= 1 + // } + // var sz uint8 + // var dod int64 + // switch d { + // case 0b0: + // // dod == 0 + // case 0b10: + // sz = 14 + // case 0b110: + // sz = 17 + // case 0b1110: + // sz = 20 + // case 0b1111: + // // Do not use fast because it's very unlikely it will succeed. + // bits, err := it.br.readBits(64) + // if err != nil { + // return it.retErr(err) + // } - dod = int64(bits) - } + // dod = int64(bits) + // } - if sz != 0 { - bits, err := it.br.readBitsFast(sz) - if err != nil { - bits, err = it.br.readBits(sz) - } - if err != nil { - return it.retErr(err) - } + // if sz != 0 { + // bits, err := it.br.readBitsFast(sz) + // if err != nil { + // bits, err = it.br.readBits(sz) + // } + // if err != nil { + // return it.retErr(err) + // } - // Account for negative numbers, which come back as high unsigned numbers. - // See docs/bstream.md. - if bits > (1 << (sz - 1)) { - bits -= 1 << sz - } - dod = int64(bits) + // // Account for negative numbers, which come back as high unsigned numbers. + // // See docs/bstream.md. + // if bits > (1 << (sz - 1)) { + // bits -= 1 << sz + // } + // dod = int64(bits) + // } + dod, err := readVarbitInt(&it.br) + if err != nil { + return it.retErr(err) } it.tDelta = uint64(int64(it.tDelta) + dod) @@ -661,58 +667,62 @@ func (it *xorOptSTtIterator) Next() ValueType { } if it.firstSTChangeOn > 0 && it.numRead >= uint16(it.firstSTChangeOn) { - var d byte - // read delta-of-delta - for range 4 { - d <<= 1 - bit, err := it.br.readBitFast() - if err != nil { - bit, err = it.br.readBit() - if err != nil { - return it.retErr(err) - } - } - if bit == zero { - break - } - d |= 1 - } - var sz uint8 - var sdod int64 - switch d { - case 0b0: - // dod == 0 - case 0b10: - sz = 14 - case 0b110: - sz = 17 - case 0b1110: - sz = 20 - case 0b1111: - // Do not use fast because it's very unlikely it will succeed. - bits, err := it.br.readBits(64) - if err != nil { - return it.retErr(err) - } + // var d byte + // // read delta-of-delta + // for range 4 { + // d <<= 1 + // bit, err := it.br.readBitFast() + // if err != nil { + // bit, err = it.br.readBit() + // if err != nil { + // return it.retErr(err) + // } + // } + // if bit == zero { + // break + // } + // d |= 1 + // } + // var sz uint8 + // var sdod int64 + // switch d { + // case 0b0: + // // dod == 0 + // case 0b10: + // sz = 14 + // case 0b110: + // sz = 17 + // case 0b1110: + // sz = 20 + // case 0b1111: + // // Do not use fast because it's very unlikely it will succeed. + // bits, err := it.br.readBits(64) + // if err != nil { + // return it.retErr(err) + // } - sdod = int64(bits) - } + // sdod = int64(bits) + // } - if sz != 0 { - bits, err := it.br.readBitsFast(sz) - if err != nil { - bits, err = it.br.readBits(sz) - if err != nil { - return it.retErr(err) - } - } + // if sz != 0 { + // bits, err := it.br.readBitsFast(sz) + // if err != nil { + // bits, err = it.br.readBits(sz) + // if err != nil { + // return it.retErr(err) + // } + // } - // Account for negative numbers, which come back as high unsigned numbers. - // See docs/bstream.md. - if bits > (1 << (sz - 1)) { - bits -= 1 << sz - } - sdod = int64(bits) + // // Account for negative numbers, which come back as high unsigned numbers. + // // See docs/bstream.md. + // if bits > (1 << (sz - 1)) { + // bits -= 1 << sz + // } + // sdod = int64(bits) + // } + sdod, err := readVarbitInt(&it.br) + if err != nil { + return it.retErr(err) } if it.numRead == uint16(it.firstSTChangeOn) { it.stDiff = sdod diff --git a/tsdb/chunkenc/xoroptst_otel.go b/tsdb/chunkenc/xoroptst_otel.go new file mode 100644 index 0000000000..8e482d3803 --- /dev/null +++ b/tsdb/chunkenc/xoroptst_otel.go @@ -0,0 +1,759 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package chunkenc + +import ( + "encoding/binary" + "math" + + "github.com/prometheus/prometheus/model/histogram" +) + +// stHeader is a 16 bit header for start time (ST) in chunks. +// If can store a maximum of 0x7FF (2047) samples before the first ST change. +type stHeader uint16 + +func (h stHeader) stEqualsT() bool { + return (h & 0x8000) != 0 +} + +func (h *stHeader) setSTEqualsT() { + *h |= 0x8000 +} + +func (h stHeader) firstSTKnown() bool { + return (h & 0x1000) != 0 +} + +func (h *stHeader) setFirstSTKnown() { + *h |= 0x1000 +} + +func (h stHeader) firstSTDiffKnown() bool { + return (h & 0x800) != 0 +} + +func (h *stHeader) setFirstSTDiffKnown() { + *h |= 0x800 +} + +func (h stHeader) firstSTChangeOn() uint16 { + return uint16(h) & 0x7FF +} + +func (h *stHeader) setFirstSTChangeOn(pos uint16) { + *h |= stHeader(pos & 0x7FF) +} + +// nsHeader is a 16 bit header for number of sample (NS) in chunks. +// Maximum number of samples is 0x7FF (2047). +type nsHeader uint16 + +func readNSHeader(b []byte) nsHeader { + return nsHeader(b[1]) | nsHeader(b[0]&0x07)<<8 +} + +func readHeaders(b []byte) (stHeader, nsHeader) { + v := b[0] + numSamples := nsHeader(v&0x07)<<8 | nsHeader(b[1]) + stHeader := (stHeader(v>>3))<<8 | stHeader(b[2]) + return stHeader, numSamples +} + +func writeHeaders(b []byte, stHeader stHeader, nsHeader nsHeader) { + b[0] = (uint8(stHeader>>8) << 3) | (uint8(nsHeader>>8) & 0x07) + b[1] = uint8(nsHeader) + b[2] = uint8(stHeader) +} + +// XorOptSTotelChunk holds XOR enncoded samples with optional start time (ST) +// per chunk or per sample. See tsdb/docs/format/chunks.md for details. +type XorOptSTotelChunk struct { + b bstream +} + +// NewXOROptSTotelChunk returns a new chunk with EncXOROptOtelST encoding. +func NewXOROptSTotelChunk() *XorOptSTotelChunk { + b := make([]byte, chunkHeaderSize+chunkSTHeaderSize, chunkAllocationSize) + return &XorOptSTotelChunk{b: bstream{stream: b, count: 0}} +} + +func (c *XorOptSTotelChunk) Reset(stream []byte) { + c.b.Reset(stream) +} + +// Encoding returns the encoding type. +func (*XorOptSTotelChunk) Encoding() Encoding { + return EncXOROptOtelST +} + +// Bytes returns the underlying byte slice of the chunk. +func (c *XorOptSTotelChunk) Bytes() []byte { + return c.b.bytes() +} + +// NumSamples returns the number of samples in the chunk. +func (c *XorOptSTotelChunk) NumSamples() int { + return int(readNSHeader(c.b.bytes())) +} + +// Compact implements the Chunk interface. +func (c *XorOptSTotelChunk) Compact() { + if l := len(c.b.stream); cap(c.b.stream) > l+chunkCompactCapacityThreshold { + buf := make([]byte, l) + copy(buf, c.b.stream) + c.b.stream = buf + } +} + +// Appender implements the Chunk interface. +// It is not valid to call Appender() multiple times concurrently or to use multiple +// Appenders on the same chunk. +func (c *XorOptSTotelChunk) Appender() (Appender, error) { + if len(c.b.stream) == chunkHeaderSize+chunkSTHeaderSize { // Avoid allocating an Iterator when chunk is empty. + return &xorOptSTotelAppender{b: &c.b, t: math.MinInt64, leading: 0xff}, nil + } + it := c.iterator(nil) + + // To get an appender we must know the state it would have if we had + // appended all existing data from scratch. + // We iterate through the end and populate via the iterator's state. + for it.Next() != ValNone { + } + if err := it.Err(); err != nil { + return nil, err + } + + // Set the bit position for continuing writes. + // The iterator's reader tracks how many bits remain unread in the last byte. + c.b.count = it.br.valid + + a := &xorOptSTotelAppender{ + b: &c.b, + st: it.st, + t: it.t, + v: it.val, + stDiff: it.stDiff, + tDelta: it.tDelta, + leading: it.leading, + trailing: it.trailing, + + numTotal: it.numTotal, + stHeader: it.stHeader, + } + return a, nil +} + +func (c *XorOptSTotelChunk) iterator(it Iterator) *xorOptSTotelIterator { + xorIter, ok := it.(*xorOptSTotelIterator) + if !ok { + xorIter = &xorOptSTotelIterator{} + } + + xorIter.Reset(c.b.bytes()) + return xorIter +} + +// Iterator implements the Chunk interface. +// Iterator() must not be called concurrently with any modifications to the chunk, +// but after it returns you can use an Iterator concurrently with an Appender or +// other Iterators. +func (c *XorOptSTotelChunk) Iterator(it Iterator) Iterator { + return c.iterator(it) +} + +type xorOptSTotelAppender struct { + b *bstream + + st, t int64 + v float64 + + stDiff int64 // Difference between current ST and previous T. Undefined for first sample. + tDelta uint64 // Difference between current T and previous T. Undefined for first sample. + numTotal nsHeader + stHeader stHeader + leading uint8 + trailing uint8 +} + +func (a *xorOptSTotelAppender) writeVDelta(v float64) { + xorWrite(a.b, v, a.v, &a.leading, &a.trailing) +} + +func (*xorOptSTotelAppender) AppendHistogram(*HistogramAppender, int64, int64, *histogram.Histogram, bool) (Chunk, bool, Appender, error) { + panic("appended a histogram sample to a float chunk") +} + +func (*xorOptSTotelAppender) AppendFloatHistogram(*FloatHistogramAppender, int64, int64, *histogram.FloatHistogram, bool) (Chunk, bool, Appender, error) { + panic("appended a float histogram sample to a float chunk") +} + +type xorOptSTotelIterator struct { + br bstreamReader + numTotal nsHeader + + stHeader stHeader + leading uint8 + trailing uint8 + + numRead uint16 + + st, t int64 + val float64 + + stDiff int64 + tDelta uint64 + err error +} + +func (it *xorOptSTotelIterator) Seek(t int64) ValueType { + if it.err != nil { + return ValNone + } + + for t > it.t || it.numRead == 0 { + if it.Next() == ValNone { + return ValNone + } + } + return ValFloat +} + +func (it *xorOptSTotelIterator) At() (int64, float64) { + return it.t, it.val +} + +func (*xorOptSTotelIterator) AtHistogram(*histogram.Histogram) (int64, *histogram.Histogram) { + panic("cannot call xorIterator.AtHistogram") +} + +func (*xorOptSTotelIterator) AtFloatHistogram(*histogram.FloatHistogram) (int64, *histogram.FloatHistogram) { + panic("cannot call xorIterator.AtFloatHistogram") +} + +func (it *xorOptSTotelIterator) AtT() int64 { + return it.t +} + +func (it *xorOptSTotelIterator) AtST() int64 { + return it.st +} + +func (it *xorOptSTotelIterator) Err() error { + return it.err +} + +func (it *xorOptSTotelIterator) Reset(b []byte) { + // We skip initial headers for actual samples. + it.br = newBReader(b[chunkHeaderSize+chunkSTHeaderSize:]) + it.stHeader, it.numTotal = readHeaders(b) + it.numRead = 0 + it.st = 0 + it.t = 0 + it.val = 0 + it.leading = 0 + it.trailing = 0 + it.stDiff = 0 + it.tDelta = 0 + it.err = nil +} + +func (a *xorOptSTotelAppender) Append(st, t int64, v float64) { + if st == 0 && a.stHeader == 0 { + // Fast path for no ST usage at all. + // Same as classic XOR chunk appender. + + var tDelta uint64 + + switch a.numTotal { + case 0: + buf := make([]byte, binary.MaxVarintLen64) + for _, b := range buf[:binary.PutVarint(buf, t)] { + a.b.writeByte(b) + } + a.b.writeBits(math.Float64bits(v), 64) + case 1: + buf := make([]byte, binary.MaxVarintLen64) + tDelta = uint64(t - a.t) + for _, b := range buf[:binary.PutUvarint(buf, tDelta)] { + a.b.writeByte(b) + } + a.writeVDelta(v) + default: + tDelta = uint64(t - a.t) + dod := int64(tDelta - a.tDelta) + + // // Gorilla has a max resolution of seconds, Prometheus milliseconds. + // // Thus we use higher value range steps with larger bit size. + // // + // // TODO(beorn7): This seems to needlessly jump to large bit + // // sizes even for very small deviations from zero. Timestamp + // // compression can probably benefit from some smaller bit + // // buckets. See also what was done for histogram encoding in + // // varbit.go. + // switch { + // case dod == 0: + // a.b.writeBit(zero) + // case bitRange(dod, 14): + // a.b.writeByte(0b10<<6 | (uint8(dod>>8) & (1<<6 - 1))) // 0b10 size code combined with 6 bits of dod. + // a.b.writeByte(uint8(dod)) // Bottom 8 bits of dod. + // case bitRange(dod, 17): + // a.b.writeBits(0b110, 3) + // a.b.writeBits(uint64(dod), 17) + // case bitRange(dod, 20): + // a.b.writeBits(0b1110, 4) + // a.b.writeBits(uint64(dod), 20) + // default: + // a.b.writeBits(0b1111, 4) + // a.b.writeBits(uint64(dod), 64) + // } + putVarbitInt(a.b, dod) + + a.writeVDelta(v) + } + + a.t = t + a.v = v + a.tDelta = tDelta + a.numTotal++ + binary.BigEndian.PutUint16(a.b.bytes(), uint16(a.numTotal)) + return + } + + var ( + stDiff int64 // Difference between current ST and previous T. Undefined for first sample. + tDelta uint64 // Difference between current T and previous T. Undefined for first sample. + ) + + // Slow path for ST usage. + switch a.numTotal { + case 0: + buf := make([]byte, binary.MaxVarintLen64) + + // Write T. + for _, b := range buf[:binary.PutVarint(buf, t)] { + a.b.writeByte(b) + } + + // Write V. + a.b.writeBits(math.Float64bits(v), 64) + + // Write ST. + for _, b := range buf[:binary.PutVarint(buf, t-st)] { + a.b.writeByte(b) + } + a.stHeader.setFirstSTKnown() + if st == t { + a.stHeader.setSTEqualsT() + } + + case 1: + buf := make([]byte, binary.MaxVarintLen64) + tDelta = uint64(t - a.t) + for _, b := range buf[:binary.PutUvarint(buf, tDelta)] { + a.b.writeByte(b) + } + a.writeVDelta(v) + if st != 0 && st != a.st && a.stHeader.firstSTKnown() { + a.stHeader.setFirstSTDiffKnown() + } + if (st == 0 && a.stHeader.firstSTKnown()) || (st != t && a.stHeader.stEqualsT()) || (st != 0 && !a.stHeader.firstSTKnown()) { + a.stHeader.setFirstSTChangeOn(1) + } + if !a.stHeader.firstSTDiffKnown() && a.stHeader.firstSTChangeOn() == 0 { + break + } + // Initialize double delta of st - prev_t. + stDiff = st - a.t + sdod := stDiff + // // Gorilla has a max resolution of seconds, Prometheus milliseconds. + // // Thus we use higher value range steps with larger bit size. + // // + // // TODO(beorn7): This seems to needlessly jump to large bit + // // sizes even for very small deviations from zero. Timestamp + // // compression can probably benefit from some smaller bit + // // buckets. See also what was done for histogram encoding in + // // varbit.go. + // switch { + // case sdod == 0: + // a.b.writeBit(zero) + // case bitRange(sdod, 14): + // a.b.writeByte(0b10<<6 | (uint8(sdod>>8) & (1<<6 - 1))) // 0b10 size code combined with 6 bits of dod. + // a.b.writeByte(uint8(sdod)) // Bottom 8 bits of dod. + // case bitRange(sdod, 17): + // a.b.writeBits(0b110, 3) + // a.b.writeBits(uint64(sdod), 17) + // case bitRange(sdod, 20): + // a.b.writeBits(0b1110, 4) + // a.b.writeBits(uint64(sdod), 20) + // default: + // a.b.writeBits(0b1111, 4) + // a.b.writeBits(uint64(sdod), 64) + // } + putVarbitInt(a.b, sdod) + + default: + tDelta = uint64(t - a.t) + dod := int64(tDelta - a.tDelta) + + // // Gorilla has a max resolution of seconds, Prometheus milliseconds. + // // Thus we use higher value range steps with larger bit size. + // // + // // TODO(beorn7): This seems to needlessly jump to large bit + // // sizes even for very small deviations from zero. Timestamp + // // compression can probably benefit from some smaller bit + // // buckets. See also what was done for histogram encoding in + // // varbit.go. + // switch { + // case dod == 0: + // a.b.writeBit(zero) + // case bitRange(dod, 14): + // a.b.writeByte(0b10<<6 | (uint8(dod>>8) & (1<<6 - 1))) // 0b10 size code combined with 6 bits of dod. + // a.b.writeByte(uint8(dod)) // Bottom 8 bits of dod. + // case bitRange(dod, 17): + // a.b.writeBits(0b110, 3) + // a.b.writeBits(uint64(dod), 17) + // case bitRange(dod, 20): + // a.b.writeBits(0b1110, 4) + // a.b.writeBits(uint64(dod), 20) + // default: + // a.b.writeBits(0b1111, 4) + // a.b.writeBits(uint64(dod), 64) + // } + putVarbitInt(a.b, dod) + a.writeVDelta(v) + + stDiff = st - a.t + if a.stHeader.firstSTChangeOn() == 0 { + if a.stHeader.firstSTKnown() { + if stDiff == a.stDiff && a.stHeader.firstSTDiffKnown() || st == t && a.stHeader.stEqualsT() { + // No stDiff change. + break + } + if !a.stHeader.firstSTDiffKnown() { + if st == a.st { + // No st change. + break + } + // This is the first ST diff change, reset the baseline. + a.stDiff = 0 + } + a.stHeader.setFirstSTChangeOn(uint16(a.numTotal)) + } else if st != 0 { + // First ST change. + a.stHeader.setFirstSTChangeOn(uint16(a.numTotal)) + } + } + + sdod := stDiff - a.stDiff + + // // Gorilla has a max resolution of seconds, Prometheus milliseconds. + // // Thus we use higher value range steps with larger bit size. + // // + // // TODO(beorn7): This seems to needlessly jump to large bit + // // sizes even for very small deviations from zero. Timestamp + // // compression can probably benefit from some smaller bit + // // buckets. See also what was done for histogram encoding in + // // varbit.go. + // switch { + // case sdod == 0: + // a.b.writeBit(zero) + // case bitRange(sdod, 14): + // a.b.writeByte(0b10<<6 | (uint8(sdod>>8) & (1<<6 - 1))) // 0b10 size code combined with 6 bits of dod. + // a.b.writeByte(uint8(sdod)) // Bottom 8 bits of dod. + // case bitRange(sdod, 17): + // a.b.writeBits(0b110, 3) + // a.b.writeBits(uint64(sdod), 17) + // case bitRange(sdod, 20): + // a.b.writeBits(0b1110, 4) + // a.b.writeBits(uint64(sdod), 20) + // default: + // a.b.writeBits(0b1111, 4) + // a.b.writeBits(uint64(sdod), 64) + // } + putVarbitInt(a.b, sdod) + } + + a.st = st + a.t = t + a.v = v + a.tDelta = tDelta + a.stDiff = stDiff + a.numTotal++ + writeHeaders(a.b.bytes(), a.stHeader, a.numTotal) +} + +func (it *xorOptSTotelIterator) retErr(err error) ValueType { + it.err = err + return ValNone +} + +func (it *xorOptSTotelIterator) Next() ValueType { + if it.err != nil || it.numRead == uint16(it.numTotal) { + return ValNone + } + + if it.numRead == 0 { + t, err := binary.ReadVarint(&it.br) + if err != nil { + return it.retErr(err) + } + + v, err := it.br.readBits(64) + if err != nil { + return it.retErr(err) + } + it.t = t + it.val = math.Float64frombits(v) + + // Optional ST read. + if it.stHeader.firstSTKnown() { + st, err := binary.ReadVarint(&it.br) + if err != nil { + return it.retErr(err) + } + it.st = t - st + if st == 0 { + it.stHeader.setSTEqualsT() + } + } + + it.numRead++ + return ValFloat + } + + if it.numRead == 1 { + tDelta, err := binary.ReadUvarint(&it.br) + if err != nil { + return it.retErr(err) + } + it.tDelta = tDelta + + if err := xorRead(&it.br, &it.val, &it.leading, &it.trailing); err != nil { + return it.retErr(err) + } + + if it.stHeader.firstSTDiffKnown() || it.stHeader.firstSTChangeOn() == 1 { + // var d byte + // // read delta-of-delta + // for range 4 { + // d <<= 1 + // bit, err := it.br.readBitFast() + // if err != nil { + // bit, err = it.br.readBit() + // if err != nil { + // return it.retErr(err) + // } + // } + // if bit == zero { + // break + // } + // d |= 1 + // } + // var sz uint8 + // var sdod int64 + // switch d { + // case 0b0: + // // dod == 0 + // case 0b10: + // sz = 14 + // case 0b110: + // sz = 17 + // case 0b1110: + // sz = 20 + // case 0b1111: + // // Do not use fast because it's very unlikely it will succeed. + // bits, err := it.br.readBits(64) + // if err != nil { + // return it.retErr(err) + // } + + // sdod = int64(bits) + // } + + // if sz != 0 { + // bits, err := it.br.readBitsFast(sz) + // if err != nil { + // bits, err = it.br.readBits(sz) + // if err != nil { + // return it.retErr(err) + // } + // } + + // // Account for negative numbers, which come back as high unsigned numbers. + // // See docs/bstream.md. + // if bits > (1 << (sz - 1)) { + // bits -= 1 << sz + // } + // sdod = int64(bits) + // } + sdod, err := readVarbitInt(&it.br) + if err != nil { + return it.retErr(err) + } + it.stDiff = sdod + it.st = it.t + sdod + } + + it.t += int64(it.tDelta) + it.numRead++ + return ValFloat + } + + // var d byte + // // read delta-of-delta + // for range 4 { + // d <<= 1 + // bit, err := it.br.readBitFast() + // if err != nil { + // bit, err = it.br.readBit() + // } + // if err != nil { + // return it.retErr(err) + // } + // if bit == zero { + // break + // } + // d |= 1 + // } + // var sz uint8 + // var dod int64 + // switch d { + // case 0b0: + // // dod == 0 + // case 0b10: + // sz = 14 + // case 0b110: + // sz = 17 + // case 0b1110: + // sz = 20 + // case 0b1111: + // // Do not use fast because it's very unlikely it will succeed. + // bits, err := it.br.readBits(64) + // if err != nil { + // return it.retErr(err) + // } + + // dod = int64(bits) + // } + + // if sz != 0 { + // bits, err := it.br.readBitsFast(sz) + // if err != nil { + // bits, err = it.br.readBits(sz) + // } + // if err != nil { + // return it.retErr(err) + // } + + // // Account for negative numbers, which come back as high unsigned numbers. + // // See docs/bstream.md. + // if bits > (1 << (sz - 1)) { + // bits -= 1 << sz + // } + // dod = int64(bits) + // } + dod, err := readVarbitInt(&it.br) + if err != nil { + return it.retErr(err) + } + + it.tDelta = uint64(int64(it.tDelta) + dod) + + if err := xorRead(&it.br, &it.val, &it.leading, &it.trailing); err != nil { + return it.retErr(err) + } + + stChangeOn := it.stHeader.firstSTChangeOn() + if stChangeOn == 0 || it.numRead < stChangeOn { + // No ST change recorded. + if it.stHeader.firstSTKnown() { + if it.stHeader.stEqualsT() { + // ST equals T. + it.st = it.t + int64(it.tDelta) + } else if it.stHeader.firstSTDiffKnown() { + // First ST diff was known and hasn't changed. + it.st = it.t + it.stDiff + } + // Otherwise first ST was known and hasn't changed. + // Do nothing. + } + } else { + // if it.numRead > stChangeOn { + // Double delta of t - st continues. + // var d byte + // // read delta-of-delta + // for range 4 { + // d <<= 1 + // bit, err := it.br.readBitFast() + // if err != nil { + // bit, err = it.br.readBit() + // if err != nil { + // return it.retErr(err) + // } + // } + // if bit == zero { + // break + // } + // d |= 1 + // } + // var sz uint8 + // var sdod int64 + // switch d { + // case 0b0: + // // dod == 0 + // case 0b10: + // sz = 14 + // case 0b110: + // sz = 17 + // case 0b1110: + // sz = 20 + // case 0b1111: + // // Do not use fast because it's very unlikely it will succeed. + // bits, err := it.br.readBits(64) + // if err != nil { + // return it.retErr(err) + // } + + // sdod = int64(bits) + // } + + // if sz != 0 { + // bits, err := it.br.readBitsFast(sz) + // if err != nil { + // bits, err = it.br.readBits(sz) + // if err != nil { + // return it.retErr(err) + // } + // } + + // // Account for negative numbers, which come back as high unsigned numbers. + // // See docs/bstream.md. + // if bits > (1 << (sz - 1)) { + // bits -= 1 << sz + // } + // sdod = int64(bits) + // } + sdod, err := readVarbitInt(&it.br) + if err != nil { + return it.retErr(err) + } + it.stDiff += sdod + it.st = it.t + it.stDiff + } + it.t += int64(it.tDelta) + + it.numRead++ + return ValFloat +} diff --git a/tsdb/chunkenc/xoroptst_otel_test.go b/tsdb/chunkenc/xoroptst_otel_test.go new file mode 100644 index 0000000000..a8cc40763a --- /dev/null +++ b/tsdb/chunkenc/xoroptst_otel_test.go @@ -0,0 +1,24 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package chunkenc + +import ( + "testing" +) + +func TestXorOptSTotelChunk(t *testing.T) { + testChunkSTHandling(t, ValFloat, func() Chunk { + return NewXOROptSTotelChunk() + }) +} diff --git a/tsdb/db_append_v2_test.go b/tsdb/db_append_v2_test.go index a3d74efefd..40b67e9e10 100644 --- a/tsdb/db_append_v2_test.go +++ b/tsdb/db_append_v2_test.go @@ -7553,7 +7553,7 @@ func TestCompactHeadWithSTStorage_AppendV2(t *testing.T) { for _, chk := range chks { c, _, err := chunkr.ChunkOrIterable(chk) require.NoError(t, err) - require.Equal(t, chunkenc.EncXOROptST, c.Encoding(), + require.Equal(t, chunkenc.EncodingForFloatST, c.Encoding(), "unexpected chunk encoding, got %s", c.Encoding()) chunkCount++ } diff --git a/tsdb/head_test.go b/tsdb/head_test.go index 7f0af8b41f..6d45fddbbc 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -7695,7 +7695,7 @@ func TestHeadAppender_STStorage_ChunkEncoding(t *testing.T) { encoding := chk.Encoding() if enableST { - require.Equal(t, chunkenc.EncXOROptST, encoding, + require.Equal(t, chunkenc.EncodingForFloatST, encoding, "Expected ST-capable encoding when EnableSTStorage is true") } else { require.Equal(t, chunkenc.EncXOR, encoding, diff --git a/tsdb/ooo_head.go b/tsdb/ooo_head.go index 04f859154f..77488c232a 100644 --- a/tsdb/ooo_head.go +++ b/tsdb/ooo_head.go @@ -123,7 +123,7 @@ func (o *OOOChunk) ToEncodedChunks(mint, maxt int64, storeST bool) (chks []memCh } } switch encoding { - case chunkenc.EncXOR, chunkenc.EncXOROptST: + case chunkenc.EncXOR, chunkenc.EncodingForFloatST: app.Append(s.st, s.t, s.f) case chunkenc.EncHistogram: // TODO(krajorama): handle ST capable histogram chunk. diff --git a/tsdb/ooo_head_test.go b/tsdb/ooo_head_test.go index f7e73233fb..b980a5649c 100644 --- a/tsdb/ooo_head_test.go +++ b/tsdb/ooo_head_test.go @@ -406,7 +406,7 @@ func TestOOOChunks_ToEncodedChunks_WithST(t *testing.T) { storeST bool expectedEncoding chunkenc.Encoding }{ - {"storeST=true", true, chunkenc.EncXOROptST}, + {"storeST=true", true, chunkenc.EncodingForFloatST}, {"storeST=false", false, chunkenc.EncXOR}, }