From eeef17ea0ab276cc8a651f344c42d3dd1e75431b Mon Sep 17 00:00:00 2001 From: bwplotka Date: Mon, 9 Dec 2024 11:45:46 +0000 Subject: [PATCH 1/4] docs: Added native histogram WAL record documentation. Signed-off-by: bwplotka --- tsdb/docs/format/wal.md | 136 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 123 insertions(+), 13 deletions(-) diff --git a/tsdb/docs/format/wal.md b/tsdb/docs/format/wal.md index db1ce97a8b..5d93eb6d87 100644 --- a/tsdb/docs/format/wal.md +++ b/tsdb/docs/format/wal.md @@ -1,15 +1,32 @@ # WAL Disk Format +This document describes the official Prometheus WAL format. + The write ahead log operates in segments that are numbered and sequential, -e.g. `000000`, `000001`, `000002`, etc., and are limited to 128MB by default. -A segment is written to in pages of 32KB. Only the last page of the most recent segment +and are limited to 128MB by default. + +## Segment filename + +The sequence number is captured in the segment filename, +e.g. `000000`, `000001`, `000002`, etc. The first unsigned integer represents +the sequence number of the segment, typically encoded with six digits. + +## Segment encoding + +This section describes the segment encoding. + +A segment encodes an array of records. It does not contain any header. A segment +is written to pages of 32KB. Only the last page of the most recent segment may be partial. A WAL record is an opaque byte slice that gets split up into sub-records should it exceed the remaining space of the current page. Records are never split across segment boundaries. If a single record exceeds the default segment size, a segment with a larger size will be created. + The encoding of pages is largely borrowed from [LevelDB's/RocksDB's write ahead log.](https://github.com/facebook/rocksdb/wiki/Write-Ahead-Log-File-Format) -Notable deviations are that the record fragment is encoded as: +### Records encoding + +Each record fragment is encoded as: ``` ┌───────────┬──────────┬────────────┬──────────────┐ @@ -17,7 +34,8 @@ Notable deviations are that the record fragment is encoded as: └───────────┴──────────┴────────────┴──────────────┘ ``` -The initial type byte is made up of three components: a 3-bit reserved field, a 1-bit zstd compression flag, a 1-bit snappy compression flag, and a 3-bit type flag. +The initial type byte is made up of three components: a 3-bit reserved field, +a 1-bit zstd compression flag, a 1-bit snappy compression flag, and a 3-bit type flag. ``` ┌─────────────────┬──────────────────┬────────────────────┬──────────────────┐ @@ -25,7 +43,7 @@ The initial type byte is made up of three components: a 3-bit reserved field, a └─────────────────┴──────────────────┴────────────────────┴──────────────────┘ ``` -The lowest 3 bits within this flag represent the record type as follows: +The lowest 3 bits within the type flag represent the record type as follows: * `0`: rest of page will be empty * `1`: a full record encoded in a single fragment @@ -33,11 +51,16 @@ The lowest 3 bits within this flag represent the record type as follows: * `3`: middle fragment of a record * `4`: final fragment of a record -## Record encoding +After the type byte, 2-byte length and then 4-byte checksum of the following data are encoded. -The records written to the write ahead log are encoded as follows: +All float values are represented using the [IEEE 754 format](https://en.wikipedia.org/wiki/IEEE_754). -### Series records +### Record types + +In the following sections, all the known record types are described. New types, +can be added in the future. + +#### Series records Series records encode the labels that identifies a series and its unique ID. @@ -58,7 +81,7 @@ Series records encode the labels that identifies a series and its unique ID. └────────────────────────────────────────────┘ ``` -### Sample records +#### Sample records Sample records encode samples as a list of triples `(series_id, timestamp, value)`. Series reference and timestamp are encoded as deltas w.r.t the first sample. @@ -79,7 +102,7 @@ The first sample record begins at the second row. └──────────────────────────────────────────────────────────────────┘ ``` -### Tombstone records +#### Tombstone records Tombstone records encode tombstones as a list of triples `(series_id, min_time, max_time)` and specify an interval for which samples of a series got deleted. @@ -95,9 +118,9 @@ and specify an interval for which samples of a series got deleted. └─────────────────────────────────────────────────────┘ ``` -### Exemplar records +#### Exemplar records -Exemplar records encode exemplars as a list of triples `(series_id, timestamp, value)` +Exemplar records encode exemplars as a list of triples `(series_id, timestamp, value)` plus the length of the labels list, and all the labels. The first row stores the starting id and the starting timestamp. Series reference and timestamp are encoded as deltas w.r.t the first exemplar. @@ -127,7 +150,7 @@ See: https://github.com/OpenObservability/OpenMetrics/blob/main/specification/Op └──────────────────────────────────────────────────────────────────┘ ``` -### Metadata records +#### Metadata records Metadata records encode the metadata updates associated with a series. @@ -156,3 +179,90 @@ Metadata records encode the metadata updates associated with a series. └────────────────────────────────────────────┘ ``` +#### Histogram records + +Histogram records encode the integer and float native histogram samples. + +A record with the integer native histograms with the exponential bucketing: + +``` +┌───────────────────────────────────────────────────────────────────────┐ +│ type = 7 <1b> │ +├───────────────────────────────────────────────────────────────────────┤ +│ ┌────────────────────┬───────────────────────────┐ │ +│ │ id <8b> │ timestamp <8b> │ │ +│ └────────────────────┴───────────────────────────┘ │ +│ ┌────────────────────┬──────────────────────────────────────────────┐ │ +│ │ id_delta │ timestamp_delta │ │ +│ ├────────────────────┴────┬─────────────────────────────────────────┤ │ +│ │ counter_reset_hint <1b> │ schema │ │ +│ ├─────────────────────────┴────┬────────────────────────────────────┤ │ +│ │ zero_threshold (float) <8b> │ zero_count │ │ +│ ├─────────────────┬────────────┴────────────────────────────────────┤ │ +│ │ count │ sum (float) <8b> │ │ +│ ├─────────────────┴─────────────────────────────────────────────────┤ │ +│ │ positive_spans_num │ │ +│ ├─────────────────────────────────┬─────────────────────────────────┤ │ +│ │ positive_span_offset_1 │ positive_span_len_1 │ │ +│ ├─────────────────────────────────┴─────────────────────────────────┤ │ +│ │ . . . │ │ +│ ├───────────────────────────────────────────────────────────────────┤ │ +│ │ negative_spans_num │ │ +│ ├───────────────────────────────┬───────────────────────────────────┤ │ +│ │ negative_span_offset │ negative_span_len │ │ +│ ├───────────────────────────────┴───────────────────────────────────┤ │ +│ │ . . . │ │ +│ ├───────────────────────────────────────────────────────────────────┤ │ +│ │ positive_bkts_num │ │ +│ ├─────────────────────────┬───────┬─────────────────────────────────┤ │ +│ │ positive_bkt_1 │ . . . │ positive_bkt_n │ │ +│ ├─────────────────────────┴───────┴─────────────────────────────────┤ │ +│ │ negative_bkts_num │ │ +│ ├─────────────────────────┬───────┬─────────────────────────────────┤ │ +│ │ negative_bkt_1 │ . . . │ negative_bkt_n │ │ +│ └─────────────────────────┴───────┴─────────────────────────────────┘ │ +│ . . . │ +└───────────────────────────────────────────────────────────────────────┘ +``` + +A records with the Float histograms: + +``` +┌───────────────────────────────────────────────────────────────────────┐ +│ type = 8 <1b> │ +├───────────────────────────────────────────────────────────────────────┤ +│ ┌────────────────────┬───────────────────────────┐ │ +│ │ id <8b> │ timestamp <8b> │ │ +│ └────────────────────┴───────────────────────────┘ │ +│ ┌────────────────────┬──────────────────────────────────────────────┐ │ +│ │ id_delta │ timestamp_delta │ │ +│ ├────────────────────┴────┬─────────────────────────────────────────┤ │ +│ │ counter_reset_hint <1b> │ schema │ │ +│ ├─────────────────────────┴────┬────────────────────────────────────┤ │ +│ │ zero_threshold (float) <8b> │ zero_count (float) <8b> │ │ +│ ├────────────────────┬─────────┴────────────────────────────────────┤ │ +│ │ count (float) <8b> │ sum (float) <8b> │ │ +│ ├────────────────────┴──────────────────────────────────────────────┤ │ +│ │ positive_spans_num │ │ +│ ├─────────────────────────────────┬─────────────────────────────────┤ │ +│ │ positive_span_offset_1 │ positive_span_len_1 │ │ +│ ├─────────────────────────────────┴─────────────────────────────────┤ │ +│ │ . . . │ │ +│ ├───────────────────────────────────────────────────────────────────┤ │ +│ │ negative_spans_num │ │ +│ ├───────────────────────────────┬───────────────────────────────────┤ │ +│ │ negative_span_offset │ negative_span_len │ │ +│ ├───────────────────────────────┴───────────────────────────────────┤ │ +│ │ . . . │ │ +│ ├───────────────────────────────────────────────────────────────────┤ │ +│ │ positive_bkts_num │ │ +│ ├─────────────────────────────┬───────┬─────────────────────────────┤ │ +│ │ positive_bkt_1 (float) <8b> │ . . . │ positive_bkt_n (float) <8b> │ │ +│ ├─────────────────────────────┴───────┴─────────────────────────────┤ │ +│ │ negative_bkts_num │ │ +│ ├─────────────────────────────┬───────┬─────────────────────────────┤ │ +│ │ negative_bkt_1 (float) <8b> │ . . . │ negative_bkt_n (float) <8b> │ │ +│ └─────────────────────────────┴───────┴─────────────────────────────┘ │ +│ . . . │ +└───────────────────────────────────────────────────────────────────────┘ +``` From fe4e8170074940cf4249e03fa3cfa59d66c8653d Mon Sep 17 00:00:00 2001 From: avoidaway Date: Mon, 9 Dec 2024 21:07:47 +0800 Subject: [PATCH 2/4] chore: fix some function names in comment Signed-off-by: avoidaway --- storage/remote/queue_manager.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/storage/remote/queue_manager.go b/storage/remote/queue_manager.go index f6d6cbc7e9..475c126eff 100644 --- a/storage/remote/queue_manager.go +++ b/storage/remote/queue_manager.go @@ -1688,7 +1688,7 @@ func (s *shards) updateMetrics(_ context.Context, err error, sampleCount, exempl s.enqueuedHistograms.Sub(int64(histogramCount)) } -// sendSamples to the remote storage with backoff for recoverable errors. +// sendSamplesWithBackoff to the remote storage with backoff for recoverable errors. func (s *shards) sendSamplesWithBackoff(ctx context.Context, samples []prompb.TimeSeries, sampleCount, exemplarCount, histogramCount, metadataCount int, pBuf *proto.Buffer, buf *[]byte, enc Compression) (WriteResponseStats, error) { // Build the WriteRequest with no metadata. req, highest, lowest, err := buildWriteRequest(s.qm.logger, samples, nil, pBuf, buf, nil, enc) @@ -1802,7 +1802,7 @@ func (s *shards) sendSamplesWithBackoff(ctx context.Context, samples []prompb.Ti return accumulatedStats, err } -// sendV2Samples to the remote storage with backoff for recoverable errors. +// sendV2SamplesWithBackoff to the remote storage with backoff for recoverable errors. func (s *shards) sendV2SamplesWithBackoff(ctx context.Context, samples []writev2.TimeSeries, labels []string, sampleCount, exemplarCount, histogramCount, metadataCount int, pBuf, buf *[]byte, enc Compression) (WriteResponseStats, error) { // Build the WriteRequest with no metadata. req, highest, lowest, err := buildV2WriteRequest(s.qm.logger, samples, labels, pBuf, buf, nil, enc) From 0a7729469d90fa2d2fa74b156052d2be5c89480e Mon Sep 17 00:00:00 2001 From: Jan Fajerski Date: Thu, 5 Dec 2024 16:31:45 +0100 Subject: [PATCH 3/4] RELEASE.md: schedule 3.[1,2,3,4] releases Signed-off-by: Jan Fajerski --- RELEASE.md | 64 ++++++++---------------------------------------------- 1 file changed, 9 insertions(+), 55 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 8e78a6a3ec..b481c89c30 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -5,61 +5,15 @@ This page describes the release process and the currently planned schedule for u ## Release schedule Release cadence of first pre-releases being cut is 6 weeks. +Please see [the v2.55 RELEASE.md](https://github.com/prometheus/prometheus/blob/release-2.55/RELEASE.md) for the v2 release series schedule. -| release series | date of first pre-release (year-month-day) | release shepherd | -|----------------|--------------------------------------------|---------------------------------------------| -| v2.4 | 2018-09-06 | Goutham Veeramachaneni (GitHub: @gouthamve) | -| v2.5 | 2018-10-24 | Frederic Branczyk (GitHub: @brancz) | -| v2.6 | 2018-12-05 | Simon Pasquier (GitHub: @simonpasquier) | -| v2.7 | 2019-01-16 | Goutham Veeramachaneni (GitHub: @gouthamve) | -| v2.8 | 2019-02-27 | Ganesh Vernekar (GitHub: @codesome) | -| v2.9 | 2019-04-10 | Brian Brazil (GitHub: @brian-brazil) | -| v2.10 | 2019-05-22 | Björn Rabenstein (GitHub: @beorn7) | -| v2.11 | 2019-07-03 | Frederic Branczyk (GitHub: @brancz) | -| v2.12 | 2019-08-14 | Julius Volz (GitHub: @juliusv) | -| v2.13 | 2019-09-25 | Krasi Georgiev (GitHub: @krasi-georgiev) | -| v2.14 | 2019-11-06 | Chris Marchbanks (GitHub: @csmarchbanks) | -| v2.15 | 2019-12-18 | Bartek Plotka (GitHub: @bwplotka) | -| v2.16 | 2020-01-29 | Callum Styan (GitHub: @cstyan) | -| v2.17 | 2020-03-11 | Julien Pivotto (GitHub: @roidelapluie) | -| v2.18 | 2020-04-22 | Bartek Plotka (GitHub: @bwplotka) | -| v2.19 | 2020-06-03 | Ganesh Vernekar (GitHub: @codesome) | -| v2.20 | 2020-07-15 | Björn Rabenstein (GitHub: @beorn7) | -| v2.21 | 2020-08-26 | Julien Pivotto (GitHub: @roidelapluie) | -| v2.22 | 2020-10-07 | Frederic Branczyk (GitHub: @brancz) | -| v2.23 | 2020-11-18 | Ganesh Vernekar (GitHub: @codesome) | -| v2.24 | 2020-12-30 | Björn Rabenstein (GitHub: @beorn7) | -| v2.25 | 2021-02-10 | Julien Pivotto (GitHub: @roidelapluie) | -| v2.26 | 2021-03-24 | Bartek Plotka (GitHub: @bwplotka) | -| v2.27 | 2021-05-05 | Chris Marchbanks (GitHub: @csmarchbanks) | -| v2.28 | 2021-06-16 | Julius Volz (GitHub: @juliusv) | -| v2.29 | 2021-07-28 | Frederic Branczyk (GitHub: @brancz) | -| v2.30 | 2021-09-08 | Ganesh Vernekar (GitHub: @codesome) | -| v2.31 | 2021-10-20 | Julien Pivotto (GitHub: @roidelapluie) | -| v2.32 | 2021-12-01 | Julius Volz (GitHub: @juliusv) | -| v2.33 | 2022-01-12 | Björn Rabenstein (GitHub: @beorn7) | -| v2.34 | 2022-02-23 | Chris Marchbanks (GitHub: @csmarchbanks) | -| v2.35 | 2022-04-06 | Augustin Husson (GitHub: @nexucis) | -| v2.36 | 2022-05-18 | Matthias Loibl (GitHub: @metalmatze) | -| v2.37 LTS | 2022-06-29 | Julien Pivotto (GitHub: @roidelapluie) | -| v2.38 | 2022-08-10 | Julius Volz (GitHub: @juliusv) | -| v2.39 | 2022-09-21 | Ganesh Vernekar (GitHub: @codesome) | -| v2.40 | 2022-11-02 | Ganesh Vernekar (GitHub: @codesome) | -| v2.41 | 2022-12-14 | Julien Pivotto (GitHub: @roidelapluie) | -| v2.42 | 2023-01-25 | Kemal Akkoyun (GitHub: @kakkoyun) | -| v2.43 | 2023-03-08 | Julien Pivotto (GitHub: @roidelapluie) | -| v2.44 | 2023-04-19 | Bryan Boreham (GitHub: @bboreham) | -| v2.45 LTS | 2023-05-31 | Jesus Vazquez (Github: @jesusvazquez) | -| v2.46 | 2023-07-12 | Julien Pivotto (GitHub: @roidelapluie) | -| v2.47 | 2023-08-23 | Bryan Boreham (GitHub: @bboreham) | -| v2.48 | 2023-10-04 | Levi Harrison (GitHub: @LeviHarrison) | -| v2.49 | 2023-12-05 | Bartek Plotka (GitHub: @bwplotka) | -| v2.50 | 2024-01-16 | Augustin Husson (GitHub: @nexucis) | -| v2.51 | 2024-03-07 | Bryan Boreham (GitHub: @bboreham) | -| v2.52 | 2024-04-22 | Arthur Silva Sens (GitHub: @ArthurSens) | -| v2.53 LTS | 2024-06-03 | George Krajcsovits (GitHub: @krajorama) | -| v2.54 | 2024-07-17 | Bryan Boreham (GitHub: @bboreham) | -| v2.55 | 2024-09-17 | Bryan Boreham (GitHub: @bboreham) | +| release series | date of first pre-release (year-month-day) | release shepherd | +|----------------|--------------------------------------------|-----------------------------------| +| v3.0 | 2024-11-14 | Jan Fajerski (GitHub: @jan--f) | +| v3.1 | 2024-12-17 | Bryan Boreham (GitHub: @bboreham) | +| v3.2 | 2025-01-28 | Jan Fajerski (GitHub: @jan--f) | +| v3.3 | 2025-03-11 | Ayoub Mrini (Github: @machine424) | +| v3.4 | 2025-04-22 | **volunteer welcome** | If you are interested in volunteering please create a pull request against the [prometheus/prometheus](https://github.com/prometheus/prometheus) repository and propose yourself for the release series of your choice. @@ -204,7 +158,7 @@ Then release with `git tag-release`. Signing a tag with a GPG key is appreciated, but in case you can't add a GPG key to your Github account using the following [procedure](https://help.github.com/articles/generating-a-gpg-key/), you can replace the `-s` flag by `-a` flag of the `git tag` command to only annotate the tag without signing. -Once a tag is created, the release process through CircleCI will be triggered for this tag and Circle CI will draft the GitHub release using the `prombot` account. +Once a tag is created, the release process through Github Actions will be triggered for this tag and Github Actions will draft the GitHub release using the `prombot` account. Finally, wait for the build step for the tag to finish. The point here is to wait for tarballs to be uploaded to the Github release and the container images to be pushed to the Docker Hub and Quay.io. Once that has happened, click _Publish release_, which will make the release publicly visible and create a GitHub notification. **Note:** for a release candidate version ensure the _This is a pre-release_ box is checked when drafting the release in the Github UI. The CI job should take care of this but it's a good idea to double check before clicking _Publish release_.` From 437362a7a789a803b970238fce46e2cdc20319e2 Mon Sep 17 00:00:00 2001 From: TJ Hoplock Date: Tue, 10 Dec 2024 02:24:20 -0500 Subject: [PATCH 4/4] fix(deduper): use ptr to sync.RWMutex, fix panic during concurrent use Resolves: #15559 As accurately noted in the issue description, the map is shared among child loggers that get created when `WithAttr()`/`WithGroup()` are called on the underlying handler, which happens via `log.With()` and `log.WithGroup()` respectively. The RW mutex was a value in the previous implementation that used go-kit/log, and I should've updated it to use a pointer when I converted the deduper. Also adds a test. Signed-off-by: TJ Hoplock --- util/logging/dedupe.go | 5 ++++- util/logging/dedupe_test.go | 24 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/util/logging/dedupe.go b/util/logging/dedupe.go index e7dff20f78..8137f4f22b 100644 --- a/util/logging/dedupe.go +++ b/util/logging/dedupe.go @@ -33,7 +33,7 @@ type Deduper struct { next *slog.Logger repeat time.Duration quit chan struct{} - mtx sync.RWMutex + mtx *sync.RWMutex seen map[string]time.Time } @@ -43,6 +43,7 @@ func Dedupe(next *slog.Logger, repeat time.Duration) *Deduper { next: next, repeat: repeat, quit: make(chan struct{}), + mtx: new(sync.RWMutex), seen: map[string]time.Time{}, } go d.run() @@ -88,6 +89,7 @@ func (d *Deduper) WithAttrs(attrs []slog.Attr) slog.Handler { repeat: d.repeat, quit: d.quit, seen: d.seen, + mtx: d.mtx, } } @@ -103,6 +105,7 @@ func (d *Deduper) WithGroup(name string) slog.Handler { repeat: d.repeat, quit: d.quit, seen: d.seen, + mtx: d.mtx, } } diff --git a/util/logging/dedupe_test.go b/util/logging/dedupe_test.go index 5baa90b038..a8774aefd3 100644 --- a/util/logging/dedupe_test.go +++ b/util/logging/dedupe_test.go @@ -56,3 +56,27 @@ func TestDedupe(t *testing.T) { } require.Len(t, lines, 2) } + +func TestDedupeConcurrent(t *testing.T) { + d := Dedupe(promslog.New(&promslog.Config{}), 250*time.Millisecond) + dlog := slog.New(d) + defer d.Stop() + + concurrentWriteFunc := func() { + go func() { + dlog1 := dlog.With("writer", 1) + for i := 0; i < 10; i++ { + dlog1.With("foo", "bar").Info("test", "hello", "world") + } + }() + + go func() { + dlog2 := dlog.With("writer", 2) + for i := 0; i < 10; i++ { + dlog2.With("foo", "bar").Info("test", "hello", "world") + } + }() + } + + require.NotPanics(t, func() { concurrentWriteFunc() }) +}