Merge branch 'main' into fix/consul-filter-health-endpoint

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
This commit is contained in:
Bryan Boreham 2026-02-24 11:25:35 +00:00 committed by GitHub
commit d28d33afe0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
813 changed files with 71749 additions and 17103 deletions

View file

@ -7,3 +7,4 @@ data/
!.build/linux-arm64/
!.build/linux-ppc64le/
!.build/linux-s390x/
!.build/linux-riscv64/

1
.gitattributes vendored Normal file
View file

@ -0,0 +1 @@
web/api/v1/testdata/openapi_golden.yaml linguist-generated

10
.github/CODEOWNERS vendored
View file

@ -1,10 +0,0 @@
/web/ui @juliusv
/web/ui/module @juliusv @nexucis
/storage/remote @cstyan @bwplotka @tomwilkie
/storage/remote/otlptranslator @aknuds1 @jesusvazquez
/discovery/kubernetes @brancz
/tsdb @jesusvazquez
/promql @roidelapluie
/cmd/promtool @dgl
/documentation/prometheus-mixin @metalmatze

View file

@ -28,6 +28,7 @@ If no, just write "NONE" in the release-notes block below.
Otherwise, please describe what should be mentioned in the CHANGELOG. Use the following prefixes:
[FEATURE] [ENHANCEMENT] [PERF] [BUGFIX] [SECURITY] [CHANGE]
Refer to the existing CHANGELOG for inspiration: https://github.com/prometheus/prometheus/blob/main/CHANGELOG.md
A concrete example may look as follows (be sure to leave out the surrounding quotes): "[FEATURE] API: Add /api/v1/features for clients to understand which features are supported".
If you need help formulating your entries, consult the reviewer(s).
-->
```release-notes

View file

@ -1,27 +0,0 @@
version: 2
updates:
- package-ecosystem: "docker"
directory: "/"
schedule:
interval: "monthly"
- package-ecosystem: "github-actions"
directories:
- "/"
- "/scripts"
schedule:
interval: "monthly"
- package-ecosystem: "gomod"
directories:
- "/"
- "/documentation/examples/remote_storage"
- "/internal/tools"
schedule:
interval: "monthly"
groups:
k8s.io:
patterns:
- "k8s.io/*"
go.opentelemetry.io:
patterns:
- "go.opentelemetry.io/*"
open-pull-requests-limit: 20

View file

@ -19,7 +19,7 @@ jobs:
steps:
- name: Dependabot metadata
id: metadata
uses: dependabot/fetch-metadata@08eff52bf64351f401fb50d4972fa95b9f2c2d1b # v2.4.0
uses: dependabot/fetch-metadata@21025c705c08248db411dc16f3619e6b5f9ea21a # v2.5.0
with:
github-token: "${{ secrets.GITHUB_TOKEN }}"
- name: Enable auto-merge for Dependabot PRs

View file

@ -12,7 +12,7 @@ jobs:
name: lint
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- uses: bufbuild/buf-setup-action@a47c93e0b1648d5651a065437926377d060baa99 # v1.50.0

View file

@ -12,7 +12,7 @@ jobs:
runs-on: ubuntu-latest
if: github.repository_owner == 'prometheus'
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- uses: bufbuild/buf-setup-action@a47c93e0b1648d5651a065437926377d060baa99 # v1.50.0
@ -25,7 +25,7 @@ jobs:
with:
input: 'prompb'
against: 'https://github.com/prometheus/prometheus.git#branch=main,ref=HEAD~1,subdir=prompb'
- uses: bufbuild/buf-push-action@a654ff18effe4641ebea4a4ce242c49800728459 # v1.1.1
- uses: bufbuild/buf-push-action@a654ff18effe4641ebea4a4ce242c49800728459 # v1.2.0
with:
input: 'prompb'
buf_token: ${{ secrets.BUF_TOKEN }}

View file

@ -20,7 +20,7 @@ jobs:
# Don't run it on dependabot PRs either as humans would take control in case a bump introduces a breaking change.
if: (github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community') && github.event.pull_request.user.login != 'dependabot[bot]'
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- env:
PR_DESCRIPTION: ${{ github.event.pull_request.body }}
run: |

View file

@ -3,6 +3,8 @@ name: CI
on:
pull_request:
push:
branches: [main, 'release-*']
tags: ['v*']
permissions:
contents: read
@ -16,10 +18,10 @@ jobs:
# should also be updated.
image: quay.io/prometheus/golang-builder:1.25-base
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- uses: prometheus/promci@443c7fc2397e946bc9f5029e313a9c3441b9b86d # v0.4.7
- uses: prometheus/promci@fc721ff8497a70a93a881cd552b71af7fb3a9d53 # v0.5.4
- uses: ./.github/promci/actions/setup_environment
with:
enable_npm: true
@ -34,10 +36,10 @@ jobs:
container:
image: quay.io/prometheus/golang-builder:1.25-base
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- uses: prometheus/promci@443c7fc2397e946bc9f5029e313a9c3441b9b86d # v0.4.7
- uses: prometheus/promci@fc721ff8497a70a93a881cd552b71af7fb3a9d53 # v0.5.4
- uses: ./.github/promci/actions/setup_environment
- run: go test --tags=dedupelabels ./...
- run: go test --tags=slicelabels -race ./cmd/prometheus ./model/textparse ./prompb/...
@ -57,9 +59,9 @@ jobs:
GOEXPERIMENT: synctest
container:
# The go version in this image should be N-1 wrt test_go.
image: quay.io/prometheus/golang-builder:1.24-base
image: quay.io/prometheus/golang-builder:1.25-base
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- run: make build
@ -78,10 +80,10 @@ jobs:
image: quay.io/prometheus/golang-builder:1.25-base
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- uses: prometheus/promci@443c7fc2397e946bc9f5029e313a9c3441b9b86d # v0.4.7
- uses: prometheus/promci@fc721ff8497a70a93a881cd552b71af7fb3a9d53 # v0.5.4
- uses: ./.github/promci/actions/setup_environment
with:
enable_go: false
@ -97,10 +99,10 @@ jobs:
name: Go tests on Windows
runs-on: windows-latest
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0
- uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v6.2.0
with:
go-version: 1.25.x
- run: |
@ -116,7 +118,7 @@ jobs:
container:
image: quay.io/prometheus/golang-builder:1.25-base
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- run: go install ./cmd/promtool/.
@ -128,6 +130,27 @@ jobs:
- run: make -C documentation/prometheus-mixin
- run: git diff --exit-code
test-compliance:
name: Compliance testing
runs-on: ubuntu-latest
container:
# Whenever the Go version is updated here, .promu.yml
# should also be updated.
image: quay.io/prometheus/golang-builder:1.25-base
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- uses: prometheus/promci@fc721ff8497a70a93a881cd552b71af7fb3a9d53 # v0.5.4
- uses: ./.github/promci/actions/setup_environment
with:
enable_npm: false
# NOTE: Those tests are based on https://github.com/prometheus/compliance and
# are executed against the ./cmd/prometheus main package.
- run: go test -skip ${SKIP_TESTS} -v --tags=compliance ./compliance/...
env:
SKIP_TESTS: "TestRemoteWriteSender/prometheus/samples/rw2/start_timestamp*" # TODO(bwplotka): PROM-60
build:
name: Build Prometheus for common architectures
runs-on: ubuntu-latest
@ -143,10 +166,10 @@ jobs:
matrix:
thread: [ 0, 1, 2 ]
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- uses: prometheus/promci@443c7fc2397e946bc9f5029e313a9c3441b9b86d # v0.4.7
- uses: prometheus/promci@fc721ff8497a70a93a881cd552b71af7fb3a9d53 # v0.5.4
- uses: ./.github/promci/actions/build
with:
promu_opts: "-p linux/amd64 -p windows/amd64 -p linux/arm64 -p darwin/amd64 -p darwin/arm64 -p linux/386"
@ -170,10 +193,10 @@ jobs:
# Whenever the Go version is updated here, .promu.yml
# should also be updated.
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- uses: prometheus/promci@443c7fc2397e946bc9f5029e313a9c3441b9b86d # v0.4.7
- uses: prometheus/promci@fc721ff8497a70a93a881cd552b71af7fb3a9d53 # v0.5.4
- uses: ./.github/promci/actions/build
with:
parallelism: 12
@ -202,30 +225,32 @@ jobs:
if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') }}
run: exit 1
check_generated_parser:
# Checks generated parser and UI functions list. Not renaming as it is a required check.
name: Check generated parser
runs-on: ubuntu-latest
container:
image: quay.io/prometheus/golang-builder:1.25-base
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: Install Go
uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0
- uses: prometheus/promci@fc721ff8497a70a93a881cd552b71af7fb3a9d53 # v0.5.4
- uses: ./.github/promci/actions/setup_environment
with:
cache: false
go-version: 1.25.x
- name: Run goyacc and check for diff
run: make install-goyacc check-generated-parser
enable_npm: true
- run: make install-goyacc check-generated-parser
- run: make check-generated-promql-functions
golangci:
name: golangci-lint
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: Install Go
uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0
uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v6.2.0
with:
go-version: 1.25.x
- name: Install snmp_exporter/generator dependencies
@ -235,21 +260,27 @@ jobs:
id: golangci-lint-version
run: echo "version=$(make print-golangci-lint-version)" >> $GITHUB_OUTPUT
- name: Lint
uses: golangci/golangci-lint-action@4afd733a84b1f43292c63897423277bb7f4313a9 # v8.0.0
uses: golangci/golangci-lint-action@1e7e51e771db61008b38414a730f564565cf7c20 # v9.2.0
with:
args: --verbose
version: ${{ steps.golangci-lint-version.outputs.version }}
- name: Lint with slicelabels
uses: golangci/golangci-lint-action@4afd733a84b1f43292c63897423277bb7f4313a9 # v8.0.0
uses: golangci/golangci-lint-action@1e7e51e771db61008b38414a730f564565cf7c20 # v9.2.0
with:
# goexperiment.synctest to ensure we don't miss files that depend on it.
args: --verbose --build-tags=slicelabels,goexperiment.synctest
version: ${{ steps.golangci-lint-version.outputs.version }}
- name: Lint with dedupelabels
uses: golangci/golangci-lint-action@4afd733a84b1f43292c63897423277bb7f4313a9 # v8.0.0
uses: golangci/golangci-lint-action@1e7e51e771db61008b38414a730f564565cf7c20 # v9.2.0
with:
args: --verbose --build-tags=dedupelabels
version: ${{ steps.golangci-lint-version.outputs.version }}
- name: Lint in documentation/examples/remote_storage
uses: golangci/golangci-lint-action@1e7e51e771db61008b38414a730f564565cf7c20 # v9.2.0
with:
args: --verbose
working-directory: documentation/examples/remote_storage
version: ${{ steps.golangci-lint-version.outputs.version }}
fuzzing:
uses: ./.github/workflows/fuzzing.yml
if: github.event_name == 'pull_request'
@ -265,10 +296,10 @@ jobs:
needs: [test_ui, test_go, test_go_more, test_go_oldest, test_windows, golangci, codeql, build_all]
if: github.event_name == 'push' && github.event.ref == 'refs/heads/main'
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- uses: prometheus/promci@443c7fc2397e946bc9f5029e313a9c3441b9b86d # v0.4.7
- uses: prometheus/promci@fc721ff8497a70a93a881cd552b71af7fb3a9d53 # v0.5.4
- uses: ./.github/promci/actions/publish_main
with:
docker_hub_login: ${{ secrets.docker_hub_login }}
@ -284,10 +315,10 @@ jobs:
||
(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.'))
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- uses: prometheus/promci@443c7fc2397e946bc9f5029e313a9c3441b9b86d # v0.4.7
- uses: prometheus/promci@fc721ff8497a70a93a881cd552b71af7fb3a9d53 # v0.5.4
- uses: ./.github/promci/actions/publish_release
with:
docker_hub_login: ${{ secrets.docker_hub_login }}
@ -301,16 +332,16 @@ jobs:
needs: [test_ui, codeql]
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- uses: prometheus/promci@443c7fc2397e946bc9f5029e313a9c3441b9b86d # v0.4.7
- uses: prometheus/promci@fc721ff8497a70a93a881cd552b71af7fb3a9d53 # v0.5.4
- name: Install nodejs
uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # v6.2.0
with:
node-version-file: "web/ui/.nvmrc"
registry-url: "https://registry.npmjs.org"
- uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
- uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3
with:
path: ~/.npm
key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }}

View file

@ -24,17 +24,17 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: Initialize CodeQL
uses: github/codeql-action/init@0499de31b99561a6d14a36a5f662c2a54f91beee # v4.31.2
uses: github/codeql-action/init@89a39a4e59826350b863aa6b6252a07ad50cf83e # v4.32.4
with:
languages: ${{ matrix.language }}
- name: Autobuild
uses: github/codeql-action/autobuild@0499de31b99561a6d14a36a5f662c2a54f91beee # v4.31.2
uses: github/codeql-action/autobuild@89a39a4e59826350b863aa6b6252a07ad50cf83e # v4.32.4
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@0499de31b99561a6d14a36a5f662c2a54f91beee # v4.31.2
uses: github/codeql-action/analyze@89a39a4e59826350b863aa6b6252a07ad50cf83e # v4.32.4

View file

@ -18,7 +18,7 @@ jobs:
if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks.
steps:
- name: git checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: Set docker hub repo name
@ -42,7 +42,7 @@ jobs:
if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks.
steps:
- name: git checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: Set quay.io org name

View file

@ -1,30 +1,47 @@
name: CIFuzz
name: fuzzing
on:
workflow_call:
permissions:
contents: read
jobs:
Fuzzing:
fuzzing:
name: Run Go Fuzz Tests
runs-on: ubuntu-latest
strategy:
matrix:
fuzz_test: [FuzzParseMetricText, FuzzParseOpenMetric, FuzzParseMetricSelector, FuzzParseExpr]
steps:
- name: Build Fuzzers
id: build
uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@cafd7a0eb8ecb4e007c56897996a9b65c49c972f # master
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
oss-fuzz-project-name: "prometheus"
dry-run: false
- name: Run Fuzzers
uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@cafd7a0eb8ecb4e007c56897996a9b65c49c972f # master
# Note: Regularly check for updates to the pinned commit hash at:
# https://github.com/google/oss-fuzz/tree/master/infra/cifuzz/actions/run_fuzzers
persist-credentials: false
- name: Install Go
uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v6.2.0
with:
oss-fuzz-project-name: "prometheus"
fuzz-seconds: 600
dry-run: false
- name: Upload Crash
go-version: 1.25.x
- name: Run Fuzzing
run: go test -fuzz=${{ matrix.fuzz_test }}$ -fuzztime=5m ./util/fuzzing
continue-on-error: true
id: fuzz
- name: Upload Crash Artifacts
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
if: failure() && steps.build.outcome == 'success'
if: failure()
with:
name: artifacts
path: ./out/artifacts
name: fuzz-artifacts-${{ matrix.fuzz_test }}
path: util/fuzzing/testdata/fuzz/${{ matrix.fuzz_test }}
fuzzing_status:
# This status check aggregates the individual matrix jobs of the fuzzing
# step into a final status. Fails if a single matrix job fails, succeeds if
# all matrix jobs succeed.
name: Fuzzing
runs-on: ubuntu-latest
needs: [fuzzing]
if: always()
steps:
- name: Successful fuzzing
if: ${{ !(contains(needs.*.result, 'failure')) && !(contains(needs.*.result, 'cancelled')) }}
run: exit 0
- name: Failing or cancelled fuzzing
if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') }}
run: exit 1

View file

@ -16,7 +16,7 @@ jobs:
runs-on: ubuntu-latest
if: github.repository_owner == 'prometheus'
steps:
- uses: dessant/lock-threads@1bf7ec25051fe7c00bdd17e6a7cf3d7bfb7dc771 # v5.0.1
- uses: dessant/lock-threads@7266a7ce5c1df01b1c6db85bf8cd86c737dadbe7 # v6.0.0
with:
process-only: 'issues'
issue-inactive-days: '180'

View file

@ -38,8 +38,8 @@ jobs:
uses: docker://prominfra/prombench:master
with:
args: >-
until make all_nodes_deleted; do echo "waiting for nodepools to be deleted"; sleep 10; done;
make deploy;
until make all_nodes_running; do echo "waiting for nodepools to be created"; sleep 10; done;
- name: Update status to failure
if: failure()
run: >-
@ -73,8 +73,8 @@ jobs:
uses: docker://prominfra/prombench:master
with:
args: >-
until make all_nodes_running; do echo "waiting for nodepools to be created"; sleep 10; done;
make clean;
until make all_nodes_deleted; do echo "waiting for nodepools to be deleted"; sleep 10; done;
- name: Update status to failure
if: failure()
run: >-
@ -108,10 +108,10 @@ jobs:
uses: docker://prominfra/prombench:master
with:
args: >-
until make all_nodes_running; do echo "waiting for nodepools to be created"; sleep 10; done;
make clean;
until make all_nodes_deleted; do echo "waiting for nodepools to be deleted"; sleep 10; done;
make deploy;
until make all_nodes_running; do echo "waiting for nodepools to be created"; sleep 10; done;
- name: Update status to failure
if: failure()
run: >-

View file

@ -14,7 +14,7 @@ jobs:
container:
image: quay.io/prometheus/golang-builder
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- run: ./scripts/sync_repo_files.sh

View file

@ -21,7 +21,7 @@ jobs:
steps:
- name: "Checkout code"
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # tag=v4.2.2
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
@ -37,7 +37,7 @@ jobs:
# Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
# format to the repository Actions tab.
- name: "Upload artifact"
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # tag=v5.0.0
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # tag=v6.0.0
with:
name: SARIF file
path: results.sarif
@ -45,6 +45,6 @@ jobs:
# Upload the results to GitHub's code scanning dashboard.
- name: "Upload to code-scanning"
uses: github/codeql-action/upload-sarif@0499de31b99561a6d14a36a5f662c2a54f91beee # tag=v4.31.2
uses: github/codeql-action/upload-sarif@89a39a4e59826350b863aa6b6252a07ad50cf83e # v4.32.4
with:
sarif_file: results.sarif

View file

@ -11,7 +11,7 @@ jobs:
if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks.
runs-on: ubuntu-latest
steps:
- uses: actions/stale@5f858e3efba33a5ca4407a664cc011ad407f2008 # v10.1.0
- uses: actions/stale@b5d41d4e1d5dceea10e7104786b73624c18a190f # v10.2.0
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
# opt out of defaults to avoid marking issues as stale and closing them

1
.gitignore vendored
View file

@ -26,6 +26,7 @@ npm_licenses.tar.bz2
/vendor
/.build
/go.work.sum
/**/node_modules

View file

@ -31,6 +31,7 @@ linters:
- govet
- loggercheck
- misspell
- modernize
- nilnesserr
# TODO(bwplotka): Enable once https://github.com/golangci/golangci-lint/issues/3228 is fixed.
# - nolintlint
@ -38,6 +39,7 @@ linters:
- predeclared
- revive
- sloglint
- staticcheck
- testifylint
- unconvert
- unused
@ -101,6 +103,10 @@ linters:
desc: "Use github.com/klauspost/compress instead of zlib"
- pkg: "golang.org/x/exp/slices"
desc: "Use 'slices' instead."
- pkg: "gopkg.in/yaml.v2"
desc: "Use go.yaml.in/yaml/v2 instead of gopkg.in/yaml.v2"
- pkg: "gopkg.in/yaml.v3"
desc: "Use go.yaml.in/yaml/v3 instead of gopkg.in/yaml.v3"
errcheck:
exclude-functions:
# Don't flag lines such as "io.Copy(io.Discard, resp.Body)".
@ -117,6 +123,12 @@ linters:
- shadow
- fieldalignment
enable-all: true
modernize:
disable:
# Suggest replacing omitempty with omitzero for struct fields.
# Disable this check for now since it introduces too many changes in our existing codebase.
# See https://pkg.go.dev/golang.org/x/tools/go/analysis/passes/modernize#hdr-Analyzer_omitzero for more details.
- omitzero
perfsprint:
# Optimizes even if it requires an int or uint type cast.
int-conversion: true
@ -175,6 +187,11 @@ linters:
- name: unused-receiver
- name: var-declaration
- name: var-naming
# TODO(SuperQ): See: https://github.com/prometheus/prometheus/issues/17766
arguments:
- []
- []
- - skip-package-name-checks: true
testifylint:
disable:
- float-compare

View file

@ -2,6 +2,7 @@
extends: default
ignore: |
**/node_modules
web/api/v1/testdata/openapi_*_golden.yaml
rules:
braces:

View file

@ -1,9 +1,87 @@
# Changelog
## main / unreleased
## 3.9.1 / 2026-01-07
* [BUGFIX] Discovery/Consul: Fix filter parameter not being applied to health service endpoint, causing Node and Node.Meta filters to be ignored. #16087
* [FEATURE] Templates: Add urlQueryEscape to template functions. #17403
- [BUGFIX] Agent: fix crash shortly after startup from invalid type of object. #17802
- [BUGFIX] Scraping: fix relabel keep/drop not working. #17807
## 3.9.0 / 2026-01-06
- [CHANGE] Native Histograms are no longer experimental! Make the `native-histogram` feature flag a no-op. Use `scrape_native_histograms` config option instead. #17528
- [CHANGE] API: Add maximum limit of 10,000 sets of statistics to TSDB status endpoint. #17647
- [FEATURE] API: Add /api/v1/features for clients to understand which features are supported. #17427
- [FEATURE] Promtool: Add `start_timestamp` field for unit tests. #17636
- [FEATURE] Promtool: Add `--format seriesjson` option to `tsdb dump` to output just series labels in JSON format. #13409
- [FEATURE] Add `--storage.tsdb.delay-compact-file.path` flag for better interoperability with Thanos. #17435
- [FEATURE] UI: Add an option on the query drop-down menu to duplicate that query panel. #17714
- [ENHANCEMENT]: TSDB: add flag `--storage.tsdb.block-reload-interval` to configure TSDB Block Reload Interval. #16728
- [ENHANCEMENT] UI: Add graph option to start the chart's Y axis at zero. #17565
- [ENHANCEMENT] Scraping: Classic protobuf format no longer requires the unit in the metric name. #16834
- [ENHANCEMENT] PromQL, Rules, SD, Scraping: Add native histograms to complement existing summaries. #17374
- [ENHANCEMENT] Notifications: Add a histogram `prometheus_notifications_latency_histogram_seconds` to complement the existing summary. #16637
- [ENHANCEMENT] Remote-write: Add custom scope support for AzureAD authentication. #17483
- [ENHANCEMENT] SD: add a `config` label with job name for most `prometheus_sd_refresh` metrics. #17138
- [ENHANCEMENT] TSDB: New histogram `prometheus_tsdb_sample_ooo_delta`, the distribution of out-of-order samples in seconds. Collected for all samples, accepted or not. #17477
- [ENHANCEMENT] Remote-read: Validate histograms received via remote-read. #17561
- [PERF] TSDB: Small optimizations to postings index. #17439
- [PERF] Scraping: Speed up relabelling of series. #17530
- [PERF] PromQL: Small optimisations in binary operators. #17524, #17519.
- [BUGFIX] UI: PromQL autocomplete now shows the correct type and HELP text for OpenMetrics counters whose samples end in `_total`. #17682
- [BUGFIX] UI: Fixed codemirror-promql incorrectly showing label completion suggestions after the closing curly brace of a vector selector. #17602
- [BUGFIX] UI: Query editor no longer suggests a duration unit if one is already present after a number. #17605
- [BUGFIX] PromQL: Fix some "vector cannot contain metrics with the same labelset" errors when experimental delayed name removal is enabled. #17678
- [BUGFIX] PromQL: Fix possible corruption of PromQL text if the query had an empty `ignoring()` and non-empty grouping. #17643
- [BUGFIX] PromQL: Fix resets/changes to return empty results for anchored selectors when all samples are outside the range. #17479
- [BUGFIX] PromQL: Check more consistently for many-to-one matching in filter binary operators. #17668
- [BUGFIX] PromQL: Fix collision in unary negation with non-overlapping series. #17708
- [BUGFIX] PromQL: Fix collision in label_join and label_replace with non-overlapping series. #17703
- [BUGFIX] PromQL: Fix bug with inconsistent results for queries with OR expression when experimental delayed name removal is enabled. #17161
- [BUGFIX] PromQL: Ensure that `rate`/`increase`/`delta` of histograms results in a gauge histogram. #17608
- [BUGFIX] PromQL: Do not panic while iterating over invalid histograms. #17559
- [BUGFIX] TSDB: Reject chunk files whose encoded chunk length overflows int. #17533
- [BUGFIX] TSDB: Do not panic during resolution reduction of invalid histograms. #17561
- [BUGFIX] Remote-write Receive: Avoid duplicate labels when experimental type-and-unit-label feature is enabled. #17546
- [BUGFIX] OTLP Receiver: Only write metadata to disk when experimental metadata-wal-records feature is enabled. #17472
## 3.8.1 / 2025-12-16
* [BUGFIX] remote: Fix Remote Write receiver, so it does not send wrong response headers for v1 flow and cause Prometheus senders to emit false partial error log and metrics. #17683
## 3.8.0 / 2025-11-28
* [CHANGE] Remote-write: Update receiving to [2.0-rc.4 spec](https://github.com/prometheus/docs/blob/60c24e450010df38cfcb4f65df874f6f9b26dbcb/docs/specs/prw/remote_write_spec_2_0.md). "created timestamp" (CT) is now called "start timestamp" (ST). #17411
* [CHANGE] TSDB: Native Histogram Custom Bounds with a NaN threshold are now rejected. #17287
* [FEATURE] OAuth2: support jwt-bearer grant-type (RFC7523 3.1). #17592
* [FEATURE] Dockerfile: Add OpenContainers spec labels to Dockerfile. #16483
* [FEATURE] SD: Add unified AWS service discovery for ec2, lightsail and ecs services. #17406
* [FEATURE] Native histograms are now a stable, but optional feature, use the `scrape_native_histograms` config setting. #17232 #17315
* [FEATURE] UI: Support anchored and smoothed keyword in promql editor. #17239
* [FEATURE] UI: Show detailed relabeling steps for each discovered target. #17337
* [FEATURE] Alerting: Add urlQueryEscape to template functions. #17403
* [FEATURE] Promtool: Add Remote-Write 2.0 support to `promtool push metrics` via the `--protobuf_message` flag. #17417
* [ENHANCEMENT] Clarify the docs about handling negative native histograms. #17249
* [ENHANCEMENT] Mixin: Add static UID to the remote-write dashboard. #17256
* [ENHANCEMENT] PromQL: Reconcile mismatched NHCB bounds in `Add` and `Sub`. #17278
* [ENHANCEMENT] Alerting: Add "unknown" state for alerting rules that haven't been evaluated yet. #17282
* [ENHANCEMENT] Scrape: Allow simultaneous use of classic histogram → NHCB conversion and zero-timestamp ingestion. #17305
* [ENHANCEMENT] UI: Add smoothed/anchored in explain. #17334
* [ENHANCEMENT] OTLP: De-duplicate any `target_info` samples with the same timestamp for the same series. #17400
* [ENHANCEMENT] Document `use_fips_sts_endpoint` in `sigv4` config sections. #17304
* [ENHANCEMENT] Document Prometheus Agent. #14519
* [PERF] PromQL: Speed up parsing of variadic functions. #17316
* [PERF] UI: Speed up alerts/rules/... pages by not rendering collapsed content. #17485
* [PERF] UI: Performance improvement when getting label name and values in promql editor. #17194
* [PERF] UI: Speed up /alerts for many firing alerts via virtual scrolling. #17254
* [BUGFIX] PromQL: Fix slice indexing bug in info function on churning series. #17199
* [BUGFIX] API: Reduce lock contention on `/api/v1/targets`. #17306
* [BUGFIX] PromQL: Consistent handling of gauge vs. counter histograms in aggregations. #17312
* [BUGFIX] TSDB: Allow NHCB with -Inf as the first custom value. #17320
* [BUGFIX] UI: Fix duplicate loading of data from the API speed up rendering of some pages. #17357
* [BUGFIX] Old UI: Fix createExpressionLink to correctly build /graph URLs so links from Alerts/Rules work again. #17365
* [BUGFIX] PromQL: Avoid panic when parsing malformed `info` call. #17379
* [BUGFIX] PromQL: Include histograms when enforcing sample_limit. #17390
* [BUGFIX] Config: Fix panic if TLS CA file is absent. #17418
* [BUGFIX] PromQL: Fix `histogram_fraction` for classic histograms and NHCB if lower bound is in the first bucket. #17424
## 3.7.3 / 2025-10-29
@ -201,7 +279,7 @@
## 3.2.1 / 2025-02-25
* [BUGFIX] Don't send Accept` header `escape=allow-utf-8` when `metric_name_validation_scheme: legacy` is configured. #16061
* [BUGFIX] Don't send `Accept` header `escape=allow-utf-8` when `metric_name_validation_scheme: legacy` is configured. #16061
## 3.2.0 / 2025-02-17
@ -212,10 +290,10 @@
* [ENHANCEMENT] scrape: Add metadata for automatic metrics to WAL for `metadata-wal-records` feature. #15837
* [ENHANCEMENT] promtool: Support linting of scrape interval, through lint option `too-long-scrape-interval`. #15719
* [ENHANCEMENT] promtool: Add --ignore-unknown-fields option. #15706
* [ENHANCEMENT] ui: Make "hide empty rules" and hide empty rules" persistent #15807
* [ENHANCEMENT] ui: Make "hide empty rules" and "hide empty rules" persistent #15807
* [ENHANCEMENT] web/api: Add a limit parameter to `/query` and `/query_range`. #15552
* [ENHANCEMENT] api: Add fields Node and ServerTime to `/status`. #15784
* [PERF] Scraping: defer computing labels for dropped targets until they are needed by the UI. #15261
* [PERF] Scraping: defer computing labels for dropped targets until they are needed by the UI. #15261
* [BUGFIX] remotewrite2: Fix invalid metadata bug for metrics without metadata. #15829
* [BUGFIX] remotewrite2: Fix the unit field propagation. #15825
* [BUGFIX] scrape: Fix WAL metadata for histograms and summaries. #15832
@ -232,9 +310,9 @@
* [ENHANCEMENT] TSDB: Improve calculation of space used by labels. #13880
* [ENHANCEMENT] Rules: new metric rule_group_last_rule_duration_sum_seconds. #15672
* [ENHANCEMENT] Observability: Export 'go_sync_mutex_wait_total_seconds_total' metric. #15339
* [ENHANCEMEN] Remote-Write: optionally use a DNS resolver that picks a random IP. #15329
* [ENHANCEMENT] Remote-Write: optionally use a DNS resolver that picks a random IP. #15329
* [PERF] Optimize `l=~".+"` matcher. #15474, #15684
* [PERF] TSDB: Cache all symbols for compaction . #15455
* [PERF] TSDB: Cache all symbols for compaction. #15455
* [PERF] TSDB: MemPostings: keep a map of label values slices. #15426
* [PERF] Remote-Write: Remove interning hook. #15456
* [PERF] Scrape: optimize string manipulation for experimental native histograms with custom buckets. #15453

View file

@ -1,10 +1,29 @@
#
# Please keep this file in sync with the MAINTAINERS.md file!
#
# Prometheus team members are members of the "default maintainers" github team.
# They are code owners by default for the whole repo.
* @prometheus/default-maintainers
# Example adding a dedicated maintainer for AWS SD, and also "default
# maintainers" so that they do not need to bypass codeowners check to merge
# something.
# Example comes from
# https://github.com/prometheus/prometheus/pull/17105#issuecomment-3248209452
# /discovery/aws/ @matt-gp @prometheus/default-maintainers
# Subsystems.
/Makefile @prometheus/default-maintainers @simonpasquier @SuperQ
/cmd/promtool @prometheus/default-maintainers @dgl
/documentation/prometheus-mixin @prometheus/default-maintainers @metalmatze
/model/histogram @prometheus/default-maintainers @beorn7 @krajorama
/web/ui @prometheus/default-maintainers @juliusv
/web/ui/module @prometheus/default-maintainers @juliusv @nexucis
/promql @prometheus/default-maintainers @roidelapluie
/storage/remote @prometheus/default-maintainers @cstyan @bwplotka @tomwilkie @alexgreenbank
/storage/remote/otlptranslator @prometheus/default-maintainers @aknuds1 @jesusvazquez @ArthurSens
/tsdb @prometheus/default-maintainers @jesusvazquez @codesome @bwplotka @krajorama
# Service discovery.
/discovery/kubernetes @prometheus/default-maintainers @brancz
/discovery/stackit @prometheus/default-maintainers @jkroepke
/discovery/aws/ @prometheus/default-maintainers @matt-gp @sysadmind
# Pending
# https://github.com/prometheus/prometheus/pull/15212#issuecomment-3575225179
# /discovery/aliyun @prometheus/default-maintainers @KeyOfSpectator
# https://github.com/prometheus/prometheus/pull/14108#issuecomment-2639515421
# /discovery/nomad @prometheus/default-maintainers @jaloren @jrasell

View file

@ -14,7 +14,7 @@ Prometheus uses GitHub to manage reviews of pull requests.
of inspiration. Also please see our [non-goals issue](https://github.com/prometheus/docs/issues/149) on areas that the Prometheus community doesn't plan to work on.
* Relevant coding style guidelines are the [Go Code Review
Comments](https://code.google.com/p/go-wiki/wiki/CodeReviewComments)
Comments](https://go.dev/wiki/CodeReviewComments)
and the _Formatting and style_ section of Peter Bourgon's [Go: Best
Practices for Production
Environments](https://peter.bourgon.org/go-in-production/#formatting-and-style).
@ -78,8 +78,7 @@ go get example.com/some/module/pkg@vX.Y.Z
Tidy up the `go.mod` and `go.sum` files:
```bash
# The GO111MODULE variable can be omitted when the code isn't located in GOPATH.
GO111MODULE=on go mod tidy
go mod tidy
```
You have to commit the changes to `go.mod` and `go.sum` before submitting the pull request.

View file

@ -9,7 +9,8 @@ LABEL org.opencontainers.image.authors="The Prometheus Authors" \
org.opencontainers.image.source="https://github.com/prometheus/prometheus" \
org.opencontainers.image.url="https://github.com/prometheus/prometheus" \
org.opencontainers.image.documentation="https://prometheus.io/docs" \
org.opencontainers.image.licenses="Apache License 2.0"
org.opencontainers.image.licenses="Apache License 2.0" \
io.prometheus.image.variant="busybox"
ARG ARCH="amd64"
ARG OS="linux"

29
Dockerfile.distroless Normal file
View file

@ -0,0 +1,29 @@
ARG DISTROLESS_ARCH="amd64"
# Use DISTROLESS_ARCH for base image selection (handles armv7->arm mapping).
FROM gcr.io/distroless/static-debian13:nonroot-${DISTROLESS_ARCH}
# Base image sets USER to 65532:65532 (nonroot user).
ARG ARCH="amd64"
ARG OS="linux"
LABEL org.opencontainers.image.authors="The Prometheus Authors"
LABEL org.opencontainers.image.vendor="Prometheus"
LABEL org.opencontainers.image.title="Prometheus"
LABEL org.opencontainers.image.description="The Prometheus monitoring system and time series database"
LABEL org.opencontainers.image.source="https://github.com/prometheus/prometheus"
LABEL org.opencontainers.image.url="https://github.com/prometheus/prometheus"
LABEL org.opencontainers.image.documentation="https://prometheus.io/docs"
LABEL org.opencontainers.image.licenses="Apache License 2.0"
LABEL io.prometheus.image.variant="distroless"
COPY documentation/examples/prometheus.yml /etc/prometheus/prometheus.yml
COPY LICENSE NOTICE npm_licenses.tar.bz2 /
COPY .build/${OS}-${ARCH}/prometheus /bin/prometheus
COPY .build/${OS}-${ARCH}/promtool /bin/promtool
WORKDIR /prometheus
EXPOSE 9090
ENTRYPOINT [ "/bin/prometheus" ]
CMD [ "--config.file=/etc/prometheus/prometheus.yml", \
"--storage.tsdb.path=/prometheus" ]

View file

@ -1,9 +1,12 @@
# Maintainers
## Please keep this file in sync with the CODEOWNERS file!
General maintainers:
* Bryan Boreham (bjboreham@gmail.com / @bboreham)
* Ayoub Mrini (ayoubmrini424@gmail.com / @machine424)
* Julien Pivotto (roidelapluie@prometheus.io / @roidelapluie)
* György Krajcsovits (<gyorgy.krajcsovits@grafana.com> / @krajorama)
Maintainers for specific parts of the codebase:
* `cmd`
@ -13,15 +16,13 @@ Maintainers for specific parts of the codebase:
* `stackit`: Jan-Otto Kröpke (<mail@jkroepke.de> / @jkroepke)
* `documentation`
* `prometheus-mixin`: Matthias Loibl (<mail@matthiasloibl.com> / @metalmatze)
* `model/histogram` and other code related to native histograms: Björn Rabenstein (<beorn@grafana.com> / @beorn7),
George Krajcsovits (<gyorgy.krajcsovits@grafana.com> / @krajorama)
* `storage`
* `remote`: Callum Styan (<callumstyan@gmail.com> / @cstyan), Bartłomiej Płotka (<bwplotka@gmail.com> / @bwplotka), Tom Wilkie (tom.wilkie@gmail.com / @tomwilkie), Nicolás Pazos ( <npazosmendez@gmail.com> / @npazosmendez), Alex Greenbank ( <alexgreenbank@yahoo.com> / @alexgreenbank)
* `remote`: Callum Styan (<callumstyan@gmail.com> / @cstyan), Bartłomiej Płotka (<bwplotka@gmail.com> / @bwplotka), Tom Wilkie (tom.wilkie@gmail.com / @tomwilkie), Alex Greenbank (<alexgreenbank@yahoo.com> / @alexgreenbank)
* `otlptranslator`: Arthur Silva Sens (<arthursens2005@gmail.com> / @ArthurSens), Arve Knudsen (<arve.knudsen@gmail.com> / @aknuds1), Jesús Vázquez (<jesus.vazquez@grafana.com> / @jesusvazquez)
* `tsdb`: Ganesh Vernekar (<ganesh@grafana.com> / @codesome), Bartłomiej Płotka (<bwplotka@gmail.com> / @bwplotka), Jesús Vázquez (<jesus.vazquez@grafana.com> / @jesusvazquez)
* `web`
* `ui`: Julius Volz (<julius.volz@gmail.com> / @juliusv)
* `module`: Augustin Husson (<husson.augustin@gmail.com> @nexucis)
* `module`: Augustin Husson (<husson.augustin@gmail.com> / @nexucis)
* `Makefile` and related build configuration: Simon Pasquier (<pasquier.simon@gmail.com> / @simonpasquier), Ben Kochie (<superq@gmail.com> / @SuperQ)
For the sake of brevity, not all subtrees are explicitly listed. Due to the

View file

@ -1,4 +1,4 @@
# Copyright 2018 The Prometheus Authors
# Copyright The Prometheus Authors
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
@ -12,7 +12,7 @@
# limitations under the License.
# Needs to be defined before including Makefile.common to auto-generate targets
DOCKER_ARCHS ?= amd64 armv7 arm64 ppc64le s390x
DOCKER_ARCHS ?= amd64 armv7 arm64 ppc64le riscv64 s390x
UI_PATH = web/ui
UI_NODE_MODULES_PATH = $(UI_PATH)/node_modules
@ -79,6 +79,20 @@ ui-lint:
# new Mantine-based UI is fully integrated and the old app can be removed.
cd $(UI_PATH)/react-app && npm run lint
.PHONY: generate-promql-functions
generate-promql-functions: ui-install
@echo ">> generating PromQL function signatures"
@cd $(UI_PATH)/mantine-ui/src/promql/tools && $(GO) run ./gen_functions_list > ../functionSignatures.ts
@echo ">> generating PromQL function documentation"
@cd $(UI_PATH)/mantine-ui/src/promql/tools && $(GO) run ./gen_functions_docs $(CURDIR)/docs/querying/functions.md > ../functionDocs.tsx
@echo ">> formatting generated files"
@cd $(UI_PATH)/mantine-ui && npx prettier --write --print-width 120 src/promql/functionSignatures.ts src/promql/functionDocs.tsx
.PHONY: check-generated-promql-functions
check-generated-promql-functions: generate-promql-functions
@echo ">> checking generated PromQL functions"
@git diff --exit-code -- $(UI_PATH)/mantine-ui/src/promql/functionSignatures.ts $(UI_PATH)/mantine-ui/src/promql/functionDocs.tsx || (echo "Generated PromQL function files are out of date. Please run 'make generate-promql-functions' and commit the changes." && false)
.PHONY: assets
ifndef SKIP_UI_BUILD
assets: check-node-version ui-install ui-build
@ -152,15 +166,8 @@ tarball: npm_licenses common-tarball
.PHONY: docker
docker: npm_licenses common-docker
plugins/plugins.go: plugins.yml plugins/generate.go
@echo ">> creating plugins list"
$(GO) generate -tags plugins ./plugins
.PHONY: plugins
plugins: plugins/plugins.go
.PHONY: build
build: assets npm_licenses assets-compress plugins common-build
build: assets npm_licenses assets-compress common-build
.PHONY: bench_tsdb
bench_tsdb: $(PROMU)
@ -184,14 +191,26 @@ check-go-mod-version:
@echo ">> checking go.mod version matching"
@./scripts/check-go-mod-version.sh
.PHONY: update-features-testdata
update-features-testdata:
@echo ">> updating features testdata"
@$(GO) test ./cmd/prometheus -run TestFeaturesAPI -update-features
GO_SUBMODULE_DIRS := documentation/examples/remote_storage internal/tools web/ui/mantine-ui/src/promql/tools
.PHONY: update-all-go-deps
update-all-go-deps:
@$(MAKE) update-go-deps
@echo ">> updating Go dependencies in ./documentation/examples/remote_storage/"
@cd ./documentation/examples/remote_storage/ && for m in $$($(GO) list -mod=readonly -m -f '{{ if and (not .Indirect) (not .Main)}}{{.Path}}{{end}}' all); do \
update-all-go-deps: update-go-deps
$(foreach dir,$(GO_SUBMODULE_DIRS),$(MAKE) update-go-deps-in-dir DIR=$(dir);)
@echo ">> syncing Go workspace"
@$(GO) work sync
.PHONY: update-go-deps-in-dir
update-go-deps-in-dir:
@echo ">> updating Go dependencies in ./$(DIR)/"
@cd ./$(DIR) && for m in $$($(GO) list -mod=readonly -m -f '{{ if and (not .Indirect) (not .Main)}}{{.Path}}{{end}}' all); do \
$(GO) get $$m; \
done
@cd ./documentation/examples/remote_storage/ && $(GO) mod tidy
@cd ./$(DIR) && $(GO) mod tidy
.PHONY: check-node-version
check-node-version:
@ -201,3 +220,8 @@ check-node-version:
bump-go-version:
@echo ">> bumping Go minor version"
@./scripts/bump_go_version.sh
.PHONY: generate-fuzzing-seed-corpus
generate-fuzzing-seed-corpus:
@echo ">> Generating fuzzing seed corpus"
@$(GO) generate -tags fuzzing ./util/fuzzing/corpus_gen

View file

@ -1,4 +1,4 @@
# Copyright 2018 The Prometheus Authors
# Copyright The Prometheus Authors
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
@ -55,13 +55,13 @@ ifneq ($(shell command -v gotestsum 2> /dev/null),)
endif
endif
PROMU_VERSION ?= 0.17.0
PROMU_VERSION ?= 0.18.0
PROMU_URL := https://github.com/prometheus/promu/releases/download/v$(PROMU_VERSION)/promu-$(PROMU_VERSION).$(GO_BUILD_PLATFORM).tar.gz
SKIP_GOLANGCI_LINT :=
GOLANGCI_LINT :=
GOLANGCI_LINT_OPTS ?=
GOLANGCI_LINT_VERSION ?= v2.6.0
GOLANGCI_LINT_VERSION ?= v2.10.1
GOLANGCI_FMT_OPTS ?=
# golangci-lint only supports linux, darwin and windows platforms on i386/amd64/arm64.
# windows isn't included here because of the path separator being different.
@ -82,11 +82,32 @@ endif
PREFIX ?= $(shell pwd)
BIN_DIR ?= $(shell pwd)
DOCKER_IMAGE_TAG ?= $(subst /,-,$(shell git rev-parse --abbrev-ref HEAD))
DOCKERFILE_PATH ?= ./Dockerfile
DOCKERBUILD_CONTEXT ?= ./
DOCKER_REPO ?= prom
# Check if deprecated DOCKERFILE_PATH is set
ifdef DOCKERFILE_PATH
$(error DOCKERFILE_PATH is deprecated. Use DOCKERFILE_VARIANTS ?= $(DOCKERFILE_PATH) in the Makefile)
endif
DOCKER_ARCHS ?= amd64
DOCKERFILE_VARIANTS ?= Dockerfile $(wildcard Dockerfile.*)
# Function to extract variant from Dockerfile label.
# Returns the variant name from io.prometheus.image.variant label, or "default" if not found.
define dockerfile_variant
$(strip $(or $(shell sed -n 's/.*io\.prometheus\.image\.variant="\([^"]*\)".*/\1/p' $(1)),default))
endef
# Check for duplicate variant names (including default for Dockerfiles without labels).
DOCKERFILE_VARIANT_NAMES := $(foreach df,$(DOCKERFILE_VARIANTS),$(call dockerfile_variant,$(df)))
DOCKERFILE_VARIANT_NAMES_SORTED := $(sort $(DOCKERFILE_VARIANT_NAMES))
ifneq ($(words $(DOCKERFILE_VARIANT_NAMES)),$(words $(DOCKERFILE_VARIANT_NAMES_SORTED)))
$(error Duplicate variant names found. Each Dockerfile must have a unique io.prometheus.image.variant label, and only one can be without a label (default))
endif
# Build variant:dockerfile pairs for shell iteration.
DOCKERFILE_VARIANTS_WITH_NAMES := $(foreach df,$(DOCKERFILE_VARIANTS),$(call dockerfile_variant,$(df)):$(df))
BUILD_DOCKER_ARCHS = $(addprefix common-docker-,$(DOCKER_ARCHS))
PUBLISH_DOCKER_ARCHS = $(addprefix common-docker-publish-,$(DOCKER_ARCHS))
@ -112,7 +133,7 @@ common-all: precheck style check_license lint yamllint unused build test
.PHONY: common-style
common-style:
@echo ">> checking code style"
@fmtRes=$$($(GOFMT) -d $$(find . -path ./vendor -prune -o -name '*.go' -print)); \
@fmtRes=$$($(GOFMT) -d $$(git ls-files '*.go' ':!:vendor/*' || find . -path ./vendor -prune -o -name '*.go' -print)); \
if [ -n "$${fmtRes}" ]; then \
echo "gofmt checking failed!"; echo "$${fmtRes}"; echo; \
echo "Please ensure you are using $$($(GO) version) for formatting code."; \
@ -122,13 +143,19 @@ common-style:
.PHONY: common-check_license
common-check_license:
@echo ">> checking license header"
@licRes=$$(for file in $$(find . -type f -iname '*.go' ! -path './vendor/*') ; do \
@licRes=$$(for file in $$(git ls-files '*.go' ':!:vendor/*' || find . -path ./vendor -prune -o -type f -iname '*.go' -print) ; do \
awk 'NR<=3' $$file | grep -Eq "(Copyright|generated|GENERATED)" || echo $$file; \
done); \
if [ -n "$${licRes}" ]; then \
echo "license header checking failed:"; echo "$${licRes}"; \
exit 1; \
fi
@echo ">> checking for copyright years 2026 or later"
@futureYearRes=$$(git grep -E 'Copyright (202[6-9]|20[3-9][0-9])' -- '*.go' ':!:vendor/*' || true); \
if [ -n "$${futureYearRes}" ]; then \
echo "Files with copyright year 2026 or later found (should use 'Copyright The Prometheus Authors'):"; echo "$${futureYearRes}"; \
exit 1; \
fi
.PHONY: common-deps
common-deps:
@ -220,28 +247,110 @@ common-docker-repo-name:
.PHONY: common-docker $(BUILD_DOCKER_ARCHS)
common-docker: $(BUILD_DOCKER_ARCHS)
$(BUILD_DOCKER_ARCHS): common-docker-%:
docker build -t "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" \
-f $(DOCKERFILE_PATH) \
--build-arg ARCH="$*" \
--build-arg OS="linux" \
$(DOCKERBUILD_CONTEXT)
@for variant in $(DOCKERFILE_VARIANTS_WITH_NAMES); do \
dockerfile=$${variant#*:}; \
variant_name=$${variant%%:*}; \
distroless_arch="$*"; \
if [ "$*" = "armv7" ]; then \
distroless_arch="arm"; \
fi; \
if [ "$$dockerfile" = "Dockerfile" ]; then \
echo "Building default variant ($$variant_name) for linux-$* using $$dockerfile"; \
docker build -t "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" \
-f $$dockerfile \
--build-arg ARCH="$*" \
--build-arg OS="linux" \
--build-arg DISTROLESS_ARCH="$$distroless_arch" \
$(DOCKERBUILD_CONTEXT); \
if [ "$$variant_name" != "default" ]; then \
echo "Tagging default variant with $$variant_name suffix"; \
docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" \
"$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name"; \
fi; \
else \
echo "Building $$variant_name variant for linux-$* using $$dockerfile"; \
docker build -t "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name" \
-f $$dockerfile \
--build-arg ARCH="$*" \
--build-arg OS="linux" \
--build-arg DISTROLESS_ARCH="$$distroless_arch" \
$(DOCKERBUILD_CONTEXT); \
fi; \
done
.PHONY: common-docker-publish $(PUBLISH_DOCKER_ARCHS)
common-docker-publish: $(PUBLISH_DOCKER_ARCHS)
$(PUBLISH_DOCKER_ARCHS): common-docker-publish-%:
docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)"
@for variant in $(DOCKERFILE_VARIANTS_WITH_NAMES); do \
dockerfile=$${variant#*:}; \
variant_name=$${variant%%:*}; \
if [ "$$dockerfile" != "Dockerfile" ] || [ "$$variant_name" != "default" ]; then \
echo "Pushing $$variant_name variant for linux-$*"; \
docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name"; \
fi; \
if [ "$$dockerfile" = "Dockerfile" ]; then \
echo "Pushing default variant ($$variant_name) for linux-$*"; \
docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)"; \
fi; \
if [ "$(DOCKER_IMAGE_TAG)" = "latest" ]; then \
if [ "$$dockerfile" != "Dockerfile" ] || [ "$$variant_name" != "default" ]; then \
echo "Pushing $$variant_name variant version tags for linux-$*"; \
docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:v$(DOCKER_MAJOR_VERSION_TAG)-$$variant_name"; \
fi; \
if [ "$$dockerfile" = "Dockerfile" ]; then \
echo "Pushing default variant version tag for linux-$*"; \
docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:v$(DOCKER_MAJOR_VERSION_TAG)"; \
fi; \
fi; \
done
DOCKER_MAJOR_VERSION_TAG = $(firstword $(subst ., ,$(shell cat VERSION)))
.PHONY: common-docker-tag-latest $(TAG_DOCKER_ARCHS)
common-docker-tag-latest: $(TAG_DOCKER_ARCHS)
$(TAG_DOCKER_ARCHS): common-docker-tag-latest-%:
docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:latest"
docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:v$(DOCKER_MAJOR_VERSION_TAG)"
@for variant in $(DOCKERFILE_VARIANTS_WITH_NAMES); do \
dockerfile=$${variant#*:}; \
variant_name=$${variant%%:*}; \
if [ "$$dockerfile" != "Dockerfile" ] || [ "$$variant_name" != "default" ]; then \
echo "Tagging $$variant_name variant for linux-$* as latest"; \
docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:latest-$$variant_name"; \
docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:v$(DOCKER_MAJOR_VERSION_TAG)-$$variant_name"; \
fi; \
if [ "$$dockerfile" = "Dockerfile" ]; then \
echo "Tagging default variant ($$variant_name) for linux-$* as latest"; \
docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:latest"; \
docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:v$(DOCKER_MAJOR_VERSION_TAG)"; \
fi; \
done
.PHONY: common-docker-manifest
common-docker-manifest:
DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create -a "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(SANITIZED_DOCKER_IMAGE_TAG)" $(foreach ARCH,$(DOCKER_ARCHS),$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$(ARCH):$(SANITIZED_DOCKER_IMAGE_TAG))
DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(SANITIZED_DOCKER_IMAGE_TAG)"
@for variant in $(DOCKERFILE_VARIANTS_WITH_NAMES); do \
dockerfile=$${variant#*:}; \
variant_name=$${variant%%:*}; \
if [ "$$dockerfile" != "Dockerfile" ] || [ "$$variant_name" != "default" ]; then \
echo "Creating manifest for $$variant_name variant"; \
DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create -a "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name" $(foreach ARCH,$(DOCKER_ARCHS),$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$(ARCH):$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name); \
DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name"; \
fi; \
if [ "$$dockerfile" = "Dockerfile" ]; then \
echo "Creating default variant ($$variant_name) manifest"; \
DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create -a "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(SANITIZED_DOCKER_IMAGE_TAG)" $(foreach ARCH,$(DOCKER_ARCHS),$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$(ARCH):$(SANITIZED_DOCKER_IMAGE_TAG)); \
DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(SANITIZED_DOCKER_IMAGE_TAG)"; \
fi; \
if [ "$(DOCKER_IMAGE_TAG)" = "latest" ]; then \
if [ "$$dockerfile" != "Dockerfile" ] || [ "$$variant_name" != "default" ]; then \
echo "Creating manifest for $$variant_name variant version tag"; \
DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create -a "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):v$(DOCKER_MAJOR_VERSION_TAG)-$$variant_name" $(foreach ARCH,$(DOCKER_ARCHS),$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$(ARCH):v$(DOCKER_MAJOR_VERSION_TAG)-$$variant_name); \
DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):v$(DOCKER_MAJOR_VERSION_TAG)-$$variant_name"; \
fi; \
if [ "$$dockerfile" = "Dockerfile" ]; then \
echo "Creating default variant version tag manifest"; \
DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create -a "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):v$(DOCKER_MAJOR_VERSION_TAG)" $(foreach ARCH,$(DOCKER_ARCHS),$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$(ARCH):v$(DOCKER_MAJOR_VERSION_TAG)); \
DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):v$(DOCKER_MAJOR_VERSION_TAG)"; \
fi; \
fi; \
done
.PHONY: promu
promu: $(PROMU)

View file

@ -69,7 +69,7 @@ To build Prometheus from source code, You need:
* Go: Version specified in [go.mod](./go.mod) or greater.
* NodeJS: Version specified in [.nvmrc](./web/ui/.nvmrc) or greater.
* npm: Version 8 or greater (check with `npm --version` and [here](https://www.npmjs.com/)).
* npm: Version 10 or greater (check with `npm --version` and [here](https://www.npmjs.com/)).
Start by cloning the repository:
@ -82,15 +82,15 @@ You can use the `go` tool to build and install the `prometheus`
and `promtool` binaries into your `GOPATH`:
```bash
GO111MODULE=on go install github.com/prometheus/prometheus/cmd/...
go install github.com/prometheus/prometheus/cmd/...
prometheus --config.file=your_config.yml
```
*However*, when using `go install` to build Prometheus, Prometheus will expect to be able to
read its web assets from local filesystem directories under `web/ui/static` and
`web/ui/templates`. In order for these assets to be found, you will have to run Prometheus
from the root of the cloned repository. Note also that these directories do not include the
React UI unless it has been built explicitly using `make assets` or `make build`.
read its web assets from local filesystem directories under `web/ui/static`. In order for
these assets to be found, you will have to run Prometheus from the root of the cloned
repository. Note also that this directory does not include the React UI unless it has been
built explicitly using `make assets` or `make build`.
An example of the above configuration file can be found [here.](https://github.com/prometheus/prometheus/blob/main/documentation/examples/prometheus.yml)
@ -113,16 +113,31 @@ The Makefile provides several targets:
### Service discovery plugins
Prometheus is bundled with many service discovery plugins.
When building Prometheus from source, you can edit the [plugins.yml](./plugins.yml)
file to disable some service discoveries. The file is a yaml-formatted list of go
import path that will be built into the Prometheus binary.
Prometheus is bundled with many service discovery plugins. You can customize
which service discoveries are included in your build using Go build tags.
After you have changed the file, you
need to run `make build` again.
To exclude service discoveries when building with `make build`, add the desired
tags to the `.promu.yml` file under `build.tags.all`:
If you are using another method to compile Prometheus, `make plugins` will
generate the plugins file accordingly.
```yaml
build:
tags:
all:
- netgo
- builtinassets
- remove_all_sd # Exclude all optional SDs
- enable_kubernetes_sd # Re-enable only kubernetes
```
Then run `make build` as usual. Alternatively, when using `go build` directly:
```bash
go build -tags "remove_all_sd,enable_kubernetes_sd" ./cmd/prometheus
```
Available build tags:
* `remove_all_sd` - Exclude all optional service discoveries (keeps file_sd, static_sd, and http_sd)
* `enable_<name>_sd` - Re-enable a specific SD when using `remove_all_sd`
If you add out-of-tree plugins, which we do not endorse at the moment,
additional steps might be needed to adjust the `go.mod` and `go.sum` files. As
@ -144,6 +159,15 @@ produce a fully working image when run locally.
## Using Prometheus as a Go Library
Within the Prometheus project, repositories such as [prometheus/common](https://github.com/prometheus/common) and
[prometheus/client-golang](https://github.com/prometheus/client-golang) are designed as re-usable libraries.
The [prometheus/prometheus](https://github.com/prometheus/prometheus) repository builds a stand-alone program and is not
designed for use as a library. We are aware that people do use parts as such,
and we do not put any deliberate inconvenience in the way, but we want you to be
aware that no care has been taken to make it work well as a library. For instance,
you may encounter errors that only surface when used as a library.
### Remote Write
We are publishing our Remote Write protobuf independently at

View file

@ -7,18 +7,20 @@ This page describes the release process and the currently planned schedule for u
Release cadence of first pre-releases being cut is 6 weeks.
Please see [the v2.55 RELEASE.md](https://github.com/prometheus/prometheus/blob/release-2.55/RELEASE.md) for the v2 release series schedule.
| release series | date of first pre-release (year-month-day) | release shepherd |
|----------------|--------------------------------------------|------------------------------------|
| v3.0 | 2024-11-14 | Jan Fajerski (GitHub: @jan--f) |
| v3.1 | 2024-12-17 | Bryan Boreham (GitHub: @bboreham) |
| v3.2 | 2025-01-28 | Jan Fajerski (GitHub: @jan--f) |
| v3.3 | 2025-03-11 | Ayoub Mrini (Github: @machine424) |
| v3.4 | 2025-04-29 | Jan-Otto Kröpke (Github: @jkroepke)|
| v3.5 LTS | 2025-06-03 | Bryan Boreham (GitHub: @bboreham) |
| v3.6 | 2025-08-01 | Ayoub Mrini (Github: @machine424) |
| v3.7 | 2025-09-25 | Arthur Sens and George Krajcsovits (Github: @ArthurSens and @krajorama)|
| v3.8 | 2025-11-06 | Jan Fajerski (GitHub: @jan--f) |
| v3.9 | 2025-12-18 | **volunteer welcome** |
| release series | date of first pre-release (year-month-day) | release shepherd |
|----------------|--------------------------------------------|-------------------------------------------------------------------------|
| v3.0 | 2024-11-14 | Jan Fajerski (GitHub: @jan--f) |
| v3.1 | 2024-12-17 | Bryan Boreham (GitHub: @bboreham) |
| v3.2 | 2025-01-28 | Jan Fajerski (GitHub: @jan--f) |
| v3.3 | 2025-03-11 | Ayoub Mrini (Github: @machine424) |
| v3.4 | 2025-04-29 | Jan-Otto Kröpke (Github: @jkroepke) |
| v3.5 LTS | 2025-06-03 | Bryan Boreham (GitHub: @bboreham) |
| v3.6 | 2025-08-01 | Ayoub Mrini (Github: @machine424) |
| v3.7 | 2025-09-25 | Arthur Sens and George Krajcsovits (Github: @ArthurSens and @krajorama) |
| v3.8 | 2025-11-06 | Jan Fajerski (GitHub: @jan--f) |
| v3.9 | 2025-12-18 | Bryan Boreham (GitHub: @bboreham) |
| v3.10 | 2026-02-05 | Ganesh Vernekar (Github: @codesome) |
| v3.11 | 2026-03-19 | **volunteer welcome** |
If you are interested in volunteering please create a pull request against the [prometheus/prometheus](https://github.com/prometheus/prometheus) repository and propose yourself for the release series of your choice.

View file

@ -1 +1 @@
3.7.3
3.9.1

View file

@ -0,0 +1,125 @@
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"bytes"
"encoding/json"
"flag"
"fmt"
"io"
"net/http"
"os"
"path/filepath"
"testing"
"time"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/util/testutil"
)
var updateFeatures = flag.Bool("update-features", false, "update features.json golden file")
func TestFeaturesAPI(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
t.Parallel()
tmpDir := t.TempDir()
configFile := filepath.Join(tmpDir, "prometheus.yml")
require.NoError(t, os.WriteFile(configFile, []byte{}, 0o644))
port := testutil.RandomUnprivilegedPort(t)
prom := prometheusCommandWithLogging(
t,
configFile,
port,
fmt.Sprintf("--storage.tsdb.path=%s", tmpDir),
)
require.NoError(t, prom.Start())
baseURL := fmt.Sprintf("http://127.0.0.1:%d", port)
// Wait for Prometheus to be ready.
require.Eventually(t, func() bool {
resp, err := http.Get(baseURL + "/-/ready")
if err != nil {
return false
}
defer resp.Body.Close()
return resp.StatusCode == http.StatusOK
}, 10*time.Second, 100*time.Millisecond, "Prometheus didn't become ready in time")
// Fetch features from the API.
resp, err := http.Get(baseURL + "/api/v1/features")
require.NoError(t, err)
defer resp.Body.Close()
require.Equal(t, http.StatusOK, resp.StatusCode)
body, err := io.ReadAll(resp.Body)
require.NoError(t, err)
// Parse API response.
var apiResponse struct {
Status string `json:"status"`
Data map[string]map[string]bool `json:"data"`
}
require.NoError(t, json.Unmarshal(body, &apiResponse))
require.Equal(t, "success", apiResponse.Status)
goldenPath := filepath.Join("testdata", "features.json")
// If update flag is set, write the current features to the golden file.
if *updateFeatures {
var buf bytes.Buffer
encoder := json.NewEncoder(&buf)
encoder.SetEscapeHTML(false)
encoder.SetIndent("", " ")
require.NoError(t, encoder.Encode(apiResponse.Data))
// Ensure testdata directory exists.
require.NoError(t, os.MkdirAll(filepath.Dir(goldenPath), 0o755))
require.NoError(t, os.WriteFile(goldenPath, buf.Bytes(), 0o644))
t.Logf("Updated golden file: %s", goldenPath)
return
}
// Load golden file.
goldenData, err := os.ReadFile(goldenPath)
require.NoError(t, err, "Failed to read golden file %s. Run 'make update-features-testdata' to generate it.", goldenPath)
var expectedFeatures map[string]map[string]bool
require.NoError(t, json.Unmarshal(goldenData, &expectedFeatures))
// The labels implementation depends on build tags (stringlabels, slicelabels, or dedupelabels).
// We need to update the expected features to match the current build.
if prometheusFeatures, ok := expectedFeatures["prometheus"]; ok {
// Remove all label implementation features from expected.
delete(prometheusFeatures, "stringlabels")
delete(prometheusFeatures, "slicelabels")
delete(prometheusFeatures, "dedupelabels")
// Add the current implementation.
if actualPrometheus, ok := apiResponse.Data["prometheus"]; ok {
for _, impl := range []string{"stringlabels", "slicelabels", "dedupelabels"} {
if actualPrometheus[impl] {
prometheusFeatures[impl] = true
}
}
}
}
// Compare the features data with the golden file.
require.Equal(t, expectedFeatures, apiResponse.Data, "Features mismatch. Run 'make update-features-testdata' to update the golden file.")
}

View file

@ -1,4 +1,4 @@
// Copyright 2015 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@ -16,6 +16,7 @@ package main
import (
"context"
"encoding/json"
"errors"
"fmt"
"log/slog"
@ -72,11 +73,13 @@ import (
"github.com/prometheus/prometheus/scrape"
"github.com/prometheus/prometheus/storage"
"github.com/prometheus/prometheus/storage/remote"
"github.com/prometheus/prometheus/template"
"github.com/prometheus/prometheus/tracing"
"github.com/prometheus/prometheus/tsdb"
"github.com/prometheus/prometheus/tsdb/agent"
"github.com/prometheus/prometheus/util/compression"
"github.com/prometheus/prometheus/util/documentcli"
"github.com/prometheus/prometheus/util/features"
"github.com/prometheus/prometheus/util/logging"
"github.com/prometheus/prometheus/util/notifications"
prom_runtime "github.com/prometheus/prometheus/util/runtime"
@ -215,6 +218,8 @@ type flagConfig struct {
promqlEnableDelayedNameRemoval bool
parserOpts parser.Options
promslogConfig promslog.Config
}
@ -230,11 +235,14 @@ func (c *flagConfig) setFeatureListOptions(logger *slog.Logger) error {
c.tsdb.EnableMemorySnapshotOnShutdown = true
logger.Info("Experimental memory snapshot on shutdown enabled")
case "extra-scrape-metrics":
c.scrape.ExtraMetrics = true
logger.Info("Experimental additional scrape metrics enabled")
t := true
config.DefaultConfig.GlobalConfig.ExtraScrapeMetrics = &t
config.DefaultGlobalConfig.ExtraScrapeMetrics = &t
logger.Warn("This option for --enable-feature is being phased out. It currently changes the default for the extra_scrape_metrics config setting to true, but will become a no-op in a future version. Stop using this option and set extra_scrape_metrics in the config instead.", "option", o)
case "metadata-wal-records":
c.scrape.AppendMetadata = true
c.web.AppendMetadata = true
features.Enable(features.TSDB, "metadata_wal_records")
logger.Info("Experimental metadata records in WAL enabled")
case "promql-per-step-stats":
c.enablePerStepStats = true
@ -249,26 +257,36 @@ func (c *flagConfig) setFeatureListOptions(logger *slog.Logger) error {
c.enableConcurrentRuleEval = true
logger.Info("Experimental concurrent rule evaluation enabled.")
case "promql-experimental-functions":
parser.EnableExperimentalFunctions = true
c.parserOpts.EnableExperimentalFunctions = true
logger.Info("Experimental PromQL functions enabled.")
case "promql-duration-expr":
parser.ExperimentalDurationExpr = true
c.parserOpts.ExperimentalDurationExpr = true
logger.Info("Experimental duration expression parsing enabled.")
case "native-histograms":
// Change relevant global variables. Hacky, but it's hard to pass a new option or default to unmarshallers.
t := true
config.DefaultConfig.GlobalConfig.ScrapeNativeHistograms = &t
config.DefaultGlobalConfig.ScrapeNativeHistograms = &t
logger.Warn("This option for --enable-feature is being phased out. It currently changes the default for the scrape_native_histograms scrape config setting to true, but will become a no-op in v3.9+. Stop using this option and set scrape_native_histograms in the scrape config instead.", "option", o)
logger.Warn("This option for --enable-feature is a no-op. To scrape native histograms, set the scrape_native_histograms scrape config setting to true.", "option", o)
case "ooo-native-histograms":
logger.Warn("This option for --enable-feature is now permanently enabled and therefore a no-op.", "option", o)
case "created-timestamp-zero-ingestion":
c.scrape.EnableCreatedTimestampZeroIngestion = true
c.web.CTZeroIngestionEnabled = true
// NOTE(bwplotka): Once AppendableV1 is removed, there will be only the TSDB and agent flags.
c.scrape.EnableStartTimestampZeroIngestion = true
c.web.STZeroIngestionEnabled = true
c.tsdb.EnableSTAsZeroSample = true
c.agent.EnableSTAsZeroSample = true
// Change relevant global variables. Hacky, but it's hard to pass a new option or default to unmarshallers.
// This is to widen the ST support surface.
config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
logger.Info("Experimental created timestamp zero ingestion enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols))
logger.Info("Experimental start timestamp zero ingestion enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols))
case "st-storage":
// TODO(bwplotka): Implement ST Storage as per PROM-60 and document this hidden feature flag.
c.tsdb.EnableSTStorage = true
c.agent.EnableSTStorage = true
// Change relevant global variables. Hacky, but it's hard to pass a new option or default to unmarshallers. This is to widen the ST support surface.
config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
logger.Info("Experimental start timestamp storage enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols))
case "delayed-compaction":
c.tsdb.EnableDelayedCompaction = true
logger.Info("Experimental delayed compaction is enabled.")
@ -276,8 +294,11 @@ func (c *flagConfig) setFeatureListOptions(logger *slog.Logger) error {
c.promqlEnableDelayedNameRemoval = true
logger.Info("Experimental PromQL delayed name removal enabled.")
case "promql-extended-range-selectors":
parser.EnableExtendedRangeSelectors = true
c.parserOpts.EnableExtendedRangeSelectors = true
logger.Info("Experimental PromQL extended range selectors enabled.")
case "promql-binop-fill-modifiers":
c.parserOpts.EnableBinopFillModifiers = true
logger.Info("Experimental PromQL binary operator fill modifiers enabled.")
case "":
continue
case "old-ui":
@ -345,10 +366,14 @@ func main() {
Registerer: prometheus.DefaultRegisterer,
},
web: web.Options{
Registerer: prometheus.DefaultRegisterer,
Gatherer: prometheus.DefaultGatherer,
Registerer: prometheus.DefaultRegisterer,
Gatherer: prometheus.DefaultGatherer,
FeatureRegistry: features.DefaultRegistry,
},
promslogConfig: promslog.Config{},
scrape: scrape.Options{
FeatureRegistry: features.DefaultRegistry,
},
}
a := kingpin.New(filepath.Base(os.Args[0]), "The Prometheus monitoring server").UsageWriter(os.Stdout)
@ -460,8 +485,9 @@ func main() {
Default("true").Hidden().BoolVar(&cfg.tsdb.EnableOverlappingCompaction)
var (
tsdbWALCompression bool
tsdbWALCompressionType string
tsdbWALCompression bool
tsdbWALCompressionType string
tsdbDelayCompactFilePath string
)
serverOnlyFlag(a, "storage.tsdb.wal-compression", "Compress the tsdb WAL. If false, the --storage.tsdb.wal-compression-type flag is ignored.").
Hidden().Default("true").BoolVar(&tsdbWALCompression)
@ -478,6 +504,12 @@ func main() {
serverOnlyFlag(a, "storage.tsdb.delayed-compaction.max-percent", "Sets the upper limit for the random compaction delay, specified as a percentage of the head chunk range. 100 means the compaction can be delayed by up to the entire head chunk range. Only effective when the delayed-compaction feature flag is enabled.").
Default("10").Hidden().IntVar(&cfg.tsdb.CompactionDelayMaxPercent)
serverOnlyFlag(a, "storage.tsdb.delay-compact-file.path", "Path to a JSON file with uploaded TSDB blocks e.g. Thanos shipper meta file. If set TSDB will only compact 1 level blocks that are marked as uploaded in that file, improving external storage integrations e.g. with Thanos sidecar. 1+ level compactions won't be delayed.").
Default("").StringVar(&tsdbDelayCompactFilePath)
serverOnlyFlag(a, "storage.tsdb.block-reload-interval", "Interval at which to check for new or removed blocks in storage. Users who manually backfill or drop blocks must wait up to this duration before changes become available.").
Default("1m").Hidden().SetValue(&cfg.tsdb.BlockReloadInterval)
agentOnlyFlag(a, "storage.agent.path", "Base path for metrics storage.").
Default("data-agent/").StringVar(&cfg.agentStoragePath)
@ -564,7 +596,7 @@ func main() {
a.Flag("scrape.discovery-reload-interval", "Interval used by scrape manager to throttle target groups updates.").
Hidden().Default("5s").SetValue(&cfg.scrape.DiscoveryReloadInterval)
a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, auto-gomaxprocs, native-histograms, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction, old-ui, otlp-deltatocumulative, promql-duration-expr, use-uncached-io, promql-extended-range-selectors. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details.").
a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, auto-gomaxprocs, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction, old-ui, otlp-deltatocumulative, promql-duration-expr, use-uncached-io, promql-extended-range-selectors, promql-binop-fill-modifiers. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details.").
Default("").StringsVar(&cfg.featureList)
a.Flag("agent", "Run Prometheus in 'Agent mode'.").BoolVar(&agentMode)
@ -600,6 +632,8 @@ func main() {
os.Exit(1)
}
promqlParser := parser.NewParser(cfg.parserOpts)
if agentMode && len(serverOnlyFlags) > 0 {
fmt.Fprintf(os.Stderr, "The following flag(s) can not be used in agent mode: %q", serverOnlyFlags)
os.Exit(3)
@ -654,7 +688,7 @@ func main() {
}
// Parse rule files to verify they exist and contain valid rules.
if err := rules.ParseFiles(cfgFile.RuleFiles, cfgFile.GlobalConfig.MetricNameValidationScheme); err != nil {
if err := rules.ParseFiles(cfgFile.RuleFiles, cfgFile.GlobalConfig.MetricNameValidationScheme, promqlParser); err != nil {
absPath, pathErr := filepath.Abs(cfg.configFile)
if pathErr != nil {
absPath = cfg.configFile
@ -669,8 +703,13 @@ func main() {
}
cfg.tsdb.MaxExemplars = cfgFile.StorageConfig.ExemplarsConfig.MaxExemplars
}
if cfg.tsdb.BlockReloadInterval < model.Duration(1*time.Second) {
logger.Warn("The option --storage.tsdb.block-reload-interval is set to a value less than 1s. Setting it to 1s to avoid overload.")
cfg.tsdb.BlockReloadInterval = model.Duration(1 * time.Second)
}
if cfgFile.StorageConfig.TSDBConfig != nil {
cfg.tsdb.OutOfOrderTimeWindow = cfgFile.StorageConfig.TSDBConfig.OutOfOrderTimeWindow
cfg.tsdb.StaleSeriesCompactionThreshold = cfgFile.StorageConfig.TSDBConfig.StaleSeriesCompactionThreshold
if cfgFile.StorageConfig.TSDBConfig.Retention != nil {
if cfgFile.StorageConfig.TSDBConfig.Retention.Time > 0 {
cfg.tsdb.RetentionDuration = cfgFile.StorageConfig.TSDBConfig.Retention.Time
@ -707,6 +746,12 @@ func main() {
}
}
if tsdbDelayCompactFilePath != "" {
logger.Info("Compactions will be delayed for blocks not marked as uploaded in the file tracking uploads", "path", tsdbDelayCompactFilePath)
cfg.tsdb.BlockCompactionExcludeFunc = exludeBlocksPendingUpload(
logger, tsdbDelayCompactFilePath)
}
// Now that the validity of the config is established, set the config
// success metrics accordingly, although the config isn't really loaded
// yet. This will happen later (including setting these metrics again),
@ -790,6 +835,12 @@ func main() {
"vm_limits", prom_runtime.VMLimits(),
)
features.Set(features.Prometheus, "agent_mode", agentMode)
features.Set(features.Prometheus, "server_mode", !agentMode)
features.Set(features.Prometheus, "auto_reload_config", cfg.enableAutoReload)
features.Enable(features.Prometheus, labels.ImplementationName)
template.RegisterFeatures(features.DefaultRegistry)
var (
localStorage = &readyStorage{stats: tsdb.NewDBStats()}
scraper = &readyScrapeManager{}
@ -826,13 +877,13 @@ func main() {
os.Exit(1)
}
discoveryManagerScrape = discovery.NewManager(ctxScrape, logger.With("component", "discovery manager scrape"), prometheus.DefaultRegisterer, sdMetrics, discovery.Name("scrape"))
discoveryManagerScrape = discovery.NewManager(ctxScrape, logger.With("component", "discovery manager scrape"), prometheus.DefaultRegisterer, sdMetrics, discovery.Name("scrape"), discovery.FeatureRegistry(features.DefaultRegistry))
if discoveryManagerScrape == nil {
logger.Error("failed to create a discovery manager scrape")
os.Exit(1)
}
discoveryManagerNotify = discovery.NewManager(ctxNotify, logger.With("component", "discovery manager notify"), prometheus.DefaultRegisterer, sdMetrics, discovery.Name("notify"))
discoveryManagerNotify = discovery.NewManager(ctxNotify, logger.With("component", "discovery manager notify"), prometheus.DefaultRegisterer, sdMetrics, discovery.Name("notify"), discovery.FeatureRegistry(features.DefaultRegistry))
if discoveryManagerNotify == nil {
logger.Error("failed to create a discovery manager notify")
os.Exit(1)
@ -842,7 +893,7 @@ func main() {
&cfg.scrape,
logger.With("component", "scrape manager"),
logging.NewJSONFileLogger,
fanoutStorage,
nil, fanoutStorage,
prometheus.DefaultRegisterer,
)
if err != nil {
@ -873,6 +924,8 @@ func main() {
EnablePerStepStats: cfg.enablePerStepStats,
EnableDelayedNameRemoval: cfg.promqlEnableDelayedNameRemoval,
EnableTypeAndUnitLabels: cfg.scrape.EnableTypeAndUnitLabels,
FeatureRegistry: features.DefaultRegistry,
Parser: promqlParser,
}
queryEngine = promql.NewEngine(opts)
@ -895,6 +948,8 @@ func main() {
DefaultRuleQueryOffset: func() time.Duration {
return time.Duration(cfgFile.GlobalConfig.RuleQueryOffset)
},
FeatureRegistry: features.DefaultRegistry,
Parser: promqlParser,
})
}
@ -914,6 +969,7 @@ func main() {
cfg.web.LookbackDelta = time.Duration(cfg.lookbackDelta)
cfg.web.IsAgent = agentMode
cfg.web.AppName = modeAppName
cfg.web.Parser = promqlParser
cfg.web.Version = &web.PrometheusVersion{
Version: version.Version,
@ -1135,9 +1191,11 @@ func main() {
func() error {
<-reloadReady.C
ruleManager.Run()
logger.Info("Rule manager stopped")
return nil
},
func(error) {
logger.Info("Stopping rule manager manager...")
ruleManager.Stop()
},
)
@ -1172,9 +1230,11 @@ func main() {
func() error {
<-reloadReady.C
tracingManager.Run()
logger.Info("Tracing manager stopped")
return nil
},
func(error) {
logger.Info("Stopping tracing manager...")
tracingManager.Stop()
},
)
@ -1251,6 +1311,7 @@ func main() {
checksum = currentChecksum
}
case <-cancel:
logger.Info("Reloaders stopped")
return nil
}
}
@ -1258,6 +1319,7 @@ func main() {
func(error) {
// Wait for any in-progress reloads to complete to avoid
// reloading things after they have been shutdown.
logger.Info("Stopping reloaders...")
cancel <- struct{}{}
},
)
@ -1331,6 +1393,9 @@ func main() {
"RetentionDuration", cfg.tsdb.RetentionDuration,
"WALSegmentSize", cfg.tsdb.WALSegmentSize,
"WALCompressionType", cfg.tsdb.WALCompressionType,
"BlockReloadInterval", cfg.tsdb.BlockReloadInterval,
"EnableSTAsZeroSample", cfg.tsdb.EnableSTAsZeroSample,
"EnableSTStorage", cfg.tsdb.EnableSTStorage,
)
startTimeMargin := int64(2 * time.Duration(cfg.tsdb.MinBlockDuration).Seconds() * 1000)
@ -1338,9 +1403,11 @@ func main() {
db.SetWriteNotified(remoteStorage)
close(dbOpen)
<-cancel
logger.Info("TSDB stopped")
return nil
},
func(error) {
logger.Info("Stopping storage...")
if err := fanoutStorage.Close(); err != nil {
logger.Error("Error stopping storage", "err", err)
}
@ -1387,15 +1454,19 @@ func main() {
"MinWALTime", cfg.agent.MinWALTime,
"MaxWALTime", cfg.agent.MaxWALTime,
"OutOfOrderTimeWindow", cfg.agent.OutOfOrderTimeWindow,
"EnableSTAsZeroSample", cfg.agent.EnableSTAsZeroSample,
"EnableSTStorage", cfg.tsdb.EnableSTStorage,
)
localStorage.Set(db, 0)
db.SetWriteNotified(remoteStorage)
close(dbOpen)
<-cancel
logger.Info("Agent WAL storage stopped")
return nil
},
func(error) {
logger.Info("Stopping agent WAL storage...")
if err := fanoutStorage.Close(); err != nil {
logger.Error("Error stopping storage", "err", err)
}
@ -1410,9 +1481,11 @@ func main() {
if err := webHandler.Run(ctxWeb, listeners, *webConfig); err != nil {
return fmt.Errorf("error starting web server: %w", err)
}
logger.Info("Web handler stopped")
return nil
},
func(error) {
logger.Info("Stopping web handler...")
cancelWeb()
},
)
@ -1435,6 +1508,7 @@ func main() {
return nil
},
func(error) {
logger.Info("Stopping notifier manager...")
notifierManager.Stop()
},
)
@ -1538,7 +1612,7 @@ func reloadConfig(filename string, enableExemplarStorage bool, logger *slog.Logg
logger.Error("Failed to apply configuration", "err", err)
failed = true
}
timingsLogger = timingsLogger.With((rl.name), time.Since(rstart))
timingsLogger = timingsLogger.With(rl.name, time.Since(rstart))
}
if failed {
return fmt.Errorf("one or more errors occurred while applying the new configuration (--config.file=%q)", filename)
@ -1712,6 +1786,14 @@ func (s *readyStorage) Appender(ctx context.Context) storage.Appender {
return notReadyAppender{}
}
// AppenderV2 implements the Storage interface.
func (s *readyStorage) AppenderV2(ctx context.Context) storage.AppenderV2 {
if x := s.get(); x != nil {
return x.AppenderV2(ctx)
}
return notReadyAppenderV2{}
}
type notReadyAppender struct{}
// SetOptions does nothing in this appender implementation.
@ -1729,7 +1811,7 @@ func (notReadyAppender) AppendHistogram(storage.SeriesRef, labels.Labels, int64,
return 0, tsdb.ErrNotReady
}
func (notReadyAppender) AppendHistogramCTZeroSample(storage.SeriesRef, labels.Labels, int64, int64, *histogram.Histogram, *histogram.FloatHistogram) (storage.SeriesRef, error) {
func (notReadyAppender) AppendHistogramSTZeroSample(storage.SeriesRef, labels.Labels, int64, int64, *histogram.Histogram, *histogram.FloatHistogram) (storage.SeriesRef, error) {
return 0, tsdb.ErrNotReady
}
@ -1737,7 +1819,7 @@ func (notReadyAppender) UpdateMetadata(storage.SeriesRef, labels.Labels, metadat
return 0, tsdb.ErrNotReady
}
func (notReadyAppender) AppendCTZeroSample(storage.SeriesRef, labels.Labels, int64, int64) (storage.SeriesRef, error) {
func (notReadyAppender) AppendSTZeroSample(storage.SeriesRef, labels.Labels, int64, int64) (storage.SeriesRef, error) {
return 0, tsdb.ErrNotReady
}
@ -1745,6 +1827,15 @@ func (notReadyAppender) Commit() error { return tsdb.ErrNotReady }
func (notReadyAppender) Rollback() error { return tsdb.ErrNotReady }
type notReadyAppenderV2 struct{}
func (notReadyAppenderV2) Append(storage.SeriesRef, labels.Labels, int64, int64, float64, *histogram.Histogram, *histogram.FloatHistogram, storage.AOptions) (storage.SeriesRef, error) {
return 0, tsdb.ErrNotReady
}
func (notReadyAppenderV2) Commit() error { return tsdb.ErrNotReady }
func (notReadyAppenderV2) Rollback() error { return tsdb.ErrNotReady }
// Close implements the Storage interface.
func (s *readyStorage) Close() error {
if x := s.get(); x != nil {
@ -1887,6 +1978,11 @@ type tsdbOptions struct {
CompactionDelayMaxPercent int
EnableOverlappingCompaction bool
UseUncachedIO bool
BlockCompactionExcludeFunc tsdb.BlockExcludeFilterFunc
BlockReloadInterval model.Duration
EnableSTAsZeroSample bool
EnableSTStorage bool
StaleSeriesCompactionThreshold float64
}
func (opts tsdbOptions) ToTSDBOptions() tsdb.Options {
@ -1910,6 +2006,12 @@ func (opts tsdbOptions) ToTSDBOptions() tsdb.Options {
CompactionDelayMaxPercent: opts.CompactionDelayMaxPercent,
EnableOverlappingCompaction: opts.EnableOverlappingCompaction,
UseUncachedIO: opts.UseUncachedIO,
BlockCompactionExcludeFunc: opts.BlockCompactionExcludeFunc,
BlockReloadInterval: time.Duration(opts.BlockReloadInterval),
FeatureRegistry: features.DefaultRegistry,
EnableSTAsZeroSample: opts.EnableSTAsZeroSample,
EnableSTStorage: opts.EnableSTStorage,
StaleSeriesCompactionThreshold: opts.StaleSeriesCompactionThreshold,
}
}
@ -1922,7 +2024,9 @@ type agentOptions struct {
TruncateFrequency model.Duration
MinWALTime, MaxWALTime model.Duration
NoLockfile bool
OutOfOrderTimeWindow int64
OutOfOrderTimeWindow int64 // TODO(bwplotka): Unused option, fix it or remove.
EnableSTAsZeroSample bool
EnableSTStorage bool
}
func (opts agentOptions) ToAgentOptions(outOfOrderTimeWindow int64) agent.Options {
@ -1938,6 +2042,8 @@ func (opts agentOptions) ToAgentOptions(outOfOrderTimeWindow int64) agent.Option
MaxWALTime: durationToInt64Millis(time.Duration(opts.MaxWALTime)),
NoLockfile: opts.NoLockfile,
OutOfOrderTimeWindow: outOfOrderTimeWindow,
EnableSTAsZeroSample: opts.EnableSTAsZeroSample,
EnableSTStorage: opts.EnableSTStorage,
}
}
@ -1974,3 +2080,48 @@ func (p *rwProtoMsgFlagParser) Set(opt string) error {
*p.msgs = append(*p.msgs, t)
return nil
}
type UploadMeta struct {
Uploaded []string `json:"uploaded"`
}
// Cache the last read UploadMeta.
var (
tsdbDelayCompactLastMeta *UploadMeta // The content of uploadMetaPath from the last time we've opened it.
tsdbDelayCompactLastMetaTime time.Time // The timestamp at which we stored tsdbDelayCompactLastMeta last time.
)
func exludeBlocksPendingUpload(logger *slog.Logger, uploadMetaPath string) tsdb.BlockExcludeFilterFunc {
return func(meta *tsdb.BlockMeta) bool {
if meta.Compaction.Level > 1 {
// Blocks with level > 1 are assumed to be not uploaded, thus no need to delay those.
// See `storage.tsdb.delay-compact-file.path` flag for detail.
return false
}
// If we have cached uploadMetaPath content that was stored in the last minute the use it.
if tsdbDelayCompactLastMeta != nil &&
tsdbDelayCompactLastMetaTime.After(time.Now().UTC().Add(time.Minute*-1)) {
return !slices.Contains(tsdbDelayCompactLastMeta.Uploaded, meta.ULID.String())
}
// We don't have anything cached or it's older than a minute. Try to open and parse the uploadMetaPath path.
data, err := os.ReadFile(uploadMetaPath)
if err != nil {
logger.Warn("cannot open TSDB upload meta file", slog.String("path", uploadMetaPath), slog.Any("err", err))
return false
}
var uploadMeta UploadMeta
if err = json.Unmarshal(data, &uploadMeta); err != nil {
logger.Warn("cannot parse TSDB upload meta file", slog.String("path", uploadMetaPath), slog.Any("err", err))
return false
}
// We have parsed the uploadMetaPath file, cache it.
tsdbDelayCompactLastMeta = &uploadMeta
tsdbDelayCompactLastMetaTime = time.Now().UTC()
return !slices.Contains(uploadMeta.Uploaded, meta.ULID.String())
}
}

View file

@ -1,4 +1,4 @@
// Copyright 2017 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@ -395,6 +395,7 @@ func TestTimeMetrics(t *testing.T) {
}
func getCurrentGaugeValuesFor(t *testing.T, reg prometheus.Gatherer, metricNames ...string) map[string]float64 {
t.Helper()
f, err := reg.Gather()
require.NoError(t, err)
@ -426,7 +427,7 @@ func TestAgentSuccessfulStartup(t *testing.T) {
go func() { done <- prom.Wait() }()
select {
case err := <-done:
t.Logf("prometheus agent should be still running: %v", err)
t.Logf("prometheus agent exited early: %v", err)
actualExitStatus = prom.ProcessState.ExitCode()
case <-time.After(startupTime):
prom.Process.Kill()
@ -571,12 +572,7 @@ func TestDocumentation(t *testing.T) {
var stdout bytes.Buffer
cmd.Stdout = &stdout
if err := cmd.Run(); err != nil {
var exitError *exec.ExitError
if errors.As(err, &exitError) && exitError.ExitCode() != 0 {
fmt.Println("Command failed with non-zero exit code")
}
}
require.NoError(t, cmd.Run(), "failed to generate CLI documentation via --write-documentation")
generatedContent := strings.ReplaceAll(stdout.String(), filepath.Base(promPath), strings.TrimSuffix(filepath.Base(promPath), ".test"))
@ -753,7 +749,7 @@ global:
configFile := filepath.Join(tmpDir, "prometheus.yml")
port := testutil.RandomUnprivilegedPort(t)
os.WriteFile(configFile, []byte(tc.config), 0o777)
require.NoError(t, os.WriteFile(configFile, []byte(tc.config), 0o777))
prom := prometheusCommandWithLogging(
t,
configFile,
@ -801,7 +797,7 @@ global:
newConfig := `
runtime:
gogc: 99`
os.WriteFile(configFile, []byte(newConfig), 0o777)
require.NoError(t, os.WriteFile(configFile, []byte(newConfig), 0o777))
reloadPrometheusConfig(t, reloadURL)
ensureGOGCValue(99.0)
})
@ -834,7 +830,7 @@ scrape_configs:
static_configs:
- targets: ['localhost:%d']
`, port, port)
os.WriteFile(configFile, []byte(config), 0o777)
require.NoError(t, os.WriteFile(configFile, []byte(config), 0o777))
prom := prometheusCommandWithLogging(
t,
@ -968,7 +964,18 @@ remote_write:
// TestRemoteWrite_ReshardingWithoutDeadlock ensures that resharding (scaling up) doesn't block when the shards are full.
// See: https://github.com/prometheus/prometheus/issues/17384.
//
// The following shows key resharding metrics before and after the fix.
// In v3.7.0, the deadlock prevented the resharding logic from observing the true incoming data rate.
//
// | Metric | v3.7.0 | after the fix |
// |---------------------|---------------|---------------------|
// | dataInRate | 0.6 | 307.2 |
// | dataPendingRate | 0.2 | 306.8 |
// | dataPending | 0 | 1228.8 |
// | desiredShards | 0.6 | 369.2 |.
func TestRemoteWrite_ReshardingWithoutDeadlock(t *testing.T) {
t.Skip("flaky test, see https://github.com/prometheus/prometheus/issues/17489")
t.Parallel()
tmpDir := t.TempDir()
@ -983,7 +990,8 @@ func TestRemoteWrite_ReshardingWithoutDeadlock(t *testing.T) {
config := fmt.Sprintf(`
global:
scrape_interval: 100ms
# Using a smaller interval may cause the scrape to time out.
scrape_interval: 1s
scrape_configs:
- job_name: 'self'
static_configs:
@ -994,6 +1002,8 @@ remote_write:
queue_config:
# Speed up the queue being full.
capacity: 1
# Helps keep the time to send one sample low so it doesnt influence the resharding logic.
max_samples_per_send: 1
`, port, server.URL)
require.NoError(t, os.WriteFile(configFile, []byte(config), 0o777))
@ -1002,36 +1012,52 @@ remote_write:
configFile,
port,
fmt.Sprintf("--storage.tsdb.path=%s", tmpDir),
"--log.level=debug",
)
require.NoError(t, prom.Start())
var checkInitialDesiredShardsOnce sync.Once
require.Eventually(t, func() bool {
const desiredShardsMetric = "prometheus_remote_storage_shards_desired"
getMetrics := func() ([]byte, error) {
r, err := http.Get(fmt.Sprintf("http://127.0.0.1:%d/metrics", port))
if err != nil {
return false
return nil, err
}
defer r.Body.Close()
if r.StatusCode != http.StatusOK {
return false
return nil, fmt.Errorf("unexpected status code: %d", r.StatusCode)
}
metrics, err := io.ReadAll(r.Body)
if err != nil {
return nil, err
}
return metrics, nil
}
// Ensure the initial desired shards is 1.
require.Eventually(t, func() bool {
metrics, err := getMetrics()
if err != nil {
return false
}
initialDesiredShards, err := getMetricValue(t, bytes.NewReader(metrics), model.MetricTypeGauge, desiredShardsMetric)
if err != nil {
return false
}
return initialDesiredShards == 1.0
}, 10*time.Second, 100*time.Millisecond)
checkInitialDesiredShardsOnce.Do(func() {
s, err := getMetricValue(t, bytes.NewReader(metrics), model.MetricTypeGauge, "prometheus_remote_storage_shards_desired")
require.NoError(t, err)
require.Equal(t, 1.0, s)
})
desiredShards, err := getMetricValue(t, bytes.NewReader(metrics), model.MetricTypeGauge, "prometheus_remote_storage_shards_desired")
if err != nil || desiredShards <= 1 {
// Ensure scaling up is triggered after some time.
require.Eventually(t, func() bool {
metrics, err := getMetrics()
if err != nil {
return false
}
desiredShards, err := getMetricValue(t, bytes.NewReader(metrics), model.MetricTypeGauge, desiredShardsMetric)
if err != nil || desiredShards <= 1.0 {
return false
}
return true
// 3*shardUpdateDuration to allow for the resharding logic to run.
}, 30*time.Second, 1*time.Second)
}, 30*time.Second, time.Second)
}

View file

@ -1,4 +1,4 @@
// Copyright 2020 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

View file

@ -1,4 +1,4 @@
// Copyright 2020 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@ -334,7 +334,8 @@ func (p *queryLogTest) run(t *testing.T) {
p.query(t)
ql := readQueryLog(t, queryLogFile.Name())
// Wait for query log entry to be written (avoid race with file I/O).
ql := waitForQueryLog(t, queryLogFile.Name(), 1)
qc := len(ql)
if p.exactQueryCount() {
require.Equal(t, 1, qc)
@ -361,7 +362,8 @@ func (p *queryLogTest) run(t *testing.T) {
p.query(t)
qc++
ql = readQueryLog(t, queryLogFile.Name())
// Wait for query log entry to be written (avoid race with file I/O).
ql = waitForQueryLog(t, queryLogFile.Name(), qc)
if p.exactQueryCount() {
require.Len(t, ql, qc)
} else {
@ -392,7 +394,8 @@ func (p *queryLogTest) run(t *testing.T) {
qc++
ql = readQueryLog(t, newFile.Name())
// Wait for query log entry to be written (avoid race with file I/O).
ql = waitForQueryLog(t, newFile.Name(), qc)
if p.exactQueryCount() {
require.Len(t, ql, qc)
} else {
@ -404,7 +407,8 @@ func (p *queryLogTest) run(t *testing.T) {
p.query(t)
ql = readQueryLog(t, queryLogFile.Name())
// Wait for query log entry to be written (avoid race with file I/O).
ql = waitForQueryLog(t, queryLogFile.Name(), 1)
qc = len(ql)
if p.exactQueryCount() {
require.Equal(t, 1, qc)
@ -446,6 +450,18 @@ func readQueryLog(t *testing.T, path string) []queryLogLine {
return ql
}
// waitForQueryLog waits for the query log to contain at least minEntries entries,
// polling at regular intervals until the timeout is reached.
func waitForQueryLog(t *testing.T, path string, minEntries int) []queryLogLine {
t.Helper()
var ql []queryLogLine
require.Eventually(t, func() bool {
ql = readQueryLog(t, path)
return len(ql) >= minEntries
}, 5*time.Second, 100*time.Millisecond, "timed out waiting for query log to have at least %d entries, got %d", minEntries, len(ql))
return ql
}
func TestQueryLog(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")

View file

@ -1,4 +1,4 @@
// Copyright 2024 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

View file

@ -1,4 +1,4 @@
// Copyright 2024 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

257
cmd/prometheus/testdata/features.json vendored Normal file
View file

@ -0,0 +1,257 @@
{
"api": {
"admin": false,
"exclude_alerts": true,
"label_values_match": true,
"lifecycle": false,
"openapi_3.1": true,
"openapi_3.2": true,
"otlp_write_receiver": false,
"query_stats": true,
"query_warnings": true,
"remote_write_receiver": false,
"time_range_labels": true,
"time_range_series": true
},
"otlp_receiver": {
"delta_conversion": false,
"native_delta_ingestion": false
},
"prometheus": {
"agent_mode": false,
"auto_reload_config": false,
"server_mode": true,
"stringlabels": true
},
"promql": {
"anchored": false,
"at_modifier": true,
"bool": true,
"by": true,
"delayed_name_removal": false,
"duration_expr": false,
"fill": false,
"fill_left": false,
"fill_right": false,
"group_left": true,
"group_right": true,
"ignoring": true,
"negative_offset": true,
"offset": true,
"on": true,
"per_query_lookback_delta": true,
"per_step_stats": false,
"smoothed": false,
"subqueries": true,
"type_and_unit_labels": false,
"without": true
},
"promql_functions": {
"abs": true,
"absent": true,
"absent_over_time": true,
"acos": true,
"acosh": true,
"asin": true,
"asinh": true,
"atan": true,
"atanh": true,
"avg_over_time": true,
"ceil": true,
"changes": true,
"clamp": true,
"clamp_max": true,
"clamp_min": true,
"cos": true,
"cosh": true,
"count_over_time": true,
"day_of_month": true,
"day_of_week": true,
"day_of_year": true,
"days_in_month": true,
"deg": true,
"delta": true,
"deriv": true,
"double_exponential_smoothing": false,
"exp": true,
"first_over_time": false,
"floor": true,
"histogram_avg": true,
"histogram_count": true,
"histogram_fraction": true,
"histogram_quantile": true,
"histogram_quantiles": false,
"histogram_stddev": true,
"histogram_stdvar": true,
"histogram_sum": true,
"hour": true,
"idelta": true,
"increase": true,
"info": false,
"irate": true,
"label_join": true,
"label_replace": true,
"last_over_time": true,
"ln": true,
"log10": true,
"log2": true,
"mad_over_time": false,
"max_over_time": true,
"min_over_time": true,
"minute": true,
"month": true,
"pi": true,
"predict_linear": true,
"present_over_time": true,
"quantile_over_time": true,
"rad": true,
"rate": true,
"resets": true,
"round": true,
"scalar": true,
"sgn": true,
"sin": true,
"sinh": true,
"sort": true,
"sort_by_label": false,
"sort_by_label_desc": false,
"sort_desc": true,
"sqrt": true,
"stddev_over_time": true,
"stdvar_over_time": true,
"sum_over_time": true,
"tan": true,
"tanh": true,
"time": true,
"timestamp": true,
"ts_of_first_over_time": false,
"ts_of_last_over_time": false,
"ts_of_max_over_time": false,
"ts_of_min_over_time": false,
"vector": true,
"year": true
},
"promql_operators": {
"!=": true,
"!~": true,
"%": true,
"*": true,
"+": true,
"-": true,
"/": true,
"<": true,
"<=": true,
"==": true,
"=~": true,
">": true,
">=": true,
"@": true,
"^": true,
"and": true,
"atan2": true,
"avg": true,
"bottomk": true,
"count": true,
"count_values": true,
"group": true,
"limit_ratio": false,
"limitk": false,
"max": true,
"min": true,
"or": true,
"quantile": true,
"stddev": true,
"stdvar": true,
"sum": true,
"topk": true,
"unless": true
},
"rules": {
"concurrent_rule_eval": false,
"keep_firing_for": true,
"query_offset": true
},
"scrape": {
"extra_scrape_metrics": true,
"start_timestamp_zero_ingestion": false,
"type_and_unit_labels": false
},
"service_discovery_providers": {
"aws": true,
"azure": true,
"consul": true,
"digitalocean": true,
"dns": true,
"docker": true,
"dockerswarm": true,
"ec2": true,
"ecs": true,
"elasticache": true,
"eureka": true,
"file": true,
"gce": true,
"hetzner": true,
"http": true,
"ionos": true,
"kubernetes": true,
"kuma": true,
"lightsail": true,
"linode": true,
"marathon": true,
"msk": true,
"nerve": true,
"nomad": true,
"openstack": true,
"ovhcloud": true,
"puppetdb": true,
"scaleway": true,
"serverset": true,
"stackit": true,
"static": true,
"triton": true,
"uyuni": true,
"vultr": true
},
"templating_functions": {
"args": true,
"externalURL": true,
"first": true,
"graphLink": true,
"humanize": true,
"humanize1024": true,
"humanizeDuration": true,
"humanizePercentage": true,
"humanizeTimestamp": true,
"label": true,
"match": true,
"now": true,
"parseDuration": true,
"pathPrefix": true,
"query": true,
"reReplaceAll": true,
"safeHtml": true,
"sortByLabel": true,
"stripDomain": true,
"stripPort": true,
"strvalue": true,
"tableLink": true,
"title": true,
"toDuration": true,
"toLower": true,
"toTime": true,
"toUpper": true,
"urlQueryEscape": true,
"value": true
},
"tsdb": {
"delayed_compaction": false,
"exemplar_storage": false,
"isolation": true,
"native_histograms": true,
"use_uncached_io": false
},
"ui": {
"ui_v2": false,
"ui_v3": true
}
}

View file

@ -0,0 +1,144 @@
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"encoding/json"
"os"
"path"
"testing"
"time"
"github.com/oklog/ulid/v2"
"github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/tsdb"
)
func TestBlockExcludeFilter(t *testing.T) {
for _, test := range []struct {
summary string // Description of the test case.
uploaded []ulid.ULID // List of blocks marked as uploaded inside the shipper file.
setupFn func(string) // Optional function to run before the test, takes the path to the shipper file.
meta tsdb.BlockMeta // Meta of the block we're checking.
isExcluded bool // What do we expect to be returned.
}{
{
summary: "missing file",
setupFn: func(path string) {
// Delete shipper file to test error handling.
os.Remove(path)
},
meta: tsdb.BlockMeta{ULID: ulid.MustNew(1, nil)},
isExcluded: false,
},
{
summary: "corrupt file",
setupFn: func(path string) {
// Overwrite the shipper file content with invalid JSON.
os.WriteFile(path, []byte("{["), 0o644)
},
meta: tsdb.BlockMeta{ULID: ulid.MustNew(1, nil)},
isExcluded: false,
},
{
summary: "empty uploaded list",
uploaded: []ulid.ULID{},
meta: tsdb.BlockMeta{ULID: ulid.MustNew(1, nil)},
isExcluded: true,
},
{
summary: "block meta not present in the uploaded list, level=1",
uploaded: []ulid.ULID{ulid.MustNew(1, nil), ulid.MustNew(3, nil)},
meta: tsdb.BlockMeta{
ULID: ulid.MustNew(2, nil),
Compaction: tsdb.BlockMetaCompaction{Level: 1},
},
isExcluded: true,
},
{
summary: "block meta not present in the uploaded list, level=2",
uploaded: []ulid.ULID{ulid.MustNew(1, nil), ulid.MustNew(3, nil)},
meta: tsdb.BlockMeta{
ULID: ulid.MustNew(2, nil),
Compaction: tsdb.BlockMetaCompaction{Level: 2},
},
isExcluded: false,
},
{
summary: "block meta present in the uploaded list",
uploaded: []ulid.ULID{ulid.MustNew(1, nil), ulid.MustNew(2, nil), ulid.MustNew(3, nil)},
meta: tsdb.BlockMeta{ULID: ulid.MustNew(2, nil)},
isExcluded: false,
},
{
summary: "don't read the file if there's valid cache",
setupFn: func(path string) {
// Remove the shipper file, cache should be used instead.
require.NoError(t, os.Remove(path))
// Set cached values
tsdbDelayCompactLastMeta = &UploadMeta{
Uploaded: []string{
ulid.MustNew(1, nil).String(),
ulid.MustNew(2, nil).String(),
ulid.MustNew(3, nil).String(),
},
}
tsdbDelayCompactLastMetaTime = time.Now().UTC().Add(time.Second * -1)
},
uploaded: []ulid.ULID{},
meta: tsdb.BlockMeta{ULID: ulid.MustNew(2, nil)},
isExcluded: false,
},
{
summary: "read the file if there's cache but expired",
setupFn: func(_ string) {
// Set the cache but make it too old
tsdbDelayCompactLastMeta = &UploadMeta{
Uploaded: []string{},
}
tsdbDelayCompactLastMetaTime = time.Now().UTC().Add(time.Second * -61)
},
uploaded: []ulid.ULID{ulid.MustNew(1, nil), ulid.MustNew(2, nil), ulid.MustNew(3, nil)},
meta: tsdb.BlockMeta{ULID: ulid.MustNew(2, nil)},
isExcluded: false,
},
} {
t.Run(test.summary, func(t *testing.T) {
dir := t.TempDir()
shipperPath := path.Join(dir, "shipper.json")
uploaded := make([]string, 0, len(test.uploaded))
for _, ul := range test.uploaded {
uploaded = append(uploaded, ul.String())
}
ts := UploadMeta{Uploaded: uploaded}
data, err := json.Marshal(ts)
require.NoError(t, err, "failed to marshall upload meta file")
require.NoError(t, os.WriteFile(shipperPath, data, 0o644), "failed to write upload meta file")
tsdbDelayCompactLastMeta = nil
tsdbDelayCompactLastMetaTime = time.Time{}
if test.setupFn != nil {
test.setupFn(shipperPath)
}
fn := exludeBlocksPendingUpload(promslog.NewNopLogger(), shipperPath)
isExcluded := fn(&test.meta)
require.Equal(t, test.isExcluded, isExcluded)
})
}
}

View file

@ -1,4 +1,4 @@
// Copyright 2023 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

View file

@ -1,4 +1,4 @@
// Copyright 2023 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

View file

@ -1,4 +1,4 @@
// Copyright 2015 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

View file

@ -1,4 +1,4 @@
// Copyright 2020 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@ -27,7 +27,6 @@ import (
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/textparse"
"github.com/prometheus/prometheus/tsdb"
tsdb_errors "github.com/prometheus/prometheus/tsdb/errors"
)
func getMinAndMaxTimestamps(p textparse.Parser) (int64, int64, error) {
@ -94,7 +93,7 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn
return err
}
defer func() {
returnErr = tsdb_errors.NewMulti(returnErr, db.Close()).Err()
returnErr = errors.Join(returnErr, db.Close())
}()
var (
@ -125,7 +124,7 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn
return fmt.Errorf("block writer: %w", err)
}
defer func() {
err = tsdb_errors.NewMulti(err, w.Close()).Err()
err = errors.Join(err, w.Close())
}()
ctx := context.Background()

View file

@ -1,4 +1,4 @@
// Copyright 2020 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

View file

@ -1,4 +1,4 @@
// Copyright 2015 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

View file

@ -1,4 +1,4 @@
// Copyright 2015 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@ -61,7 +61,10 @@ import (
"github.com/prometheus/prometheus/util/documentcli"
)
var promqlEnableDelayedNameRemoval = false
var (
promqlEnableDelayedNameRemoval = false
promtoolParserOpts parser.Options
)
func init() {
// This can be removed when the legacy global mode is fully deprecated.
@ -162,7 +165,11 @@ func main() {
checkRulesIgnoreUnknownFields := checkRulesCmd.Flag("ignore-unknown-fields", "Ignore unknown fields in the rule files. This is useful when you want to extend rule files with custom metadata. Ensure that those fields are removed before loading them into the Prometheus server as it performs strict checks by default.").Default("false").Bool()
checkMetricsCmd := checkCmd.Command("metrics", checkMetricsUsage)
checkMetricsExtended := checkCmd.Flag("extended", "Print extended information related to the cardinality of the metrics.").Bool()
checkMetricsExtended := checkMetricsCmd.Flag("extended", "Print extended information related to the cardinality of the metrics.").Bool()
checkMetricsLint := checkMetricsCmd.Flag(
"lint",
"Linting checks to apply for metrics. Available options are: all, none. Use --lint=none to disable metrics linting.",
).Default(lintOptionAll).String()
agentMode := checkConfigCmd.Flag("agent", "Check config file for Prometheus in Agent mode.").Bool()
queryCmd := app.Command("query", "Run query against a Prometheus server.")
@ -257,12 +264,13 @@ func main() {
listHumanReadable := tsdbListCmd.Flag("human-readable", "Print human readable values.").Short('r').Bool()
listPath := tsdbListCmd.Arg("db path", "Database path (default is "+defaultDBPath+").").Default(defaultDBPath).String()
tsdbDumpCmd := tsdbCmd.Command("dump", "Dump samples from a TSDB.")
tsdbDumpCmd := tsdbCmd.Command("dump", "Dump data (series+samples or optionally just series) from a TSDB.")
dumpPath := tsdbDumpCmd.Arg("db path", "Database path (default is "+defaultDBPath+").").Default(defaultDBPath).String()
dumpSandboxDirRoot := tsdbDumpCmd.Flag("sandbox-dir-root", "Root directory where a sandbox directory will be created, this sandbox is used in case WAL replay generates chunks (default is the database path). The sandbox is cleaned up at the end.").String()
dumpMinTime := tsdbDumpCmd.Flag("min-time", "Minimum timestamp to dump, in milliseconds since the Unix epoch.").Default(strconv.FormatInt(math.MinInt64, 10)).Int64()
dumpMaxTime := tsdbDumpCmd.Flag("max-time", "Maximum timestamp to dump, in milliseconds since the Unix epoch.").Default(strconv.FormatInt(math.MaxInt64, 10)).Int64()
dumpMatch := tsdbDumpCmd.Flag("match", "Series selector. Can be specified multiple times.").Default("{__name__=~'(?s:.*)'}").Strings()
dumpFormat := tsdbDumpCmd.Flag("format", "Output format of the dump (prom (default) or seriesjson).").Default("prom").Enum("prom", "seriesjson")
tsdbDumpOpenMetricsCmd := tsdbCmd.Command("dump-openmetrics", "[Experimental] Dump samples from a TSDB into OpenMetrics text format, excluding native histograms and staleness markers, which are not representable in OpenMetrics.")
dumpOpenMetricsPath := tsdbDumpOpenMetricsCmd.Arg("db path", "Database path (default is "+defaultDBPath+").").Default(defaultDBPath).String()
@ -309,7 +317,7 @@ func main() {
promQLLabelsDeleteQuery := promQLLabelsDeleteCmd.Arg("query", "PromQL query.").Required().String()
promQLLabelsDeleteName := promQLLabelsDeleteCmd.Arg("name", "Name of the label to delete.").Required().String()
featureList := app.Flag("enable-feature", "Comma separated feature names to enable. Valid options: promql-experimental-functions, promql-delayed-name-removal. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details").Default("").Strings()
featureList := app.Flag("enable-feature", "Comma separated feature names to enable. Valid options: promql-experimental-functions, promql-delayed-name-removal, promql-duration-expr, promql-extended-range-selectors. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details").Default("").Strings()
documentationCmd := app.Command("write-documentation", "Generate command line documentation. Internal use.").Hidden()
@ -343,9 +351,13 @@ func main() {
for o := range strings.SplitSeq(f, ",") {
switch o {
case "promql-experimental-functions":
parser.EnableExperimentalFunctions = true
promtoolParserOpts.EnableExperimentalFunctions = true
case "promql-delayed-name-removal":
promqlEnableDelayedNameRemoval = true
case "promql-duration-expr":
promtoolParserOpts.ExperimentalDurationExpr = true
case "promql-extended-range-selectors":
promtoolParserOpts.EnableExtendedRangeSelectors = true
case "":
continue
default:
@ -353,13 +365,14 @@ func main() {
}
}
}
promtoolParser := parser.NewParser(promtoolParserOpts)
switch parsedCmd {
case sdCheckCmd.FullCommand():
os.Exit(CheckSD(*sdConfigFile, *sdJobName, *sdTimeout, prometheus.DefaultRegisterer))
case checkConfigCmd.FullCommand():
os.Exit(CheckConfig(*agentMode, *checkConfigSyntaxOnly, newConfigLintConfig(*checkConfigLint, *checkConfigLintFatal, *checkConfigIgnoreUnknownFields, model.UTF8Validation, model.Duration(*checkLookbackDelta)), *configFiles...))
os.Exit(CheckConfig(*agentMode, *checkConfigSyntaxOnly, newConfigLintConfig(*checkConfigLint, *checkConfigLintFatal, *checkConfigIgnoreUnknownFields, model.UTF8Validation, model.Duration(*checkLookbackDelta)), promtoolParser, *configFiles...))
case checkServerHealthCmd.FullCommand():
os.Exit(checkErr(CheckServerStatus(serverURL, checkHealth, httpRoundTripper)))
@ -371,10 +384,10 @@ func main() {
os.Exit(CheckWebConfig(*webConfigFiles...))
case checkRulesCmd.FullCommand():
os.Exit(CheckRules(newRulesLintConfig(*checkRulesLint, *checkRulesLintFatal, *checkRulesIgnoreUnknownFields, model.UTF8Validation), *ruleFiles...))
os.Exit(CheckRules(newRulesLintConfig(*checkRulesLint, *checkRulesLintFatal, *checkRulesIgnoreUnknownFields, model.UTF8Validation), promtoolParser, *ruleFiles...))
case checkMetricsCmd.FullCommand():
os.Exit(CheckMetrics(*checkMetricsExtended))
os.Exit(CheckMetrics(*checkMetricsExtended, *checkMetricsLint))
case pushMetricsCmd.FullCommand():
os.Exit(PushMetrics(remoteWriteURL, httpRoundTripper, *pushMetricsHeaders, *pushMetricsTimeout, *pushMetricsProtoMsg, *pushMetricsLabels, *metricFiles...))
@ -411,6 +424,7 @@ func main() {
EnableNegativeOffset: true,
EnableDelayedNameRemoval: promqlEnableDelayedNameRemoval,
},
promtoolParser,
*testRulesRun,
*testRulesDiff,
*testRulesDebug,
@ -422,15 +436,20 @@ func main() {
os.Exit(checkErr(benchmarkWrite(*benchWriteOutPath, *benchSamplesFile, *benchWriteNumMetrics, *benchWriteNumScrapes)))
case tsdbAnalyzeCmd.FullCommand():
os.Exit(checkErr(analyzeBlock(ctx, *analyzePath, *analyzeBlockID, *analyzeLimit, *analyzeRunExtended, *analyzeMatchers)))
os.Exit(checkErr(analyzeBlock(ctx, *analyzePath, *analyzeBlockID, *analyzeLimit, *analyzeRunExtended, *analyzeMatchers, promtoolParser)))
case tsdbListCmd.FullCommand():
os.Exit(checkErr(listBlocks(*listPath, *listHumanReadable)))
case tsdbDumpCmd.FullCommand():
os.Exit(checkErr(dumpSamples(ctx, *dumpPath, *dumpSandboxDirRoot, *dumpMinTime, *dumpMaxTime, *dumpMatch, formatSeriesSet)))
format := formatSeriesSet
if *dumpFormat == "seriesjson" {
format = formatSeriesSetLabelsToJSON
}
os.Exit(checkErr(dumpTSDBData(ctx, *dumpPath, *dumpSandboxDirRoot, *dumpMinTime, *dumpMaxTime, *dumpMatch, format, promtoolParser)))
case tsdbDumpOpenMetricsCmd.FullCommand():
os.Exit(checkErr(dumpSamples(ctx, *dumpOpenMetricsPath, *dumpOpenMetricsSandboxDirRoot, *dumpOpenMetricsMinTime, *dumpOpenMetricsMaxTime, *dumpOpenMetricsMatch, formatSeriesSetOpenMetrics)))
os.Exit(checkErr(dumpTSDBData(ctx, *dumpOpenMetricsPath, *dumpOpenMetricsSandboxDirRoot, *dumpOpenMetricsMinTime, *dumpOpenMetricsMaxTime, *dumpOpenMetricsMatch, formatSeriesSetOpenMetrics, promtoolParser)))
// TODO(aSquare14): Work on adding support for custom block size.
case openMetricsImportCmd.FullCommand():
os.Exit(backfillOpenMetrics(*importFilePath, *importDBPath, *importHumanReadable, *importQuiet, *maxBlockDuration, *openMetricsLabels))
@ -446,15 +465,15 @@ func main() {
case promQLFormatCmd.FullCommand():
checkExperimental(*experimental)
os.Exit(checkErr(formatPromQL(*promQLFormatQuery)))
os.Exit(checkErr(formatPromQL(*promQLFormatQuery, promtoolParser)))
case promQLLabelsSetCmd.FullCommand():
checkExperimental(*experimental)
os.Exit(checkErr(labelsSetPromQL(*promQLLabelsSetQuery, *promQLLabelsSetType, *promQLLabelsSetName, *promQLLabelsSetValue)))
os.Exit(checkErr(labelsSetPromQL(*promQLLabelsSetQuery, *promQLLabelsSetType, *promQLLabelsSetName, *promQLLabelsSetValue, promtoolParser)))
case promQLLabelsDeleteCmd.FullCommand():
checkExperimental(*experimental)
os.Exit(checkErr(labelsDeletePromQL(*promQLLabelsDeleteQuery, *promQLLabelsDeleteName)))
os.Exit(checkErr(labelsDeletePromQL(*promQLLabelsDeleteQuery, *promQLLabelsDeleteName, promtoolParser)))
}
}
@ -579,7 +598,7 @@ func CheckServerStatus(serverURL *url.URL, checkEndpoint string, roundTripper ht
}
// CheckConfig validates configuration files.
func CheckConfig(agentMode, checkSyntaxOnly bool, lintSettings configLintConfig, files ...string) int {
func CheckConfig(agentMode, checkSyntaxOnly bool, lintSettings configLintConfig, p parser.Parser, files ...string) int {
failed := false
hasErrors := false
@ -600,7 +619,7 @@ func CheckConfig(agentMode, checkSyntaxOnly bool, lintSettings configLintConfig,
if !checkSyntaxOnly {
scrapeConfigsFailed := lintScrapeConfigs(scrapeConfigs, lintSettings)
failed = failed || scrapeConfigsFailed
rulesFailed, rulesHaveErrors := checkRules(ruleFiles, lintSettings.rulesLintConfig)
rulesFailed, rulesHaveErrors := checkRules(ruleFiles, lintSettings.rulesLintConfig, p)
failed = failed || rulesFailed
hasErrors = hasErrors || rulesHaveErrors
}
@ -827,13 +846,13 @@ func checkSDFile(filename string) ([]*targetgroup.Group, error) {
}
// CheckRules validates rule files.
func CheckRules(ls rulesLintConfig, files ...string) int {
func CheckRules(ls rulesLintConfig, p parser.Parser, files ...string) int {
failed := false
hasErrors := false
if len(files) == 0 {
failed, hasErrors = checkRulesFromStdin(ls)
failed, hasErrors = checkRulesFromStdin(ls, p)
} else {
failed, hasErrors = checkRules(files, ls)
failed, hasErrors = checkRules(files, ls, p)
}
if failed && hasErrors {
@ -847,7 +866,7 @@ func CheckRules(ls rulesLintConfig, files ...string) int {
}
// checkRulesFromStdin validates rule from stdin.
func checkRulesFromStdin(ls rulesLintConfig) (bool, bool) {
func checkRulesFromStdin(ls rulesLintConfig, p parser.Parser) (bool, bool) {
failed := false
hasErrors := false
fmt.Println("Checking standard input")
@ -856,7 +875,7 @@ func checkRulesFromStdin(ls rulesLintConfig) (bool, bool) {
fmt.Fprintln(os.Stderr, " FAILED:", err)
return true, true
}
rgs, errs := rulefmt.Parse(data, ls.ignoreUnknownFields, ls.nameValidationScheme)
rgs, errs := rulefmt.Parse(data, ls.ignoreUnknownFields, ls.nameValidationScheme, p)
if errs != nil {
failed = true
fmt.Fprintln(os.Stderr, " FAILED:")
@ -885,12 +904,12 @@ func checkRulesFromStdin(ls rulesLintConfig) (bool, bool) {
}
// checkRules validates rule files.
func checkRules(files []string, ls rulesLintConfig) (bool, bool) {
func checkRules(files []string, ls rulesLintConfig, p parser.Parser) (bool, bool) {
failed := false
hasErrors := false
for _, f := range files {
fmt.Println("Checking", f)
rgs, errs := rulefmt.ParseFile(f, ls.ignoreUnknownFields, ls.nameValidationScheme)
rgs, errs := rulefmt.ParseFile(f, ls.ignoreUnknownFields, ls.nameValidationScheme, p)
if errs != nil {
failed = true
fmt.Fprintln(os.Stderr, " FAILED:")
@ -929,11 +948,11 @@ func checkRuleGroups(rgs *rulefmt.RuleGroups, lintSettings rulesLintConfig) (int
dRules := checkDuplicates(rgs.Groups)
if len(dRules) != 0 {
var errMessage strings.Builder
errMessage.WriteString(fmt.Sprintf("%d duplicate rule(s) found.\n", len(dRules)))
fmt.Fprintf(&errMessage, "%d duplicate rule(s) found.\n", len(dRules))
for _, n := range dRules {
errMessage.WriteString(fmt.Sprintf("Metric: %s\nLabel(s):\n", n.metric))
fmt.Fprintf(&errMessage, "Metric: %s\nLabel(s):\n", n.metric)
n.label.Range(func(l labels.Label) {
errMessage.WriteString(fmt.Sprintf("\t%s: %s\n", l.Name, l.Value))
fmt.Fprintf(&errMessage, "\t%s: %s\n", l.Name, l.Value)
})
}
errMessage.WriteString("Might cause inconsistency while recording expressions")
@ -1012,36 +1031,53 @@ func ruleMetric(rule rulefmt.Rule) string {
}
var checkMetricsUsage = strings.TrimSpace(`
Pass Prometheus metrics over stdin to lint them for consistency and correctness.
Pass Prometheus metrics over stdin to lint them for consistency and correctness, and optionally perform cardinality analysis.
examples:
$ cat metrics.prom | promtool check metrics
$ curl -s http://localhost:9090/metrics | promtool check metrics
$ curl -s http://localhost:9090/metrics | promtool check metrics --extended
$ curl -s http://localhost:9100/metrics | promtool check metrics --extended --lint=none
`)
// CheckMetrics performs a linting pass on input metrics.
func CheckMetrics(extended bool) int {
var buf bytes.Buffer
tee := io.TeeReader(os.Stdin, &buf)
l := promlint.New(tee)
problems, err := l.Lint()
if err != nil {
fmt.Fprintln(os.Stderr, "error while linting:", err)
func CheckMetrics(extended bool, lint string) int {
// Validate that at least one feature is enabled.
if !extended && lint == lintOptionNone {
fmt.Fprintln(os.Stderr, "error: at least one of --extended or linting must be enabled")
fmt.Fprintln(os.Stderr, "Use --extended for cardinality analysis, or remove --lint=none to enable linting")
return failureExitCode
}
for _, p := range problems {
fmt.Fprintln(os.Stderr, p.Metric, p.Text)
var buf bytes.Buffer
var (
problems []promlint.Problem
reader io.Reader
err error
)
if lint != lintOptionNone {
tee := io.TeeReader(os.Stdin, &buf)
l := promlint.New(tee)
problems, err = l.Lint()
if err != nil {
fmt.Fprintln(os.Stderr, "error while linting:", err)
return failureExitCode
}
for _, p := range problems {
fmt.Fprintln(os.Stderr, p.Metric, p.Text)
}
reader = &buf
} else {
reader = os.Stdin
}
if len(problems) > 0 {
return lintErrExitCode
}
hasLintProblems := len(problems) > 0
if extended {
stats, total, err := checkMetricsExtended(&buf)
stats, total, err := checkMetricsExtended(reader)
if err != nil {
fmt.Fprintln(os.Stderr, err)
return failureExitCode
@ -1055,6 +1091,10 @@ func CheckMetrics(extended bool) int {
w.Flush()
}
if hasLintProblems {
return lintErrExitCode
}
return successExitCode
}
@ -1310,8 +1350,8 @@ func checkTargetGroupsForScrapeConfig(targetGroups []*targetgroup.Group, scfg *c
return nil
}
func formatPromQL(query string) error {
expr, err := parser.ParseExpr(query)
func formatPromQL(query string, p parser.Parser) error {
expr, err := p.ParseExpr(query)
if err != nil {
return err
}
@ -1320,8 +1360,8 @@ func formatPromQL(query string) error {
return nil
}
func labelsSetPromQL(query, labelMatchType, name, value string) error {
expr, err := parser.ParseExpr(query)
func labelsSetPromQL(query, labelMatchType, name, value string, p parser.Parser) error {
expr, err := p.ParseExpr(query)
if err != nil {
return err
}
@ -1365,8 +1405,8 @@ func labelsSetPromQL(query, labelMatchType, name, value string) error {
return nil
}
func labelsDeletePromQL(query, name string) error {
expr, err := parser.ParseExpr(query)
func labelsDeletePromQL(query, name string, p parser.Parser) error {
expr, err := p.ParseExpr(query)
if err != nil {
return err
}

View file

@ -1,4 +1,4 @@
// Copyright 2018 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@ -18,6 +18,7 @@ import (
"context"
"errors"
"fmt"
"io"
"net/http"
"net/http/httptest"
"net/url"
@ -36,6 +37,7 @@ import (
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/rulefmt"
"github.com/prometheus/prometheus/promql/parser"
"github.com/prometheus/prometheus/promql/promqltest"
)
@ -186,7 +188,7 @@ func TestCheckDuplicates(t *testing.T) {
c := test
t.Run(c.name, func(t *testing.T) {
t.Parallel()
rgs, err := rulefmt.ParseFile(c.ruleFile, false, model.UTF8Validation)
rgs, err := rulefmt.ParseFile(c.ruleFile, false, model.UTF8Validation, parser.NewParser(parser.Options{}))
require.Empty(t, err)
dups := checkDuplicates(rgs.Groups)
require.Equal(t, c.expectedDups, dups)
@ -195,7 +197,7 @@ func TestCheckDuplicates(t *testing.T) {
}
func BenchmarkCheckDuplicates(b *testing.B) {
rgs, err := rulefmt.ParseFile("./testdata/rules_large.yml", false, model.UTF8Validation)
rgs, err := rulefmt.ParseFile("./testdata/rules_large.yml", false, model.UTF8Validation, parser.NewParser(parser.Options{}))
require.Empty(b, err)
for b.Loop() {
@ -402,6 +404,99 @@ func TestCheckMetricsExtended(t *testing.T) {
}, stats)
}
func TestCheckMetricsLintOptions(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("Skipping on windows")
}
const testMetrics = `
# HELP testMetric_CamelCase A test metric with camelCase
# TYPE testMetric_CamelCase gauge
testMetric_CamelCase{label="value1"} 1
`
tests := []struct {
name string
lint string
extended bool
wantErrCode int
wantLint bool
wantCard bool
}{
{
name: "default_all_with_extended",
lint: lintOptionAll,
extended: true,
wantErrCode: lintErrExitCode,
wantLint: true,
wantCard: true,
},
{
name: "lint_none_with_extended",
lint: lintOptionNone,
extended: true,
wantErrCode: successExitCode,
wantLint: false,
wantCard: true,
},
{
name: "both_disabled_fails",
lint: lintOptionNone,
extended: false,
wantErrCode: failureExitCode,
wantLint: false,
wantCard: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
r, w, err := os.Pipe()
require.NoError(t, err)
_, err = w.WriteString(testMetrics)
require.NoError(t, err)
w.Close()
oldStdin := os.Stdin
os.Stdin = r
defer func() { os.Stdin = oldStdin }()
oldStdout := os.Stdout
oldStderr := os.Stderr
rOut, wOut, err := os.Pipe()
require.NoError(t, err)
rErr, wErr, err := os.Pipe()
require.NoError(t, err)
os.Stdout = wOut
os.Stderr = wErr
code := CheckMetrics(tt.extended, tt.lint)
wOut.Close()
wErr.Close()
os.Stdout = oldStdout
os.Stderr = oldStderr
var outBuf, errBuf bytes.Buffer
_, _ = io.Copy(&outBuf, rOut)
_, _ = io.Copy(&errBuf, rErr)
require.Equal(t, tt.wantErrCode, code)
if tt.wantLint {
require.Contains(t, errBuf.String(), "testMetric_CamelCase")
} else {
require.NotContains(t, errBuf.String(), "testMetric_CamelCase")
}
if tt.wantCard {
require.Contains(t, outBuf.String(), "Cardinality")
} else {
require.NotContains(t, outBuf.String(), "Cardinality")
}
})
}
}
func TestExitCodes(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
@ -508,7 +603,7 @@ func TestCheckRules(t *testing.T) {
defer func(v *os.File) { os.Stdin = v }(os.Stdin)
os.Stdin = r
exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, false, false, model.UTF8Validation))
exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, false, false, model.UTF8Validation), parser.NewParser(parser.Options{}))
require.Equal(t, successExitCode, exitCode)
})
@ -530,7 +625,7 @@ func TestCheckRules(t *testing.T) {
defer func(v *os.File) { os.Stdin = v }(os.Stdin)
os.Stdin = r
exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, false, false, model.UTF8Validation))
exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, false, false, model.UTF8Validation), parser.NewParser(parser.Options{}))
require.Equal(t, failureExitCode, exitCode)
})
@ -552,7 +647,7 @@ func TestCheckRules(t *testing.T) {
defer func(v *os.File) { os.Stdin = v }(os.Stdin)
os.Stdin = r
exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, true, false, model.UTF8Validation))
exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, true, false, model.UTF8Validation), parser.NewParser(parser.Options{}))
require.Equal(t, lintErrExitCode, exitCode)
})
}
@ -561,7 +656,7 @@ func TestCheckRulesWithFeatureFlag(t *testing.T) {
// As opposed to TestCheckRules calling CheckRules directly we run promtool
// so the feature flag parsing can be tested.
args := []string{"-test.main", "--enable-feature=promql-experimental-functions", "check", "rules", "testdata/features.yml"}
args := []string{"-test.main", "--enable-feature=promql-experimental-functions", "--enable-feature=promql-duration-expr", "--enable-feature=promql-extended-range-selectors", "check", "rules", "testdata/features.yml"}
tool := exec.Command(promtoolPath, args...)
err := tool.Run()
require.NoError(t, err)
@ -570,19 +665,19 @@ func TestCheckRulesWithFeatureFlag(t *testing.T) {
func TestCheckRulesWithRuleFiles(t *testing.T) {
t.Run("rules-good", func(t *testing.T) {
t.Parallel()
exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, false, false, model.UTF8Validation), "./testdata/rules.yml")
exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, false, false, model.UTF8Validation), parser.NewParser(parser.Options{}), "./testdata/rules.yml")
require.Equal(t, successExitCode, exitCode)
})
t.Run("rules-bad", func(t *testing.T) {
t.Parallel()
exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, false, false, model.UTF8Validation), "./testdata/rules-bad.yml")
exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, false, false, model.UTF8Validation), parser.NewParser(parser.Options{}), "./testdata/rules-bad.yml")
require.Equal(t, failureExitCode, exitCode)
})
t.Run("rules-lint-fatal", func(t *testing.T) {
t.Parallel()
exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, true, false, model.UTF8Validation), "./testdata/prometheus-rules.lint.yml")
exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, true, false, model.UTF8Validation), parser.NewParser(parser.Options{}), "./testdata/prometheus-rules.lint.yml")
require.Equal(t, lintErrExitCode, exitCode)
})
}
@ -611,20 +706,21 @@ func TestCheckScrapeConfigs(t *testing.T) {
} {
t.Run(tc.name, func(t *testing.T) {
// Non-fatal linting.
code := CheckConfig(false, false, newConfigLintConfig(lintOptionTooLongScrapeInterval, false, false, model.UTF8Validation, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml")
p := parser.NewParser(parser.Options{})
code := CheckConfig(false, false, newConfigLintConfig(lintOptionTooLongScrapeInterval, false, false, model.UTF8Validation, tc.lookbackDelta), p, "./testdata/prometheus-config.lint.too_long_scrape_interval.yml")
require.Equal(t, successExitCode, code, "Non-fatal linting should return success")
// Fatal linting.
code = CheckConfig(false, false, newConfigLintConfig(lintOptionTooLongScrapeInterval, true, false, model.UTF8Validation, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml")
code = CheckConfig(false, false, newConfigLintConfig(lintOptionTooLongScrapeInterval, true, false, model.UTF8Validation, tc.lookbackDelta), p, "./testdata/prometheus-config.lint.too_long_scrape_interval.yml")
if tc.expectError {
require.Equal(t, lintErrExitCode, code, "Fatal linting should return error")
} else {
require.Equal(t, successExitCode, code, "Fatal linting should return success when there are no problems")
}
// Check syntax only, no linting.
code = CheckConfig(false, true, newConfigLintConfig(lintOptionTooLongScrapeInterval, true, false, model.UTF8Validation, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml")
code = CheckConfig(false, true, newConfigLintConfig(lintOptionTooLongScrapeInterval, true, false, model.UTF8Validation, tc.lookbackDelta), p, "./testdata/prometheus-config.lint.too_long_scrape_interval.yml")
require.Equal(t, successExitCode, code, "Fatal linting should return success when checking syntax only")
// Lint option "none" should disable linting.
code = CheckConfig(false, false, newConfigLintConfig(lintOptionNone+","+lintOptionTooLongScrapeInterval, true, false, model.UTF8Validation, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml")
code = CheckConfig(false, false, newConfigLintConfig(lintOptionNone+","+lintOptionTooLongScrapeInterval, true, false, model.UTF8Validation, tc.lookbackDelta), p, "./testdata/prometheus-config.lint.too_long_scrape_interval.yml")
require.Equal(t, successExitCode, code, `Fatal linting should return success when lint option "none" is specified`)
})
}
@ -640,7 +736,6 @@ func TestTSDBDumpCommand(t *testing.T) {
load 1m
metric{foo="bar"} 1 2 3
`)
t.Cleanup(func() { storage.Close() })
for _, c := range []struct {
name string

View file

@ -1,4 +1,4 @@
// Copyright 2023 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

View file

@ -1,4 +1,4 @@
// Copyright 2025 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

View file

@ -1,4 +1,4 @@
// Copyright 2023 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

View file

@ -1,4 +1,4 @@
// Copyright 2020 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@ -15,6 +15,7 @@ package main
import (
"context"
"errors"
"fmt"
"log/slog"
"time"
@ -28,7 +29,6 @@ import (
"github.com/prometheus/prometheus/rules"
"github.com/prometheus/prometheus/storage"
"github.com/prometheus/prometheus/tsdb"
tsdb_errors "github.com/prometheus/prometheus/tsdb/errors"
)
const maxSamplesInMemory = 5000
@ -143,7 +143,7 @@ func (importer *ruleImporter) importRule(ctx context.Context, ruleExpr, ruleName
var closed bool
defer func() {
if !closed {
err = tsdb_errors.NewMulti(err, w.Close()).Err()
err = errors.Join(err, w.Close())
}
}()
app := newMultipleAppender(ctx, w)
@ -181,7 +181,7 @@ func (importer *ruleImporter) importRule(ctx context.Context, ruleExpr, ruleName
if err := app.flushAndCommit(ctx); err != nil {
return fmt.Errorf("flush and commit: %w", err)
}
err = tsdb_errors.NewMulti(err, w.Close()).Err()
err = errors.Join(err, w.Close())
closed = true
}

View file

@ -1,4 +1,4 @@
// Copyright 2020 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

View file

@ -1,4 +1,4 @@
// Copyright 2021 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

View file

@ -1,4 +1,4 @@
// Copyright 2021 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

View file

@ -0,0 +1,3 @@
{"__name__":"heavy_metric","foo":"bar"}
{"__name__":"heavy_metric","foo":"foo"}
{"__name__":"metric","baz":"abc","foo":"bar"}

View file

@ -0,0 +1,2 @@
{"__name__":"heavy_metric","foo":"foo"}
{"__name__":"metric","baz":"abc","foo":"bar"}

View file

@ -0,0 +1 @@
{"__name__":"metric","baz":"abc","foo":"bar"}

View file

@ -1,6 +1,10 @@
groups:
- name: features
rules:
- record: x
# We don't expect anything from this, just want to check the function parses.
# We don't expect anything from these, just want to check the syntax parses.
- record: promql-experimental-functions
expr: sort_by_label(up, "instance")
- record: promql-duration-expr
expr: rate(up[1m * 2])
- record: promql-extended-range-selectors
expr: rate(up[1m] anchored)

View file

@ -0,0 +1,76 @@
rule_files:
- rules.yml
evaluation_interval: 1m
tests:
# Test with default start_time (0 / Unix epoch).
- name: default_start_time
interval: 1m
promql_expr_test:
- expr: time()
eval_time: 0m
exp_samples:
- value: 0
- expr: time()
eval_time: 5m
exp_samples:
- value: 300
# Test with RFC3339 start_timestamp.
- name: rfc3339_start_timestamp
interval: 1m
start_timestamp: "2024-01-01T00:00:00Z"
promql_expr_test:
- expr: time()
eval_time: 0m
exp_samples:
- value: 1704067200
- expr: time()
eval_time: 5m
exp_samples:
- value: 1704067500
# Test with Unix timestamp start_timestamp.
- name: unix_timestamp_start_timestamp
interval: 1m
start_timestamp: 1609459200
input_series:
- series: test_metric
values: "1 1 1"
promql_expr_test:
- expr: time()
eval_time: 0m
exp_samples:
- value: 1609459200
- expr: time()
eval_time: 10m
exp_samples:
- value: 1609459800
# Test that input series samples are correctly timestamped with custom start_timestamp.
- name: samples_with_start_timestamp
interval: 1m
start_timestamp: "2024-01-01T00:00:00Z"
input_series:
- series: 'my_metric{label="test"}'
values: "10+10x15"
promql_expr_test:
# Query at absolute timestamp (start_timestamp = 1704067200).
- expr: my_metric@1704067200
eval_time: 5m
exp_samples:
- labels: 'my_metric{label="test"}'
value: 10
# Query at 2 minutes after start_timestamp (1704067200 + 120 = 1704067320).
- expr: my_metric@1704067320
eval_time: 5m
exp_samples:
- labels: 'my_metric{label="test"}'
value: 30
# Verify timestamp() function returns the absolute timestamp.
- expr: timestamp(my_metric)
eval_time: 5m
exp_samples:
- labels: '{label="test"}'
value: 1704067500

View file

@ -1,4 +1,4 @@
// Copyright 2017 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@ -17,6 +17,7 @@ import (
"bufio"
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
@ -42,7 +43,6 @@ import (
"github.com/prometheus/prometheus/tsdb"
"github.com/prometheus/prometheus/tsdb/chunkenc"
"github.com/prometheus/prometheus/tsdb/chunks"
tsdb_errors "github.com/prometheus/prometheus/tsdb/errors"
"github.com/prometheus/prometheus/tsdb/fileutil"
"github.com/prometheus/prometheus/tsdb/index"
)
@ -159,17 +159,14 @@ func (b *writeBenchmark) ingestScrapes(lbls []labels.Labels, scrapeCount int) (u
batch := lbls[:l]
lbls = lbls[l:]
wg.Add(1)
go func() {
defer wg.Done()
wg.Go(func() {
n, err := b.ingestScrapesShard(batch, 100, int64(timeDelta*i))
if err != nil {
// exitWithError(err)
fmt.Println(" err", err)
}
total.Add(n)
}()
})
}
wg.Wait()
}
@ -338,7 +335,7 @@ func listBlocks(path string, humanReadable bool) error {
return err
}
defer func() {
err = tsdb_errors.NewMulti(err, db.Close()).Err()
err = errors.Join(err, db.Close())
}()
blocks, err := db.Blocks()
if err != nil {
@ -408,13 +405,13 @@ func openBlock(path, blockID string) (*tsdb.DBReadOnly, tsdb.BlockReader, error)
return db, b, nil
}
func analyzeBlock(ctx context.Context, path, blockID string, limit int, runExtended bool, matchers string) error {
func analyzeBlock(ctx context.Context, path, blockID string, limit int, runExtended bool, matchers string, p parser.Parser) error {
var (
selectors []*labels.Matcher
err error
)
if len(matchers) > 0 {
selectors, err = parser.ParseMetricSelector(matchers)
selectors, err = p.ParseMetricSelector(matchers)
if err != nil {
return err
}
@ -424,7 +421,7 @@ func analyzeBlock(ctx context.Context, path, blockID string, limit int, runExten
return err
}
defer func() {
err = tsdb_errors.NewMulti(err, db.Close()).Err()
err = errors.Join(err, db.Close())
}()
meta := block.Meta()
@ -478,24 +475,24 @@ func analyzeBlock(ctx context.Context, path, blockID string, limit int, runExten
labelpairsCount := map[string]uint64{}
entries := 0
var (
p index.Postings
refs []storage.SeriesRef
postings index.Postings
refs []storage.SeriesRef
)
if len(matchers) > 0 {
p, err = tsdb.PostingsForMatchers(ctx, ir, selectors...)
postings, err = tsdb.PostingsForMatchers(ctx, ir, selectors...)
if err != nil {
return err
}
// Expand refs first and cache in memory.
// So later we don't have to expand again.
refs, err = index.ExpandPostings(p)
refs, err = index.ExpandPostings(postings)
if err != nil {
return err
}
fmt.Printf("Matched series: %d\n", len(refs))
p = index.NewListPostings(refs)
postings = index.NewListPostings(refs)
} else {
p, err = ir.Postings(ctx, "", "") // The special all key.
postings, err = ir.Postings(ctx, "", "") // The special all key.
if err != nil {
return err
}
@ -503,8 +500,8 @@ func analyzeBlock(ctx context.Context, path, blockID string, limit int, runExten
chks := []chunks.Meta{}
builder := labels.ScratchBuilder{}
for p.Next() {
if err = ir.Series(p.At(), &builder, &chks); err != nil {
for postings.Next() {
if err = ir.Series(postings.At(), &builder, &chks); err != nil {
return err
}
// Amount of the block time range not covered by this series.
@ -517,8 +514,8 @@ func analyzeBlock(ctx context.Context, path, blockID string, limit int, runExten
entries++
})
}
if p.Err() != nil {
return p.Err()
if postings.Err() != nil {
return postings.Err()
}
fmt.Printf("Postings (unique label pairs): %d\n", len(labelpairsUncovered))
fmt.Printf("Postings entries (total label pairs): %d\n", entries)
@ -624,7 +621,7 @@ func analyzeCompaction(ctx context.Context, block tsdb.BlockReader, indexr tsdb.
return err
}
defer func() {
err = tsdb_errors.NewMulti(err, chunkr.Close()).Err()
err = errors.Join(err, chunkr.Close())
}()
totalChunks := 0
@ -706,13 +703,13 @@ func analyzeCompaction(ctx context.Context, block tsdb.BlockReader, indexr tsdb.
type SeriesSetFormatter func(series storage.SeriesSet) error
func dumpSamples(ctx context.Context, dbDir, sandboxDirRoot string, mint, maxt int64, match []string, formatter SeriesSetFormatter) (err error) {
func dumpTSDBData(ctx context.Context, dbDir, sandboxDirRoot string, mint, maxt int64, match []string, formatter SeriesSetFormatter, p parser.Parser) (err error) {
db, err := tsdb.OpenDBReadOnly(dbDir, sandboxDirRoot, nil)
if err != nil {
return err
}
defer func() {
err = tsdb_errors.NewMulti(err, db.Close()).Err()
err = errors.Join(err, db.Close())
}()
q, err := db.Querier(mint, maxt)
if err != nil {
@ -720,7 +717,7 @@ func dumpSamples(ctx context.Context, dbDir, sandboxDirRoot string, mint, maxt i
}
defer q.Close()
matcherSets, err := parser.ParseMetricSelectors(match)
matcherSets, err := p.ParseMetricSelectors(match)
if err != nil {
return err
}
@ -742,7 +739,7 @@ func dumpSamples(ctx context.Context, dbDir, sandboxDirRoot string, mint, maxt i
}
if ws := ss.Warnings(); len(ws) > 0 {
return tsdb_errors.NewMulti(ws.AsErrors()...).Err()
return errors.Join(ws.AsErrors()...)
}
if ss.Err() != nil {
@ -794,6 +791,30 @@ func CondensedString(ls labels.Labels) string {
return b.String()
}
func formatSeriesSetLabelsToJSON(ss storage.SeriesSet) error {
seriesCache := make(map[string]struct{})
for ss.Next() {
series := ss.At()
lbs := series.Labels()
b, err := json.Marshal(lbs)
if err != nil {
return err
}
if len(b) == 0 {
continue
}
s := string(b)
if _, ok := seriesCache[s]; !ok {
fmt.Println(s)
seriesCache[s] = struct{}{}
}
}
return nil
}
func formatSeriesSetOpenMetrics(ss storage.SeriesSet) error {
for ss.Next() {
series := ss.At()

View file

@ -1,4 +1,4 @@
// Copyright 2017 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

View file

@ -1,4 +1,4 @@
// Copyright 2017 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@ -27,6 +27,7 @@ import (
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/promql/parser"
"github.com/prometheus/prometheus/promql/promqltest"
"github.com/prometheus/prometheus/tsdb"
)
@ -63,7 +64,7 @@ func getDumpedSamples(t *testing.T, databasePath, sandboxDirRoot string, mint, m
r, w, _ := os.Pipe()
os.Stdout = w
err := dumpSamples(
err := dumpTSDBData(
context.Background(),
databasePath,
sandboxDirRoot,
@ -71,6 +72,7 @@ func getDumpedSamples(t *testing.T, databasePath, sandboxDirRoot string, mint, m
maxt,
match,
formatter,
parser.NewParser(parser.Options{}),
)
require.NoError(t, err)
@ -97,7 +99,6 @@ func TestTSDBDump(t *testing.T) {
heavy_metric{foo="bar"} 5 4 3 2 1
heavy_metric{foo="foo"} 5 4 3 2 1
`)
t.Cleanup(func() { storage.Close() })
tests := []struct {
name string
@ -106,13 +107,15 @@ func TestTSDBDump(t *testing.T) {
sandboxDirRoot string
match []string
expectedDump string
expectedSeries string
}{
{
name: "default match",
mint: math.MinInt64,
maxt: math.MaxInt64,
match: []string{"{__name__=~'(?s:.*)'}"},
expectedDump: "testdata/dump-test-1.prom",
name: "default match",
mint: math.MinInt64,
maxt: math.MaxInt64,
match: []string{"{__name__=~'(?s:.*)'}"},
expectedDump: "testdata/dump-test-1.prom",
expectedSeries: "testdata/dump-series-1.prom",
},
{
name: "default match with sandbox dir root set",
@ -121,41 +124,47 @@ func TestTSDBDump(t *testing.T) {
sandboxDirRoot: t.TempDir(),
match: []string{"{__name__=~'(?s:.*)'}"},
expectedDump: "testdata/dump-test-1.prom",
expectedSeries: "testdata/dump-series-1.prom",
},
{
name: "same matcher twice",
mint: math.MinInt64,
maxt: math.MaxInt64,
match: []string{"{foo=~'.+'}", "{foo=~'.+'}"},
expectedDump: "testdata/dump-test-1.prom",
name: "same matcher twice",
mint: math.MinInt64,
maxt: math.MaxInt64,
match: []string{"{foo=~'.+'}", "{foo=~'.+'}"},
expectedDump: "testdata/dump-test-1.prom",
expectedSeries: "testdata/dump-series-1.prom",
},
{
name: "no duplication",
mint: math.MinInt64,
maxt: math.MaxInt64,
match: []string{"{__name__=~'(?s:.*)'}", "{baz='abc'}"},
expectedDump: "testdata/dump-test-1.prom",
name: "no duplication",
mint: math.MinInt64,
maxt: math.MaxInt64,
match: []string{"{__name__=~'(?s:.*)'}", "{baz='abc'}"},
expectedDump: "testdata/dump-test-1.prom",
expectedSeries: "testdata/dump-series-1.prom",
},
{
name: "well merged",
mint: math.MinInt64,
maxt: math.MaxInt64,
match: []string{"{__name__='heavy_metric'}", "{baz='abc'}"},
expectedDump: "testdata/dump-test-1.prom",
name: "well merged",
mint: math.MinInt64,
maxt: math.MaxInt64,
match: []string{"{__name__='heavy_metric'}", "{baz='abc'}"},
expectedDump: "testdata/dump-test-1.prom",
expectedSeries: "testdata/dump-series-1.prom",
},
{
name: "multi matchers",
mint: math.MinInt64,
maxt: math.MaxInt64,
match: []string{"{__name__='heavy_metric',foo='foo'}", "{__name__='metric'}"},
expectedDump: "testdata/dump-test-2.prom",
name: "multi matchers",
mint: math.MinInt64,
maxt: math.MaxInt64,
match: []string{"{__name__='heavy_metric',foo='foo'}", "{__name__='metric'}"},
expectedDump: "testdata/dump-test-2.prom",
expectedSeries: "testdata/dump-series-2.prom",
},
{
name: "with reduced mint and maxt",
mint: int64(60000),
maxt: int64(120000),
match: []string{"{__name__='metric'}"},
expectedDump: "testdata/dump-test-3.prom",
name: "with reduced mint and maxt",
mint: int64(60000),
maxt: int64(120000),
match: []string{"{__name__='metric'}"},
expectedDump: "testdata/dump-test-3.prom",
expectedSeries: "testdata/dump-series-3.prom",
},
}
for _, tt := range tests {
@ -166,6 +175,12 @@ func TestTSDBDump(t *testing.T) {
expectedMetrics = normalizeNewLine(expectedMetrics)
// Sort both, because Prometheus does not guarantee the output order.
require.Equal(t, sortLines(string(expectedMetrics)), sortLines(dumpedMetrics))
dumpedSeries := getDumpedSamples(t, storage.Dir(), tt.sandboxDirRoot, tt.mint, tt.maxt, tt.match, formatSeriesSetLabelsToJSON)
expectedSeries, err := os.ReadFile(tt.expectedSeries)
require.NoError(t, err)
expectedSeries = normalizeNewLine(expectedSeries)
require.Equal(t, sortLines(string(expectedSeries)), sortLines(dumpedSeries))
})
}
}
@ -182,7 +197,6 @@ func TestTSDBDumpOpenMetrics(t *testing.T) {
my_counter{foo="bar", baz="abc"} 1 2 3 4 5
my_gauge{bar="foo", abc="baz"} 9 8 0 4 7
`)
t.Cleanup(func() { storage.Close() })
tests := []struct {
name string

View file

@ -1,4 +1,4 @@
// Copyright 2018 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@ -47,11 +47,11 @@ import (
// RulesUnitTest does unit testing of rules based on the unit testing files provided.
// More info about the file format can be found in the docs.
func RulesUnitTest(queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag, debug, ignoreUnknownFields bool, files ...string) int {
return RulesUnitTestResult(io.Discard, queryOpts, runStrings, diffFlag, debug, ignoreUnknownFields, files...)
func RulesUnitTest(queryOpts promqltest.LazyLoaderOpts, p parser.Parser, runStrings []string, diffFlag, debug, ignoreUnknownFields bool, files ...string) int {
return RulesUnitTestResult(io.Discard, queryOpts, p, runStrings, diffFlag, debug, ignoreUnknownFields, files...)
}
func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag, debug, ignoreUnknownFields bool, files ...string) int {
func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts, p parser.Parser, runStrings []string, diffFlag, debug, ignoreUnknownFields bool, files ...string) int {
failed := false
junit := &junitxml.JUnitXML{}
@ -61,7 +61,7 @@ func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts,
}
for _, f := range files {
if errs := ruleUnitTest(f, queryOpts, run, diffFlag, debug, ignoreUnknownFields, junit.Suite(f)); errs != nil {
if errs := ruleUnitTest(f, queryOpts, p, run, diffFlag, debug, ignoreUnknownFields, junit.Suite(f)); errs != nil {
fmt.Fprintln(os.Stderr, " FAILED:")
for _, e := range errs {
fmt.Fprintln(os.Stderr, e.Error())
@ -83,7 +83,7 @@ func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts,
return successExitCode
}
func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, run *regexp.Regexp, diffFlag, debug, ignoreUnknownFields bool, ts *junitxml.TestSuite) []error {
func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, p parser.Parser, run *regexp.Regexp, diffFlag, debug, ignoreUnknownFields bool, ts *junitxml.TestSuite) []error {
b, err := os.ReadFile(filename)
if err != nil {
ts.Abort(err)
@ -132,6 +132,7 @@ func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, run *reg
if t.Interval == 0 {
t.Interval = unitTestInp.EvaluationInterval
}
t.parser = p
ers := t.test(testname, evalInterval, groupOrderMap, queryOpts, diffFlag, debug, ignoreUnknownFields, unitTestInp.FuzzyCompare, unitTestInp.RuleFiles...)
if ers != nil {
for _, e := range ers {
@ -188,15 +189,39 @@ func resolveAndGlobFilepaths(baseDir string, utf *unitTestFile) error {
return nil
}
// testStartTimestamp wraps time.Time to support custom YAML unmarshaling.
// It can parse both RFC3339 timestamps and Unix timestamps.
type testStartTimestamp struct {
time.Time
}
// UnmarshalYAML implements custom YAML unmarshaling for testStartTimestamp.
// It accepts both RFC3339 formatted strings and numeric Unix timestamps.
func (t *testStartTimestamp) UnmarshalYAML(unmarshal func(any) error) error {
var s string
if err := unmarshal(&s); err != nil {
return err
}
parsed, err := parseTime(s)
if err != nil {
return err
}
t.Time = parsed
return nil
}
// testGroup is a group of input series and tests associated with it.
type testGroup struct {
Interval model.Duration `yaml:"interval"`
InputSeries []series `yaml:"input_series"`
AlertRuleTests []alertTestCase `yaml:"alert_rule_test,omitempty"`
PromqlExprTests []promqlTestCase `yaml:"promql_expr_test,omitempty"`
ExternalLabels labels.Labels `yaml:"external_labels,omitempty"`
ExternalURL string `yaml:"external_url,omitempty"`
TestGroupName string `yaml:"name,omitempty"`
Interval model.Duration `yaml:"interval"`
InputSeries []series `yaml:"input_series"`
AlertRuleTests []alertTestCase `yaml:"alert_rule_test,omitempty"`
PromqlExprTests []promqlTestCase `yaml:"promql_expr_test,omitempty"`
ExternalLabels labels.Labels `yaml:"external_labels,omitempty"`
ExternalURL string `yaml:"external_url,omitempty"`
TestGroupName string `yaml:"name,omitempty"`
StartTimestamp testStartTimestamp `yaml:"start_timestamp,omitempty"`
parser parser.Parser `yaml:"-"`
}
// test performs the unit tests.
@ -209,6 +234,8 @@ func (tg *testGroup) test(testname string, evalInterval time.Duration, groupOrde
}()
}
// Setup testing suite.
// Set the start time from the test group.
queryOpts.StartTime = tg.StartTimestamp.Time
suite, err := promqltest.NewLazyLoader(tg.seriesLoadingString(), queryOpts)
if err != nil {
return []error{err}
@ -228,6 +255,7 @@ func (tg *testGroup) test(testname string, evalInterval time.Duration, groupOrde
Context: context.Background(),
NotifyFunc: func(context.Context, string, ...*rules.Alert) {},
Logger: promslog.NewNopLogger(),
Parser: tg.parser,
}
m := rules.NewManager(opts)
groupsMap, ers := m.LoadGroups(time.Duration(tg.Interval), tg.ExternalLabels, tg.ExternalURL, nil, ignoreUnknownFields, ruleFiles...)
@ -237,7 +265,12 @@ func (tg *testGroup) test(testname string, evalInterval time.Duration, groupOrde
groups := orderedGroups(groupsMap, groupOrderMap)
// Bounds for evaluating the rules.
mint := time.Unix(0, 0).UTC()
var mint time.Time
if tg.StartTimestamp.IsZero() {
mint = time.Unix(0, 0).UTC()
} else {
mint = tg.StartTimestamp.Time
}
maxt := mint.Add(tg.maxEvalTime())
// Optional floating point compare fuzzing.
@ -453,10 +486,10 @@ Outer:
var expSamples []parsedSample
for _, s := range testCase.ExpSamples {
lb, err := parser.ParseMetric(s.Labels)
lb, err := tg.parser.ParseMetric(s.Labels)
var hist *histogram.FloatHistogram
if err == nil && s.Histogram != "" {
_, values, parseErr := parser.ParseSeriesDesc("{} " + s.Histogram)
_, values, parseErr := tg.parser.ParseSeriesDesc("{} " + s.Histogram)
switch {
case parseErr != nil:
err = parseErr
@ -528,9 +561,9 @@ Outer:
// seriesLoadingString returns the input series in PromQL notation.
func (tg *testGroup) seriesLoadingString() string {
var result strings.Builder
result.WriteString(fmt.Sprintf("load %v\n", shortDuration(tg.Interval)))
fmt.Fprintf(&result, "load %v\n", shortDuration(tg.Interval))
for _, is := range tg.InputSeries {
result.WriteString(fmt.Sprintf(" %v %v\n", is.Series, is.Values))
fmt.Fprintf(&result, " %v %v\n", is.Series, is.Values)
}
return result.String()
}
@ -631,13 +664,14 @@ func (la labelsAndAnnotations) String() string {
if len(la) == 0 {
return "[]"
}
s := "[\n0:" + indentLines("\n"+la[0].String(), " ")
var s strings.Builder
s.WriteString("[\n0:" + indentLines("\n"+la[0].String(), " "))
for i, l := range la[1:] {
s += ",\n" + strconv.Itoa(i+1) + ":" + indentLines("\n"+l.String(), " ")
s.WriteString(",\n" + strconv.Itoa(i+1) + ":" + indentLines("\n"+l.String(), " "))
}
s += "\n]"
s.WriteString("\n]")
return s
return s.String()
}
type labelAndAnnotation struct {
@ -688,11 +722,12 @@ func parsedSamplesString(pss []parsedSample) string {
if len(pss) == 0 {
return "nil"
}
s := pss[0].String()
var s strings.Builder
s.WriteString(pss[0].String())
for _, ps := range pss[1:] {
s += ", " + ps.String()
s.WriteString(", " + ps.String())
}
return s
return s.String()
}
func (ps *parsedSample) String() string {

View file

@ -1,4 +1,4 @@
// Copyright 2018 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@ -21,6 +21,7 @@ import (
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/promql/parser"
"github.com/prometheus/prometheus/promql/promqltest"
"github.com/prometheus/prometheus/util/junitxml"
)
@ -129,6 +130,16 @@ func TestRulesUnitTest(t *testing.T) {
},
want: 0,
},
{
name: "Start time tests",
args: args{
files: []string{"./testdata/start-time-test.yml"},
},
queryOpts: promqltest.LazyLoaderOpts{
EnableAtModifier: true,
},
want: 0,
},
}
reuseFiles := []string{}
reuseCount := [2]int{}
@ -143,7 +154,7 @@ func TestRulesUnitTest(t *testing.T) {
}
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
if got := RulesUnitTest(tt.queryOpts, nil, false, false, false, tt.args.files...); got != tt.want {
if got := RulesUnitTest(tt.queryOpts, parser.NewParser(parser.Options{}), nil, false, false, false, tt.args.files...); got != tt.want {
t.Errorf("RulesUnitTest() = %v, want %v", got, tt.want)
}
})
@ -151,7 +162,7 @@ func TestRulesUnitTest(t *testing.T) {
t.Run("Junit xml output ", func(t *testing.T) {
t.Parallel()
var buf bytes.Buffer
if got := RulesUnitTestResult(&buf, promqltest.LazyLoaderOpts{}, nil, false, false, false, reuseFiles...); got != 1 {
if got := RulesUnitTestResult(&buf, promqltest.LazyLoaderOpts{}, parser.NewParser(parser.Options{}), nil, false, false, false, reuseFiles...); got != 1 {
t.Errorf("RulesUnitTestResults() = %v, want 1", got)
}
var test junitxml.JUnitXML
@ -267,7 +278,7 @@ func TestRulesUnitTestRun(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
got := RulesUnitTest(tt.queryOpts, tt.args.run, false, false, tt.ignoreUnknownFields, tt.args.files...)
got := RulesUnitTest(tt.queryOpts, parser.NewParser(parser.Options{}), tt.args.run, false, false, tt.ignoreUnknownFields, tt.args.files...)
require.Equal(t, tt.want, got)
})
}

26
compliance/go.mod Normal file
View file

@ -0,0 +1,26 @@
module compliance
go 1.25.5
require github.com/prometheus/compliance/remotewrite v0.0.0-20260220101514-bccaa3a70275
require (
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/snappy v1.0.0 // indirect
github.com/grafana/regexp v0.0.0-20250905093917-f7b3be9d1853 // indirect
github.com/klauspost/compress v1.18.1 // indirect
github.com/kr/pretty v0.3.1 // indirect
github.com/oklog/run v1.2.0 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/client_golang/exp v0.0.0-20250914183048-a974e0d45e0a // indirect
github.com/prometheus/client_model v0.6.2 // indirect
github.com/prometheus/common v0.67.2 // indirect
github.com/prometheus/prometheus v0.307.4-0.20251119130332-1174b0ce4f1f // indirect
github.com/stretchr/testify v1.11.1 // indirect
go.yaml.in/yaml/v2 v2.4.3 // indirect
golang.org/x/text v0.30.0 // indirect
google.golang.org/protobuf v1.36.10 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

79
compliance/go.sum Normal file
View file

@ -0,0 +1,79 @@
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
github.com/golang/snappy v1.0.0 h1:Oy607GVXHs7RtbggtPBnr2RmDArIsAefDwvrdWvRhGs=
github.com/golang/snappy v1.0.0/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/grafana/regexp v0.0.0-20250905093917-f7b3be9d1853 h1:cLN4IBkmkYZNnk7EAJ0BHIethd+J6LqxFNw5mSiI2bM=
github.com/grafana/regexp v0.0.0-20250905093917-f7b3be9d1853/go.mod h1:+JKpmjMGhpgPL+rXZ5nsZieVzvarn86asRlBg4uNGnk=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co=
github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/oklog/run v1.2.0 h1:O8x3yXwah4A73hJdlrwo/2X6J62gE5qTMusH0dvz60E=
github.com/oklog/run v1.2.0/go.mod h1:mgDbKRSwPhJfesJ4PntqFUbKQRZ50NgmZTSPlFA0YFk=
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_golang/exp v0.0.0-20250914183048-a974e0d45e0a h1:RF1vfKM34/3DbGNis22BGd6sDDY3XBi0eM7pYqmOEO0=
github.com/prometheus/client_golang/exp v0.0.0-20250914183048-a974e0d45e0a/go.mod h1:FGJuwvfcPY0V5enm+w8zF1RNS062yugQtPPQp1c4Io4=
github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
github.com/prometheus/common v0.67.2 h1:PcBAckGFTIHt2+L3I33uNRTlKTplNzFctXcWhPyAEN8=
github.com/prometheus/common v0.67.2/go.mod h1:63W3KZb1JOKgcjlIr64WW/LvFGAqKPj0atm+knVGEko=
github.com/prometheus/compliance/remotewrite v0.0.0-20260220101514-bccaa3a70275 h1:NLTtFqM00EuqtisYX9P+hQkjoxNxsR2oUQWDluyD2Xw=
github.com/prometheus/compliance/remotewrite v0.0.0-20260220101514-bccaa3a70275/go.mod h1:VEPZGvpSBbzTKc5acnBj9ng4gfo1DZ4qBsCQnoNFiSc=
github.com/prometheus/prometheus v0.307.4-0.20251119130332-1174b0ce4f1f h1:ERPCnBglv9Z4IjkEBTNbcHmZPlryMldXVWLkk7TeBIY=
github.com/prometheus/prometheus v0.307.4-0.20251119130332-1174b0ce4f1f/go.mod h1:7hcXiGf9AXIKW2ehWWzxkvRYJTGmc2StUIJ8mprfxjg=
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0=
go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k=
golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE=
google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View file

@ -0,0 +1,93 @@
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package compliance
import (
"bytes"
"context"
"fmt"
"html/template"
"os"
"path/filepath"
"testing"
"github.com/prometheus/compliance/remotewrite/sender"
)
const (
scrapeConfigTemplate = `
global:
scrape_interval: 1s
remote_write:
- url: "{{.RemoteWriteEndpointURL}}"
protobuf_message: "{{.RemoteWriteMessage}}"
send_exemplars: true
queue_config:
retry_on_http_429: true
metadata_config:
send: true
scrape_configs:
- job_name: "{{.ScrapeTargetJobName}}"
scrape_interval: 1s
scrape_protocols:
- PrometheusProto
- OpenMetricsText1.0.0
- PrometheusText0.0.4
static_configs:
- targets: ["{{.ScrapeTargetHostPort}}"]
`
)
var scrapeConfigTmpl = template.Must(template.New("config").Parse(scrapeConfigTemplate))
type internalPrometheus struct{}
func (p internalPrometheus) Name() string { return "internal-prometheus" }
// Run runs a <REPO>cmd/prometheus main package as a test sender target, until ctx is done.
func (p internalPrometheus) Run(ctx context.Context, opts sender.Options) error {
var buf bytes.Buffer
if err := scrapeConfigTmpl.Execute(&buf, opts); err != nil {
return fmt.Errorf("failed to execute config template: %w", err)
}
dir, err := os.MkdirTemp("", "test-*")
if err != nil {
return err
}
configFile := filepath.Join(dir, "config.yaml")
if err := os.WriteFile(configFile, buf.Bytes(), 0o600); err != nil {
return err
}
defer os.RemoveAll(dir)
return sender.RunCommand(ctx, "../cmd/prometheus", nil,
"go", "run", ".",
"--web.listen-address=0.0.0.0:0",
fmt.Sprintf("--storage.tsdb.path=%v", dir),
fmt.Sprintf("--config.file=%s", configFile),
// Set important flags for the full remote write compliance:
"--enable-feature=st-storage",
)
}
var _ sender.Sender = internalPrometheus{}
// TestRemoteWriteSender runs remote write sender compliance tests defined in
// https://github.com/prometheus/compliance/tree/main/remotewrite/sender
func TestRemoteWriteSender(t *testing.T) {
sender.RunTests(t, internalPrometheus{}, sender.ComplianceTests())
}

View file

@ -1,4 +1,4 @@
// Copyright 2015 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@ -149,6 +149,10 @@ func LoadFile(filename string, agentMode bool, logger *slog.Logger) (*Config, er
return cfg, nil
}
func boolPtr(b bool) *bool {
return &b
}
// The defaults applied before parsing the respective config sections.
var (
// DefaultConfig is the default top-level configuration.
@ -158,7 +162,6 @@ var (
OTLPConfig: DefaultOTLPConfig,
}
f bool
// DefaultGlobalConfig is the default global configuration.
DefaultGlobalConfig = GlobalConfig{
ScrapeInterval: model.Duration(1 * time.Minute),
@ -173,9 +176,10 @@ var (
ScrapeProtocols: nil,
// When the native histogram feature flag is enabled,
// ScrapeNativeHistograms default changes to true.
ScrapeNativeHistograms: &f,
ScrapeNativeHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: false,
AlwaysScrapeClassicHistograms: false,
ExtraScrapeMetrics: boolPtr(false),
MetricNameValidationScheme: model.UTF8Validation,
MetricNameEscapingScheme: model.AllowUTF8,
}
@ -513,6 +517,10 @@ type GlobalConfig struct {
ConvertClassicHistogramsToNHCB bool `yaml:"convert_classic_histograms_to_nhcb,omitempty"`
// Whether to scrape a classic histogram, even if it is also exposed as a native histogram.
AlwaysScrapeClassicHistograms bool `yaml:"always_scrape_classic_histograms,omitempty"`
// Whether to enable additional scrape metrics.
// When enabled, Prometheus stores samples for scrape_timeout_seconds,
// scrape_sample_limit, and scrape_body_size_bytes.
ExtraScrapeMetrics *bool `yaml:"extra_scrape_metrics,omitempty"`
}
// ScrapeProtocol represents supported protocol for scraping metrics.
@ -652,6 +660,9 @@ func (c *GlobalConfig) UnmarshalYAML(unmarshal func(any) error) error {
if gc.ScrapeNativeHistograms == nil {
gc.ScrapeNativeHistograms = DefaultGlobalConfig.ScrapeNativeHistograms
}
if gc.ExtraScrapeMetrics == nil {
gc.ExtraScrapeMetrics = DefaultGlobalConfig.ExtraScrapeMetrics
}
if gc.ScrapeProtocols == nil {
if DefaultGlobalConfig.ScrapeProtocols != nil {
// This is the case where the defaults are set due to a feature flag.
@ -687,7 +698,17 @@ func (c *GlobalConfig) isZero() bool {
c.ScrapeProtocols == nil &&
c.ScrapeNativeHistograms == nil &&
!c.ConvertClassicHistogramsToNHCB &&
!c.AlwaysScrapeClassicHistograms
!c.AlwaysScrapeClassicHistograms &&
c.BodySizeLimit == 0 &&
c.SampleLimit == 0 &&
c.TargetLimit == 0 &&
c.LabelLimit == 0 &&
c.LabelNameLengthLimit == 0 &&
c.LabelValueLengthLimit == 0 &&
c.KeepDroppedTargets == 0 &&
c.MetricNameValidationScheme == model.UnsetValidation &&
c.MetricNameEscapingScheme == "" &&
c.ExtraScrapeMetrics == nil
}
const DefaultGoGCPercentage = 75
@ -796,6 +817,11 @@ type ScrapeConfig struct {
// blank in config files but must have a value if a ScrapeConfig is created
// programmatically.
MetricNameEscapingScheme string `yaml:"metric_name_escaping_scheme,omitempty"`
// Whether to enable additional scrape metrics.
// When enabled, Prometheus stores samples for scrape_timeout_seconds,
// scrape_sample_limit, and scrape_body_size_bytes.
// If not set (nil), inherits the value from the global configuration.
ExtraScrapeMetrics *bool `yaml:"extra_scrape_metrics,omitempty"`
// We cannot do proper Go type embedding below as the parser will then parse
// values arbitrarily into the overflow maps of further-down types.
@ -897,6 +923,9 @@ func (c *ScrapeConfig) Validate(globalConfig GlobalConfig) error {
if c.ScrapeNativeHistograms == nil {
c.ScrapeNativeHistograms = globalConfig.ScrapeNativeHistograms
}
if c.ExtraScrapeMetrics == nil {
c.ExtraScrapeMetrics = globalConfig.ExtraScrapeMetrics
}
if c.ScrapeProtocols == nil {
switch {
@ -1022,7 +1051,7 @@ func ToEscapingScheme(s string, v model.ValidationScheme) (model.EscapingScheme,
case model.LegacyValidation:
return model.UnderscoreEscaping, nil
case model.UnsetValidation:
return model.NoEscaping, fmt.Errorf("v is unset: %s", v)
return model.NoEscaping, fmt.Errorf("ValidationScheme is unset: %s", v)
default:
panic(fmt.Errorf("unhandled validation scheme: %s", v))
}
@ -1045,6 +1074,11 @@ func (c *ScrapeConfig) AlwaysScrapeClassicHistogramsEnabled() bool {
return c.AlwaysScrapeClassicHistograms != nil && *c.AlwaysScrapeClassicHistograms
}
// ExtraScrapeMetricsEnabled returns whether to enable extra scrape metrics.
func (c *ScrapeConfig) ExtraScrapeMetricsEnabled() bool {
return c.ExtraScrapeMetrics != nil && *c.ExtraScrapeMetrics
}
// StorageConfig configures runtime reloadable configuration options.
type StorageConfig struct {
TSDBConfig *TSDBConfig `yaml:"tsdb,omitempty"`
@ -1073,6 +1107,10 @@ type TSDBConfig struct {
// This should not be used directly and must be converted into OutOfOrderTimeWindow.
OutOfOrderTimeWindowFlag model.Duration `yaml:"out_of_order_time_window,omitempty"`
// StaleSeriesCompactionThreshold is a number between 0.0-1.0 indicating the % of stale series in
// the in-memory Head block. If the % of stale series crosses this threshold, stale series compaction is run immediately.
StaleSeriesCompactionThreshold float64 `yaml:"stale_series_compaction_threshold,omitempty"`
Retention *TSDBRetentionConfig `yaml:"retention,omitempty"`
}

View file

@ -1,4 +1,4 @@
// Copyright 2017 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

View file

@ -1,4 +1,4 @@
// Copyright 2015 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@ -74,10 +74,6 @@ func mustParseURL(u string) *config.URL {
return &config.URL{URL: parsed}
}
func boolPtr(b bool) *bool {
return &b
}
const (
globBodySizeLimit = 15 * units.MiB
globSampleLimit = 1500
@ -109,6 +105,7 @@ var expectedConf = &Config{
ScrapeNativeHistograms: boolPtr(false),
AlwaysScrapeClassicHistograms: false,
ConvertClassicHistogramsToNHCB: false,
ExtraScrapeMetrics: boolPtr(false),
MetricNameValidationScheme: model.UTF8Validation,
},
@ -236,6 +233,7 @@ var expectedConf = &Config{
ScrapeNativeHistograms: boolPtr(false),
AlwaysScrapeClassicHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: boolPtr(false),
ExtraScrapeMetrics: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -360,6 +358,7 @@ var expectedConf = &Config{
ScrapeNativeHistograms: boolPtr(false),
AlwaysScrapeClassicHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: boolPtr(false),
ExtraScrapeMetrics: boolPtr(false),
HTTPClientConfig: config.HTTPClientConfig{
BasicAuth: &config.BasicAuth{
@ -470,6 +469,7 @@ var expectedConf = &Config{
ScrapeNativeHistograms: boolPtr(false),
AlwaysScrapeClassicHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: boolPtr(false),
ExtraScrapeMetrics: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -532,6 +532,7 @@ var expectedConf = &Config{
ScrapeNativeHistograms: boolPtr(false),
AlwaysScrapeClassicHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: boolPtr(false),
ExtraScrapeMetrics: boolPtr(false),
MetricsPath: "/metrics",
Scheme: "http",
@ -571,6 +572,7 @@ var expectedConf = &Config{
ScrapeNativeHistograms: boolPtr(false),
AlwaysScrapeClassicHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: boolPtr(false),
ExtraScrapeMetrics: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -616,6 +618,7 @@ var expectedConf = &Config{
ScrapeNativeHistograms: boolPtr(false),
AlwaysScrapeClassicHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: boolPtr(false),
ExtraScrapeMetrics: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -661,6 +664,7 @@ var expectedConf = &Config{
ScrapeNativeHistograms: boolPtr(false),
AlwaysScrapeClassicHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: boolPtr(false),
ExtraScrapeMetrics: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -696,6 +700,7 @@ var expectedConf = &Config{
ScrapeNativeHistograms: boolPtr(false),
AlwaysScrapeClassicHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: boolPtr(false),
ExtraScrapeMetrics: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -739,6 +744,7 @@ var expectedConf = &Config{
ScrapeNativeHistograms: boolPtr(false),
AlwaysScrapeClassicHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: boolPtr(false),
ExtraScrapeMetrics: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -779,6 +785,7 @@ var expectedConf = &Config{
ScrapeNativeHistograms: boolPtr(false),
AlwaysScrapeClassicHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: boolPtr(false),
ExtraScrapeMetrics: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -826,6 +833,7 @@ var expectedConf = &Config{
ScrapeNativeHistograms: boolPtr(false),
AlwaysScrapeClassicHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: boolPtr(false),
ExtraScrapeMetrics: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -863,6 +871,7 @@ var expectedConf = &Config{
ScrapeNativeHistograms: boolPtr(false),
AlwaysScrapeClassicHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: boolPtr(false),
ExtraScrapeMetrics: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -903,6 +912,7 @@ var expectedConf = &Config{
ScrapeNativeHistograms: boolPtr(false),
AlwaysScrapeClassicHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: boolPtr(false),
ExtraScrapeMetrics: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -936,6 +946,7 @@ var expectedConf = &Config{
ScrapeNativeHistograms: boolPtr(false),
AlwaysScrapeClassicHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: boolPtr(false),
ExtraScrapeMetrics: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -972,6 +983,7 @@ var expectedConf = &Config{
ScrapeNativeHistograms: boolPtr(false),
AlwaysScrapeClassicHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: boolPtr(false),
ExtraScrapeMetrics: boolPtr(false),
MetricsPath: "/federate",
Scheme: DefaultScrapeConfig.Scheme,
@ -1008,6 +1020,7 @@ var expectedConf = &Config{
ScrapeNativeHistograms: boolPtr(false),
AlwaysScrapeClassicHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: boolPtr(false),
ExtraScrapeMetrics: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1044,6 +1057,7 @@ var expectedConf = &Config{
ScrapeNativeHistograms: boolPtr(false),
AlwaysScrapeClassicHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: boolPtr(false),
ExtraScrapeMetrics: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1077,6 +1091,7 @@ var expectedConf = &Config{
ScrapeNativeHistograms: boolPtr(false),
AlwaysScrapeClassicHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: boolPtr(false),
ExtraScrapeMetrics: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1118,6 +1133,7 @@ var expectedConf = &Config{
ScrapeNativeHistograms: boolPtr(false),
AlwaysScrapeClassicHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: boolPtr(false),
ExtraScrapeMetrics: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1158,6 +1174,7 @@ var expectedConf = &Config{
ScrapeNativeHistograms: boolPtr(false),
AlwaysScrapeClassicHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: boolPtr(false),
ExtraScrapeMetrics: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1195,6 +1212,7 @@ var expectedConf = &Config{
ScrapeNativeHistograms: boolPtr(false),
AlwaysScrapeClassicHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: boolPtr(false),
ExtraScrapeMetrics: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1231,6 +1249,7 @@ var expectedConf = &Config{
ScrapeNativeHistograms: boolPtr(false),
AlwaysScrapeClassicHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: boolPtr(false),
ExtraScrapeMetrics: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1271,6 +1290,7 @@ var expectedConf = &Config{
ScrapeNativeHistograms: boolPtr(false),
AlwaysScrapeClassicHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: boolPtr(false),
ExtraScrapeMetrics: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1314,6 +1334,7 @@ var expectedConf = &Config{
ScrapeNativeHistograms: boolPtr(true),
AlwaysScrapeClassicHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: boolPtr(false),
ExtraScrapeMetrics: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1377,6 +1398,7 @@ var expectedConf = &Config{
ScrapeNativeHistograms: boolPtr(false),
AlwaysScrapeClassicHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: boolPtr(false),
ExtraScrapeMetrics: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1410,6 +1432,7 @@ var expectedConf = &Config{
ScrapeNativeHistograms: boolPtr(false),
AlwaysScrapeClassicHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: boolPtr(false),
ExtraScrapeMetrics: boolPtr(false),
HTTPClientConfig: config.DefaultHTTPClientConfig,
MetricsPath: DefaultScrapeConfig.MetricsPath,
@ -1454,6 +1477,7 @@ var expectedConf = &Config{
ScrapeNativeHistograms: boolPtr(false),
AlwaysScrapeClassicHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: boolPtr(false),
ExtraScrapeMetrics: boolPtr(false),
HTTPClientConfig: config.DefaultHTTPClientConfig,
MetricsPath: DefaultScrapeConfig.MetricsPath,
@ -1504,6 +1528,7 @@ var expectedConf = &Config{
ScrapeNativeHistograms: boolPtr(false),
AlwaysScrapeClassicHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: boolPtr(false),
ExtraScrapeMetrics: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1544,6 +1569,7 @@ var expectedConf = &Config{
ScrapeNativeHistograms: boolPtr(false),
AlwaysScrapeClassicHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: boolPtr(false),
ExtraScrapeMetrics: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1585,6 +1611,7 @@ var expectedConf = &Config{
ScrapeNativeHistograms: boolPtr(false),
AlwaysScrapeClassicHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: boolPtr(false),
ExtraScrapeMetrics: boolPtr(false),
HTTPClientConfig: config.DefaultHTTPClientConfig,
MetricsPath: DefaultScrapeConfig.MetricsPath,
@ -1621,6 +1648,7 @@ var expectedConf = &Config{
ScrapeNativeHistograms: boolPtr(false),
AlwaysScrapeClassicHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: boolPtr(false),
ExtraScrapeMetrics: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1659,6 +1687,7 @@ var expectedConf = &Config{
ScrapeNativeHistograms: boolPtr(false),
AlwaysScrapeClassicHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: boolPtr(false),
ExtraScrapeMetrics: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1704,8 +1733,9 @@ var expectedConf = &Config{
},
StorageConfig: StorageConfig{
TSDBConfig: &TSDBConfig{
OutOfOrderTimeWindow: 30 * time.Minute.Milliseconds(),
OutOfOrderTimeWindowFlag: model.Duration(30 * time.Minute),
OutOfOrderTimeWindow: 30 * time.Minute.Milliseconds(),
OutOfOrderTimeWindowFlag: model.Duration(30 * time.Minute),
StaleSeriesCompactionThreshold: 0.5,
Retention: &TSDBRetentionConfig{
Time: model.Duration(24 * time.Hour),
Size: 1 * units.GiB,
@ -2663,12 +2693,87 @@ func TestAgentMode(t *testing.T) {
)
}
func TestEmptyGlobalBlock(t *testing.T) {
c, err := Load("global:\n", promslog.NewNopLogger())
require.NoError(t, err)
exp := DefaultConfig
exp.loaded = true
require.Equal(t, exp, *c)
func TestGlobalConfig(t *testing.T) {
t.Run("empty block restores defaults", func(t *testing.T) {
c, err := Load("global:\n", promslog.NewNopLogger())
require.NoError(t, err)
exp := DefaultConfig
exp.loaded = true
require.Equal(t, exp, *c)
})
// Verify that isZero() correctly identifies non-zero configurations for all
// fields in GlobalConfig. This is important because isZero() is used during
// YAML unmarshaling to detect empty global blocks that should be replaced
// with defaults.
t.Run("isZero", func(t *testing.T) {
for _, tc := range []struct {
name string
config GlobalConfig
expectZero bool
}{
{
name: "empty GlobalConfig",
config: GlobalConfig{},
expectZero: true,
},
{
name: "ScrapeInterval set",
config: GlobalConfig{ScrapeInterval: model.Duration(30 * time.Second)},
expectZero: false,
},
{
name: "BodySizeLimit set",
config: GlobalConfig{BodySizeLimit: 1 * units.MiB},
expectZero: false,
},
{
name: "SampleLimit set",
config: GlobalConfig{SampleLimit: 1000},
expectZero: false,
},
{
name: "TargetLimit set",
config: GlobalConfig{TargetLimit: 500},
expectZero: false,
},
{
name: "LabelLimit set",
config: GlobalConfig{LabelLimit: 100},
expectZero: false,
},
{
name: "LabelNameLengthLimit set",
config: GlobalConfig{LabelNameLengthLimit: 50},
expectZero: false,
},
{
name: "LabelValueLengthLimit set",
config: GlobalConfig{LabelValueLengthLimit: 200},
expectZero: false,
},
{
name: "KeepDroppedTargets set",
config: GlobalConfig{KeepDroppedTargets: 10},
expectZero: false,
},
{
name: "MetricNameValidationScheme set",
config: GlobalConfig{MetricNameValidationScheme: model.LegacyValidation},
expectZero: false,
},
{
name: "MetricNameEscapingScheme set",
config: GlobalConfig{MetricNameEscapingScheme: model.EscapeUnderscores},
expectZero: false,
},
} {
t.Run(tc.name, func(t *testing.T) {
result := tc.config.isZero()
require.Equal(t, tc.expectZero, result)
})
}
})
}
// ScrapeConfigOptions contains options for creating a scrape config.
@ -2680,6 +2785,7 @@ type ScrapeConfigOptions struct {
ScrapeNativeHistograms bool
AlwaysScrapeClassicHistograms bool
ConvertClassicHistToNHCB bool
ExtraScrapeMetrics bool
}
func TestGetScrapeConfigs(t *testing.T) {
@ -2713,6 +2819,7 @@ func TestGetScrapeConfigs(t *testing.T) {
ScrapeNativeHistograms: boolPtr(opts.ScrapeNativeHistograms),
AlwaysScrapeClassicHistograms: boolPtr(opts.AlwaysScrapeClassicHistograms),
ConvertClassicHistogramsToNHCB: boolPtr(opts.ConvertClassicHistToNHCB),
ExtraScrapeMetrics: boolPtr(opts.ExtraScrapeMetrics),
}
if opts.ScrapeProtocols == nil {
sc.ScrapeProtocols = DefaultScrapeProtocols
@ -2796,6 +2903,7 @@ func TestGetScrapeConfigs(t *testing.T) {
ScrapeNativeHistograms: boolPtr(false),
AlwaysScrapeClassicHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: boolPtr(false),
ExtraScrapeMetrics: boolPtr(false),
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -2834,6 +2942,7 @@ func TestGetScrapeConfigs(t *testing.T) {
ScrapeNativeHistograms: boolPtr(false),
AlwaysScrapeClassicHistograms: boolPtr(false),
ConvertClassicHistogramsToNHCB: boolPtr(false),
ExtraScrapeMetrics: boolPtr(false),
HTTPClientConfig: config.HTTPClientConfig{
TLSConfig: config.TLSConfig{
@ -2946,6 +3055,26 @@ func TestGetScrapeConfigs(t *testing.T) {
configFile: "testdata/global_scrape_protocols_and_local_disable_scrape_native_hist.good.yml",
expectedResult: []*ScrapeConfig{sc(ScrapeConfigOptions{JobName: "prometheus", ScrapeInterval: model.Duration(60 * time.Second), ScrapeTimeout: model.Duration(10 * time.Second), ScrapeNativeHistograms: false, ScrapeProtocols: []ScrapeProtocol{PrometheusText0_0_4}})},
},
{
name: "A global config that enables extra scrape metrics",
configFile: "testdata/global_enable_extra_scrape_metrics.good.yml",
expectedResult: []*ScrapeConfig{sc(ScrapeConfigOptions{JobName: "prometheus", ScrapeInterval: model.Duration(60 * time.Second), ScrapeTimeout: model.Duration(10 * time.Second), ExtraScrapeMetrics: true})},
},
{
name: "A global config that disables extra scrape metrics",
configFile: "testdata/global_disable_extra_scrape_metrics.good.yml",
expectedResult: []*ScrapeConfig{sc(ScrapeConfigOptions{JobName: "prometheus", ScrapeInterval: model.Duration(60 * time.Second), ScrapeTimeout: model.Duration(10 * time.Second), ExtraScrapeMetrics: false})},
},
{
name: "A global config that disables extra scrape metrics and scrape config that enables it",
configFile: "testdata/local_enable_extra_scrape_metrics.good.yml",
expectedResult: []*ScrapeConfig{sc(ScrapeConfigOptions{JobName: "prometheus", ScrapeInterval: model.Duration(60 * time.Second), ScrapeTimeout: model.Duration(10 * time.Second), ExtraScrapeMetrics: true})},
},
{
name: "A global config that enables extra scrape metrics and scrape config that disables it",
configFile: "testdata/local_disable_extra_scrape_metrics.good.yml",
expectedResult: []*ScrapeConfig{sc(ScrapeConfigOptions{JobName: "prometheus", ScrapeInterval: model.Duration(60 * time.Second), ScrapeTimeout: model.Duration(10 * time.Second), ExtraScrapeMetrics: false})},
},
}
for _, tc := range testCases {
@ -2962,6 +3091,99 @@ func TestGetScrapeConfigs(t *testing.T) {
}
}
func TestExtraScrapeMetrics(t *testing.T) {
tests := []struct {
name string
config string
expectGlobal *bool
expectEnabled bool
}{
{
name: "default values (not set)",
config: `
scrape_configs:
- job_name: test
static_configs:
- targets: ['localhost:9090']
`,
expectGlobal: boolPtr(false), // inherits from DefaultGlobalConfig
expectEnabled: false,
},
{
name: "global enabled",
config: `
global:
extra_scrape_metrics: true
scrape_configs:
- job_name: test
static_configs:
- targets: ['localhost:9090']
`,
expectGlobal: boolPtr(true),
expectEnabled: true,
},
{
name: "global disabled",
config: `
global:
extra_scrape_metrics: false
scrape_configs:
- job_name: test
static_configs:
- targets: ['localhost:9090']
`,
expectGlobal: boolPtr(false),
expectEnabled: false,
},
{
name: "scrape override enabled",
config: `
global:
extra_scrape_metrics: false
scrape_configs:
- job_name: test
extra_scrape_metrics: true
static_configs:
- targets: ['localhost:9090']
`,
expectGlobal: boolPtr(false),
expectEnabled: true,
},
{
name: "scrape override disabled",
config: `
global:
extra_scrape_metrics: true
scrape_configs:
- job_name: test
extra_scrape_metrics: false
static_configs:
- targets: ['localhost:9090']
`,
expectGlobal: boolPtr(true),
expectEnabled: false,
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
cfg, err := Load(tc.config, promslog.NewNopLogger())
require.NoError(t, err)
// Check global config
require.Equal(t, tc.expectGlobal, cfg.GlobalConfig.ExtraScrapeMetrics)
// Check scrape config
scfgs, err := cfg.GetScrapeConfigs()
require.NoError(t, err)
require.Len(t, scfgs, 1)
// Check the effective value via the helper method
require.Equal(t, tc.expectEnabled, scfgs[0].ExtraScrapeMetricsEnabled())
})
}
}
func kubernetesSDHostURL() config.URL {
tURL, _ := url.Parse("https://localhost:1234")
return config.URL{URL: tURL}

View file

@ -1,4 +1,4 @@
// Copyright 2017 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

View file

@ -1,4 +1,4 @@
// Copyright 2024 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

View file

@ -1,4 +1,4 @@
// Copyright 2024 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

View file

@ -453,6 +453,7 @@ alerting:
storage:
tsdb:
out_of_order_time_window: 30m
stale_series_compaction_threshold: 0.5
retention:
time: 1d
size: 1GB

View file

@ -0,0 +1,6 @@
global:
extra_scrape_metrics: false
scrape_configs:
- job_name: prometheus
static_configs:
- targets: ['localhost:8080']

View file

@ -0,0 +1,6 @@
global:
extra_scrape_metrics: true
scrape_configs:
- job_name: prometheus
static_configs:
- targets: ['localhost:8080']

View file

@ -0,0 +1,7 @@
global:
extra_scrape_metrics: true
scrape_configs:
- job_name: prometheus
static_configs:
- targets: ['localhost:8080']
extra_scrape_metrics: false

View file

@ -0,0 +1,7 @@
global:
extra_scrape_metrics: false
scrape_configs:
- job_name: prometheus
static_configs:
- targets: ['localhost:8080']
extra_scrape_metrics: true

View file

@ -50,7 +50,7 @@ file for use with `file_sd`.
The general principle with SD is to extract all the potentially useful
information we can out of the SD, and let the user choose what they need of it
using
[relabelling](https://prometheus.io/docs/operating/configuration/#<relabel_config>).
[relabelling](https://prometheus.io/docs/operating/configuration/#relabel_config).
This information is generally termed metadata.
Metadata is exposed as a set of key/value pairs (labels) per target. The keys

336
discovery/aws/aws.go Normal file
View file

@ -0,0 +1,336 @@
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package aws
import (
"context"
"errors"
"fmt"
"time"
awsConfig "github.com/aws/aws-sdk-go-v2/config"
"github.com/aws/aws-sdk-go-v2/feature/ec2/imds"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/discovery"
)
// DefaultSDConfig is the default AWS SD configuration.
var DefaultSDConfig = SDConfig{
RefreshInterval: model.Duration(60 * time.Second),
HTTPClientConfig: config.DefaultHTTPClientConfig,
}
func init() {
discovery.RegisterConfig(&SDConfig{})
}
// Role is role of the service in AWS.
type Role string
// The valid options for Role.
const (
RoleEC2 Role = "ec2"
RoleECS Role = "ecs"
RoleElasticache Role = "elasticache"
RoleLightsail Role = "lightsail"
RoleMSK Role = "msk"
)
// UnmarshalYAML implements the yaml.Unmarshaler interface.
func (c *Role) UnmarshalYAML(unmarshal func(any) error) error {
if err := unmarshal((*string)(c)); err != nil {
return err
}
switch *c {
case RoleEC2, RoleECS, RoleElasticache, RoleLightsail, RoleMSK:
return nil
default:
return fmt.Errorf("unknown AWS SD role %q", *c)
}
}
func (c Role) String() string {
return string(c)
}
// SDConfig is the configuration for AWS service discovery.
type SDConfig struct {
Role Role `yaml:"role"`
Region string `yaml:"region,omitempty"`
Endpoint string `yaml:"endpoint,omitempty"`
AccessKey string `yaml:"access_key,omitempty"`
SecretKey config.Secret `yaml:"secret_key,omitempty"`
Profile string `yaml:"profile,omitempty"`
RoleARN string `yaml:"role_arn,omitempty"`
RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"`
Port int `yaml:"port,omitempty"`
HTTPClientConfig config.HTTPClientConfig `yaml:",inline"`
// ec2 specific
Filters []*EC2Filter `yaml:"filters,omitempty"`
// ecs, msk specific
Clusters []string `yaml:"clusters,omitempty"`
// Embedded sub-configs (internal use only, not serialized)
*EC2SDConfig `yaml:"-"`
*ECSSDConfig `yaml:"-"`
*ElasticacheSDConfig `yaml:"-"`
*LightsailSDConfig `yaml:"-"`
*MSKSDConfig `yaml:"-"`
}
// UnmarshalYAML implements the yaml.Unmarshaler interface for SDConfig.
func (c *SDConfig) UnmarshalYAML(unmarshal func(any) error) error {
// Alias to avoid recursion
type plain SDConfig
var aux plain
// Unmarshal into aux
if err := unmarshal(&aux); err != nil {
return err
}
*c = SDConfig(aux)
var err error
c.Region, err = loadRegion(context.Background(), c.Region)
if err != nil {
return fmt.Errorf("could not determine AWS region: %w", err)
}
switch c.Role {
case RoleEC2:
if c.EC2SDConfig == nil {
ec2Config := DefaultEC2SDConfig
c.EC2SDConfig = &ec2Config
}
c.EC2SDConfig.HTTPClientConfig = c.HTTPClientConfig
c.EC2SDConfig.Region = c.Region
if c.Endpoint != "" {
c.EC2SDConfig.Endpoint = c.Endpoint
}
if c.AccessKey != "" {
c.EC2SDConfig.AccessKey = c.AccessKey
}
if c.SecretKey != "" {
c.EC2SDConfig.SecretKey = c.SecretKey
}
if c.Profile != "" {
c.EC2SDConfig.Profile = c.Profile
}
if c.RoleARN != "" {
c.EC2SDConfig.RoleARN = c.RoleARN
}
if c.Port != 0 {
c.EC2SDConfig.Port = c.Port
}
if c.RefreshInterval != 0 {
c.EC2SDConfig.RefreshInterval = c.RefreshInterval
}
if c.Filters != nil {
c.EC2SDConfig.Filters = c.Filters
}
case RoleECS:
if c.ECSSDConfig == nil {
ecsConfig := DefaultECSSDConfig
c.ECSSDConfig = &ecsConfig
}
c.ECSSDConfig.HTTPClientConfig = c.HTTPClientConfig
c.ECSSDConfig.Region = c.Region
if c.Endpoint != "" {
c.ECSSDConfig.Endpoint = c.Endpoint
}
if c.AccessKey != "" {
c.ECSSDConfig.AccessKey = c.AccessKey
}
if c.SecretKey != "" {
c.ECSSDConfig.SecretKey = c.SecretKey
}
if c.Profile != "" {
c.ECSSDConfig.Profile = c.Profile
}
if c.RoleARN != "" {
c.ECSSDConfig.RoleARN = c.RoleARN
}
if c.Port != 0 {
c.ECSSDConfig.Port = c.Port
}
if c.RefreshInterval != 0 {
c.ECSSDConfig.RefreshInterval = c.RefreshInterval
}
if c.Clusters != nil {
c.ECSSDConfig.Clusters = c.Clusters
}
case RoleElasticache:
if c.ElasticacheSDConfig == nil {
elasticacheConfig := DefaultElasticacheSDConfig
c.ElasticacheSDConfig = &elasticacheConfig
}
c.ElasticacheSDConfig.HTTPClientConfig = c.HTTPClientConfig
c.ElasticacheSDConfig.Region = c.Region
if c.Endpoint != "" {
c.ElasticacheSDConfig.Endpoint = c.Endpoint
}
if c.AccessKey != "" {
c.ElasticacheSDConfig.AccessKey = c.AccessKey
}
if c.SecretKey != "" {
c.ElasticacheSDConfig.SecretKey = c.SecretKey
}
if c.Profile != "" {
c.ElasticacheSDConfig.Profile = c.Profile
}
if c.RoleARN != "" {
c.ElasticacheSDConfig.RoleARN = c.RoleARN
}
if c.Port != 0 {
c.ElasticacheSDConfig.Port = c.Port
}
if c.RefreshInterval != 0 {
c.ElasticacheSDConfig.RefreshInterval = c.RefreshInterval
}
if c.Clusters != nil {
c.ElasticacheSDConfig.Clusters = c.Clusters
}
case RoleLightsail:
if c.LightsailSDConfig == nil {
lightsailConfig := DefaultLightsailSDConfig
c.LightsailSDConfig = &lightsailConfig
}
c.LightsailSDConfig.HTTPClientConfig = c.HTTPClientConfig
c.LightsailSDConfig.Region = c.Region
if c.Endpoint != "" {
c.LightsailSDConfig.Endpoint = c.Endpoint
}
if c.AccessKey != "" {
c.LightsailSDConfig.AccessKey = c.AccessKey
}
if c.SecretKey != "" {
c.LightsailSDConfig.SecretKey = c.SecretKey
}
if c.Profile != "" {
c.LightsailSDConfig.Profile = c.Profile
}
if c.RoleARN != "" {
c.LightsailSDConfig.RoleARN = c.RoleARN
}
if c.Port != 0 {
c.LightsailSDConfig.Port = c.Port
}
if c.RefreshInterval != 0 {
c.LightsailSDConfig.RefreshInterval = c.RefreshInterval
}
case RoleMSK:
if c.MSKSDConfig == nil {
mskConfig := DefaultMSKSDConfig
c.MSKSDConfig = &mskConfig
}
c.MSKSDConfig.HTTPClientConfig = c.HTTPClientConfig
c.MSKSDConfig.Region = c.Region
if c.Endpoint != "" {
c.MSKSDConfig.Endpoint = c.Endpoint
}
if c.AccessKey != "" {
c.MSKSDConfig.AccessKey = c.AccessKey
}
if c.SecretKey != "" {
c.MSKSDConfig.SecretKey = c.SecretKey
}
if c.Profile != "" {
c.MSKSDConfig.Profile = c.Profile
}
if c.RoleARN != "" {
c.MSKSDConfig.RoleARN = c.RoleARN
}
if c.Port != 0 {
c.MSKSDConfig.Port = c.Port
}
if c.RefreshInterval != 0 {
c.MSKSDConfig.RefreshInterval = c.RefreshInterval
}
if c.Clusters != nil {
c.MSKSDConfig.Clusters = c.Clusters
}
default:
return fmt.Errorf("unknown AWS SD role %q", c.Role)
}
return nil
}
// Name returns the name of the AWS Config.
func (*SDConfig) Name() string { return "aws" }
// NewDiscovererMetrics implements discovery.Config.
func (*SDConfig) NewDiscovererMetrics(_ prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
return &awsMetrics{refreshMetrics: rmi}
}
// NewDiscoverer returns a Discoverer for the AWS Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
awsMetrics, ok := opts.Metrics.(*awsMetrics)
if !ok {
return nil, errors.New("invalid discovery metrics type for AWS SD")
}
switch c.Role {
case RoleEC2:
opts.Metrics = &ec2Metrics{refreshMetrics: awsMetrics.refreshMetrics}
return NewEC2Discovery(c.EC2SDConfig, opts)
case RoleECS:
opts.Metrics = &ecsMetrics{refreshMetrics: awsMetrics.refreshMetrics}
return NewECSDiscovery(c.ECSSDConfig, opts)
case RoleElasticache:
opts.Metrics = &elasticacheMetrics{refreshMetrics: awsMetrics.refreshMetrics}
return NewElasticacheDiscovery(c.ElasticacheSDConfig, opts)
case RoleLightsail:
opts.Metrics = &lightsailMetrics{refreshMetrics: awsMetrics.refreshMetrics}
return NewLightsailDiscovery(c.LightsailSDConfig, opts)
case RoleMSK:
opts.Metrics = &mskMetrics{refreshMetrics: awsMetrics.refreshMetrics}
return NewMSKDiscovery(c.MSKSDConfig, opts)
default:
return nil, fmt.Errorf("unknown AWS SD role %q", c.Role)
}
}
// loadRegion finds the region in order: AWS config/env vars ->IMDS.
func loadRegion(ctx context.Context, specifiedRegion string) (string, error) {
if specifiedRegion != "" {
return specifiedRegion, nil
}
cfg, err := awsConfig.LoadDefaultConfig(ctx)
if err != nil {
return "", fmt.Errorf("failed to load AWS config: %w", err)
}
if cfg.Region != "" {
return cfg.Region, nil
}
// Fallback (may fail in non-AWS environments)
imdsClient := imds.NewFromConfig(cfg)
region, err := imdsClient.GetRegion(ctx, &imds.GetRegionInput{})
if err != nil {
return "", fmt.Errorf("failed to get region from IMDS: %w", err)
}
if region.Region == "" {
return "", errors.New("region not found in AWS config or IMDS")
}
return region.Region, nil
}

489
discovery/aws/aws_test.go Normal file
View file

@ -0,0 +1,489 @@
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package aws
import (
"context"
"errors"
"math/rand/v2"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"testing"
"time"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"go.yaml.in/yaml/v3"
)
func TestRoleUnmarshalYAML(t *testing.T) {
tests := []struct {
name string
input string
expected Role
wantErr bool
}{
{
name: "EC2Role",
input: "ec2",
expected: RoleEC2,
wantErr: false,
},
{
name: "LightsailRole",
input: "lightsail",
expected: RoleLightsail,
wantErr: false,
},
{
name: "ECSRole",
input: "ecs",
expected: RoleECS,
wantErr: false,
},
{
name: "InvalidRole",
input: "invalid",
expected: "invalid",
wantErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var r Role
err := r.UnmarshalYAML(func(v any) error {
ptr, ok := v.(*string)
if !ok {
return errors.New("not a string pointer")
}
*ptr = tt.input
return nil
})
if tt.wantErr {
require.Error(t, err, "expected error for input %q", tt.input)
} else {
require.NoError(t, err, "unexpected error for input %q", tt.input)
require.Equal(t, tt.expected, r, "unexpected role for input %q", tt.input)
}
})
}
}
func TestRoleString(t *testing.T) {
tests := []struct {
name string
role Role
expected string
}{
{
name: "EC2",
role: RoleEC2,
expected: "ec2",
},
{
name: "Lightsail",
role: RoleLightsail,
expected: "lightsail",
},
{
name: "ECS",
role: RoleECS,
expected: "ecs",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
require.Equal(t, tt.expected, tt.role.String())
})
}
}
func TestSDConfigName(t *testing.T) {
cfg := &SDConfig{}
require.Equal(t, "aws", cfg.Name())
}
func TestDefaultSDConfig(t *testing.T) {
require.Equal(t, Role(""), DefaultSDConfig.Role)
require.Equal(t, model.Duration(60*time.Second), DefaultSDConfig.RefreshInterval)
}
func TestSDConfigUnmarshalYAML(t *testing.T) {
tests := []struct {
name string
yaml string
validateFunc func(t *testing.T, cfg *SDConfig)
}{
{
name: "EC2WithFlatFields",
yaml: `role: ec2
region: us-west-2
port: 9100
filters:
- name: instance-state-name
values: [running]`,
validateFunc: func(t *testing.T, cfg *SDConfig) {
require.Equal(t, RoleEC2, cfg.Role)
require.NotNil(t, cfg.EC2SDConfig)
require.Equal(t, "us-west-2", cfg.EC2SDConfig.Region)
require.Equal(t, 9100, cfg.EC2SDConfig.Port)
require.Len(t, cfg.EC2SDConfig.Filters, 1)
require.Equal(t, "instance-state-name", cfg.EC2SDConfig.Filters[0].Name)
require.Equal(t, []string{"running"}, cfg.EC2SDConfig.Filters[0].Values)
},
},
{
name: "ECSWithFlatFields",
yaml: `role: ecs
region: us-east-1
port: 9200
clusters: ["some-cluster"]`,
validateFunc: func(t *testing.T, cfg *SDConfig) {
require.Equal(t, RoleECS, cfg.Role)
require.NotNil(t, cfg.ECSSDConfig)
require.Equal(t, "us-east-1", cfg.ECSSDConfig.Region)
require.Equal(t, 9200, cfg.ECSSDConfig.Port)
require.Equal(t, []string{"some-cluster"}, cfg.ECSSDConfig.Clusters)
},
},
{
name: "LightsailWithFlatFields",
yaml: `role: lightsail
region: eu-central-1
port: 9300`,
validateFunc: func(t *testing.T, cfg *SDConfig) {
require.Equal(t, RoleLightsail, cfg.Role)
require.NotNil(t, cfg.LightsailSDConfig)
require.Equal(t, "eu-central-1", cfg.LightsailSDConfig.Region)
require.Equal(t, 9300, cfg.LightsailSDConfig.Port)
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var cfg SDConfig
require.NoError(t, yaml.Unmarshal([]byte(tt.yaml), &cfg))
tt.validateFunc(t, &cfg)
})
}
}
// TestMultipleSDConfigsDoNotShareState verifies that multiple AWS SD configs
// don't share the same underlying configuration object. This was a bug where
// all configs pointed to the same global default, causing port and other
// settings from one job to overwrite settings in another job.
func TestMultipleSDConfigsDoNotShareState(t *testing.T) {
tests := []struct {
name string
yaml string
validateFunc func(t *testing.T, cfg1, cfg2 *SDConfig)
}{
{
name: "EC2MultipleJobsDifferentPorts",
yaml: `
- role: ec2
region: us-west-2
port: 9100
filters:
- name: tag:Name
values: [host-1]
- role: ec2
region: us-west-2
port: 9101
filters:
- name: tag:Name
values: [host-2]`,
validateFunc: func(t *testing.T, cfg1, cfg2 *SDConfig) {
require.Equal(t, RoleEC2, cfg1.Role)
require.Equal(t, RoleEC2, cfg2.Role)
require.NotNil(t, cfg1.EC2SDConfig)
require.NotNil(t, cfg2.EC2SDConfig)
// Verify ports are different and not shared
require.Equal(t, 9100, cfg1.EC2SDConfig.Port)
require.Equal(t, 9101, cfg2.EC2SDConfig.Port)
// Verify filters are different and not shared
require.Len(t, cfg1.EC2SDConfig.Filters, 1)
require.Len(t, cfg2.EC2SDConfig.Filters, 1)
require.Equal(t, []string{"host-1"}, cfg1.EC2SDConfig.Filters[0].Values)
require.Equal(t, []string{"host-2"}, cfg2.EC2SDConfig.Filters[0].Values)
// Most importantly: verify they're not the same pointer
require.NotSame(t, cfg1.EC2SDConfig, cfg2.EC2SDConfig,
"EC2SDConfig objects should not share the same memory address")
},
},
{
name: "ECSMultipleJobsDifferentPorts",
yaml: `
- role: ecs
region: us-east-1
port: 8080
clusters: [cluster-a]
- role: ecs
region: us-east-1
port: 8081
clusters: [cluster-b]`,
validateFunc: func(t *testing.T, cfg1, cfg2 *SDConfig) {
require.Equal(t, RoleECS, cfg1.Role)
require.Equal(t, RoleECS, cfg2.Role)
require.NotNil(t, cfg1.ECSSDConfig)
require.NotNil(t, cfg2.ECSSDConfig)
require.Equal(t, 8080, cfg1.ECSSDConfig.Port)
require.Equal(t, 8081, cfg2.ECSSDConfig.Port)
require.Equal(t, []string{"cluster-a"}, cfg1.ECSSDConfig.Clusters)
require.Equal(t, []string{"cluster-b"}, cfg2.ECSSDConfig.Clusters)
require.NotSame(t, cfg1.ECSSDConfig, cfg2.ECSSDConfig,
"ECSSDConfig objects should not share the same memory address")
},
},
{
name: "LightsailMultipleJobsDifferentPorts",
yaml: `
- role: lightsail
region: eu-west-1
port: 7070
- role: lightsail
region: eu-west-1
port: 7071`,
validateFunc: func(t *testing.T, cfg1, cfg2 *SDConfig) {
require.Equal(t, RoleLightsail, cfg1.Role)
require.Equal(t, RoleLightsail, cfg2.Role)
require.NotNil(t, cfg1.LightsailSDConfig)
require.NotNil(t, cfg2.LightsailSDConfig)
require.Equal(t, 7070, cfg1.LightsailSDConfig.Port)
require.Equal(t, 7071, cfg2.LightsailSDConfig.Port)
require.NotSame(t, cfg1.LightsailSDConfig, cfg2.LightsailSDConfig,
"LightsailSDConfig objects should not share the same memory address")
},
},
{
name: "MSKMultipleJobsDifferentPorts",
yaml: `
- role: msk
region: ap-south-1
port: 6060
clusters: ["cluster-1"]
- role: msk
region: ap-south-1
port: 6061
clusters: ["cluster-2"]`,
validateFunc: func(t *testing.T, cfg1, cfg2 *SDConfig) {
require.Equal(t, RoleMSK, cfg1.Role)
require.Equal(t, RoleMSK, cfg2.Role)
require.NotNil(t, cfg1.MSKSDConfig)
require.NotNil(t, cfg2.MSKSDConfig)
require.Equal(t, 6060, cfg1.MSKSDConfig.Port)
require.Equal(t, []string{"cluster-1"}, cfg1.MSKSDConfig.Clusters)
require.Equal(t, 6061, cfg2.MSKSDConfig.Port)
require.Equal(t, []string{"cluster-2"}, cfg2.MSKSDConfig.Clusters)
require.NotSame(t, cfg1.MSKSDConfig, cfg2.MSKSDConfig,
"MSKSDConfig objects should not share the same memory address")
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var configs []SDConfig
require.NoError(t, yaml.Unmarshal([]byte(tt.yaml), &configs))
require.Len(t, configs, 2)
tt.validateFunc(t, &configs[0], &configs[1])
})
}
}
// getRandomRegion is a helper to return a pseudo-random AWS region for testing.
func getRandomRegion() string {
regions := []string{
"us-east-1",
"us-east-2",
"us-west-1",
"us-west-2",
"eu-west-1",
"eu-west-2",
"ap-southeast-1",
"ap-southeast-2",
"ap-northeast-1",
"ap-northeast-2",
}
return regions[rand.IntN(len(regions))]
}
func TestLoadRegion(t *testing.T) {
t.Run("with_env_region", func(t *testing.T) {
randomRegion := getRandomRegion()
t.Setenv("AWS_REGION", randomRegion)
t.Setenv("AWS_ACCESS_KEY_ID", "dummy")
t.Setenv("AWS_SECRET_ACCESS_KEY", "dummy")
t.Setenv("AWS_CONFIG_FILE", "") // Ensure no config file is used
t.Setenv("AWS_PROFILE", "") // Ensure no profile file is used
region, err := loadRegion(context.Background(), "")
require.NoError(t, err)
require.Equal(t, randomRegion, region)
})
t.Run("with_config_file_default_profile", func(t *testing.T) {
randomRegion := getRandomRegion()
// Create a temporary AWS config file
tmpDir := t.TempDir()
configFile := filepath.Join(tmpDir, "config")
configContent := `[default]
region = ` + randomRegion + `
`
err := os.WriteFile(configFile, []byte(configContent), 0o644)
require.NoError(t, err)
defer os.Remove(configFile)
// Set up environment to use the config file
t.Setenv("AWS_CONFIG_FILE", configFile)
t.Setenv("AWS_ACCESS_KEY_ID", "dummy")
t.Setenv("AWS_SECRET_ACCESS_KEY", "dummy")
// Clear any region environment variables to force config file usage
t.Setenv("AWS_REGION", "")
t.Setenv("AWS_PROFILE", "") // Ensure no profile file is used
t.Setenv("AWS_DEFAULT_REGION", "")
region, err := loadRegion(context.Background(), "")
require.NoError(t, err)
require.Equal(t, randomRegion, region)
})
t.Run("with_config_file_named_profile", func(t *testing.T) {
randomRegion := getRandomRegion()
// Create a temporary AWS config file
tmpDir := t.TempDir()
configFile := filepath.Join(tmpDir, "config")
configContent := `[default]
region = ` + getRandomRegion() + `
[profile ` + randomRegion + `-profile]
region = ` + randomRegion + `
`
err := os.WriteFile(configFile, []byte(configContent), 0o644)
require.NoError(t, err)
defer os.Remove(configFile)
// Set up environment to use the config file
t.Setenv("AWS_CONFIG_FILE", configFile)
t.Setenv("AWS_PROFILE", randomRegion+"-profile")
t.Setenv("AWS_ACCESS_KEY_ID", "dummy")
t.Setenv("AWS_SECRET_ACCESS_KEY", "dummy")
// Clear any region environment variables to force config file usage
t.Setenv("AWS_REGION", "")
t.Setenv("AWS_DEFAULT_REGION", "")
region, err := loadRegion(context.Background(), "")
require.NoError(t, err)
require.Equal(t, randomRegion, region)
})
t.Run("with_specified_region", func(t *testing.T) {
specifiedRegion := getRandomRegion()
// Even with environment region set differently, specified region should take precedence
t.Setenv("AWS_REGION", getRandomRegion())
t.Setenv("AWS_ACCESS_KEY_ID", "dummy")
t.Setenv("AWS_SECRET_ACCESS_KEY", "dummy")
region, err := loadRegion(context.Background(), specifiedRegion)
require.NoError(t, err)
require.Equal(t, specifiedRegion, region)
})
t.Run("imds_fallback", func(t *testing.T) {
randomRegion := getRandomRegion()
// Mock IMDS server that returns a region
mockIMDS := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Handle instance identity document (contains region info)
if r.URL.Path == "/latest/dynamic/instance-identity/document" {
imdsPayload := `{"region": "` + randomRegion + `"}`
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
w.Write([]byte(imdsPayload))
return
}
w.WriteHeader(http.StatusNotFound)
}))
defer mockIMDS.Close()
// Set up environment with no region but valid credentials
// This will force fallback to IMDS
t.Setenv("AWS_ACCESS_KEY_ID", "dummy")
t.Setenv("AWS_SECRET_ACCESS_KEY", "dummy")
// Unset any existing region
t.Setenv("AWS_REGION", "")
t.Setenv("AWS_DEFAULT_REGION", "")
t.Setenv("AWS_CONFIG_FILE", "") // Ensure no config file is used
t.Setenv("AWS_PROFILE", "") // Ensure no profile file is used
// Point IMDS to our mock server
t.Setenv("AWS_EC2_METADATA_SERVICE_ENDPOINT", mockIMDS.URL)
region, err := loadRegion(context.Background(), "")
require.NoError(t, err)
require.Equal(t, randomRegion, region)
})
t.Run("imds_empty_region", func(t *testing.T) {
// Mock IMDS server that returns empty region
mockIMDS := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Handle instance identity document with empty region
if r.URL.Path == "/latest/dynamic/instance-identity/document" {
imdsPayload := `{"region": ""}`
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
w.Write([]byte(imdsPayload))
return
}
w.WriteHeader(http.StatusNotFound)
}))
defer mockIMDS.Close()
// Set up environment with no region but valid credentials
t.Setenv("AWS_ACCESS_KEY_ID", "dummy")
t.Setenv("AWS_SECRET_ACCESS_KEY", "dummy")
// Unset any existing region
t.Setenv("AWS_REGION", "")
t.Setenv("AWS_DEFAULT_REGION", "")
t.Setenv("AWS_CONFIG_FILE", "") // Ensure no config file is used
t.Setenv("AWS_PROFILE", "") // Ensure no profile file is used
// Point IMDS to our mock server
t.Setenv("AWS_EC2_METADATA_SERVICE_ENDPOINT", mockIMDS.URL)
_, err := loadRegion(context.Background(), "")
require.Error(t, err)
require.Contains(t, err.Error(), "failed to get region from IMDS")
})
}

View file

@ -1,4 +1,4 @@
// Copyright 2021 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@ -27,7 +27,6 @@ import (
awsConfig "github.com/aws/aws-sdk-go-v2/config"
"github.com/aws/aws-sdk-go-v2/credentials"
"github.com/aws/aws-sdk-go-v2/credentials/stscreds"
"github.com/aws/aws-sdk-go-v2/feature/ec2/imds"
"github.com/aws/aws-sdk-go-v2/service/ec2"
ec2Types "github.com/aws/aws-sdk-go-v2/service/ec2/types"
"github.com/aws/aws-sdk-go-v2/service/sts"
@ -113,7 +112,7 @@ func (*EC2SDConfig) Name() string { return "ec2" }
// NewDiscoverer returns a Discoverer for the EC2 Config.
func (c *EC2SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewEC2Discovery(c, opts.Logger, opts.Metrics)
return NewEC2Discovery(c, opts)
}
// UnmarshalYAML implements the yaml.Unmarshaler interface for the EC2 Config.
@ -125,31 +124,10 @@ func (c *EC2SDConfig) UnmarshalYAML(unmarshal func(any) error) error {
return err
}
if c.Region == "" {
cfg, err := awsConfig.LoadDefaultConfig(context.Background())
if err != nil {
return err
}
if cfg.Region != "" {
// If the region is already set in the config, use it.
// This can happen if the user has set the region in the AWS config file or environment variables.
c.Region = cfg.Region
}
if c.Region == "" {
// Try to get the region from the instance metadata service (IMDS).
imdsClient := imds.NewFromConfig(cfg)
region, err := imdsClient.GetRegion(context.Background(), &imds.GetRegionInput{})
if err != nil {
return err
}
c.Region = region.Region
}
}
if c.Region == "" {
return errors.New("EC2 SD configuration requires a region")
// Check if the region is set, if not attempt to load it from the AWS SDK.
c.Region, err = loadRegion(context.Background(), c.Region)
if err != nil {
return fmt.Errorf("could not determine AWS region: %w", err)
}
for _, f := range c.Filters {
@ -180,23 +158,24 @@ type EC2Discovery struct {
}
// NewEC2Discovery returns a new EC2Discovery which periodically refreshes its targets.
func NewEC2Discovery(conf *EC2SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*EC2Discovery, error) {
m, ok := metrics.(*ec2Metrics)
func NewEC2Discovery(conf *EC2SDConfig, opts discovery.DiscovererOptions) (*EC2Discovery, error) {
m, ok := opts.Metrics.(*ec2Metrics)
if !ok {
return nil, errors.New("invalid discovery metrics type")
}
if logger == nil {
logger = promslog.NewNopLogger()
if opts.Logger == nil {
opts.Logger = promslog.NewNopLogger()
}
d := &EC2Discovery{
logger: logger,
logger: opts.Logger,
cfg: conf,
}
d.Discovery = refresh.NewDiscovery(
refresh.Options{
Logger: logger,
Logger: opts.Logger,
Mech: "ec2",
SetName: opts.SetName,
Interval: time.Duration(d.cfg.RefreshInterval),
RefreshF: d.refresh,
MetricsInstantiator: m.refreshMetrics,
@ -245,7 +224,12 @@ func (d *EC2Discovery) ec2Client(ctx context.Context) (ec2Client, error) {
cfg.Credentials = aws.NewCredentialsCache(assumeProvider)
}
d.ec2 = ec2.NewFromConfig(cfg)
d.ec2 = ec2.NewFromConfig(cfg, func(options *ec2.Options) {
if d.cfg.Endpoint != "" {
options.BaseEndpoint = &d.cfg.Endpoint
}
options.HTTPClient = httpClient
})
return d.ec2, nil
}
@ -255,8 +239,15 @@ func (d *EC2Discovery) refreshAZIDs(ctx context.Context) error {
if err != nil {
return err
}
if azs.AvailabilityZones == nil {
d.azToAZID = make(map[string]string)
return nil
}
d.azToAZID = make(map[string]string, len(azs.AvailabilityZones))
for _, az := range azs.AvailabilityZones {
if az.ZoneName == nil || az.ZoneId == nil {
continue
}
d.azToAZID[*az.ZoneName] = *az.ZoneId
}
return nil

View file

@ -1,4 +1,4 @@
// Copyright 2024 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

995
discovery/aws/ecs.go Normal file
View file

@ -0,0 +1,995 @@
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package aws
import (
"context"
"errors"
"fmt"
"log/slog"
"net"
"slices"
"strconv"
"strings"
"sync"
"time"
"github.com/aws/aws-sdk-go-v2/aws"
awsConfig "github.com/aws/aws-sdk-go-v2/config"
"github.com/aws/aws-sdk-go-v2/credentials"
"github.com/aws/aws-sdk-go-v2/credentials/stscreds"
"github.com/aws/aws-sdk-go-v2/service/ec2"
"github.com/aws/aws-sdk-go-v2/service/ecs"
"github.com/aws/aws-sdk-go-v2/service/ecs/types"
"github.com/aws/aws-sdk-go-v2/service/sts"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"golang.org/x/sync/errgroup"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/refresh"
"github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/util/strutil"
)
const (
ecsLabel = model.MetaLabelPrefix + "ecs_"
ecsLabelCluster = ecsLabel + "cluster"
ecsLabelClusterARN = ecsLabel + "cluster_arn"
ecsLabelService = ecsLabel + "service"
ecsLabelServiceARN = ecsLabel + "service_arn"
ecsLabelServiceStatus = ecsLabel + "service_status"
ecsLabelTaskGroup = ecsLabel + "task_group"
ecsLabelTaskARN = ecsLabel + "task_arn"
ecsLabelTaskDefinition = ecsLabel + "task_definition"
ecsLabelRegion = ecsLabel + "region"
ecsLabelAvailabilityZone = ecsLabel + "availability_zone"
ecsLabelSubnetID = ecsLabel + "subnet_id"
ecsLabelIPAddress = ecsLabel + "ip_address"
ecsLabelLaunchType = ecsLabel + "launch_type"
ecsLabelDesiredStatus = ecsLabel + "desired_status"
ecsLabelLastStatus = ecsLabel + "last_status"
ecsLabelHealthStatus = ecsLabel + "health_status"
ecsLabelPlatformFamily = ecsLabel + "platform_family"
ecsLabelPlatformVersion = ecsLabel + "platform_version"
ecsLabelTag = ecsLabel + "tag_"
ecsLabelTagCluster = ecsLabelTag + "cluster_"
ecsLabelTagService = ecsLabelTag + "service_"
ecsLabelTagTask = ecsLabelTag + "task_"
ecsLabelTagEC2 = ecsLabelTag + "ec2_"
ecsLabelNetworkMode = ecsLabel + "network_mode"
ecsLabelContainerInstanceARN = ecsLabel + "container_instance_arn"
ecsLabelEC2InstanceID = ecsLabel + "ec2_instance_id"
ecsLabelEC2InstanceType = ecsLabel + "ec2_instance_type"
ecsLabelEC2InstancePrivateIP = ecsLabel + "ec2_instance_private_ip"
ecsLabelEC2InstancePublicIP = ecsLabel + "ec2_instance_public_ip"
ecsLabelPublicIP = ecsLabel + "public_ip"
)
// DefaultECSSDConfig is the default ECS SD configuration.
var DefaultECSSDConfig = ECSSDConfig{
Port: 80,
RefreshInterval: model.Duration(60 * time.Second),
RequestConcurrency: 20, // Aligned with AWS ECS API sustained rate limits (20 req/sec)
HTTPClientConfig: config.DefaultHTTPClientConfig,
}
func init() {
discovery.RegisterConfig(&ECSSDConfig{})
}
// ECSSDConfig is the configuration for ECS based service discovery.
type ECSSDConfig struct {
Region string `yaml:"region"`
Endpoint string `yaml:"endpoint"`
AccessKey string `yaml:"access_key,omitempty"`
SecretKey config.Secret `yaml:"secret_key,omitempty"`
Profile string `yaml:"profile,omitempty"`
RoleARN string `yaml:"role_arn,omitempty"`
Clusters []string `yaml:"clusters,omitempty"`
Port int `yaml:"port"`
RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"`
// RequestConcurrency controls the maximum number of concurrent ECS API requests.
// Default is 20, which aligns with AWS ECS sustained rate limits:
// - Cluster read actions (DescribeClusters, ListClusters): 20 req/sec sustained
// - Service read actions (DescribeServices, ListServices): 20 req/sec sustained
// - Cluster resource read actions (DescribeTasks, ListTasks): 20 req/sec sustained
// See: https://docs.aws.amazon.com/AmazonECS/latest/APIReference/request-throttling.html
RequestConcurrency int `yaml:"request_concurrency,omitempty"`
HTTPClientConfig config.HTTPClientConfig `yaml:",inline"`
}
// NewDiscovererMetrics implements discovery.Config.
func (*ECSSDConfig) NewDiscovererMetrics(_ prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
return &ecsMetrics{
refreshMetrics: rmi,
}
}
// Name returns the name of the ECS Config.
func (*ECSSDConfig) Name() string { return "ecs" }
// NewDiscoverer returns a Discoverer for the EC2 Config.
func (c *ECSSDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewECSDiscovery(c, opts)
}
// UnmarshalYAML implements the yaml.Unmarshaler interface for the ECS Config.
func (c *ECSSDConfig) UnmarshalYAML(unmarshal func(any) error) error {
*c = DefaultECSSDConfig
type plain ECSSDConfig
err := unmarshal((*plain)(c))
if err != nil {
return err
}
c.Region, err = loadRegion(context.Background(), c.Region)
if err != nil {
return fmt.Errorf("could not determine AWS region: %w", err)
}
return c.HTTPClientConfig.Validate()
}
type ecsClient interface {
ListClusters(context.Context, *ecs.ListClustersInput, ...func(*ecs.Options)) (*ecs.ListClustersOutput, error)
DescribeClusters(context.Context, *ecs.DescribeClustersInput, ...func(*ecs.Options)) (*ecs.DescribeClustersOutput, error)
ListServices(context.Context, *ecs.ListServicesInput, ...func(*ecs.Options)) (*ecs.ListServicesOutput, error)
DescribeServices(context.Context, *ecs.DescribeServicesInput, ...func(*ecs.Options)) (*ecs.DescribeServicesOutput, error)
ListTasks(context.Context, *ecs.ListTasksInput, ...func(*ecs.Options)) (*ecs.ListTasksOutput, error)
DescribeTasks(context.Context, *ecs.DescribeTasksInput, ...func(*ecs.Options)) (*ecs.DescribeTasksOutput, error)
DescribeContainerInstances(context.Context, *ecs.DescribeContainerInstancesInput, ...func(*ecs.Options)) (*ecs.DescribeContainerInstancesOutput, error)
}
type ecsEC2Client interface {
DescribeInstances(context.Context, *ec2.DescribeInstancesInput, ...func(*ec2.Options)) (*ec2.DescribeInstancesOutput, error)
DescribeNetworkInterfaces(context.Context, *ec2.DescribeNetworkInterfacesInput, ...func(*ec2.Options)) (*ec2.DescribeNetworkInterfacesOutput, error)
}
// ECSDiscovery periodically performs ECS-SD requests. It implements
// the Discoverer interface.
type ECSDiscovery struct {
*refresh.Discovery
logger *slog.Logger
cfg *ECSSDConfig
ecs ecsClient
ec2 ecsEC2Client
}
// NewECSDiscovery returns a new ECSDiscovery which periodically refreshes its targets.
func NewECSDiscovery(conf *ECSSDConfig, opts discovery.DiscovererOptions) (*ECSDiscovery, error) {
m, ok := opts.Metrics.(*ecsMetrics)
if !ok {
return nil, errors.New("invalid discovery metrics type")
}
if opts.Logger == nil {
opts.Logger = promslog.NewNopLogger()
}
d := &ECSDiscovery{
logger: opts.Logger,
cfg: conf,
}
d.Discovery = refresh.NewDiscovery(
refresh.Options{
Logger: opts.Logger,
Mech: "ecs",
Interval: time.Duration(d.cfg.RefreshInterval),
RefreshF: d.refresh,
MetricsInstantiator: m.refreshMetrics,
},
)
return d, nil
}
func (d *ECSDiscovery) initEcsClient(ctx context.Context) error {
if d.ecs != nil && d.ec2 != nil {
return nil
}
if d.cfg.Region == "" {
return errors.New("region must be set for ECS service discovery")
}
// Build the HTTP client from the provided HTTPClientConfig.
client, err := config.NewClientFromConfig(d.cfg.HTTPClientConfig, "ecs_sd")
if err != nil {
return err
}
// Build the AWS config with the provided region.
var configOptions []func(*awsConfig.LoadOptions) error
configOptions = append(configOptions, awsConfig.WithRegion(d.cfg.Region))
configOptions = append(configOptions, awsConfig.WithHTTPClient(client))
// Only set static credentials if both access key and secret key are provided
// Otherwise, let AWS SDK use its default credential chain
if d.cfg.AccessKey != "" && d.cfg.SecretKey != "" {
credProvider := credentials.NewStaticCredentialsProvider(d.cfg.AccessKey, string(d.cfg.SecretKey), "")
configOptions = append(configOptions, awsConfig.WithCredentialsProvider(credProvider))
}
if d.cfg.Profile != "" {
configOptions = append(configOptions, awsConfig.WithSharedConfigProfile(d.cfg.Profile))
}
cfg, err := awsConfig.LoadDefaultConfig(ctx, configOptions...)
if err != nil {
d.logger.Error("Failed to create AWS config", "error", err)
return fmt.Errorf("could not create aws config: %w", err)
}
// If the role ARN is set, assume the role to get credentials and set the credentials provider in the config.
if d.cfg.RoleARN != "" {
assumeProvider := stscreds.NewAssumeRoleProvider(sts.NewFromConfig(cfg), d.cfg.RoleARN)
cfg.Credentials = aws.NewCredentialsCache(assumeProvider)
}
d.ecs = ecs.NewFromConfig(cfg, func(options *ecs.Options) {
if d.cfg.Endpoint != "" {
options.BaseEndpoint = &d.cfg.Endpoint
}
options.HTTPClient = client
})
d.ec2 = ec2.NewFromConfig(cfg, func(options *ec2.Options) {
options.HTTPClient = client
})
// Test credentials by making a simple API call
testCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()
_, err = d.ecs.DescribeClusters(testCtx, &ecs.DescribeClustersInput{})
if err != nil {
d.logger.Error("Failed to test ECS credentials", "error", err)
return fmt.Errorf("ECS credential test failed: %w", err)
}
return nil
}
// listClusterARNs returns a slice of cluster arns.
// This method does not use concurrency as it's a simple paginated call.
func (d *ECSDiscovery) listClusterARNs(ctx context.Context) ([]string, error) {
var (
clusterARNs []string
nextToken *string
)
for {
resp, err := d.ecs.ListClusters(ctx, &ecs.ListClustersInput{
NextToken: nextToken,
MaxResults: aws.Int32(100),
})
if err != nil {
return nil, fmt.Errorf("could not list clusters: %w", err)
}
clusterARNs = append(clusterARNs, resp.ClusterArns...)
if resp.NextToken == nil {
break
}
nextToken = resp.NextToken
}
return clusterARNs, nil
}
// describeClusters returns a map of cluster ARN to a slice of clusters.
// Uses concurrent requests limited by RequestConcurrency to respect AWS API throttling.
// Clusters are described in batches of 100 to respect AWS API limits (DescribeClusters allows up to 100 clusters per call).
func (d *ECSDiscovery) describeClusters(ctx context.Context, clusters []string) (map[string]types.Cluster, error) {
mu := sync.Mutex{}
clusterMap := make(map[string]types.Cluster)
errg, ectx := errgroup.WithContext(ctx)
errg.SetLimit(d.cfg.RequestConcurrency)
for batch := range slices.Chunk(clusters, 100) {
errg.Go(func() error {
resp, err := d.ecs.DescribeClusters(ectx, &ecs.DescribeClustersInput{
Clusters: batch,
Include: []types.ClusterField{"TAGS"},
})
if err != nil {
d.logger.Error("Failed to describe clusters", "clusters", batch, "error", err)
return fmt.Errorf("could not describe clusters %v: %w", batch, err)
}
for _, cluster := range resp.Clusters {
if cluster.ClusterArn != nil {
mu.Lock()
clusterMap[*cluster.ClusterArn] = cluster
mu.Unlock()
}
}
return nil
})
}
return clusterMap, errg.Wait()
}
// listServiceARNs returns a map of cluster ARN to a slice of service ARNs.
// Uses concurrent requests limited by RequestConcurrency to respect AWS API throttling.
// Services are listed in batches of 100 to respect AWS API limits (ListServices allows up to 100 services per call).
func (d *ECSDiscovery) listServiceARNs(ctx context.Context, clusters []string) (map[string][]string, error) {
mu := sync.Mutex{}
services := make(map[string][]string)
errg, ectx := errgroup.WithContext(ctx)
errg.SetLimit(d.cfg.RequestConcurrency)
for _, clusterARN := range clusters {
errg.Go(func() error {
var nextToken *string
var serviceARNs []string
for {
resp, err := d.ecs.ListServices(ectx, &ecs.ListServicesInput{
Cluster: aws.String(clusterARN),
NextToken: nextToken,
MaxResults: aws.Int32(100),
})
if err != nil {
return fmt.Errorf("could not list services for cluster %q: %w", clusterARN, err)
}
serviceARNs = append(serviceARNs, resp.ServiceArns...)
if resp.NextToken == nil {
break
}
nextToken = resp.NextToken
}
mu.Lock()
services[clusterARN] = serviceARNs
mu.Unlock()
return nil
})
}
return services, errg.Wait()
}
// describeServices returns a map of service name to service.
// Uses concurrent requests limited by RequestConcurrency to respect AWS API throttling.
// Services are described in batches of 10 to respect AWS API limits (DescribeServices allows up to 10 services per call).
func (d *ECSDiscovery) describeServices(ctx context.Context, clusterARN string, serviceARNS []string) (map[string]types.Service, error) {
mu := sync.Mutex{}
services := make(map[string]types.Service)
errg, ectx := errgroup.WithContext(ctx)
errg.SetLimit(d.cfg.RequestConcurrency)
for batch := range slices.Chunk(serviceARNS, 10) {
errg.Go(func() error {
resp, err := d.ecs.DescribeServices(ectx, &ecs.DescribeServicesInput{
Cluster: aws.String(clusterARN),
Services: batch,
Include: []types.ServiceField{"TAGS"},
})
if err != nil {
d.logger.Error("Failed to describe services", "cluster", clusterARN, "batch", batch, "error", err)
return fmt.Errorf("could not describe services for cluster %q: batch %v: %w", clusterARN, batch, err)
}
for _, service := range resp.Services {
if service.ServiceArn != nil {
mu.Lock()
services[*service.ServiceName] = service
mu.Unlock()
}
}
return nil
})
}
return services, errg.Wait()
}
// listTaskARNs returns a map of clustersARN to a slice of task ARNs.
// Uses concurrent requests limited by RequestConcurrency to respect AWS API throttling.
// Tasks are listed in batches of 100 to respect AWS API limits (ListTasks allows up to 100 tasks per call).
// This method also uses pagination to handle cases where there are more than 100 tasks in a cluster.
func (d *ECSDiscovery) listTaskARNs(ctx context.Context, clusterARNs []string) (map[string][]string, error) {
mu := sync.Mutex{}
tasks := make(map[string][]string)
errg, ectx := errgroup.WithContext(ctx)
errg.SetLimit(d.cfg.RequestConcurrency)
for _, clusterARN := range clusterARNs {
errg.Go(func() error {
var (
nextToken *string
taskARNs []string
)
for {
resp, err := d.ecs.ListTasks(ectx, &ecs.ListTasksInput{
Cluster: aws.String(clusterARN),
NextToken: nextToken,
MaxResults: aws.Int32(100),
})
if err != nil {
return fmt.Errorf("could not list tasks for cluster %q: %w", clusterARN, err)
}
taskARNs = append(taskARNs, resp.TaskArns...)
if resp.NextToken == nil {
break
}
nextToken = resp.NextToken
}
mu.Lock()
tasks[clusterARN] = taskARNs
mu.Unlock()
return nil
})
}
return tasks, errg.Wait()
}
// describeTasks returns a slice of tasks.
// Uses concurrent requests limited by RequestConcurrency to respect AWS API throttling.
// Tasks are described in batches of 100 to respect AWS API limits (DescribeTasks allows up to 100 tasks per call).
func (d *ECSDiscovery) describeTasks(ctx context.Context, clusterARN string, taskARNs []string) ([]types.Task, error) {
mu := sync.Mutex{}
var tasks []types.Task
errg, ectx := errgroup.WithContext(ctx)
errg.SetLimit(d.cfg.RequestConcurrency)
for batch := range slices.Chunk(taskARNs, 100) {
errg.Go(func() error {
resp, err := d.ecs.DescribeTasks(ectx, &ecs.DescribeTasksInput{
Cluster: aws.String(clusterARN),
Tasks: batch,
Include: []types.TaskField{"TAGS"},
})
if err != nil {
d.logger.Error("Failed to describe tasks", "cluster", clusterARN, "batch", batch, "error", err)
return fmt.Errorf("could not describe tasks in cluster %q: batch %v: %w", clusterARN, batch, err)
}
mu.Lock()
tasks = append(tasks, resp.Tasks...)
mu.Unlock()
return nil
})
}
return tasks, errg.Wait()
}
// describeContainerInstances returns a map of container instance ARN to EC2 instance ID
// Uses concurrent requests limited by RequestConcurrency to respect AWS API throttling.
// Container instances are described in batches of 100 to respect AWS API limits (DescribeContainerInstances allows up to 100 container instances per call).
func (d *ECSDiscovery) describeContainerInstances(ctx context.Context, clusterARN string, tasks []types.Task) (map[string]string, error) {
containerInstanceARNs := make([]string, 0, len(tasks))
for _, task := range tasks {
if task.ContainerInstanceArn != nil {
containerInstanceARNs = append(containerInstanceARNs, *task.ContainerInstanceArn)
}
}
if len(containerInstanceARNs) == 0 {
return make(map[string]string), nil
}
mu := sync.Mutex{}
containerInstToEC2 := make(map[string]string)
errg, ectx := errgroup.WithContext(ctx)
errg.SetLimit(d.cfg.RequestConcurrency)
for batch := range slices.Chunk(containerInstanceARNs, 100) {
errg.Go(func() error {
resp, err := d.ecs.DescribeContainerInstances(ectx, &ecs.DescribeContainerInstancesInput{
Cluster: aws.String(clusterARN),
ContainerInstances: batch,
})
if err != nil {
return fmt.Errorf("could not describe container instances: %w", err)
}
for _, ci := range resp.ContainerInstances {
if ci.ContainerInstanceArn != nil && ci.Ec2InstanceId != nil {
mu.Lock()
containerInstToEC2[*ci.ContainerInstanceArn] = *ci.Ec2InstanceId
mu.Unlock()
}
}
return nil
})
}
return containerInstToEC2, errg.Wait()
}
// ec2InstanceInfo holds information retrieved from EC2 DescribeInstances.
type ec2InstanceInfo struct {
privateIP string
publicIP string
subnetID string
instanceType string
tags map[string]string
}
// describeEC2Instances returns a map of EC2 instance ID to instance information.
// Uses concurrent requests limited by RequestConcurrency to respect AWS API throttling.
// This method does not use concurrency as it's a simple paginated call.
func (d *ECSDiscovery) describeEC2Instances(ctx context.Context, instanceIDs []string) (map[string]ec2InstanceInfo, error) {
if len(instanceIDs) == 0 {
return make(map[string]ec2InstanceInfo), nil
}
instanceInfo := make(map[string]ec2InstanceInfo)
var nextToken *string
for {
resp, err := d.ec2.DescribeInstances(ctx, &ec2.DescribeInstancesInput{
InstanceIds: instanceIDs,
NextToken: nextToken,
})
if err != nil {
return nil, fmt.Errorf("could not describe EC2 instances: %w", err)
}
for _, reservation := range resp.Reservations {
for _, instance := range reservation.Instances {
if instance.InstanceId != nil && instance.PrivateIpAddress != nil {
info := ec2InstanceInfo{
privateIP: *instance.PrivateIpAddress,
tags: make(map[string]string),
}
if instance.PublicIpAddress != nil {
info.publicIP = *instance.PublicIpAddress
}
if instance.SubnetId != nil {
info.subnetID = *instance.SubnetId
}
if instance.InstanceType != "" {
info.instanceType = string(instance.InstanceType)
}
// Collect EC2 instance tags
for _, tag := range instance.Tags {
if tag.Key != nil && tag.Value != nil {
info.tags[*tag.Key] = *tag.Value
}
}
instanceInfo[*instance.InstanceId] = info
}
}
}
if resp.NextToken == nil {
break
}
nextToken = resp.NextToken
}
return instanceInfo, nil
}
// describeNetworkInterfaces returns a map of ENI ID to public IP address.
// This is needed to get the public IP for tasks using awsvpc network mode, as the ENI is what gets the public IP, not the EC2 instance.
// This method does not use concurrency as it's a simple paginated call.
func (d *ECSDiscovery) describeNetworkInterfaces(ctx context.Context, tasks []types.Task) (map[string]string, error) {
eniIDs := make([]string, 0, len(tasks))
for _, task := range tasks {
for _, attachment := range task.Attachments {
if attachment.Type != nil && *attachment.Type == "ElasticNetworkInterface" {
for _, detail := range attachment.Details {
if detail.Name != nil && *detail.Name == "networkInterfaceId" && detail.Value != nil {
eniIDs = append(eniIDs, *detail.Value)
break
}
}
break
}
}
}
if len(eniIDs) == 0 {
return make(map[string]string), nil
}
eniToPublicIP := make(map[string]string)
var nextToken *string
for {
resp, err := d.ec2.DescribeNetworkInterfaces(ctx, &ec2.DescribeNetworkInterfacesInput{
NetworkInterfaceIds: eniIDs,
NextToken: nextToken,
})
if err != nil {
return nil, fmt.Errorf("could not describe network interfaces: %w", err)
}
for _, eni := range resp.NetworkInterfaces {
if eni.NetworkInterfaceId != nil && eni.Association != nil && eni.Association.PublicIp != nil {
eniToPublicIP[*eni.NetworkInterfaceId] = *eni.Association.PublicIp
}
}
if resp.NextToken == nil {
break
}
nextToken = resp.NextToken
}
return eniToPublicIP, nil
}
func (d *ECSDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
err := d.initEcsClient(ctx)
if err != nil {
return nil, err
}
var clusters []string
if len(d.cfg.Clusters) == 0 {
clusters, err = d.listClusterARNs(ctx)
if err != nil {
return nil, err
}
} else {
clusters = d.cfg.Clusters
}
if len(clusters) == 0 {
return []*targetgroup.Group{
{
Source: d.cfg.Region,
},
}, nil
}
tg := &targetgroup.Group{
Source: d.cfg.Region,
}
// Fetch cluster details, service ARNs, and task ARNs in parallel
var (
clusterMap map[string]types.Cluster
serviceMap map[string][]string
taskMap map[string][]string
)
clusterErrg, clusterCtx := errgroup.WithContext(ctx)
clusterErrg.Go(func() error {
var err error
clusterMap, err = d.describeClusters(clusterCtx, clusters)
return err
})
clusterErrg.Go(func() error {
var err error
serviceMap, err = d.listServiceARNs(clusterCtx, clusters)
return err
})
clusterErrg.Go(func() error {
var err error
taskMap, err = d.listTaskARNs(clusterCtx, clusters)
return err
})
if err := clusterErrg.Wait(); err != nil {
return nil, err
}
// Use goroutines to process clusters in parallel
var (
clusterWg sync.WaitGroup
clusterMu sync.Mutex
clusterTargets []model.LabelSet
)
for clusterARN, taskARNs := range taskMap {
if len(taskARNs) == 0 {
continue
}
clusterWg.Add(1)
go func(cluster types.Cluster, serviceARNs, taskARNs []string) {
defer clusterWg.Done()
// Fetch services and tasks in parallel (they're independent)
var (
services map[string]types.Service
tasks []types.Task
)
resourceErrg, resourceCtx := errgroup.WithContext(ctx)
resourceErrg.Go(func() error {
var err error
services, err = d.describeServices(resourceCtx, *cluster.ClusterArn, serviceARNs)
if err != nil {
d.logger.Error("Failed to describe services for cluster", "cluster", *cluster.ClusterArn, "error", err)
}
return err
})
resourceErrg.Go(func() error {
var err error
tasks, err = d.describeTasks(resourceCtx, *cluster.ClusterArn, taskARNs)
if err != nil {
d.logger.Error("Failed to describe tasks for cluster", "cluster", *cluster.ClusterArn, "error", err)
}
return err
})
if err := resourceErrg.Wait(); err != nil {
return
}
// Fetch container instances and network interfaces in parallel (both depend on tasks)
var (
containerInstances map[string]string
eniToPublicIP map[string]string
)
instanceErrg, instanceCtx := errgroup.WithContext(ctx)
instanceErrg.Go(func() error {
var err error
containerInstances, err = d.describeContainerInstances(instanceCtx, *cluster.ClusterArn, tasks)
if err != nil {
d.logger.Error("Failed to describe container instances for cluster", "cluster", *cluster.ClusterArn, "error", err)
}
return err
})
instanceErrg.Go(func() error {
var err error
eniToPublicIP, err = d.describeNetworkInterfaces(instanceCtx, tasks)
if err != nil {
d.logger.Error("Failed to describe network interfaces for cluster", "cluster", *cluster.ClusterArn, "error", err)
}
return err
})
if err := instanceErrg.Wait(); err != nil {
return
}
ec2Instances := make(map[string]ec2InstanceInfo)
if len(containerInstances) > 0 {
// Deduplicate EC2 instance IDs (multiple tasks can share the same instance)
ec2InstanceIDSet := make(map[string]struct{})
for _, ec2ID := range containerInstances {
ec2InstanceIDSet[ec2ID] = struct{}{}
}
ec2InstanceIDs := make([]string, 0, len(ec2InstanceIDSet))
for ec2ID := range ec2InstanceIDSet {
ec2InstanceIDs = append(ec2InstanceIDs, ec2ID)
}
ec2Instances, err = d.describeEC2Instances(ctx, ec2InstanceIDs)
if err != nil {
d.logger.Error("Failed to describe EC2 instances for cluster", "cluster", *cluster.ClusterArn, "error", err)
return
}
}
var (
taskWg sync.WaitGroup
taskMu sync.Mutex
taskTargets []model.LabelSet
)
for _, task := range tasks {
taskWg.Add(1)
go func(cluster types.Cluster, services map[string]types.Service, task types.Task, containerInstances map[string]string, ec2Instances map[string]ec2InstanceInfo, eniToPublicIP map[string]string) {
defer taskWg.Done()
var (
ipAddress, subnetID, publicIP string
networkMode string
ec2InstanceID, ec2InstanceType, ec2InstancePrivateIP, ec2InstancePublicIP string
)
// Try to get IP from ENI attachment (awsvpc mode)
var eniAttachment *types.Attachment
for _, attachment := range task.Attachments {
if attachment.Type != nil && *attachment.Type == "ElasticNetworkInterface" {
eniAttachment = &attachment
break
}
}
if eniAttachment != nil {
// awsvpc networking mode - get IP from ENI
networkMode = "awsvpc"
var eniID string
for _, detail := range eniAttachment.Details {
switch *detail.Name {
case "privateIPv4Address":
ipAddress = *detail.Value
case "subnetId":
subnetID = *detail.Value
case "networkInterfaceId":
eniID = *detail.Value
}
}
// Get public IP from ENI if available
if eniID != "" {
if pub, ok := eniToPublicIP[eniID]; ok {
publicIP = pub
}
}
} else if task.ContainerInstanceArn != nil {
// bridge/host networking mode - need to get EC2 instance IP and subnet
networkMode = "bridge"
var ok bool
ec2InstanceID, ok = containerInstances[*task.ContainerInstanceArn]
if ok {
info, ok := ec2Instances[ec2InstanceID]
if ok {
ipAddress = info.privateIP
publicIP = info.publicIP
subnetID = info.subnetID
ec2InstanceType = info.instanceType
ec2InstancePrivateIP = info.privateIP
ec2InstancePublicIP = info.publicIP
} else {
d.logger.Debug("EC2 instance info not found", "instance", ec2InstanceID, "task", *task.TaskArn)
}
} else {
d.logger.Debug("Container instance not found in map", "arn", *task.ContainerInstanceArn, "task", *task.TaskArn)
}
}
// Get EC2 instance metadata for awsvpc tasks running on EC2
// We want the instance type and the host IPs for advanced use cases
if networkMode == "awsvpc" && task.ContainerInstanceArn != nil {
var ok bool
ec2InstanceID, ok = containerInstances[*task.ContainerInstanceArn]
if ok {
info, ok := ec2Instances[ec2InstanceID]
if ok {
ec2InstanceType = info.instanceType
ec2InstancePrivateIP = info.privateIP
ec2InstancePublicIP = info.publicIP
}
}
}
if ipAddress == "" {
return
}
labels := model.LabelSet{
ecsLabelClusterARN: model.LabelValue(*cluster.ClusterArn),
ecsLabelCluster: model.LabelValue(*cluster.ClusterName),
ecsLabelTaskGroup: model.LabelValue(*task.Group),
ecsLabelTaskARN: model.LabelValue(*task.TaskArn),
ecsLabelTaskDefinition: model.LabelValue(*task.TaskDefinitionArn),
ecsLabelIPAddress: model.LabelValue(ipAddress),
ecsLabelRegion: model.LabelValue(d.cfg.Region),
ecsLabelLaunchType: model.LabelValue(task.LaunchType),
ecsLabelAvailabilityZone: model.LabelValue(*task.AvailabilityZone),
ecsLabelDesiredStatus: model.LabelValue(*task.DesiredStatus),
ecsLabelLastStatus: model.LabelValue(*task.LastStatus),
ecsLabelHealthStatus: model.LabelValue(task.HealthStatus),
ecsLabelNetworkMode: model.LabelValue(networkMode),
}
// Add subnet ID when available (awsvpc mode from ENI, bridge/host from EC2 instance)
if subnetID != "" {
labels[ecsLabelSubnetID] = model.LabelValue(subnetID)
}
// Add container instance and EC2 instance info for EC2 launch type
if task.ContainerInstanceArn != nil {
labels[ecsLabelContainerInstanceARN] = model.LabelValue(*task.ContainerInstanceArn)
}
if ec2InstanceID != "" {
labels[ecsLabelEC2InstanceID] = model.LabelValue(ec2InstanceID)
}
if ec2InstanceType != "" {
labels[ecsLabelEC2InstanceType] = model.LabelValue(ec2InstanceType)
}
if ec2InstancePrivateIP != "" {
labels[ecsLabelEC2InstancePrivateIP] = model.LabelValue(ec2InstancePrivateIP)
}
if ec2InstancePublicIP != "" {
labels[ecsLabelEC2InstancePublicIP] = model.LabelValue(ec2InstancePublicIP)
}
if publicIP != "" {
labels[ecsLabelPublicIP] = model.LabelValue(publicIP)
}
if task.PlatformFamily != nil {
labels[ecsLabelPlatformFamily] = model.LabelValue(*task.PlatformFamily)
}
if task.PlatformVersion != nil {
labels[ecsLabelPlatformVersion] = model.LabelValue(*task.PlatformVersion)
}
labels[model.AddressLabel] = model.LabelValue(net.JoinHostPort(ipAddress, strconv.Itoa(d.cfg.Port)))
// Add cluster tags
for _, clusterTag := range cluster.Tags {
if clusterTag.Key != nil && clusterTag.Value != nil {
labels[model.LabelName(ecsLabelTagCluster+strutil.SanitizeLabelName(*clusterTag.Key))] = model.LabelValue(*clusterTag.Value)
}
}
// If this is not a standalone task, add service information and tags
if !isStandaloneTask(task) {
service, ok := services[getServiceNameFromTaskGroup(task)]
if !ok {
d.logger.Debug("Service not found for task", "task", *task.TaskArn, "service", getServiceNameFromTaskGroup(task))
}
if service.ServiceName != nil {
labels[ecsLabelService] = model.LabelValue(*service.ServiceName)
}
if service.ServiceArn != nil {
labels[ecsLabelServiceARN] = model.LabelValue(*service.ServiceArn)
}
if service.Status != nil {
labels[ecsLabelServiceStatus] = model.LabelValue(*service.Status)
}
// Add service tags
for _, serviceTag := range service.Tags {
if serviceTag.Key != nil && serviceTag.Value != nil {
labels[model.LabelName(ecsLabelTagService+strutil.SanitizeLabelName(*serviceTag.Key))] = model.LabelValue(*serviceTag.Value)
}
}
}
// Add task tags
for _, taskTag := range task.Tags {
if taskTag.Key != nil && taskTag.Value != nil {
labels[model.LabelName(ecsLabelTagTask+strutil.SanitizeLabelName(*taskTag.Key))] = model.LabelValue(*taskTag.Value)
}
}
// Add EC2 instance tags (if running on EC2)
if ec2InstanceID != "" {
if info, ok := ec2Instances[ec2InstanceID]; ok {
for tagKey, tagValue := range info.tags {
labels[model.LabelName(ecsLabelTagEC2+strutil.SanitizeLabelName(tagKey))] = model.LabelValue(tagValue)
}
}
}
taskMu.Lock()
taskTargets = append(taskTargets, labels)
taskMu.Unlock()
}(cluster, services, task, containerInstances, ec2Instances, eniToPublicIP)
}
taskWg.Wait()
// Add this cluster's task targets to the overall collection
clusterMu.Lock()
clusterTargets = append(clusterTargets, taskTargets...)
clusterMu.Unlock()
}(clusterMap[clusterARN], serviceMap[clusterARN], taskARNs)
}
clusterWg.Wait()
// Set all targets to the target group
tg.Targets = clusterTargets
return []*targetgroup.Group{tg}, nil
}
func isStandaloneTask(task types.Task) bool {
// A standalone task will have a group of "family:task-def-name"
return task.Group != nil && strings.HasPrefix(*task.Group, "family:")
}
func getServiceNameFromTaskGroup(task types.Task) string {
return strings.Split(*task.Group, ":")[1]
}

1807
discovery/aws/ecs_test.go Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,907 @@
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package aws
import (
"context"
"errors"
"fmt"
"log/slog"
"maps"
"net"
"strconv"
"strings"
"sync"
"time"
"github.com/aws/aws-sdk-go-v2/aws"
awsConfig "github.com/aws/aws-sdk-go-v2/config"
"github.com/aws/aws-sdk-go-v2/credentials"
"github.com/aws/aws-sdk-go-v2/credentials/stscreds"
"github.com/aws/aws-sdk-go-v2/service/elasticache"
"github.com/aws/aws-sdk-go-v2/service/elasticache/types"
"github.com/aws/aws-sdk-go-v2/service/sts"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"golang.org/x/sync/errgroup"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/refresh"
"github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/util/strutil"
)
const (
elasticacheLabel = model.MetaLabelPrefix + "elasticache_"
elasticacheLabelDeploymentOption = elasticacheLabel + "deployment_option"
// cache cluster.
elasticacheLabelCacheCluster = elasticacheLabel + "cache_cluster_"
elasticacheLabelCacheClusterARN = elasticacheLabelCacheCluster + "arn"
elasticacheLabelCacheClusterAtRestEncryptionEnabled = elasticacheLabelCacheCluster + "at_rest_encryption_enabled"
elasticacheLabelCacheClusterAuthTokenEnabled = elasticacheLabelCacheCluster + "auth_token_enabled"
elasticacheLabelCacheClusterAuthTokenLastModified = elasticacheLabelCacheCluster + "auth_token_last_modified"
elasticacheLabelCacheClusterAutoMinorVersionUpgrade = elasticacheLabelCacheCluster + "auto_minor_version_upgrade"
elasticacheLabelCacheClusterCreateTime = elasticacheLabelCacheCluster + "cache_cluster_create_time"
elasticacheLabelCacheClusterID = elasticacheLabelCacheCluster + "cache_cluster_id"
elasticacheLabelCacheClusterStatus = elasticacheLabelCacheCluster + "cache_cluster_status"
elasticacheLabelCacheClusterNodeType = elasticacheLabelCacheCluster + "cache_node_type"
elasticacheLabelCacheClusterParameterGroup = elasticacheLabelCacheCluster + "cache_parameter_group"
elasticacheLabelCacheClusterSubnetGroupName = elasticacheLabelCacheCluster + "cache_subnet_group_name"
elasticacheLabelCacheClusterClientDownloadLandingPage = elasticacheLabelCacheCluster + "client_download_landing_page"
elasticacheLabelCacheClusterEngine = elasticacheLabelCacheCluster + "engine"
elasticacheLabelCacheClusterEngineVersion = elasticacheLabelCacheCluster + "engine_version"
elasticacheLabelCacheClusterIPDiscovery = elasticacheLabelCacheCluster + "ip_discovery"
elasticacheLabelCacheClusterNetworkType = elasticacheLabelCacheCluster + "network_type"
elasticacheLabelCacheClusterNumCacheNodes = elasticacheLabelCacheCluster + "num_cache_nodes"
elasticacheLabelCacheClusterPreferredAvailabilityZone = elasticacheLabelCacheCluster + "preferred_availability_zone"
elasticacheLabelCacheClusterPreferredMaintenanceWindow = elasticacheLabelCacheCluster + "preferred_maintenance_window"
elasticacheLabelCacheClusterPreferredOutpostARN = elasticacheLabelCacheCluster + "preferred_outpost_arn"
elasticacheLabelCacheClusterReplicationGroupID = elasticacheLabelCacheCluster + "replication_group_id"
elasticacheLabelCacheClusterReplicationGroupLogDeliveryEnabled = elasticacheLabelCacheCluster + "replication_group_log_delivery_enabled"
elasticacheLabelCacheClusterSnapshotRetentionLimit = elasticacheLabelCacheCluster + "snapshot_retention_limit"
elasticacheLabelCacheClusterSnapshotWindow = elasticacheLabelCacheCluster + "snapshot_window"
elasticacheLabelCacheClusterTransitEncryptionEnabled = elasticacheLabelCacheCluster + "transit_encryption_enabled"
elasticacheLabelCacheClusterTransitEncryptionMode = elasticacheLabelCacheCluster + "transit_encryption_mode"
// configuration endpoint.
elasticacheLabelCacheClusterConfigurationEndpoint = elasticacheLabelCacheCluster + "configuration_endpoint_"
elasticacheLabelCacheClusterConfigurationEndpointAddress = elasticacheLabelCacheClusterConfigurationEndpoint + "address"
elasticacheLabelCacheClusterConfigurationEndpointPort = elasticacheLabelCacheClusterConfigurationEndpoint + "port"
// notification.
elasticacheLabelCacheClusterNotification = elasticacheLabelCacheCluster + "notification_"
elasticacheLabelCacheClusterNotificationTopicARN = elasticacheLabelCacheClusterNotification + "topic_arn"
elasticacheLabelCacheClusterNotificationTopicStatus = elasticacheLabelCacheClusterNotification + "topic_status"
// log delivery configuration (slice - use with index).
elasticacheLabelCacheClusterLogDeliveryConfiguration = elasticacheLabelCacheCluster + "log_delivery_configuration_"
elasticacheLabelCacheClusterLogDeliveryConfigurationDestinationType = elasticacheLabelCacheClusterLogDeliveryConfiguration + "destination_type"
elasticacheLabelCacheClusterLogDeliveryConfigurationLogFormat = elasticacheLabelCacheClusterLogDeliveryConfiguration + "log_format"
elasticacheLabelCacheClusterLogDeliveryConfigurationLogType = elasticacheLabelCacheClusterLogDeliveryConfiguration + "log_type"
elasticacheLabelCacheClusterLogDeliveryConfigurationStatus = elasticacheLabelCacheClusterLogDeliveryConfiguration + "status"
elasticacheLabelCacheClusterLogDeliveryConfigurationMessage = elasticacheLabelCacheClusterLogDeliveryConfiguration + "message"
elasticacheLabelCacheClusterLogDeliveryConfigurationLogGroup = elasticacheLabelCacheClusterLogDeliveryConfiguration + "log_group"
elasticacheLabelCacheClusterLogDeliveryConfigurationDeliveryStream = elasticacheLabelCacheClusterLogDeliveryConfiguration + "delivery_stream"
// pending modified values.
elasticacheLabelCacheClusterPendingModifiedValues = elasticacheLabelCacheCluster + "pending_modified_values_"
elasticacheLabelCacheClusterPendingModifiedValuesAuthTokenStatus = elasticacheLabelCacheClusterPendingModifiedValues + "auth_token_status"
elasticacheLabelCacheClusterPendingModifiedValuesCacheNodeType = elasticacheLabelCacheClusterPendingModifiedValues + "cache_node_type"
elasticacheLabelCacheClusterPendingModifiedValuesEngineVersion = elasticacheLabelCacheClusterPendingModifiedValues + "engine_version"
elasticacheLabelCacheClusterPendingModifiedValuesNumCacheNodes = elasticacheLabelCacheClusterPendingModifiedValues + "num_cache_nodes"
elasticacheLabelCacheClusterPendingModifiedValuesTransitEncryptionEnabled = elasticacheLabelCacheClusterPendingModifiedValues + "transit_encryption_enabled"
elasticacheLabelCacheClusterPendingModifiedValuesTransitEncryptionMode = elasticacheLabelCacheClusterPendingModifiedValues + "transit_encryption_mode"
elasticacheLabelCacheClusterPendingModifiedValuesCacheNodeIDsToRemove = elasticacheLabelCacheClusterPendingModifiedValues + "cache_node_ids_to_remove"
// security group membership (slice - use with index).
elasticacheLabelCacheClusterSecurityGroupMembership = elasticacheLabelCacheCluster + "security_group_membership_"
elasticacheLabelCacheClusterSecurityGroupMembershipID = elasticacheLabelCacheClusterSecurityGroupMembership + "id"
elasticacheLabelCacheClusterSecurityGroupMembershipStatus = elasticacheLabelCacheClusterSecurityGroupMembership + "status"
// tags - create one label per tag key, with the format: elasticache_cache_cluster_tag_<tagkey>.
elasticacheLabelCacheClusterTag = elasticacheLabelCacheCluster + "tag_"
// node.
elasticacheLabelCacheClusterNode = elasticacheLabelCacheCluster + "node_"
elasticacheLabelCacheClusterNodeCreateTime = elasticacheLabelCacheClusterNode + "create_time"
elasticacheLabelCacheClusterNodeID = elasticacheLabelCacheClusterNode + "id"
elasticacheLabelCacheClusterNodeStatus = elasticacheLabelCacheClusterNode + "status"
elasticacheLabelCacheClusterNodeAZ = elasticacheLabelCacheClusterNode + "availability_zone"
elasticacheLabelCacheClusterNodeCustomerOutpostARN = elasticacheLabelCacheClusterNode + "customer_outpost_arn"
elasticacheLabelCacheClusterNodeSourceCacheNodeID = elasticacheLabelCacheClusterNode + "source_cache_node_id"
elasticacheLabelCacheClusterNodeParameterGroupStatus = elasticacheLabelCacheClusterNode + "parameter_group_status"
// endpoint.
elasticacheLabelCacheClusterNodeEndpoint = elasticacheLabelCacheClusterNode + "endpoint_"
elasticacheLabelCacheClusterNodeEndpointAddress = elasticacheLabelCacheClusterNodeEndpoint + "address"
elasticacheLabelCacheClusterNodeEndpointPort = elasticacheLabelCacheClusterNodeEndpoint + "port"
// serverless cache.
elasticacheLabelServerlessCache = elasticacheLabel + "serverless_cache_"
elasticacheLabelServerlessCacheARN = elasticacheLabelServerlessCache + "arn"
elasticacheLabelServerlessCacheName = elasticacheLabelServerlessCache + "name"
elasticacheLabelServerlessCacheCreateTime = elasticacheLabelServerlessCache + "create_time"
elasticacheLabelServerlessCacheDescription = elasticacheLabelServerlessCache + "description"
elasticacheLabelServerlessCacheEngine = elasticacheLabelServerlessCache + "engine"
elasticacheLabelServerlessCacheFullEngineVersion = elasticacheLabelServerlessCache + "full_engine_version"
elasticacheLabelServerlessCacheMajorEngineVersion = elasticacheLabelServerlessCache + "major_engine_version"
elasticacheLabelServerlessCacheStatus = elasticacheLabelServerlessCache + "status"
elasticacheLabelServerlessCacheKmsKeyID = elasticacheLabelServerlessCache + "kms_key_id"
elasticacheLabelServerlessCacheUserGroupID = elasticacheLabelServerlessCache + "user_group_id"
elasticacheLabelServerlessCacheDailySnapshotTime = elasticacheLabelServerlessCache + "daily_snapshot_time"
elasticacheLabelServerlessCacheSnapshotRetentionLimit = elasticacheLabelServerlessCache + "snapshot_retention_limit"
// endpoint.
elasticacheLabelServerlessCacheEndpoint = elasticacheLabelServerlessCache + "endpoint_"
elasticacheLabelServerlessCacheEndpointAddress = elasticacheLabelServerlessCacheEndpoint + "address"
elasticacheLabelServerlessCacheEndpointPort = elasticacheLabelServerlessCacheEndpoint + "port"
elasticacheLabelServerlessCacheReaderEndpointAddress = elasticacheLabelServerlessCacheEndpoint + "reader_address"
elasticacheLabelServerlessCacheReaderEndpointPort = elasticacheLabelServerlessCacheEndpoint + "reader_port"
// security group membership (slice - use with index).
elasticacheLabelServerlessCacheSecurityGroupID = elasticacheLabelServerlessCache + "security_group_id"
// Subnet group membership (slice - use with index).
elasticacheLabelServerlessCacheSubnetID = elasticacheLabelServerlessCache + "subnet_id"
// cache usage limits.
elasticacheLabelServerlessCacheCacheUsageLimit = elasticacheLabelServerlessCache + "cache_usage_limit_"
elasticacheLabelServerlessCacheCacheUsageLimitCacheDataStorage = elasticacheLabelServerlessCacheCacheUsageLimit + "data_storage"
elasticacheLabelServerlessCacheCacheUsageLimitCacheDataStorageMaximum = elasticacheLabelServerlessCacheCacheUsageLimitCacheDataStorage + "maximum"
elasticacheLabelServerlessCacheCacheUsageLimitCacheDataStorageMinimum = elasticacheLabelServerlessCacheCacheUsageLimitCacheDataStorage + "minimum"
elasticacheLabelServerlessCacheCacheUsageLimitCacheDataStorageUnit = elasticacheLabelServerlessCacheCacheUsageLimitCacheDataStorage + "unit"
elasticacheLabelServerlessCacheCacheUsageLimitECPUPerSecond = elasticacheLabelServerlessCacheCacheUsageLimit + "ecpu_per_second"
elasticacheLabelServerlessCacheCacheUsageLimitECPUPerSecondMaximum = elasticacheLabelServerlessCacheCacheUsageLimitECPUPerSecond + "maximum"
elasticacheLabelServerlessCacheCacheUsageLimitECPUPerSecondMinimum = elasticacheLabelServerlessCacheCacheUsageLimitECPUPerSecond + "minimum"
// tags - create one label per tag key, with the format: elasticache_serverless_cache_tag_<tagkey>.
elasticacheLabelServerlessCacheTag = elasticacheLabelServerlessCache + "tag_"
)
// DefaultElasticacheSDConfig is the default Elasticache SD configuration.
var DefaultElasticacheSDConfig = ElasticacheSDConfig{
Port: 80,
RefreshInterval: model.Duration(60 * time.Second),
RequestConcurrency: 10,
HTTPClientConfig: config.DefaultHTTPClientConfig,
}
func init() {
discovery.RegisterConfig(&ElasticacheSDConfig{})
}
// ElasticacheSDConfig is the configuration for Elasticache based service discovery.
type ElasticacheSDConfig struct {
Region string `yaml:"region"`
Endpoint string `yaml:"endpoint"`
AccessKey string `yaml:"access_key,omitempty"`
SecretKey config.Secret `yaml:"secret_key,omitempty"`
Profile string `yaml:"profile,omitempty"`
RoleARN string `yaml:"role_arn,omitempty"`
Clusters []string `yaml:"clusters,omitempty"`
Port int `yaml:"port"`
RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"`
// RequestConcurrency controls the maximum number of concurrent Elasticache API requests.
RequestConcurrency int `yaml:"request_concurrency,omitempty"`
HTTPClientConfig config.HTTPClientConfig `yaml:",inline"`
}
// NewDiscovererMetrics implements discovery.Config.
func (*ElasticacheSDConfig) NewDiscovererMetrics(_ prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
return &elasticacheMetrics{
refreshMetrics: rmi,
}
}
// Name returns the name of the Elasticache Config.
func (*ElasticacheSDConfig) Name() string { return "elasticache" }
// NewDiscoverer returns a Discoverer for the Elasticache Config.
func (c *ElasticacheSDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewElasticacheDiscovery(c, opts)
}
// UnmarshalYAML implements the yaml.Unmarshaler interface for the Elasticache Config.
func (c *ElasticacheSDConfig) UnmarshalYAML(unmarshal func(any) error) error {
*c = DefaultElasticacheSDConfig
type plain ElasticacheSDConfig
err := unmarshal((*plain)(c))
if err != nil {
return err
}
c.Region, err = loadRegion(context.Background(), c.Region)
if err != nil {
return fmt.Errorf("could not determine AWS region: %w", err)
}
return c.HTTPClientConfig.Validate()
}
type elasticacheClient interface {
DescribeServerlessCaches(ctx context.Context, params *elasticache.DescribeServerlessCachesInput, optFns ...func(*elasticache.Options)) (*elasticache.DescribeServerlessCachesOutput, error)
DescribeCacheClusters(ctx context.Context, params *elasticache.DescribeCacheClustersInput, optFns ...func(*elasticache.Options)) (*elasticache.DescribeCacheClustersOutput, error)
ListTagsForResource(ctx context.Context, params *elasticache.ListTagsForResourceInput, optFns ...func(*elasticache.Options)) (*elasticache.ListTagsForResourceOutput, error)
}
// ElasticacheDiscovery periodically performs Elasticache-SD requests.
// It implements the Discoverer interface.
type ElasticacheDiscovery struct {
*refresh.Discovery
logger *slog.Logger
cfg *ElasticacheSDConfig
elasticacheClient elasticacheClient
}
// NewElasticacheDiscovery returns a new ElasticacheDiscovery which periodically refreshes its targets.
func NewElasticacheDiscovery(conf *ElasticacheSDConfig, opts discovery.DiscovererOptions) (*ElasticacheDiscovery, error) {
m, ok := opts.Metrics.(*elasticacheMetrics)
if !ok {
return nil, errors.New("invalid discovery metrics type")
}
if opts.Logger == nil {
opts.Logger = promslog.NewNopLogger()
}
d := &ElasticacheDiscovery{
logger: opts.Logger,
cfg: conf,
}
d.Discovery = refresh.NewDiscovery(
refresh.Options{
Logger: opts.Logger,
Mech: "elasticache",
Interval: time.Duration(d.cfg.RefreshInterval),
RefreshF: d.refresh,
MetricsInstantiator: m.refreshMetrics,
},
)
return d, nil
}
func (d *ElasticacheDiscovery) initElasticacheClient(ctx context.Context) error {
if d.elasticacheClient != nil {
return nil
}
if d.cfg.Region == "" {
return errors.New("region must be set for Elasticache service discovery")
}
// Build the HTTP client from the provided HTTPClientConfig.
client, err := config.NewClientFromConfig(d.cfg.HTTPClientConfig, "elasticache_sd")
if err != nil {
return err
}
// Build the AWS config with the provided region.
var configOptions []func(*awsConfig.LoadOptions) error
configOptions = append(configOptions, awsConfig.WithRegion(d.cfg.Region))
configOptions = append(configOptions, awsConfig.WithHTTPClient(client))
// Only set static credentials if both access key and secret key are provided
// Otherwise, let AWS SDK use its default credential chain
if d.cfg.AccessKey != "" && d.cfg.SecretKey != "" {
credProvider := credentials.NewStaticCredentialsProvider(d.cfg.AccessKey, string(d.cfg.SecretKey), "")
configOptions = append(configOptions, awsConfig.WithCredentialsProvider(credProvider))
}
if d.cfg.Profile != "" {
configOptions = append(configOptions, awsConfig.WithSharedConfigProfile(d.cfg.Profile))
}
cfg, err := awsConfig.LoadDefaultConfig(ctx, configOptions...)
if err != nil {
d.logger.Error("Failed to create AWS config", "error", err)
return fmt.Errorf("could not create aws config: %w", err)
}
// If the role ARN is set, assume the role to get credentials and set the credentials provider in the config.
if d.cfg.RoleARN != "" {
assumeProvider := stscreds.NewAssumeRoleProvider(sts.NewFromConfig(cfg), d.cfg.RoleARN)
cfg.Credentials = aws.NewCredentialsCache(assumeProvider)
}
d.elasticacheClient = elasticache.NewFromConfig(cfg, func(options *elasticache.Options) {
if d.cfg.Endpoint != "" {
options.BaseEndpoint = &d.cfg.Endpoint
}
options.HTTPClient = client
})
// Test credentials by making a simple API call
testCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()
_, err = d.elasticacheClient.DescribeCacheClusters(testCtx, &elasticache.DescribeCacheClustersInput{})
if err != nil {
d.logger.Error("Failed to test Elasticache credentials", "error", err)
return fmt.Errorf("elasticache credential test failed: %w", err)
}
return nil
}
// describeServerlessCaches calls DescribeServerlessCaches API for the given cache IDs (or all caches if no IDs are provided) and returns the list of serverless caches.
func (d *ElasticacheDiscovery) describeServerlessCaches(ctx context.Context, caches []string) ([]types.ServerlessCache, error) {
mu := &sync.Mutex{}
errg, ectx := errgroup.WithContext(ctx)
errg.SetLimit(d.cfg.RequestConcurrency)
var serverlessCaches []types.ServerlessCache
if len(caches) == 0 {
errg.Go(func() error {
var nextToken *string
for {
output, err := d.elasticacheClient.DescribeServerlessCaches(ectx, &elasticache.DescribeServerlessCachesInput{
MaxResults: aws.Int32(50),
NextToken: nextToken,
})
if err != nil {
return fmt.Errorf("failed to describe serverless caches: %w", err)
}
mu.Lock()
serverlessCaches = append(serverlessCaches, output.ServerlessCaches...)
mu.Unlock()
if output.NextToken == nil {
break
}
nextToken = output.NextToken
}
return nil
})
} else {
for _, cacheID := range caches {
errg.Go(func() error {
output, err := d.elasticacheClient.DescribeServerlessCaches(ectx, &elasticache.DescribeServerlessCachesInput{
MaxResults: aws.Int32(50),
NextToken: nil,
ServerlessCacheName: aws.String(cacheID),
})
if err != nil {
return fmt.Errorf("failed to describe serverless cache %s: %w", cacheID, err)
}
mu.Lock()
serverlessCaches = append(serverlessCaches, output.ServerlessCaches...)
mu.Unlock()
return nil
})
}
}
return serverlessCaches, errg.Wait()
}
// describeCacheClusters calls DescribeCacheClusters API for the given cache cluster IDs (or all cache clusters if no IDs are provided) and returns the list of cache clusters.
func (d *ElasticacheDiscovery) describeCacheClusters(ctx context.Context, caches []string) ([]types.CacheCluster, error) {
mu := &sync.Mutex{}
errg, ectx := errgroup.WithContext(ctx)
errg.SetLimit(d.cfg.RequestConcurrency)
showCacheClustersNotInReplicationGroupsBools := []bool{false, true}
var cacheClusters []types.CacheCluster
if len(caches) == 0 {
for _, showCacheClustersNotInReplicationGroupsBool := range showCacheClustersNotInReplicationGroupsBools {
errg.Go(func() error {
var nextToken *string
for {
output, err := d.elasticacheClient.DescribeCacheClusters(ectx, &elasticache.DescribeCacheClustersInput{
MaxRecords: aws.Int32(100),
Marker: nextToken,
ShowCacheNodeInfo: aws.Bool(true),
ShowCacheClustersNotInReplicationGroups: aws.Bool(showCacheClustersNotInReplicationGroupsBool),
})
if err != nil {
return fmt.Errorf("failed to describe cache clusters: %w", err)
}
mu.Lock()
cacheClusters = append(cacheClusters, output.CacheClusters...)
mu.Unlock()
if output.Marker == nil {
break
}
nextToken = output.Marker
}
return nil
})
}
} else {
for _, cacheID := range caches {
for _, showCacheClustersNotInReplicationGroupsBool := range showCacheClustersNotInReplicationGroupsBools {
errg.Go(func() error {
output, err := d.elasticacheClient.DescribeCacheClusters(ectx, &elasticache.DescribeCacheClustersInput{
MaxRecords: aws.Int32(100),
Marker: nil,
ShowCacheNodeInfo: aws.Bool(true),
ShowCacheClustersNotInReplicationGroups: aws.Bool(showCacheClustersNotInReplicationGroupsBool),
CacheClusterId: aws.String(cacheID),
})
if err != nil {
return fmt.Errorf("failed to describe cache cluster %s: %w", cacheID, err)
}
mu.Lock()
cacheClusters = append(cacheClusters, output.CacheClusters...)
mu.Unlock()
return nil
})
}
}
}
return cacheClusters, errg.Wait()
}
// listTagsForResource calls ListTagsForResource API for the given resource ARNs and returns a map of resource ARN to list of tags.
func (d *ElasticacheDiscovery) listTagsForResource(ctx context.Context, resourceARNs []string) (map[string][]types.Tag, error) {
mu := &sync.Mutex{}
errg, ectx := errgroup.WithContext(ctx)
errg.SetLimit(d.cfg.RequestConcurrency)
tagsByResourceARN := make(map[string][]types.Tag)
for _, resourceARN := range resourceARNs {
errg.Go(func() error {
output, err := d.elasticacheClient.ListTagsForResource(ectx, &elasticache.ListTagsForResourceInput{
ResourceName: aws.String(resourceARN),
})
if err != nil {
return fmt.Errorf("failed to list tags for resource %s: %w", resourceARN, err)
}
mu.Lock()
tagsByResourceARN[resourceARN] = output.TagList
mu.Unlock()
return nil
})
}
return tagsByResourceARN, errg.Wait()
}
func (d *ElasticacheDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
err := d.initElasticacheClient(ctx)
if err != nil {
return nil, err
}
var clusters []string
clustersMu := sync.Mutex{}
serverlessCacheIDs, cacheClusterIDs := splitCacheDeploymentOptions(d.cfg.Clusters)
clusterErrg, clusterCtx := errgroup.WithContext(ctx)
clusterErrg.Go(func() error {
caches, err := d.describeServerlessCaches(clusterCtx, serverlessCacheIDs)
if err != nil {
return fmt.Errorf("failed to describe serverless caches: %w", err)
}
for _, cache := range caches {
clustersMu.Lock()
clusters = append(clusters, *cache.ARN)
clustersMu.Unlock()
}
return nil
})
clusterErrg.Go(func() error {
cacheClusters, err := d.describeCacheClusters(clusterCtx, cacheClusterIDs)
if err != nil {
return fmt.Errorf("failed to describe cache clusters: %w", err)
}
for _, cluster := range cacheClusters {
clustersMu.Lock()
clusters = append(clusters, *cluster.ARN)
clustersMu.Unlock()
}
return nil
})
if err := clusterErrg.Wait(); err != nil {
return nil, err
}
tagsByResourceARN, err := d.listTagsForResource(ctx, clusters)
if err != nil {
return nil, fmt.Errorf("failed to list tags for resources: %w", err)
}
tg := &targetgroup.Group{
Source: d.cfg.Region,
}
errg, ectx := errgroup.WithContext(ctx)
errg.Go(func() error {
caches, err := d.describeServerlessCaches(ectx, serverlessCacheIDs)
if err != nil {
return fmt.Errorf("failed to describe serverless caches: %w", err)
}
for _, cache := range caches {
addServerlessCacheTargets(tg, &cache, tagsByResourceARN[*cache.ARN])
}
return nil
})
errg.Go(func() error {
cacheClusters, err := d.describeCacheClusters(ectx, cacheClusterIDs)
if err != nil {
return fmt.Errorf("failed to describe cache clusters: %w", err)
}
for _, cluster := range cacheClusters {
addCacheClusterTargets(tg, &cluster, tagsByResourceARN[*cluster.ARN])
}
return nil
})
if err := errg.Wait(); err != nil {
return nil, err
}
return []*targetgroup.Group{tg}, nil
}
// splitCacheTypes takes a list of cache ARNs and splits them into serverless cache IDs and cache cluster IDs based on their format.
// Serverless caches are in the format arn:aws:elasticache:<REGION>:<ACCOUNT_ID>:serverlesscache:<CACHE_NAME>
// Cache clusters are in the format arn:aws:elasticache:<REGION>:<ACCOUNT_ID>:replicationgroup:<CACHE_CLUSTER_ID>.
func splitCacheDeploymentOptions(caches []string) (serverlessCacheIDs, cacheClusterIDs []string) {
for _, cacheARN := range caches {
if len(cacheARN) == 0 {
continue
}
parts := strings.Split(cacheARN, ":")
if len(parts) < 6 {
continue
}
resourceType := parts[5]
resourceID := parts[6]
switch resourceType {
case "serverlesscache":
serverlessCacheIDs = append(serverlessCacheIDs, resourceID)
case "replicationgroup":
cacheClusterIDs = append(cacheClusterIDs, resourceID)
default:
continue
}
}
return serverlessCacheIDs, cacheClusterIDs
}
// addServerlessCacheTargets adds targets for a serverless cache to the target group.
func addServerlessCacheTargets(tg *targetgroup.Group, cache *types.ServerlessCache, tags []types.Tag) {
labels := model.LabelSet{
elasticacheLabelDeploymentOption: model.LabelValue("serverless"),
elasticacheLabelServerlessCacheARN: model.LabelValue(*cache.ARN),
elasticacheLabelServerlessCacheName: model.LabelValue(*cache.ServerlessCacheName),
elasticacheLabelServerlessCacheStatus: model.LabelValue(*cache.Status),
elasticacheLabelServerlessCacheEngine: model.LabelValue(*cache.Engine),
elasticacheLabelServerlessCacheFullEngineVersion: model.LabelValue(*cache.FullEngineVersion),
elasticacheLabelServerlessCacheMajorEngineVersion: model.LabelValue(*cache.MajorEngineVersion),
}
if cache.Description != nil {
labels[elasticacheLabelServerlessCacheDescription] = model.LabelValue(*cache.Description)
}
if cache.CreateTime != nil {
labels[elasticacheLabelServerlessCacheCreateTime] = model.LabelValue(cache.CreateTime.Format(time.RFC3339))
}
if cache.KmsKeyId != nil {
labels[elasticacheLabelServerlessCacheKmsKeyID] = model.LabelValue(*cache.KmsKeyId)
}
if cache.UserGroupId != nil {
labels[elasticacheLabelServerlessCacheUserGroupID] = model.LabelValue(*cache.UserGroupId)
}
if cache.DailySnapshotTime != nil {
labels[elasticacheLabelServerlessCacheDailySnapshotTime] = model.LabelValue(*cache.DailySnapshotTime)
}
if cache.SnapshotRetentionLimit != nil {
labels[elasticacheLabelServerlessCacheSnapshotRetentionLimit] = model.LabelValue(strconv.Itoa(int(*cache.SnapshotRetentionLimit)))
}
if cache.Endpoint != nil {
if cache.Endpoint.Address != nil {
labels[elasticacheLabelServerlessCacheEndpointAddress] = model.LabelValue(*cache.Endpoint.Address)
}
if cache.Endpoint.Port != nil {
labels[elasticacheLabelServerlessCacheEndpointPort] = model.LabelValue(strconv.Itoa(int(*cache.Endpoint.Port)))
}
}
if cache.ReaderEndpoint != nil {
if cache.ReaderEndpoint.Address != nil {
labels[elasticacheLabelServerlessCacheReaderEndpointAddress] = model.LabelValue(*cache.ReaderEndpoint.Address)
}
if cache.ReaderEndpoint.Port != nil {
labels[elasticacheLabelServerlessCacheReaderEndpointPort] = model.LabelValue(strconv.Itoa(int(*cache.ReaderEndpoint.Port)))
}
}
for i, sgID := range cache.SecurityGroupIds {
labels[model.LabelName(fmt.Sprintf("%s_%d", elasticacheLabelServerlessCacheSecurityGroupID, i))] = model.LabelValue(sgID)
}
for i, subnetID := range cache.SubnetIds {
labels[model.LabelName(fmt.Sprintf("%s_%d", elasticacheLabelServerlessCacheSubnetID, i))] = model.LabelValue(subnetID)
}
if cache.CacheUsageLimits != nil {
if cache.CacheUsageLimits.DataStorage != nil {
if cache.CacheUsageLimits.DataStorage.Maximum != nil {
labels[elasticacheLabelServerlessCacheCacheUsageLimitCacheDataStorageMaximum] = model.LabelValue(strconv.Itoa(int(*cache.CacheUsageLimits.DataStorage.Maximum)))
}
if cache.CacheUsageLimits.DataStorage.Minimum != nil {
labels[elasticacheLabelServerlessCacheCacheUsageLimitCacheDataStorageMinimum] = model.LabelValue(strconv.Itoa(int(*cache.CacheUsageLimits.DataStorage.Minimum)))
}
labels[elasticacheLabelServerlessCacheCacheUsageLimitCacheDataStorageUnit] = model.LabelValue(cache.CacheUsageLimits.DataStorage.Unit)
}
if cache.CacheUsageLimits.ECPUPerSecond != nil {
if cache.CacheUsageLimits.ECPUPerSecond.Maximum != nil {
labels[elasticacheLabelServerlessCacheCacheUsageLimitECPUPerSecondMaximum] = model.LabelValue(strconv.Itoa(int(*cache.CacheUsageLimits.ECPUPerSecond.Maximum)))
}
if cache.CacheUsageLimits.ECPUPerSecond.Minimum != nil {
labels[elasticacheLabelServerlessCacheCacheUsageLimitECPUPerSecondMinimum] = model.LabelValue(strconv.Itoa(int(*cache.CacheUsageLimits.ECPUPerSecond.Minimum)))
}
}
}
for _, tag := range tags {
if tag.Key != nil && tag.Value != nil {
labels[model.LabelName(elasticacheLabelServerlessCacheTag+strutil.SanitizeLabelName(*tag.Key))] = model.LabelValue(*tag.Value)
}
}
// Set the address label using the endpoint
if cache.Endpoint != nil && cache.Endpoint.Address != nil && cache.Endpoint.Port != nil {
labels[model.AddressLabel] = model.LabelValue(net.JoinHostPort(*cache.Endpoint.Address, strconv.Itoa(int(*cache.Endpoint.Port))))
}
tg.Targets = append(tg.Targets, labels)
}
// addCacheClusterTargets adds targets for a cache cluster to the target group.
// Creates one target per cache node for individual scraping.
func addCacheClusterTargets(tg *targetgroup.Group, cluster *types.CacheCluster, tags []types.Tag) {
// Build common labels that apply to all nodes in this cluster
commonLabels := model.LabelSet{
elasticacheLabelDeploymentOption: model.LabelValue("node"),
elasticacheLabelCacheClusterARN: model.LabelValue(*cluster.ARN),
elasticacheLabelCacheClusterID: model.LabelValue(*cluster.CacheClusterId),
elasticacheLabelCacheClusterStatus: model.LabelValue(*cluster.CacheClusterStatus),
}
if cluster.AtRestEncryptionEnabled != nil {
commonLabels[elasticacheLabelCacheClusterAtRestEncryptionEnabled] = model.LabelValue(strconv.FormatBool(*cluster.AtRestEncryptionEnabled))
}
if cluster.AuthTokenEnabled != nil {
commonLabels[elasticacheLabelCacheClusterAuthTokenEnabled] = model.LabelValue(strconv.FormatBool(*cluster.AuthTokenEnabled))
}
if cluster.AuthTokenLastModifiedDate != nil {
commonLabels[elasticacheLabelCacheClusterAuthTokenLastModified] = model.LabelValue(cluster.AuthTokenLastModifiedDate.Format(time.RFC3339))
}
if cluster.AutoMinorVersionUpgrade != nil {
commonLabels[elasticacheLabelCacheClusterAutoMinorVersionUpgrade] = model.LabelValue(strconv.FormatBool(*cluster.AutoMinorVersionUpgrade))
}
if cluster.CacheClusterCreateTime != nil {
commonLabels[elasticacheLabelCacheClusterCreateTime] = model.LabelValue(cluster.CacheClusterCreateTime.Format(time.RFC3339))
}
if cluster.CacheNodeType != nil {
commonLabels[elasticacheLabelCacheClusterNodeType] = model.LabelValue(*cluster.CacheNodeType)
}
if cluster.CacheParameterGroup != nil && cluster.CacheParameterGroup.CacheParameterGroupName != nil {
commonLabels[elasticacheLabelCacheClusterParameterGroup] = model.LabelValue(*cluster.CacheParameterGroup.CacheParameterGroupName)
}
if cluster.CacheSubnetGroupName != nil {
commonLabels[elasticacheLabelCacheClusterSubnetGroupName] = model.LabelValue(*cluster.CacheSubnetGroupName)
}
if cluster.ClientDownloadLandingPage != nil {
commonLabels[elasticacheLabelCacheClusterClientDownloadLandingPage] = model.LabelValue(*cluster.ClientDownloadLandingPage)
}
if cluster.ConfigurationEndpoint != nil {
if cluster.ConfigurationEndpoint.Address != nil {
commonLabels[elasticacheLabelCacheClusterConfigurationEndpointAddress] = model.LabelValue(*cluster.ConfigurationEndpoint.Address)
}
if cluster.ConfigurationEndpoint.Port != nil {
commonLabels[elasticacheLabelCacheClusterConfigurationEndpointPort] = model.LabelValue(strconv.Itoa(int(*cluster.ConfigurationEndpoint.Port)))
}
}
if cluster.Engine != nil {
commonLabels[elasticacheLabelCacheClusterEngine] = model.LabelValue(*cluster.Engine)
}
if cluster.EngineVersion != nil {
commonLabels[elasticacheLabelCacheClusterEngineVersion] = model.LabelValue(*cluster.EngineVersion)
}
if len(cluster.IpDiscovery) > 0 {
commonLabels[elasticacheLabelCacheClusterIPDiscovery] = model.LabelValue(cluster.IpDiscovery)
}
if len(cluster.NetworkType) > 0 {
commonLabels[elasticacheLabelCacheClusterNetworkType] = model.LabelValue(cluster.NetworkType)
}
if cluster.NotificationConfiguration != nil {
if cluster.NotificationConfiguration.TopicArn != nil {
commonLabels[elasticacheLabelCacheClusterNotificationTopicARN] = model.LabelValue(*cluster.NotificationConfiguration.TopicArn)
}
if cluster.NotificationConfiguration.TopicStatus != nil {
commonLabels[elasticacheLabelCacheClusterNotificationTopicStatus] = model.LabelValue(*cluster.NotificationConfiguration.TopicStatus)
}
}
if cluster.NumCacheNodes != nil {
commonLabels[elasticacheLabelCacheClusterNumCacheNodes] = model.LabelValue(strconv.Itoa(int(*cluster.NumCacheNodes)))
}
if cluster.PreferredAvailabilityZone != nil {
commonLabels[elasticacheLabelCacheClusterPreferredAvailabilityZone] = model.LabelValue(*cluster.PreferredAvailabilityZone)
}
if cluster.PreferredMaintenanceWindow != nil {
commonLabels[elasticacheLabelCacheClusterPreferredMaintenanceWindow] = model.LabelValue(*cluster.PreferredMaintenanceWindow)
}
if cluster.PreferredOutpostArn != nil {
commonLabels[elasticacheLabelCacheClusterPreferredOutpostARN] = model.LabelValue(*cluster.PreferredOutpostArn)
}
if cluster.ReplicationGroupId != nil {
commonLabels[elasticacheLabelCacheClusterReplicationGroupID] = model.LabelValue(*cluster.ReplicationGroupId)
}
if cluster.ReplicationGroupLogDeliveryEnabled != nil {
commonLabels[elasticacheLabelCacheClusterReplicationGroupLogDeliveryEnabled] = model.LabelValue(strconv.FormatBool(*cluster.ReplicationGroupLogDeliveryEnabled))
}
if cluster.SnapshotRetentionLimit != nil {
commonLabels[elasticacheLabelCacheClusterSnapshotRetentionLimit] = model.LabelValue(strconv.Itoa(int(*cluster.SnapshotRetentionLimit)))
}
if cluster.SnapshotWindow != nil {
commonLabels[elasticacheLabelCacheClusterSnapshotWindow] = model.LabelValue(*cluster.SnapshotWindow)
}
if cluster.TransitEncryptionEnabled != nil {
commonLabels[elasticacheLabelCacheClusterTransitEncryptionEnabled] = model.LabelValue(strconv.FormatBool(*cluster.TransitEncryptionEnabled))
}
if len(cluster.TransitEncryptionMode) > 0 {
commonLabels[elasticacheLabelCacheClusterTransitEncryptionMode] = model.LabelValue(cluster.TransitEncryptionMode)
}
// Log delivery configurations (slice)
for i, logDelivery := range cluster.LogDeliveryConfigurations {
if len(logDelivery.DestinationType) > 0 {
commonLabels[model.LabelName(fmt.Sprintf("%s_%d", elasticacheLabelCacheClusterLogDeliveryConfigurationDestinationType, i))] = model.LabelValue(logDelivery.DestinationType)
}
if len(logDelivery.LogFormat) > 0 {
commonLabels[model.LabelName(fmt.Sprintf("%s_%d", elasticacheLabelCacheClusterLogDeliveryConfigurationLogFormat, i))] = model.LabelValue(logDelivery.LogFormat)
}
if len(logDelivery.LogType) > 0 {
commonLabels[model.LabelName(fmt.Sprintf("%s_%d", elasticacheLabelCacheClusterLogDeliveryConfigurationLogType, i))] = model.LabelValue(logDelivery.LogType)
}
if len(logDelivery.Status) > 0 {
commonLabels[model.LabelName(fmt.Sprintf("%s_%d", elasticacheLabelCacheClusterLogDeliveryConfigurationStatus, i))] = model.LabelValue(logDelivery.Status)
}
if logDelivery.Message != nil {
commonLabels[model.LabelName(fmt.Sprintf("%s_%d", elasticacheLabelCacheClusterLogDeliveryConfigurationMessage, i))] = model.LabelValue(*logDelivery.Message)
}
if logDelivery.DestinationDetails != nil {
if logDelivery.DestinationDetails.CloudWatchLogsDetails != nil && logDelivery.DestinationDetails.CloudWatchLogsDetails.LogGroup != nil {
commonLabels[model.LabelName(fmt.Sprintf("%s_%d", elasticacheLabelCacheClusterLogDeliveryConfigurationLogGroup, i))] = model.LabelValue(*logDelivery.DestinationDetails.CloudWatchLogsDetails.LogGroup)
}
if logDelivery.DestinationDetails.KinesisFirehoseDetails != nil && logDelivery.DestinationDetails.KinesisFirehoseDetails.DeliveryStream != nil {
commonLabels[model.LabelName(fmt.Sprintf("%s_%d", elasticacheLabelCacheClusterLogDeliveryConfigurationDeliveryStream, i))] = model.LabelValue(*logDelivery.DestinationDetails.KinesisFirehoseDetails.DeliveryStream)
}
}
}
// Pending modified values
if cluster.PendingModifiedValues != nil {
if len(cluster.PendingModifiedValues.AuthTokenStatus) > 0 {
commonLabels[elasticacheLabelCacheClusterPendingModifiedValuesAuthTokenStatus] = model.LabelValue(cluster.PendingModifiedValues.AuthTokenStatus)
}
if cluster.PendingModifiedValues.CacheNodeType != nil {
commonLabels[elasticacheLabelCacheClusterPendingModifiedValuesCacheNodeType] = model.LabelValue(*cluster.PendingModifiedValues.CacheNodeType)
}
if cluster.PendingModifiedValues.EngineVersion != nil {
commonLabels[elasticacheLabelCacheClusterPendingModifiedValuesEngineVersion] = model.LabelValue(*cluster.PendingModifiedValues.EngineVersion)
}
if cluster.PendingModifiedValues.NumCacheNodes != nil {
commonLabels[elasticacheLabelCacheClusterPendingModifiedValuesNumCacheNodes] = model.LabelValue(strconv.Itoa(int(*cluster.PendingModifiedValues.NumCacheNodes)))
}
if cluster.PendingModifiedValues.TransitEncryptionEnabled != nil {
commonLabels[elasticacheLabelCacheClusterPendingModifiedValuesTransitEncryptionEnabled] = model.LabelValue(strconv.FormatBool(*cluster.PendingModifiedValues.TransitEncryptionEnabled))
}
if len(cluster.PendingModifiedValues.TransitEncryptionMode) > 0 {
commonLabels[elasticacheLabelCacheClusterPendingModifiedValuesTransitEncryptionMode] = model.LabelValue(cluster.PendingModifiedValues.TransitEncryptionMode)
}
if len(cluster.PendingModifiedValues.CacheNodeIdsToRemove) > 0 {
commonLabels[elasticacheLabelCacheClusterPendingModifiedValuesCacheNodeIDsToRemove] = model.LabelValue(strings.Join(cluster.PendingModifiedValues.CacheNodeIdsToRemove, ","))
}
}
// Security group membership (slice)
for i, sg := range cluster.SecurityGroups {
if sg.SecurityGroupId != nil {
commonLabels[model.LabelName(fmt.Sprintf("%s_%d", elasticacheLabelCacheClusterSecurityGroupMembershipID, i))] = model.LabelValue(*sg.SecurityGroupId)
}
if sg.Status != nil {
commonLabels[model.LabelName(fmt.Sprintf("%s_%d", elasticacheLabelCacheClusterSecurityGroupMembershipStatus, i))] = model.LabelValue(*sg.Status)
}
}
// Tags
for _, tag := range tags {
if tag.Key != nil && tag.Value != nil {
commonLabels[model.LabelName(elasticacheLabelCacheClusterTag+strutil.SanitizeLabelName(*tag.Key))] = model.LabelValue(*tag.Value)
}
}
// Create one target per cache node
for _, node := range cluster.CacheNodes {
// Clone common labels for this node
labels := make(model.LabelSet, len(commonLabels))
maps.Copy(labels, commonLabels)
// Add node-specific labels
if node.CacheNodeId != nil {
labels[elasticacheLabelCacheClusterNodeID] = model.LabelValue(*node.CacheNodeId)
}
if node.CacheNodeStatus != nil {
labels[elasticacheLabelCacheClusterNodeStatus] = model.LabelValue(*node.CacheNodeStatus)
}
if node.CacheNodeCreateTime != nil {
labels[elasticacheLabelCacheClusterNodeCreateTime] = model.LabelValue(node.CacheNodeCreateTime.Format(time.RFC3339))
}
if node.CustomerAvailabilityZone != nil {
labels[elasticacheLabelCacheClusterNodeAZ] = model.LabelValue(*node.CustomerAvailabilityZone)
}
if node.CustomerOutpostArn != nil {
labels[elasticacheLabelCacheClusterNodeCustomerOutpostARN] = model.LabelValue(*node.CustomerOutpostArn)
}
if node.SourceCacheNodeId != nil {
labels[elasticacheLabelCacheClusterNodeSourceCacheNodeID] = model.LabelValue(*node.SourceCacheNodeId)
}
if node.ParameterGroupStatus != nil {
labels[elasticacheLabelCacheClusterNodeParameterGroupStatus] = model.LabelValue(*node.ParameterGroupStatus)
}
if node.Endpoint != nil {
if node.Endpoint.Address != nil {
labels[elasticacheLabelCacheClusterNodeEndpointAddress] = model.LabelValue(*node.Endpoint.Address)
}
if node.Endpoint.Port != nil {
labels[elasticacheLabelCacheClusterNodeEndpointPort] = model.LabelValue(strconv.Itoa(int(*node.Endpoint.Port)))
}
// Set the address label to this node's endpoint
if node.Endpoint.Address != nil && node.Endpoint.Port != nil {
labels[model.AddressLabel] = model.LabelValue(net.JoinHostPort(*node.Endpoint.Address, strconv.Itoa(int(*node.Endpoint.Port))))
}
}
tg.Targets = append(tg.Targets, labels)
}
}

View file

@ -0,0 +1,615 @@
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package aws
import (
"context"
"testing"
"time"
"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/service/elasticache"
"github.com/aws/aws-sdk-go-v2/service/elasticache/types"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/discovery/targetgroup"
)
// Struct for test data.
type elasticacheDataStore struct {
region string
serverlessCaches []types.ServerlessCache
cacheClusters []types.CacheCluster
tags map[string][]types.Tag // keyed by cache ARN
}
func TestElasticacheDiscoveryDescribeServerlessCaches(t *testing.T) {
ctx := context.Background()
for _, tt := range []struct {
name string
ecData *elasticacheDataStore
cacheNames []string
expectedCount int
}{
{
name: "MultipleCaches",
ecData: &elasticacheDataStore{
region: "us-west-2",
serverlessCaches: []types.ServerlessCache{
{
ServerlessCacheName: strptr("test-cache"),
ARN: strptr("arn:aws:elasticache:us-west-2:123456789012:serverlesscache:test-cache"),
Status: strptr("available"),
Engine: strptr("redis"),
FullEngineVersion: strptr("7.1"),
CreateTime: aws.Time(time.Now()),
Endpoint: &types.Endpoint{
Address: strptr("test-cache.serverless.use1.cache.amazonaws.com"),
Port: aws.Int32(6379),
},
},
{
ServerlessCacheName: strptr("prod-cache"),
ARN: strptr("arn:aws:elasticache:us-west-2:123456789012:serverlesscache:prod-cache"),
Status: strptr("available"),
Engine: strptr("valkey"),
FullEngineVersion: strptr("7.2"),
CreateTime: aws.Time(time.Now()),
Endpoint: &types.Endpoint{
Address: strptr("prod-cache.serverless.use1.cache.amazonaws.com"),
Port: aws.Int32(6379),
},
},
},
},
cacheNames: []string{},
expectedCount: 2,
},
{
name: "SingleCache",
ecData: &elasticacheDataStore{
region: "us-east-1",
serverlessCaches: []types.ServerlessCache{
{
ServerlessCacheName: strptr("single-cache"),
ARN: strptr("arn:aws:elasticache:us-east-1:123456789012:serverlesscache:single-cache"),
Status: strptr("available"),
Engine: strptr("redis"),
FullEngineVersion: strptr("7.1"),
CreateTime: aws.Time(time.Now()),
},
},
},
cacheNames: []string{"single-cache"},
expectedCount: 1,
},
{
name: "NoCaches",
ecData: &elasticacheDataStore{
region: "us-east-1",
serverlessCaches: []types.ServerlessCache{},
},
cacheNames: []string{},
expectedCount: 0,
},
} {
t.Run(tt.name, func(t *testing.T) {
client := newMockElasticacheClient(tt.ecData)
d := &ElasticacheDiscovery{
elasticacheClient: client,
cfg: &ElasticacheSDConfig{
Region: tt.ecData.region,
RequestConcurrency: 10,
},
}
caches, err := d.describeServerlessCaches(ctx, tt.cacheNames)
require.NoError(t, err)
require.Len(t, caches, tt.expectedCount)
})
}
}
func TestElasticacheDiscoveryDescribeCacheClusters(t *testing.T) {
ctx := context.Background()
for _, tt := range []struct {
name string
ecData *elasticacheDataStore
clusterIDs []string
expectedCount int
skipTest bool
}{
{
name: "MockValidation",
ecData: &elasticacheDataStore{
region: "us-west-2",
cacheClusters: []types.CacheCluster{
{
CacheClusterId: strptr("test-cluster-001"),
ARN: strptr("arn:aws:elasticache:us-west-2:123456789012:cluster:test-cluster-001"),
CacheClusterStatus: strptr("available"),
Engine: strptr("redis"),
EngineVersion: strptr("7.1"),
CacheNodeType: strptr("cache.t3.micro"),
NumCacheNodes: aws.Int32(1),
ConfigurationEndpoint: &types.Endpoint{
Address: strptr("test-cluster.abc123.cfg.use1.cache.amazonaws.com"),
Port: aws.Int32(6379),
},
},
},
},
clusterIDs: []string{},
expectedCount: 1,
skipTest: false,
},
{
name: "NoClusters",
ecData: &elasticacheDataStore{
region: "us-east-1",
cacheClusters: []types.CacheCluster{},
},
clusterIDs: []string{},
expectedCount: 0,
skipTest: false,
},
} {
t.Run(tt.name, func(t *testing.T) {
if tt.skipTest {
t.Skip("Skipping complex test with concurrency")
}
client := newMockElasticacheClient(tt.ecData)
// Verify mock returns expected data
output, err := client.DescribeCacheClusters(ctx, &elasticache.DescribeCacheClustersInput{})
require.NoError(t, err)
require.Len(t, output.CacheClusters, tt.expectedCount)
})
}
}
func TestAddServerlessCacheTargets(t *testing.T) {
testTime := time.Date(2024, 1, 1, 0, 0, 0, 0, time.UTC)
tests := []struct {
name string
cache *types.ServerlessCache
tags []types.Tag
expectedLabels model.LabelSet
}{
{
name: "ServerlessCacheWithEndpoint",
cache: &types.ServerlessCache{
ServerlessCacheName: strptr("my-cache"),
ARN: strptr("arn:aws:elasticache:us-east-1:123456789012:serverlesscache:my-cache"),
Status: strptr("available"),
Engine: strptr("redis"),
FullEngineVersion: strptr("7.1"),
MajorEngineVersion: strptr("7"),
CreateTime: aws.Time(testTime),
Endpoint: &types.Endpoint{
Address: strptr("my-cache.serverless.use1.cache.amazonaws.com"),
Port: aws.Int32(6379),
},
ReaderEndpoint: &types.Endpoint{
Address: strptr("my-cache-ro.serverless.use1.cache.amazonaws.com"),
Port: aws.Int32(6379),
},
SecurityGroupIds: []string{"sg-12345"},
SubnetIds: []string{"subnet-abcdef"},
CacheUsageLimits: &types.CacheUsageLimits{
DataStorage: &types.DataStorage{
Maximum: aws.Int32(10),
Minimum: aws.Int32(1),
Unit: types.DataStorageUnitGb,
},
ECPUPerSecond: &types.ECPUPerSecond{
Maximum: aws.Int32(5000),
Minimum: aws.Int32(1000),
},
},
},
tags: []types.Tag{
{Key: strptr("Environment"), Value: strptr("test")},
},
expectedLabels: model.LabelSet{
"__meta_elasticache_deployment_option": "serverless",
"__meta_elasticache_serverless_cache_arn": "arn:aws:elasticache:us-east-1:123456789012:serverlesscache:my-cache",
"__meta_elasticache_serverless_cache_name": "my-cache",
"__meta_elasticache_serverless_cache_status": "available",
"__meta_elasticache_serverless_cache_engine": "redis",
"__meta_elasticache_serverless_cache_full_engine_version": "7.1",
"__meta_elasticache_serverless_cache_major_engine_version": "7",
"__meta_elasticache_serverless_cache_create_time": "2024-01-01T00:00:00Z",
"__meta_elasticache_serverless_cache_endpoint_address": "my-cache.serverless.use1.cache.amazonaws.com",
"__meta_elasticache_serverless_cache_endpoint_port": "6379",
"__meta_elasticache_serverless_cache_security_group_id_0": "sg-12345",
"__meta_elasticache_serverless_cache_subnet_id_0": "subnet-abcdef",
"__address__": "my-cache.serverless.use1.cache.amazonaws.com:6379",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tg := &targetgroup.Group{
Source: "test",
}
addServerlessCacheTargets(tg, tt.cache, tt.tags)
require.Len(t, tg.Targets, 1)
labels := tg.Targets[0]
// Check that all expected labels are present with correct values
for k, v := range tt.expectedLabels {
actualValue, exists := labels[k]
require.True(t, exists, "label %s should exist", k)
require.Equal(t, v, actualValue, "label %s mismatch", k)
}
})
}
}
func TestAddCacheClusterTargets(t *testing.T) {
testTime := time.Date(2024, 1, 1, 0, 0, 0, 0, time.UTC)
tests := []struct {
name string
cluster *types.CacheCluster
tags []types.Tag
expectedTargetCount int
expectedLabels []model.LabelSet // One per node
}{
{
name: "CacheClusterWithMultipleNodes",
cluster: &types.CacheCluster{
CacheClusterId: strptr("my-cluster-001"),
ARN: strptr("arn:aws:elasticache:us-east-1:123456789012:cluster:my-cluster-001"),
CacheClusterStatus: strptr("available"),
Engine: strptr("redis"),
EngineVersion: strptr("7.1"),
CacheNodeType: strptr("cache.t3.micro"),
NumCacheNodes: aws.Int32(2),
CacheClusterCreateTime: aws.Time(testTime),
ConfigurationEndpoint: &types.Endpoint{
Address: strptr("my-cluster.abc123.cfg.use1.cache.amazonaws.com"),
Port: aws.Int32(6379),
},
AtRestEncryptionEnabled: aws.Bool(true),
TransitEncryptionEnabled: aws.Bool(true),
AuthTokenEnabled: aws.Bool(true),
AutoMinorVersionUpgrade: aws.Bool(true),
CacheSubnetGroupName: strptr("my-subnet-group"),
PreferredAvailabilityZone: strptr("us-east-1a"),
SecurityGroups: []types.SecurityGroupMembership{
{
SecurityGroupId: strptr("sg-12345"),
Status: strptr("active"),
},
},
CacheNodes: []types.CacheNode{
{
CacheNodeId: strptr("0001"),
CacheNodeStatus: strptr("available"),
CacheNodeCreateTime: aws.Time(testTime),
CustomerAvailabilityZone: strptr("us-east-1a"),
Endpoint: &types.Endpoint{
Address: strptr("my-cluster-001.abc123.0001.use1.cache.amazonaws.com"),
Port: aws.Int32(6379),
},
},
{
CacheNodeId: strptr("0002"),
CacheNodeStatus: strptr("available"),
CacheNodeCreateTime: aws.Time(testTime),
CustomerAvailabilityZone: strptr("us-east-1b"),
Endpoint: &types.Endpoint{
Address: strptr("my-cluster-001.abc123.0002.use1.cache.amazonaws.com"),
Port: aws.Int32(6379),
},
},
},
},
tags: []types.Tag{
{Key: strptr("Environment"), Value: strptr("production")},
{Key: strptr("Application"), Value: strptr("web-app")},
},
expectedTargetCount: 2,
expectedLabels: []model.LabelSet{
{
"__meta_elasticache_deployment_option": "node",
"__meta_elasticache_cache_cluster_arn": "arn:aws:elasticache:us-east-1:123456789012:cluster:my-cluster-001",
"__meta_elasticache_cache_cluster_cache_cluster_id": "my-cluster-001",
"__meta_elasticache_cache_cluster_cache_cluster_status": "available",
"__meta_elasticache_cache_cluster_engine": "redis",
"__meta_elasticache_cache_cluster_engine_version": "7.1",
"__meta_elasticache_cache_cluster_cache_node_type": "cache.t3.micro",
"__meta_elasticache_cache_cluster_num_cache_nodes": "2",
"__meta_elasticache_cache_cluster_cache_cluster_create_time": "2024-01-01T00:00:00Z",
"__meta_elasticache_cache_cluster_configuration_endpoint_address": "my-cluster.abc123.cfg.use1.cache.amazonaws.com",
"__meta_elasticache_cache_cluster_configuration_endpoint_port": "6379",
"__meta_elasticache_cache_cluster_at_rest_encryption_enabled": "true",
"__meta_elasticache_cache_cluster_transit_encryption_enabled": "true",
"__meta_elasticache_cache_cluster_auth_token_enabled": "true",
"__meta_elasticache_cache_cluster_auto_minor_version_upgrade": "true",
"__meta_elasticache_cache_cluster_cache_subnet_group_name": "my-subnet-group",
"__meta_elasticache_cache_cluster_preferred_availability_zone": "us-east-1a",
"__meta_elasticache_cache_cluster_security_group_membership_id_0": "sg-12345",
"__meta_elasticache_cache_cluster_security_group_membership_status_0": "active",
"__meta_elasticache_cache_cluster_tag_Environment": "production",
"__meta_elasticache_cache_cluster_tag_Application": "web-app",
"__meta_elasticache_cache_cluster_node_id": "0001",
"__meta_elasticache_cache_cluster_node_status": "available",
"__meta_elasticache_cache_cluster_node_create_time": "2024-01-01T00:00:00Z",
"__meta_elasticache_cache_cluster_node_availability_zone": "us-east-1a",
"__meta_elasticache_cache_cluster_node_endpoint_address": "my-cluster-001.abc123.0001.use1.cache.amazonaws.com",
"__meta_elasticache_cache_cluster_node_endpoint_port": "6379",
"__address__": "my-cluster-001.abc123.0001.use1.cache.amazonaws.com:6379",
},
{
"__meta_elasticache_deployment_option": "node",
"__meta_elasticache_cache_cluster_arn": "arn:aws:elasticache:us-east-1:123456789012:cluster:my-cluster-001",
"__meta_elasticache_cache_cluster_cache_cluster_id": "my-cluster-001",
"__meta_elasticache_cache_cluster_cache_cluster_status": "available",
"__meta_elasticache_cache_cluster_engine": "redis",
"__meta_elasticache_cache_cluster_engine_version": "7.1",
"__meta_elasticache_cache_cluster_cache_node_type": "cache.t3.micro",
"__meta_elasticache_cache_cluster_num_cache_nodes": "2",
"__meta_elasticache_cache_cluster_cache_cluster_create_time": "2024-01-01T00:00:00Z",
"__meta_elasticache_cache_cluster_configuration_endpoint_address": "my-cluster.abc123.cfg.use1.cache.amazonaws.com",
"__meta_elasticache_cache_cluster_configuration_endpoint_port": "6379",
"__meta_elasticache_cache_cluster_at_rest_encryption_enabled": "true",
"__meta_elasticache_cache_cluster_transit_encryption_enabled": "true",
"__meta_elasticache_cache_cluster_auth_token_enabled": "true",
"__meta_elasticache_cache_cluster_auto_minor_version_upgrade": "true",
"__meta_elasticache_cache_cluster_cache_subnet_group_name": "my-subnet-group",
"__meta_elasticache_cache_cluster_preferred_availability_zone": "us-east-1a",
"__meta_elasticache_cache_cluster_security_group_membership_id_0": "sg-12345",
"__meta_elasticache_cache_cluster_security_group_membership_status_0": "active",
"__meta_elasticache_cache_cluster_tag_Environment": "production",
"__meta_elasticache_cache_cluster_tag_Application": "web-app",
"__meta_elasticache_cache_cluster_node_id": "0002",
"__meta_elasticache_cache_cluster_node_status": "available",
"__meta_elasticache_cache_cluster_node_create_time": "2024-01-01T00:00:00Z",
"__meta_elasticache_cache_cluster_node_availability_zone": "us-east-1b",
"__meta_elasticache_cache_cluster_node_endpoint_address": "my-cluster-001.abc123.0002.use1.cache.amazonaws.com",
"__meta_elasticache_cache_cluster_node_endpoint_port": "6379",
"__address__": "my-cluster-001.abc123.0002.use1.cache.amazonaws.com:6379",
},
},
},
{
name: "CacheClusterWithSingleNode",
cluster: &types.CacheCluster{
CacheClusterId: strptr("node-cluster-001"),
ARN: strptr("arn:aws:elasticache:us-east-1:123456789012:cluster:node-cluster-001"),
CacheClusterStatus: strptr("available"),
Engine: strptr("redis"),
EngineVersion: strptr("6.2"),
CacheNodeType: strptr("cache.r6g.large"),
NumCacheNodes: aws.Int32(1),
CacheNodes: []types.CacheNode{
{
CacheNodeId: strptr("0001"),
CacheNodeStatus: strptr("available"),
CacheNodeCreateTime: aws.Time(testTime),
CustomerAvailabilityZone: strptr("us-east-1a"),
Endpoint: &types.Endpoint{
Address: strptr("node-cluster-001.abc123.0001.use1.cache.amazonaws.com"),
Port: aws.Int32(6379),
},
},
},
},
tags: []types.Tag{},
expectedTargetCount: 1,
expectedLabels: []model.LabelSet{
{
"__meta_elasticache_deployment_option": "node",
"__meta_elasticache_cache_cluster_arn": "arn:aws:elasticache:us-east-1:123456789012:cluster:node-cluster-001",
"__meta_elasticache_cache_cluster_cache_cluster_id": "node-cluster-001",
"__meta_elasticache_cache_cluster_cache_cluster_status": "available",
"__meta_elasticache_cache_cluster_engine": "redis",
"__meta_elasticache_cache_cluster_engine_version": "6.2",
"__meta_elasticache_cache_cluster_cache_node_type": "cache.r6g.large",
"__meta_elasticache_cache_cluster_num_cache_nodes": "1",
"__meta_elasticache_cache_cluster_node_id": "0001",
"__meta_elasticache_cache_cluster_node_status": "available",
"__meta_elasticache_cache_cluster_node_create_time": "2024-01-01T00:00:00Z",
"__meta_elasticache_cache_cluster_node_availability_zone": "us-east-1a",
"__meta_elasticache_cache_cluster_node_endpoint_address": "node-cluster-001.abc123.0001.use1.cache.amazonaws.com",
"__meta_elasticache_cache_cluster_node_endpoint_port": "6379",
"__address__": "node-cluster-001.abc123.0001.use1.cache.amazonaws.com:6379",
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tg := &targetgroup.Group{
Source: "test",
}
addCacheClusterTargets(tg, tt.cluster, tt.tags)
require.Len(t, tg.Targets, tt.expectedTargetCount)
// Check each target
for i, expectedLabels := range tt.expectedLabels {
labels := tg.Targets[i]
// Check that all expected labels are present with correct values
for k, v := range expectedLabels {
actualValue, exists := labels[k]
require.True(t, exists, "label %s should exist in target %d", k, i)
require.Equal(t, v, actualValue, "label %s mismatch in target %d", k, i)
}
}
})
}
}
// Mock Elasticache client.
type mockElasticacheClient struct {
data *elasticacheDataStore
}
func newMockElasticacheClient(data *elasticacheDataStore) *mockElasticacheClient {
return &mockElasticacheClient{data: data}
}
func (m *mockElasticacheClient) DescribeServerlessCaches(_ context.Context, input *elasticache.DescribeServerlessCachesInput, _ ...func(*elasticache.Options)) (*elasticache.DescribeServerlessCachesOutput, error) {
if input.ServerlessCacheName != nil {
// Filter by name
for _, cache := range m.data.serverlessCaches {
if cache.ServerlessCacheName != nil && *cache.ServerlessCacheName == *input.ServerlessCacheName {
return &elasticache.DescribeServerlessCachesOutput{
ServerlessCaches: []types.ServerlessCache{cache},
}, nil
}
}
return &elasticache.DescribeServerlessCachesOutput{
ServerlessCaches: []types.ServerlessCache{},
}, nil
}
return &elasticache.DescribeServerlessCachesOutput{
ServerlessCaches: m.data.serverlessCaches,
}, nil
}
func (m *mockElasticacheClient) DescribeCacheClusters(_ context.Context, input *elasticache.DescribeCacheClustersInput, _ ...func(*elasticache.Options)) (*elasticache.DescribeCacheClustersOutput, error) {
if input.CacheClusterId != nil {
// Single cluster lookup
for _, cluster := range m.data.cacheClusters {
if cluster.CacheClusterId != nil && *cluster.CacheClusterId == *input.CacheClusterId {
return &elasticache.DescribeCacheClustersOutput{
CacheClusters: []types.CacheCluster{cluster},
}, nil
}
}
return &elasticache.DescribeCacheClustersOutput{
CacheClusters: []types.CacheCluster{},
}, nil
}
return &elasticache.DescribeCacheClustersOutput{
CacheClusters: m.data.cacheClusters,
}, nil
}
func (m *mockElasticacheClient) ListTagsForResource(_ context.Context, input *elasticache.ListTagsForResourceInput, _ ...func(*elasticache.Options)) (*elasticache.ListTagsForResourceOutput, error) {
if input.ResourceName != nil {
if tags, ok := m.data.tags[*input.ResourceName]; ok {
return &elasticache.ListTagsForResourceOutput{
TagList: tags,
}, nil
}
}
return &elasticache.ListTagsForResourceOutput{
TagList: []types.Tag{},
}, nil
}
func TestSplitCacheDeploymentOptions(t *testing.T) {
tests := []struct {
name string
caches []string
expectedServerlessCacheIDs []string
expectedCacheClusterIDs []string
}{
{
name: "MixedARNs",
caches: []string{
"arn:aws:elasticache:us-east-1:123456789012:serverlesscache:my-serverless-cache",
"arn:aws:elasticache:us-east-1:123456789012:replicationgroup:my-replication-group",
"arn:aws:elasticache:us-west-2:123456789012:serverlesscache:prod-cache",
},
expectedServerlessCacheIDs: []string{"my-serverless-cache", "prod-cache"},
expectedCacheClusterIDs: []string{"my-replication-group"},
},
{
name: "OnlyServerlessCaches",
caches: []string{
"arn:aws:elasticache:us-east-1:123456789012:serverlesscache:cache-1",
"arn:aws:elasticache:us-east-1:123456789012:serverlesscache:cache-2",
},
expectedServerlessCacheIDs: []string{"cache-1", "cache-2"},
expectedCacheClusterIDs: nil,
},
{
name: "OnlyReplicationGroups",
caches: []string{
"arn:aws:elasticache:us-east-1:123456789012:replicationgroup:cluster-1",
"arn:aws:elasticache:us-east-1:123456789012:replicationgroup:cluster-2",
},
expectedServerlessCacheIDs: nil,
expectedCacheClusterIDs: []string{"cluster-1", "cluster-2"},
},
{
name: "EmptyInput",
caches: []string{},
expectedServerlessCacheIDs: nil,
expectedCacheClusterIDs: nil,
},
{
name: "InvalidARNs",
caches: []string{
"not-an-arn",
"arn:aws:elasticache:us-east-1",
"",
},
expectedServerlessCacheIDs: nil,
expectedCacheClusterIDs: nil,
},
{
name: "UnknownResourceType",
caches: []string{
"arn:aws:elasticache:us-east-1:123456789012:unknown:resource-id",
},
expectedServerlessCacheIDs: nil,
expectedCacheClusterIDs: nil,
},
{
name: "MixedWithInvalidARNs",
caches: []string{
"arn:aws:elasticache:us-east-1:123456789012:serverlesscache:valid-cache",
"invalid-arn",
"arn:aws:elasticache:us-east-1:123456789012:replicationgroup:valid-cluster",
"",
"arn:aws:elasticache:us-east-1:123456789012:unknown:ignored",
},
expectedServerlessCacheIDs: []string{"valid-cache"},
expectedCacheClusterIDs: []string{"valid-cluster"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
serverlessCacheIDs, cacheClusterIDs := splitCacheDeploymentOptions(tt.caches)
require.Equal(t, tt.expectedServerlessCacheIDs, serverlessCacheIDs, "serverless cache IDs mismatch")
require.Equal(t, tt.expectedCacheClusterIDs, cacheClusterIDs, "cache cluster IDs mismatch")
})
}
}

View file

@ -1,4 +1,4 @@
// Copyright 2021 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@ -17,7 +17,6 @@ import (
"context"
"errors"
"fmt"
"log/slog"
"net"
"strconv"
"strings"
@ -27,7 +26,6 @@ import (
awsConfig "github.com/aws/aws-sdk-go-v2/config"
"github.com/aws/aws-sdk-go-v2/credentials"
"github.com/aws/aws-sdk-go-v2/credentials/stscreds"
"github.com/aws/aws-sdk-go-v2/feature/ec2/imds"
"github.com/aws/aws-sdk-go-v2/service/lightsail"
"github.com/aws/aws-sdk-go-v2/service/sts"
"github.com/aws/smithy-go"
@ -95,7 +93,7 @@ func (*LightsailSDConfig) Name() string { return "lightsail" }
// NewDiscoverer returns a Discoverer for the Lightsail Config.
func (c *LightsailSDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewLightsailDiscovery(c, opts.Logger, opts.Metrics)
return NewLightsailDiscovery(c, opts)
}
// UnmarshalYAML implements the yaml.Unmarshaler interface for the Lightsail Config.
@ -107,30 +105,9 @@ func (c *LightsailSDConfig) UnmarshalYAML(unmarshal func(any) error) error {
return err
}
if c.Region == "" {
cfg, err := awsConfig.LoadDefaultConfig(context.Background())
if err != nil {
return err
}
if cfg.Region != "" {
// Use the region from the AWS config. It will load environment variables and shared config files.
c.Region = cfg.Region
}
if c.Region == "" {
// Try to get the region from the instance metadata service (IMDS).
imdsClient := imds.NewFromConfig(cfg)
region, err := imdsClient.GetRegion(context.Background(), &imds.GetRegionInput{})
if err != nil {
return err
}
c.Region = region.Region
}
}
if c.Region == "" {
return errors.New("lightsail SD configuration requires a region")
c.Region, err = loadRegion(context.Background(), c.Region)
if err != nil {
return fmt.Errorf("could not determine AWS region: %w", err)
}
return c.HTTPClientConfig.Validate()
@ -145,14 +122,14 @@ type LightsailDiscovery struct {
}
// NewLightsailDiscovery returns a new LightsailDiscovery which periodically refreshes its targets.
func NewLightsailDiscovery(conf *LightsailSDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*LightsailDiscovery, error) {
m, ok := metrics.(*lightsailMetrics)
func NewLightsailDiscovery(conf *LightsailSDConfig, opts discovery.DiscovererOptions) (*LightsailDiscovery, error) {
m, ok := opts.Metrics.(*lightsailMetrics)
if !ok {
return nil, errors.New("invalid discovery metrics type")
}
if logger == nil {
logger = promslog.NewNopLogger()
if opts.Logger == nil {
opts.Logger = promslog.NewNopLogger()
}
d := &LightsailDiscovery{
@ -160,8 +137,9 @@ func NewLightsailDiscovery(conf *LightsailSDConfig, logger *slog.Logger, metrics
}
d.Discovery = refresh.NewDiscovery(
refresh.Options{
Logger: logger,
Logger: opts.Logger,
Mech: "lightsail",
SetName: opts.SetName,
Interval: time.Duration(d.cfg.RefreshInterval),
RefreshF: d.refresh,
MetricsInstantiator: m.refreshMetrics,
@ -210,7 +188,12 @@ func (d *LightsailDiscovery) lightsailClient(ctx context.Context) (*lightsail.Cl
cfg.Credentials = aws.NewCredentialsCache(assumeProvider)
}
d.lightsail = lightsail.NewFromConfig(cfg)
d.lightsail = lightsail.NewFromConfig(cfg, func(options *lightsail.Options) {
if d.cfg.Endpoint != "" {
options.BaseEndpoint = &d.cfg.Endpoint
}
options.HTTPClient = httpClient
})
return d.lightsail, nil
}

View file

@ -0,0 +1,32 @@
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package aws
import (
"github.com/prometheus/prometheus/discovery"
)
type awsMetrics struct {
refreshMetrics discovery.RefreshMetricsInstantiator
}
var _ discovery.DiscovererMetrics = (*awsMetrics)(nil)
// Register implements discovery.DiscovererMetrics.
func (*awsMetrics) Register() error {
return nil
}
// Unregister implements discovery.DiscovererMetrics.
func (*awsMetrics) Unregister() {}

View file

@ -1,4 +1,4 @@
// Copyright 2015 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

View file

@ -0,0 +1,32 @@
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package aws
import (
"github.com/prometheus/prometheus/discovery"
)
type ecsMetrics struct {
refreshMetrics discovery.RefreshMetricsInstantiator
}
var _ discovery.DiscovererMetrics = (*ecsMetrics)(nil)
// Register implements discovery.DiscovererMetrics.
func (*ecsMetrics) Register() error {
return nil
}
// Unregister implements discovery.DiscovererMetrics.
func (*ecsMetrics) Unregister() {}

View file

@ -0,0 +1,32 @@
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package aws
import (
"github.com/prometheus/prometheus/discovery"
)
type elasticacheMetrics struct {
refreshMetrics discovery.RefreshMetricsInstantiator
}
var _ discovery.DiscovererMetrics = (*elasticacheMetrics)(nil)
// Register implements discovery.DiscovererMetrics.
func (*elasticacheMetrics) Register() error {
return nil
}
// Unregister implements discovery.DiscovererMetrics.
func (*elasticacheMetrics) Unregister() {}

View file

@ -1,4 +1,4 @@
// Copyright 2015 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

View file

@ -0,0 +1,32 @@
// Copyright 2015 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package aws
import (
"github.com/prometheus/prometheus/discovery"
)
type mskMetrics struct {
refreshMetrics discovery.RefreshMetricsInstantiator
}
var _ discovery.DiscovererMetrics = (*mskMetrics)(nil)
// Register implements discovery.DiscovererMetrics.
func (*mskMetrics) Register() error {
return nil
}
// Unregister implements discovery.DiscovererMetrics.
func (*mskMetrics) Unregister() {}

451
discovery/aws/msk.go Normal file
View file

@ -0,0 +1,451 @@
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package aws
import (
"context"
"errors"
"fmt"
"log/slog"
"net"
"strconv"
"sync"
"time"
"github.com/aws/aws-sdk-go-v2/aws"
awsConfig "github.com/aws/aws-sdk-go-v2/config"
"github.com/aws/aws-sdk-go-v2/credentials"
"github.com/aws/aws-sdk-go-v2/credentials/stscreds"
"github.com/aws/aws-sdk-go-v2/service/kafka"
"github.com/aws/aws-sdk-go-v2/service/kafka/types"
"github.com/aws/aws-sdk-go-v2/service/sts"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"golang.org/x/sync/errgroup"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/refresh"
"github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/util/strutil"
)
type NodeType string
const (
NodeTypeBroker NodeType = "BROKER"
NodeTypeController NodeType = "CONTROLLER"
)
const (
mskLabel = model.MetaLabelPrefix + "msk_"
// Cluster labels.
mskLabelCluster = mskLabel + "cluster_"
mskLabelClusterName = mskLabelCluster + "name"
mskLabelClusterARN = mskLabelCluster + "arn"
mskLabelClusterState = mskLabelCluster + "state"
mskLabelClusterType = mskLabelCluster + "type"
mskLabelClusterVersion = mskLabelCluster + "version"
mskLabelClusterJmxExporterEnabled = mskLabelCluster + "jmx_exporter_enabled"
mskLabelClusterConfigurationARN = mskLabelCluster + "configuration_arn"
mskLabelClusterConfigurationRevision = mskLabelCluster + "configuration_revision"
mskLabelClusterKafkaVersion = mskLabelCluster + "kafka_version"
mskLabelClusterTags = mskLabelCluster + "tag_"
// Node labels.
mskLabelNode = mskLabel + "node_"
mskLabelNodeType = mskLabelNode + "type"
mskLabelNodeARN = mskLabelNode + "arn"
mskLabelNodeAddedTime = mskLabelNode + "added_time"
mskLabelNodeInstanceType = mskLabelNode + "instance_type"
mskLabelNodeAttachedENI = mskLabelNode + "attached_eni"
// Broker labels.
mskLabelBroker = mskLabel + "broker_"
mskLabelBrokerEndpointIndex = mskLabelBroker + "endpoint_index"
mskLabelBrokerID = mskLabelBroker + "id"
mskLabelBrokerClientSubnet = mskLabelBroker + "client_subnet"
mskLabelBrokerClientVPCIP = mskLabelBroker + "client_vpc_ip"
mskLabelBrokerNodeExporterEnabled = mskLabelBroker + "node_exporter_enabled"
// Controller labels.
mskLabelController = mskLabel + "controller_"
mskLabelControllerEndpointIndex = mskLabelController + "endpoint_index"
)
// DefaultMSKSDConfig is the default MSK SD configuration.
var DefaultMSKSDConfig = MSKSDConfig{
Port: 80,
RefreshInterval: model.Duration(60 * time.Second),
RequestConcurrency: 10,
HTTPClientConfig: config.DefaultHTTPClientConfig,
}
func init() {
discovery.RegisterConfig(&MSKSDConfig{})
}
// MSKSDConfig is the configuration for MSK based service discovery.
type MSKSDConfig struct {
Region string `yaml:"region"`
Endpoint string `yaml:"endpoint"`
AccessKey string `yaml:"access_key,omitempty"`
SecretKey config.Secret `yaml:"secret_key,omitempty"`
Profile string `yaml:"profile,omitempty"`
RoleARN string `yaml:"role_arn,omitempty"`
Clusters []string `yaml:"clusters,omitempty"`
Port int `yaml:"port"`
RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"`
RequestConcurrency int `yaml:"request_concurrency,omitempty"`
HTTPClientConfig config.HTTPClientConfig `yaml:",inline"`
}
// NewDiscovererMetrics implements discovery.Config.
func (*MSKSDConfig) NewDiscovererMetrics(_ prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
return &mskMetrics{
refreshMetrics: rmi,
}
}
// Name returns the name of the MSK Config.
func (*MSKSDConfig) Name() string { return "msk" }
// NewDiscoverer returns a Discoverer for the MSK Config.
func (c *MSKSDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewMSKDiscovery(c, opts)
}
// UnmarshalYAML implements the yaml.Unmarshaler interface for the MSK Config.
func (c *MSKSDConfig) UnmarshalYAML(unmarshal func(any) error) error {
*c = DefaultMSKSDConfig
type plain MSKSDConfig
err := unmarshal((*plain)(c))
if err != nil {
return err
}
c.Region, err = loadRegion(context.Background(), c.Region)
if err != nil {
return fmt.Errorf("could not determine AWS region: %w", err)
}
return c.HTTPClientConfig.Validate()
}
type mskClient interface {
DescribeClusterV2(context.Context, *kafka.DescribeClusterV2Input, ...func(*kafka.Options)) (*kafka.DescribeClusterV2Output, error)
ListClustersV2(context.Context, *kafka.ListClustersV2Input, ...func(*kafka.Options)) (*kafka.ListClustersV2Output, error)
ListNodes(context.Context, *kafka.ListNodesInput, ...func(*kafka.Options)) (*kafka.ListNodesOutput, error)
}
// MSKDiscovery periodically performs MSK-SD requests. It implements
// the Discoverer interface.
type MSKDiscovery struct {
*refresh.Discovery
logger *slog.Logger
cfg *MSKSDConfig
msk mskClient
}
// NewMSKDiscovery returns a new MSKDiscovery which periodically refreshes its targets.
func NewMSKDiscovery(conf *MSKSDConfig, opts discovery.DiscovererOptions) (*MSKDiscovery, error) {
m, ok := opts.Metrics.(*mskMetrics)
if !ok {
return nil, errors.New("invalid discovery metrics type")
}
if opts.Logger == nil {
opts.Logger = promslog.NewNopLogger()
}
d := &MSKDiscovery{
logger: opts.Logger,
cfg: conf,
}
d.Discovery = refresh.NewDiscovery(
refresh.Options{
Logger: opts.Logger,
Mech: "msk",
Interval: time.Duration(d.cfg.RefreshInterval),
RefreshF: d.refresh,
MetricsInstantiator: m.refreshMetrics,
},
)
return d, nil
}
func (d *MSKDiscovery) initMskClient(ctx context.Context) error {
if d.msk != nil {
return nil
}
if d.cfg.Region == "" {
return errors.New("region must be set for MSK service discovery")
}
// Build the HTTP client from the provided HTTPClientConfig.
client, err := config.NewClientFromConfig(d.cfg.HTTPClientConfig, "msk_sd")
if err != nil {
return err
}
// Build the AWS config with the provided region.
var configOptions []func(*awsConfig.LoadOptions) error
configOptions = append(configOptions, awsConfig.WithRegion(d.cfg.Region))
configOptions = append(configOptions, awsConfig.WithHTTPClient(client))
// Only set static credentials if both access key and secret key are provided
// Otherwise, let AWS SDK use its default credential chain
if d.cfg.AccessKey != "" && d.cfg.SecretKey != "" {
credProvider := credentials.NewStaticCredentialsProvider(d.cfg.AccessKey, string(d.cfg.SecretKey), "")
configOptions = append(configOptions, awsConfig.WithCredentialsProvider(credProvider))
}
if d.cfg.Profile != "" {
configOptions = append(configOptions, awsConfig.WithSharedConfigProfile(d.cfg.Profile))
}
cfg, err := awsConfig.LoadDefaultConfig(ctx, configOptions...)
if err != nil {
d.logger.Error("Failed to create AWS config", "error", err)
return fmt.Errorf("could not create aws config: %w", err)
}
// If the role ARN is set, assume the role to get credentials and set the credentials provider in the config.
if d.cfg.RoleARN != "" {
assumeProvider := stscreds.NewAssumeRoleProvider(sts.NewFromConfig(cfg), d.cfg.RoleARN)
cfg.Credentials = aws.NewCredentialsCache(assumeProvider)
}
d.msk = kafka.NewFromConfig(cfg, func(options *kafka.Options) {
if d.cfg.Endpoint != "" {
options.BaseEndpoint = &d.cfg.Endpoint
}
options.HTTPClient = client
})
// Test credentials by making a simple API call
testCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()
_, err = d.msk.ListClustersV2(testCtx, &kafka.ListClustersV2Input{})
if err != nil {
d.logger.Error("Failed to test MSK credentials", "error", err)
return fmt.Errorf("MSK credential test failed: %w", err)
}
return nil
}
// describeClusters describes the clusters with the given ARNs and returns their details.
func (d *MSKDiscovery) describeClusters(ctx context.Context, clusterARNs []string) ([]types.Cluster, error) {
var (
clusters []types.Cluster
mu sync.Mutex
)
errg, ectx := errgroup.WithContext(ctx)
errg.SetLimit(d.cfg.RequestConcurrency)
for _, clusterARN := range clusterARNs {
errg.Go(func() error {
cluster, err := d.msk.DescribeClusterV2(ectx, &kafka.DescribeClusterV2Input{
ClusterArn: aws.String(clusterARN),
})
if err != nil {
return fmt.Errorf("could not describe cluster %v: %w", clusterARN, err)
}
mu.Lock()
clusters = append(clusters, *cluster.ClusterInfo)
mu.Unlock()
return nil
})
}
return clusters, errg.Wait()
}
// listClusters lists all MSK clusters in the configured region and returns their details.
func (d *MSKDiscovery) listClusters(ctx context.Context) ([]types.Cluster, error) {
var (
clusters []types.Cluster
nextToken *string
)
for {
listClustersInput := kafka.ListClustersV2Input{
ClusterTypeFilter: aws.String("PROVISIONED"),
MaxResults: aws.Int32(100),
NextToken: nextToken,
}
resp, err := d.msk.ListClustersV2(ctx, &listClustersInput)
if err != nil {
return nil, fmt.Errorf("could not list clusters: %w", err)
}
clusters = append(clusters, resp.ClusterInfoList...)
if resp.NextToken == nil {
break
}
nextToken = resp.NextToken
}
return clusters, nil
}
// listNodes lists all nodes for the given clusters and returns a map of cluster ARN to its nodes.
func (d *MSKDiscovery) listNodes(ctx context.Context, clusters []types.Cluster) (map[string][]types.NodeInfo, error) {
clusterNodeMap := make(map[string][]types.NodeInfo)
mu := sync.Mutex{}
errg, ectx := errgroup.WithContext(ctx)
errg.SetLimit(d.cfg.RequestConcurrency)
for _, cluster := range clusters {
clusterARN := aws.ToString(cluster.ClusterArn)
errg.Go(func() error {
var clusterNodes []types.NodeInfo
var nextToken *string
for {
resp, err := d.msk.ListNodes(ectx, &kafka.ListNodesInput{
ClusterArn: aws.String(clusterARN),
MaxResults: aws.Int32(100),
NextToken: nextToken,
})
if err != nil {
return fmt.Errorf("could not list nodes for cluster %v: %w", clusterARN, err)
}
clusterNodes = append(clusterNodes, resp.NodeInfoList...)
if resp.NextToken == nil {
break
}
nextToken = resp.NextToken
}
mu.Lock()
clusterNodeMap[clusterARN] = clusterNodes
mu.Unlock()
return nil
})
}
return clusterNodeMap, errg.Wait()
}
func (d *MSKDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
err := d.initMskClient(ctx)
if err != nil {
return nil, err
}
tg := &targetgroup.Group{
Source: d.cfg.Region,
}
var clusters []types.Cluster
if len(d.cfg.Clusters) > 0 {
clusters, err = d.describeClusters(ctx, d.cfg.Clusters)
if err != nil {
return nil, err
}
} else {
clusters, err = d.listClusters(ctx)
if err != nil {
return nil, err
}
}
clusterNodeMap, err := d.listNodes(ctx, clusters)
if err != nil {
return nil, err
}
var (
targetsMu sync.Mutex
wg sync.WaitGroup
)
for _, cluster := range clusters {
wg.Add(1)
go func(cluster types.Cluster, nodes []types.NodeInfo) {
defer wg.Done()
for _, node := range nodes {
labels := model.LabelSet{
mskLabelClusterName: model.LabelValue(aws.ToString(cluster.ClusterName)),
mskLabelClusterARN: model.LabelValue(aws.ToString(cluster.ClusterArn)),
mskLabelClusterState: model.LabelValue(string(cluster.State)),
mskLabelClusterType: model.LabelValue(string(cluster.ClusterType)),
mskLabelClusterVersion: model.LabelValue(aws.ToString(cluster.CurrentVersion)),
mskLabelNodeARN: model.LabelValue(aws.ToString(node.NodeARN)),
mskLabelNodeAddedTime: model.LabelValue(aws.ToString(node.AddedToClusterTime)),
mskLabelNodeInstanceType: model.LabelValue(aws.ToString(node.InstanceType)),
mskLabelClusterJmxExporterEnabled: model.LabelValue(strconv.FormatBool(*cluster.Provisioned.OpenMonitoring.Prometheus.JmxExporter.EnabledInBroker)),
mskLabelClusterConfigurationARN: model.LabelValue(aws.ToString(cluster.Provisioned.CurrentBrokerSoftwareInfo.ConfigurationArn)),
mskLabelClusterConfigurationRevision: model.LabelValue(strconv.FormatInt(*cluster.Provisioned.CurrentBrokerSoftwareInfo.ConfigurationRevision, 10)),
mskLabelClusterKafkaVersion: model.LabelValue(aws.ToString(cluster.Provisioned.CurrentBrokerSoftwareInfo.KafkaVersion)),
}
for key, value := range cluster.Tags {
labels[model.LabelName(mskLabelClusterTags+strutil.SanitizeLabelName(key))] = model.LabelValue(value)
}
switch nodeType(node) {
case NodeTypeBroker:
labels[mskLabelNodeType] = model.LabelValue(NodeTypeBroker)
labels[mskLabelNodeAttachedENI] = model.LabelValue(aws.ToString(node.BrokerNodeInfo.AttachedENIId))
labels[mskLabelBrokerID] = model.LabelValue(fmt.Sprintf("%.0f", aws.ToFloat64(node.BrokerNodeInfo.BrokerId)))
labels[mskLabelBrokerClientSubnet] = model.LabelValue(aws.ToString(node.BrokerNodeInfo.ClientSubnet))
labels[mskLabelBrokerClientVPCIP] = model.LabelValue(aws.ToString(node.BrokerNodeInfo.ClientVpcIpAddress))
labels[mskLabelBrokerNodeExporterEnabled] = model.LabelValue(strconv.FormatBool(*cluster.Provisioned.OpenMonitoring.Prometheus.NodeExporter.EnabledInBroker))
for idx, endpoint := range node.BrokerNodeInfo.Endpoints {
endpointLabels := labels.Clone()
endpointLabels[mskLabelBrokerEndpointIndex] = model.LabelValue(strconv.Itoa(idx))
endpointLabels[model.AddressLabel] = model.LabelValue(net.JoinHostPort(endpoint, strconv.Itoa(d.cfg.Port)))
targetsMu.Lock()
tg.Targets = append(tg.Targets, endpointLabels)
targetsMu.Unlock()
}
case NodeTypeController:
labels[mskLabelNodeType] = model.LabelValue(NodeTypeController)
for idx, endpoint := range node.ControllerNodeInfo.Endpoints {
endpointLabels := labels.Clone()
endpointLabels[mskLabelControllerEndpointIndex] = model.LabelValue(strconv.Itoa(idx))
endpointLabels[model.AddressLabel] = model.LabelValue(net.JoinHostPort(endpoint, strconv.Itoa(d.cfg.Port)))
targetsMu.Lock()
tg.Targets = append(tg.Targets, endpointLabels)
targetsMu.Unlock()
}
default:
continue
}
}
}(cluster, clusterNodeMap[aws.ToString(cluster.ClusterArn)])
}
wg.Wait()
return []*targetgroup.Group{tg}, nil
}
func nodeType(node types.NodeInfo) NodeType {
if node.BrokerNodeInfo != nil {
return NodeTypeBroker
} else if node.ControllerNodeInfo != nil {
return NodeTypeController
}
return ""
}

1131
discovery/aws/msk_test.go Normal file

File diff suppressed because it is too large Load diff

View file

@ -1,4 +1,4 @@
// Copyright 2015 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@ -127,7 +127,7 @@ func (*SDConfig) Name() string { return "azure" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewDiscovery(c, opts.Logger, opts.Metrics)
return NewDiscovery(c, opts)
}
func validateAuthParam(param, name string) error {
@ -178,28 +178,29 @@ type Discovery struct {
}
// NewDiscovery returns a new AzureDiscovery which periodically refreshes its targets.
func NewDiscovery(cfg *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
m, ok := metrics.(*azureMetrics)
func NewDiscovery(cfg *SDConfig, opts discovery.DiscovererOptions) (*Discovery, error) {
m, ok := opts.Metrics.(*azureMetrics)
if !ok {
return nil, errors.New("invalid discovery metrics type")
}
if logger == nil {
logger = promslog.NewNopLogger()
if opts.Logger == nil {
opts.Logger = promslog.NewNopLogger()
}
l := cache.New(cache.AsLRU[string, *armnetwork.Interface](lru.WithCapacity(5000)))
d := &Discovery{
cfg: cfg,
port: cfg.Port,
logger: logger,
logger: opts.Logger,
cache: l,
metrics: m,
}
d.Discovery = refresh.NewDiscovery(
refresh.Options{
Logger: logger,
Logger: opts.Logger,
Mech: "azure",
SetName: opts.SetName,
Interval: time.Duration(cfg.RefreshInterval),
RefreshF: d.refresh,
MetricsInstantiator: m.refreshMetrics,

View file

@ -1,4 +1,4 @@
// Copyright 2015 The Prometheus Authors
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@ -659,7 +659,11 @@ func TestAzureRefresh(t *testing.T) {
refreshMetrics := discovery.NewRefreshMetrics(reg)
metrics := azureSDConfig.NewDiscovererMetrics(reg, refreshMetrics)
sd, err := NewDiscovery(azureSDConfig, nil, metrics)
sd, err := NewDiscovery(azureSDConfig, discovery.DiscovererOptions{
Logger: nil,
Metrics: metrics,
SetName: "azure",
})
require.NoError(t, err)
tg, err := sd.refreshAzureClient(context.Background(), azureClient)

Some files were not shown because too many files have changed in this diff Show more