From 7f99d2930d46047a1b6e0f6ad1a0962d20a6e380 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Thu, 26 Sep 2024 11:07:15 +0100 Subject: [PATCH 1/6] [BUGFIX] PromQL: make sort_by_label stable Go's sorting functions can re-order equal elements, so the strategy of sorting by the fallback ordering first does not always work. Pulling the fallback into the main comparison function is more reliable and more efficient. Signed-off-by: Bryan Boreham --- CHANGELOG.md | 2 +- promql/functions.go | 34 ++++++++-------------------------- 2 files changed, 9 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 64618d552e..c17eb8cf2e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,7 +18,7 @@ * [ENHANCEMENT] Remote Read client: Enable streaming remote read if the server supports it. #11379 * [ENHANCEMENT] Remote-Write: Don't reshard if we haven't successfully sent a sample since last update. #14450 * [ENHANCEMENT] PromQL: Delay deletion of `__name__` label to the end of the query evaluation. This is **experimental** and enabled under the feature-flag `promql-delayed-name-removal`. #14477 -* [ENHANCEMENT] PromQL: Experimental `sort_by_label` and `sort_by_label_desc` sort by all labels when label is equal. #14655 +* [ENHANCEMENT] PromQL: Experimental `sort_by_label` and `sort_by_label_desc` sort by all labels when label is equal. #14655, #14985 * [ENHANCEMENT] PromQL: Clarify error message logged when Go runtime panic occurs during query evaluation. #14621 * [ENHANCEMENT] PromQL: Use Kahan summation for better accuracy in `avg` and `avg_over_time`. #14413 * [ENHANCEMENT] Tracing: Improve PromQL tracing, including showing the operation performed for aggregates, operators, and calls. #14816 diff --git a/promql/functions.go b/promql/functions.go index 182b69b080..04b6848b43 100644 --- a/promql/functions.go +++ b/promql/functions.go @@ -415,22 +415,12 @@ func funcSortDesc(vals []parser.Value, args parser.Expressions, enh *EvalNodeHel // === sort_by_label(vector parser.ValueTypeVector, label parser.ValueTypeString...) (Vector, Annotations) === func funcSortByLabel(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { - // First, sort by the full label set. This ensures a consistent ordering in case sorting by the - // labels provided as arguments is not conclusive. + lbls := stringSliceFromArgs(args[1:]) slices.SortFunc(vals[0].(Vector), func(a, b Sample) int { - return labels.Compare(a.Metric, b.Metric) - }) - - labels := stringSliceFromArgs(args[1:]) - // Next, sort by the labels provided as arguments. - slices.SortFunc(vals[0].(Vector), func(a, b Sample) int { - // Iterate over each given label. - for _, label := range labels { + for _, label := range lbls { lv1 := a.Metric.Get(label) lv2 := b.Metric.Get(label) - // If we encounter multiple samples with the same label values, the sorting which was - // performed in the first step will act as a "tie breaker". if lv1 == lv2 { continue } @@ -442,7 +432,8 @@ func funcSortByLabel(vals []parser.Value, args parser.Expressions, enh *EvalNode return +1 } - return 0 + // If all labels provided as arguments were equal, sort by the full label set. This ensures a consistent ordering. + return labels.Compare(a.Metric, b.Metric) }) return vals[0].(Vector), nil @@ -450,22 +441,12 @@ func funcSortByLabel(vals []parser.Value, args parser.Expressions, enh *EvalNode // === sort_by_label_desc(vector parser.ValueTypeVector, label parser.ValueTypeString...) (Vector, Annotations) === func funcSortByLabelDesc(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { - // First, sort by the full label set. This ensures a consistent ordering in case sorting by the - // labels provided as arguments is not conclusive. + lbls := stringSliceFromArgs(args[1:]) slices.SortFunc(vals[0].(Vector), func(a, b Sample) int { - return labels.Compare(b.Metric, a.Metric) - }) - - labels := stringSliceFromArgs(args[1:]) - // Next, sort by the labels provided as arguments. - slices.SortFunc(vals[0].(Vector), func(a, b Sample) int { - // Iterate over each given label. - for _, label := range labels { + for _, label := range lbls { lv1 := a.Metric.Get(label) lv2 := b.Metric.Get(label) - // If we encounter multiple samples with the same label values, the sorting which was - // performed in the first step will act as a "tie breaker". if lv1 == lv2 { continue } @@ -477,7 +458,8 @@ func funcSortByLabelDesc(vals []parser.Value, args parser.Expressions, enh *Eval return -1 } - return 0 + // If all labels provided as arguments were equal, sort by the full label set. This ensures a consistent ordering. + return -labels.Compare(a.Metric, b.Metric) }) return vals[0].(Vector), nil From 6b247c50d236c936e3499ccbb164749268537030 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Mon, 30 Sep 2024 17:09:01 -0400 Subject: [PATCH 2/6] Revert "Merge pull request #14769 from roidelapluie/autoreload" This reverts commit 50f5327f83de448354a5873b7934b7f6bb662ba1, reversing changes made to eb4004c344bf78b9e6f7e62a464a33db39a147cb. Signed-off-by: Bryan Boreham --- CHANGELOG.md | 1 - cmd/prometheus/main.go | 55 +------- config/reload.go | 92 ------------- config/reload_test.go | 222 -------------------------------- docs/command-line/prometheus.md | 3 +- docs/feature_flags.md | 13 -- 6 files changed, 2 insertions(+), 384 deletions(-) delete mode 100644 config/reload.go delete mode 100644 config/reload_test.go diff --git a/CHANGELOG.md b/CHANGELOG.md index c17eb8cf2e..88c27741b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,6 @@ ## 2.55.0-rc.0 / 2024-09-20 * [FEATURE] Support UTF-8 characters in label names - feature flag `utf8-names`. #14482, #14880, #14736, #14727 -* [FEATURE] Support config reload automatically - feature flag `auto-reload-config`. #14769 * [FEATURE] Scraping: Add the ability to set custom `http_headers` in config. #14817 * [FEATURE] Scraping: Support feature flag `created-timestamp-zero-ingestion` in OpenMetrics. #14356, #14815 * [FEATURE] Scraping: `scrape_failure_log_file` option to log failures to a file. #14734 diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index ee6cd0c2eb..b3bcb78b78 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -154,9 +154,6 @@ type flagConfig struct { RemoteFlushDeadline model.Duration nameEscapingScheme string - enableAutoReload bool - autoReloadInterval model.Duration - featureList []string memlimitRatio float64 // These options are extracted from featureList @@ -215,12 +212,6 @@ func (c *flagConfig) setFeatureListOptions(logger log.Logger) error { case "auto-gomaxprocs": c.enableAutoGOMAXPROCS = true level.Info(logger).Log("msg", "Automatically set GOMAXPROCS to match Linux container CPU quota") - case "auto-reload-config": - c.enableAutoReload = true - if s := time.Duration(c.autoReloadInterval).Seconds(); s > 0 && s < 1 { - c.autoReloadInterval, _ = model.ParseDuration("1s") - } - level.Info(logger).Log("msg", fmt.Sprintf("Enabled automatic configuration file reloading. Checking for configuration changes every %s.", c.autoReloadInterval)) case "auto-gomemlimit": c.enableAutoGOMEMLIMIT = true level.Info(logger).Log("msg", "Automatically set GOMEMLIMIT to match Linux container or system memory limit") @@ -311,9 +302,6 @@ func main() { a.Flag("config.file", "Prometheus configuration file path."). Default("prometheus.yml").StringVar(&cfg.configFile) - a.Flag("config.auto-reload-interval", "Specifies the interval for checking and automatically reloading the Prometheus configuration file upon detecting changes."). - Default("30s").SetValue(&cfg.autoReloadInterval) - a.Flag("web.listen-address", "Address to listen on for UI, API, and telemetry. Can be repeated."). Default("0.0.0.0:9090").StringsVar(&cfg.web.ListenAddresses) @@ -504,7 +492,7 @@ func main() { a.Flag("scrape.name-escaping-scheme", `Method for escaping legacy invalid names when sending to Prometheus that does not support UTF-8. Can be one of "values", "underscores", or "dots".`).Default(scrape.DefaultNameEscapingScheme.String()).StringVar(&cfg.nameEscapingScheme) - a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: agent, auto-gomaxprocs, auto-gomemlimit, auto-reload-config, concurrent-rule-eval, created-timestamp-zero-ingestion, delayed-compaction, exemplar-storage, expand-external-labels, extra-scrape-metrics, memory-snapshot-on-shutdown, native-histograms, new-service-discovery-manager, no-default-scrape-port, otlp-write-receiver, promql-experimental-functions, promql-delayed-name-removal, promql-per-step-stats, remote-write-receiver (DEPRECATED), utf8-names. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details."). + a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: agent, auto-gomaxprocs, auto-gomemlimit, concurrent-rule-eval, created-timestamp-zero-ingestion, delayed-compaction, exemplar-storage, expand-external-labels, extra-scrape-metrics, memory-snapshot-on-shutdown, native-histograms, new-service-discovery-manager, no-default-scrape-port, otlp-write-receiver, promql-experimental-functions, promql-delayed-name-removal, promql-per-step-stats, remote-write-receiver (DEPRECATED), utf8-names. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details."). Default("").StringsVar(&cfg.featureList) promlogflag.AddFlags(a, &cfg.promlogConfig) @@ -1142,15 +1130,6 @@ func main() { hup := make(chan os.Signal, 1) signal.Notify(hup, syscall.SIGHUP) cancel := make(chan struct{}) - - var checksum string - if cfg.enableAutoReload { - checksum, err = config.GenerateChecksum(cfg.configFile) - if err != nil { - level.Error(logger).Log("msg", "Failed to generate initial checksum for configuration file", "err", err) - } - } - g.Add( func() error { <-reloadReady.C @@ -1160,12 +1139,6 @@ func main() { case <-hup: if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil { level.Error(logger).Log("msg", "Error reloading config", "err", err) - } else if cfg.enableAutoReload { - if currentChecksum, err := config.GenerateChecksum(cfg.configFile); err == nil { - checksum = currentChecksum - } else { - level.Error(logger).Log("msg", "Failed to generate checksum during configuration reload", "err", err) - } } case rc := <-webHandler.Reload(): if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil { @@ -1173,32 +1146,6 @@ func main() { rc <- err } else { rc <- nil - if cfg.enableAutoReload { - if currentChecksum, err := config.GenerateChecksum(cfg.configFile); err == nil { - checksum = currentChecksum - } else { - level.Error(logger).Log("msg", "Failed to generate checksum during configuration reload", "err", err) - } - } - } - case <-time.Tick(time.Duration(cfg.autoReloadInterval)): - if !cfg.enableAutoReload { - continue - } - currentChecksum, err := config.GenerateChecksum(cfg.configFile) - if err != nil { - level.Error(logger).Log("msg", "Failed to generate checksum during configuration reload", "err", err) - continue - } - if currentChecksum == checksum { - continue - } - level.Info(logger).Log("msg", "Configuration file change detected, reloading the configuration.") - - if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil { - level.Error(logger).Log("msg", "Error reloading config", "err", err) - } else { - checksum = currentChecksum } case <-cancel: return nil diff --git a/config/reload.go b/config/reload.go deleted file mode 100644 index 8be1b28d8a..0000000000 --- a/config/reload.go +++ /dev/null @@ -1,92 +0,0 @@ -// Copyright 2024 The Prometheus Authors -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package config - -import ( - "crypto/sha256" - "encoding/hex" - "fmt" - "os" - "path/filepath" - - "gopkg.in/yaml.v2" -) - -type ExternalFilesConfig struct { - RuleFiles []string `yaml:"rule_files"` - ScrapeConfigFiles []string `yaml:"scrape_config_files"` -} - -// GenerateChecksum generates a checksum of the YAML file and the files it references. -func GenerateChecksum(yamlFilePath string) (string, error) { - hash := sha256.New() - - yamlContent, err := os.ReadFile(yamlFilePath) - if err != nil { - return "", fmt.Errorf("error reading YAML file: %w", err) - } - _, err = hash.Write(yamlContent) - if err != nil { - return "", fmt.Errorf("error writing YAML file to hash: %w", err) - } - - var config ExternalFilesConfig - if err := yaml.Unmarshal(yamlContent, &config); err != nil { - return "", fmt.Errorf("error unmarshalling YAML: %w", err) - } - - dir := filepath.Dir(yamlFilePath) - - for i, file := range config.RuleFiles { - config.RuleFiles[i] = filepath.Join(dir, file) - } - for i, file := range config.ScrapeConfigFiles { - config.ScrapeConfigFiles[i] = filepath.Join(dir, file) - } - - files := map[string][]string{ - "r": config.RuleFiles, // "r" for rule files - "s": config.ScrapeConfigFiles, // "s" for scrape config files - } - - for _, prefix := range []string{"r", "s"} { - for _, pattern := range files[prefix] { - matchingFiles, err := filepath.Glob(pattern) - if err != nil { - return "", fmt.Errorf("error finding files with pattern %q: %w", pattern, err) - } - - for _, file := range matchingFiles { - // Write prefix to the hash ("r" or "s") followed by \0, then - // the file path. - _, err = hash.Write([]byte(prefix + "\x00" + file + "\x00")) - if err != nil { - return "", fmt.Errorf("error writing %q path to hash: %w", file, err) - } - - // Read and hash the content of the file. - content, err := os.ReadFile(file) - if err != nil { - return "", fmt.Errorf("error reading file %s: %w", file, err) - } - _, err = hash.Write(append(content, []byte("\x00")...)) - if err != nil { - return "", fmt.Errorf("error writing %q content to hash: %w", file, err) - } - } - } - } - - return hex.EncodeToString(hash.Sum(nil)), nil -} diff --git a/config/reload_test.go b/config/reload_test.go deleted file mode 100644 index f0f44f3588..0000000000 --- a/config/reload_test.go +++ /dev/null @@ -1,222 +0,0 @@ -// Copyright 2024 The Prometheus Authors -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package config - -import ( - "os" - "path/filepath" - "testing" - - "github.com/stretchr/testify/require" -) - -func TestGenerateChecksum(t *testing.T) { - tmpDir := t.TempDir() - - // Define paths for the temporary files. - yamlFilePath := filepath.Join(tmpDir, "test.yml") - ruleFilePath := filepath.Join(tmpDir, "rule_file.yml") - scrapeConfigFilePath := filepath.Join(tmpDir, "scrape_config.yml") - - // Define initial and modified content for the files. - originalRuleContent := "groups:\n- name: example\n rules:\n - alert: ExampleAlert" - modifiedRuleContent := "groups:\n- name: example\n rules:\n - alert: ModifiedAlert" - - originalScrapeConfigContent := "scrape_configs:\n- job_name: example" - modifiedScrapeConfigContent := "scrape_configs:\n- job_name: modified_example" - - // Define YAML content referencing the rule and scrape config files. - yamlContent := ` -rule_files: - - rule_file.yml -scrape_config_files: - - scrape_config.yml -` - - // Write initial content to files. - require.NoError(t, os.WriteFile(ruleFilePath, []byte(originalRuleContent), 0o644)) - require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(originalScrapeConfigContent), 0o644)) - require.NoError(t, os.WriteFile(yamlFilePath, []byte(yamlContent), 0o644)) - - // Generate the original checksum. - originalChecksum := calculateChecksum(t, yamlFilePath) - - t.Run("Rule File Change", func(t *testing.T) { - // Modify the rule file. - require.NoError(t, os.WriteFile(ruleFilePath, []byte(modifiedRuleContent), 0o644)) - - // Checksum should change. - modifiedChecksum := calculateChecksum(t, yamlFilePath) - require.NotEqual(t, originalChecksum, modifiedChecksum) - - // Revert the rule file. - require.NoError(t, os.WriteFile(ruleFilePath, []byte(originalRuleContent), 0o644)) - - // Checksum should return to the original. - revertedChecksum := calculateChecksum(t, yamlFilePath) - require.Equal(t, originalChecksum, revertedChecksum) - }) - - t.Run("Scrape Config Change", func(t *testing.T) { - // Modify the scrape config file. - require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(modifiedScrapeConfigContent), 0o644)) - - // Checksum should change. - modifiedChecksum := calculateChecksum(t, yamlFilePath) - require.NotEqual(t, originalChecksum, modifiedChecksum) - - // Revert the scrape config file. - require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(originalScrapeConfigContent), 0o644)) - - // Checksum should return to the original. - revertedChecksum := calculateChecksum(t, yamlFilePath) - require.Equal(t, originalChecksum, revertedChecksum) - }) - - t.Run("Rule File Deletion", func(t *testing.T) { - // Delete the rule file. - require.NoError(t, os.Remove(ruleFilePath)) - - // Checksum should change. - deletedChecksum := calculateChecksum(t, yamlFilePath) - require.NotEqual(t, originalChecksum, deletedChecksum) - - // Restore the rule file. - require.NoError(t, os.WriteFile(ruleFilePath, []byte(originalRuleContent), 0o644)) - - // Checksum should return to the original. - revertedChecksum := calculateChecksum(t, yamlFilePath) - require.Equal(t, originalChecksum, revertedChecksum) - }) - - t.Run("Scrape Config Deletion", func(t *testing.T) { - // Delete the scrape config file. - require.NoError(t, os.Remove(scrapeConfigFilePath)) - - // Checksum should change. - deletedChecksum := calculateChecksum(t, yamlFilePath) - require.NotEqual(t, originalChecksum, deletedChecksum) - - // Restore the scrape config file. - require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(originalScrapeConfigContent), 0o644)) - - // Checksum should return to the original. - revertedChecksum := calculateChecksum(t, yamlFilePath) - require.Equal(t, originalChecksum, revertedChecksum) - }) - - t.Run("Main File Change", func(t *testing.T) { - // Modify the main YAML file. - modifiedYamlContent := ` -global: - scrape_interval: 3s -rule_files: - - rule_file.yml -scrape_config_files: - - scrape_config.yml -` - require.NoError(t, os.WriteFile(yamlFilePath, []byte(modifiedYamlContent), 0o644)) - - // Checksum should change. - modifiedChecksum := calculateChecksum(t, yamlFilePath) - require.NotEqual(t, originalChecksum, modifiedChecksum) - - // Revert the main YAML file. - require.NoError(t, os.WriteFile(yamlFilePath, []byte(yamlContent), 0o644)) - - // Checksum should return to the original. - revertedChecksum := calculateChecksum(t, yamlFilePath) - require.Equal(t, originalChecksum, revertedChecksum) - }) - - t.Run("Rule File Removed from YAML Config", func(t *testing.T) { - // Modify the YAML content to remove the rule file. - modifiedYamlContent := ` -scrape_config_files: - - scrape_config.yml -` - require.NoError(t, os.WriteFile(yamlFilePath, []byte(modifiedYamlContent), 0o644)) - - // Checksum should change. - modifiedChecksum := calculateChecksum(t, yamlFilePath) - require.NotEqual(t, originalChecksum, modifiedChecksum) - - // Revert the YAML content. - require.NoError(t, os.WriteFile(yamlFilePath, []byte(yamlContent), 0o644)) - - // Checksum should return to the original. - revertedChecksum := calculateChecksum(t, yamlFilePath) - require.Equal(t, originalChecksum, revertedChecksum) - }) - - t.Run("Scrape Config Removed from YAML Config", func(t *testing.T) { - // Modify the YAML content to remove the scrape config file. - modifiedYamlContent := ` -rule_files: - - rule_file.yml -` - require.NoError(t, os.WriteFile(yamlFilePath, []byte(modifiedYamlContent), 0o644)) - - // Checksum should change. - modifiedChecksum := calculateChecksum(t, yamlFilePath) - require.NotEqual(t, originalChecksum, modifiedChecksum) - - // Revert the YAML content. - require.NoError(t, os.WriteFile(yamlFilePath, []byte(yamlContent), 0o644)) - - // Checksum should return to the original. - revertedChecksum := calculateChecksum(t, yamlFilePath) - require.Equal(t, originalChecksum, revertedChecksum) - }) - - t.Run("Empty Rule File", func(t *testing.T) { - // Write an empty rule file. - require.NoError(t, os.WriteFile(ruleFilePath, []byte(""), 0o644)) - - // Checksum should change. - emptyChecksum := calculateChecksum(t, yamlFilePath) - require.NotEqual(t, originalChecksum, emptyChecksum) - - // Restore the rule file. - require.NoError(t, os.WriteFile(ruleFilePath, []byte(originalRuleContent), 0o644)) - - // Checksum should return to the original. - revertedChecksum := calculateChecksum(t, yamlFilePath) - require.Equal(t, originalChecksum, revertedChecksum) - }) - - t.Run("Empty Scrape Config File", func(t *testing.T) { - // Write an empty scrape config file. - require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(""), 0o644)) - - // Checksum should change. - emptyChecksum := calculateChecksum(t, yamlFilePath) - require.NotEqual(t, originalChecksum, emptyChecksum) - - // Restore the scrape config file. - require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(originalScrapeConfigContent), 0o644)) - - // Checksum should return to the original. - revertedChecksum := calculateChecksum(t, yamlFilePath) - require.Equal(t, originalChecksum, revertedChecksum) - }) -} - -// calculateChecksum generates a checksum for the given YAML file path. -func calculateChecksum(t *testing.T, yamlFilePath string) string { - checksum, err := GenerateChecksum(yamlFilePath) - require.NoError(t, err) - require.NotEmpty(t, checksum) - return checksum -} diff --git a/docs/command-line/prometheus.md b/docs/command-line/prometheus.md index 8fefa8ecc9..7d9e5a3c80 100644 --- a/docs/command-line/prometheus.md +++ b/docs/command-line/prometheus.md @@ -15,7 +15,6 @@ The Prometheus monitoring server | -h, --help | Show context-sensitive help (also try --help-long and --help-man). | | | --version | Show application version. | | | --config.file | Prometheus configuration file path. | `prometheus.yml` | -| --config.auto-reload-interval | Specifies the interval for checking and automatically reloading the Prometheus configuration file upon detecting changes. | `30s` | | --web.listen-address ... | Address to listen on for UI, API, and telemetry. Can be repeated. | `0.0.0.0:9090` | | --auto-gomemlimit.ratio | The ratio of reserved GOMEMLIMIT memory to the detected maximum container or system memory | `0.9` | | --web.config.file | [EXPERIMENTAL] Path to configuration file that can enable TLS or authentication. | | @@ -58,7 +57,7 @@ The Prometheus monitoring server | --query.max-concurrency | Maximum number of queries executed concurrently. Use with server mode only. | `20` | | --query.max-samples | Maximum number of samples a single query can load into memory. Note that queries will fail if they try to load more samples than this into memory, so this also limits the number of samples a query can return. Use with server mode only. | `50000000` | | --scrape.name-escaping-scheme | Method for escaping legacy invalid names when sending to Prometheus that does not support UTF-8. Can be one of "values", "underscores", or "dots". | `values` | -| --enable-feature ... | Comma separated feature names to enable. Valid options: agent, auto-gomaxprocs, auto-gomemlimit, auto-reload-config, concurrent-rule-eval, created-timestamp-zero-ingestion, delayed-compaction, exemplar-storage, expand-external-labels, extra-scrape-metrics, memory-snapshot-on-shutdown, native-histograms, new-service-discovery-manager, no-default-scrape-port, otlp-write-receiver, promql-experimental-functions, promql-delayed-name-removal, promql-per-step-stats, remote-write-receiver (DEPRECATED), utf8-names. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. | | +| --enable-feature ... | Comma separated feature names to enable. Valid options: agent, auto-gomaxprocs, auto-gomemlimit, concurrent-rule-eval, created-timestamp-zero-ingestion, delayed-compaction, exemplar-storage, expand-external-labels, extra-scrape-metrics, memory-snapshot-on-shutdown, native-histograms, new-service-discovery-manager, no-default-scrape-port, otlp-write-receiver, promql-experimental-functions, promql-delayed-name-removal, promql-per-step-stats, remote-write-receiver (DEPRECATED), utf8-names. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. | | | --log.level | Only log messages with the given severity or above. One of: [debug, info, warn, error] | `info` | | --log.format | Output format of log messages. One of: [logfmt, json] | `logfmt` | diff --git a/docs/feature_flags.md b/docs/feature_flags.md index 51c2a9b314..7b07a04d0e 100644 --- a/docs/feature_flags.md +++ b/docs/feature_flags.md @@ -265,16 +265,3 @@ This allows optionally preserving the `__name__` label via the `label_replace` a When enabled, changes the metric and label name validation scheme inside Prometheus to allow the full UTF-8 character set. By itself, this flag does not enable the request of UTF-8 names via content negotiation. Users will also have to set `metric_name_validation_scheme` in scrape configs to enable the feature either on the global config or on a per-scrape config basis. - -## Auto Reload Config - -`--enable-feature=auto-reload-config` - -When enabled, Prometheus will automatically reload its configuration file at a -specified interval. The interval is defined by the -`--config.auto-reload-interval` flag, which defaults to `30s`. - -Configuration reloads are triggered by detecting changes in the checksum of the -main configuration file or any referenced files, such as rule and scrape -configurations. To ensure consistency and avoid issues during reloads, it's -recommended to update these files atomically. From 105c692f77db299c0a64f293460643e35b22d75b Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Mon, 7 Oct 2024 13:50:01 +0100 Subject: [PATCH 3/6] [BUGFIX] TSDB: Don't read in-order chunks from before head MinTime Because we are reimplementing the `IndexReader` to fetch in-order and out-of-order chunks together, we must reproduce the behaviour of `Head.indexRange()`, which floors the minimum time queried at `head.MinTime()`. Signed-off-by: Bryan Boreham --- tsdb/db.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tsdb/db.go b/tsdb/db.go index 2d2759b61c..a339414c7b 100644 --- a/tsdb/db.go +++ b/tsdb/db.go @@ -2043,7 +2043,7 @@ func (db *DB) Querier(mint, maxt int64) (_ storage.Querier, err error) { overlapsOOO := overlapsClosedInterval(mint, maxt, db.head.MinOOOTime(), db.head.MaxOOOTime()) var headQuerier storage.Querier - inoMint := mint + inoMint := max(db.head.MinTime(), mint) if maxt >= db.head.MinTime() || overlapsOOO { rh := NewRangeHead(db.head, mint, maxt) var err error @@ -2121,7 +2121,7 @@ func (db *DB) blockChunkQuerierForRange(mint, maxt int64) (_ []storage.ChunkQuer overlapsOOO := overlapsClosedInterval(mint, maxt, db.head.MinOOOTime(), db.head.MaxOOOTime()) var headQuerier storage.ChunkQuerier - inoMint := mint + inoMint := max(db.head.MinTime(), mint) if maxt >= db.head.MinTime() || overlapsOOO { rh := NewRangeHead(db.head, mint, maxt) headQuerier, err = db.blockChunkQuerierFunc(rh, mint, maxt) From f7b396a1dc6c417ee5dae6457d58b114ecb65a64 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Tue, 24 Sep 2024 12:03:56 +0200 Subject: [PATCH 4/6] promql.Engine: Refactor vector selector evaluation into a method (#14900) New method is named `evalVectorSelector`. --------- Signed-off-by: Arve Knudsen --- promql/engine.go | 117 +++++++++++++++++++++++++++-------------------- 1 file changed, 67 insertions(+), 50 deletions(-) diff --git a/promql/engine.go b/promql/engine.go index b583e12d57..86e76bb70d 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -998,6 +998,8 @@ func extractGroupsFromPath(p []parser.Node) (bool, []string) { return false, nil } +// checkAndExpandSeriesSet expands expr's UnexpandedSeriesSet into expr's Series. +// If the Series field is already non-nil, it's a no-op. func checkAndExpandSeriesSet(ctx context.Context, expr parser.Expr) (annotations.Annotations, error) { switch e := expr.(type) { case *parser.MatrixSelector: @@ -1449,6 +1451,70 @@ func (ev *evaluator) rangeEvalAgg(ctx context.Context, aggExpr *parser.Aggregate return result, warnings } +// evalVectorSelector generates a Matrix between ev.startTimestamp and ev.endTimestamp (inclusive), each point spaced ev.interval apart, from vs. +// vs.Series has to be expanded before calling this method. +// For every series iterator in vs.Series, the method iterates in ev.interval sized steps from ev.startTimestamp until and including ev.endTimestamp, +// collecting every corresponding sample (obtained via ev.vectorSelectorSingle) into a Series. +// All of the generated Series are collected into a Matrix, that gets returned. +func (ev *evaluator) evalVectorSelector(ctx context.Context, vs *parser.VectorSelector) Matrix { + numSteps := int((ev.endTimestamp-ev.startTimestamp)/ev.interval) + 1 + + mat := make(Matrix, 0, len(vs.Series)) + var prevSS *Series + it := storage.NewMemoizedEmptyIterator(durationMilliseconds(ev.lookbackDelta)) + var chkIter chunkenc.Iterator + for _, s := range vs.Series { + if err := contextDone(ctx, "expression evaluation"); err != nil { + ev.error(err) + } + + chkIter = s.Iterator(chkIter) + it.Reset(chkIter) + ss := Series{ + Metric: s.Labels(), + } + + for ts, step := ev.startTimestamp, -1; ts <= ev.endTimestamp; ts += ev.interval { + step++ + _, f, h, ok := ev.vectorSelectorSingle(it, vs, ts) + if !ok { + continue + } + + if h == nil { + ev.currentSamples++ + ev.samplesStats.IncrementSamplesAtStep(step, 1) + if ev.currentSamples > ev.maxSamples { + ev.error(ErrTooManySamples(env)) + } + if ss.Floats == nil { + ss.Floats = reuseOrGetFPointSlices(prevSS, numSteps) + } + ss.Floats = append(ss.Floats, FPoint{F: f, T: ts}) + } else { + point := HPoint{H: h, T: ts} + histSize := point.size() + ev.currentSamples += histSize + ev.samplesStats.IncrementSamplesAtStep(step, int64(histSize)) + if ev.currentSamples > ev.maxSamples { + ev.error(ErrTooManySamples(env)) + } + if ss.Histograms == nil { + ss.Histograms = reuseOrGetHPointSlices(prevSS, numSteps) + } + ss.Histograms = append(ss.Histograms, point) + } + } + + if len(ss.Floats)+len(ss.Histograms) > 0 { + mat = append(mat, ss) + prevSS = &mat[len(mat)-1] + } + } + ev.samplesStats.UpdatePeak(ev.currentSamples) + return mat +} + // evalSubquery evaluates given SubqueryExpr and returns an equivalent // evaluated MatrixSelector in its place. Note that the Name and LabelMatchers are not set. func (ev *evaluator) evalSubquery(ctx context.Context, subq *parser.SubqueryExpr) (*parser.MatrixSelector, int, annotations.Annotations) { @@ -1887,56 +1953,7 @@ func (ev *evaluator) eval(ctx context.Context, expr parser.Expr) (parser.Value, if err != nil { ev.error(errWithWarnings{fmt.Errorf("expanding series: %w", err), ws}) } - mat := make(Matrix, 0, len(e.Series)) - var prevSS *Series - it := storage.NewMemoizedEmptyIterator(durationMilliseconds(ev.lookbackDelta)) - var chkIter chunkenc.Iterator - for i, s := range e.Series { - if err := contextDone(ctx, "expression evaluation"); err != nil { - ev.error(err) - } - chkIter = s.Iterator(chkIter) - it.Reset(chkIter) - ss := Series{ - Metric: e.Series[i].Labels(), - } - - for ts, step := ev.startTimestamp, -1; ts <= ev.endTimestamp; ts += ev.interval { - step++ - _, f, h, ok := ev.vectorSelectorSingle(it, e, ts) - if ok { - if h == nil { - ev.currentSamples++ - ev.samplesStats.IncrementSamplesAtStep(step, 1) - if ev.currentSamples > ev.maxSamples { - ev.error(ErrTooManySamples(env)) - } - if ss.Floats == nil { - ss.Floats = reuseOrGetFPointSlices(prevSS, numSteps) - } - ss.Floats = append(ss.Floats, FPoint{F: f, T: ts}) - } else { - point := HPoint{H: h, T: ts} - histSize := point.size() - ev.currentSamples += histSize - ev.samplesStats.IncrementSamplesAtStep(step, int64(histSize)) - if ev.currentSamples > ev.maxSamples { - ev.error(ErrTooManySamples(env)) - } - if ss.Histograms == nil { - ss.Histograms = reuseOrGetHPointSlices(prevSS, numSteps) - } - ss.Histograms = append(ss.Histograms, point) - } - } - } - - if len(ss.Floats)+len(ss.Histograms) > 0 { - mat = append(mat, ss) - prevSS = &mat[len(mat)-1] - } - } - ev.samplesStats.UpdatePeak(ev.currentSamples) + mat := ev.evalVectorSelector(ctx, e) return mat, ws case *parser.MatrixSelector: From 90cc7e572327b194af39abcb1b9456f66e8eeae5 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Tue, 15 Oct 2024 16:37:58 +0200 Subject: [PATCH 5/6] Upgrade github.com/googleapis/enterprise-certificate-proxy to v0.3.4 Signed-off-by: Arve Knudsen --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 4a2dd1c779..0631611234 100644 --- a/go.mod +++ b/go.mod @@ -140,7 +140,7 @@ require ( github.com/google/go-querystring v1.1.0 // indirect github.com/google/gofuzz v1.2.0 // indirect github.com/google/s2a-go v0.1.8 // indirect - github.com/googleapis/enterprise-certificate-proxy v0.3.3 // indirect + github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect github.com/googleapis/gax-go/v2 v2.13.0 // indirect github.com/gorilla/websocket v1.5.0 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 // indirect diff --git a/go.sum b/go.sum index 4fc4f93bd8..0246a377d2 100644 --- a/go.sum +++ b/go.sum @@ -328,8 +328,8 @@ github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+ github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/googleapis/enterprise-certificate-proxy v0.3.3 h1:QRje2j5GZimBzlbhGA2V2QlGNgL8G6e+wGo/+/2bWI0= -github.com/googleapis/enterprise-certificate-proxy v0.3.3/go.mod h1:YKe7cfqYXjKGpGvmSg28/fFvhNzinZQm8DGnaburhGA= +github.com/googleapis/enterprise-certificate-proxy v0.3.4 h1:XYIDZApgAnrN1c855gTgghdIA6Stxb52D5RnLI1SLyw= +github.com/googleapis/enterprise-certificate-proxy v0.3.4/go.mod h1:YKe7cfqYXjKGpGvmSg28/fFvhNzinZQm8DGnaburhGA= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= github.com/googleapis/gax-go/v2 v2.13.0 h1:yitjD5f7jQHhyDsnhKEBU52NdvvdSeGzlAnDPT0hH1s= From e05e97cdd7fc684442c2d8e03e6967641933de51 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Tue, 15 Oct 2024 17:41:20 +0200 Subject: [PATCH 6/6] evaluator.rangeEval: Split out gatherVector method Signed-off-by: Arve Knudsen --- promql/engine.go | 75 +++++++++++++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 29 deletions(-) diff --git a/promql/engine.go b/promql/engine.go index 86e76bb70d..ef316e088d 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -1219,38 +1219,17 @@ func (ev *evaluator) rangeEval(ctx context.Context, prepSeries func(labels.Label ev.currentSamples = tempNumSamples // Gather input vectors for this timestamp. for i := range exprs { - vectors[i] = vectors[i][:0] - + var bh []EvalSeriesHelper + var sh []EvalSeriesHelper if prepSeries != nil { - bufHelpers[i] = bufHelpers[i][:0] - } - - for si, series := range matrixes[i] { - switch { - case len(series.Floats) > 0 && series.Floats[0].T == ts: - vectors[i] = append(vectors[i], Sample{Metric: series.Metric, F: series.Floats[0].F, T: ts, DropName: series.DropName}) - // Move input vectors forward so we don't have to re-scan the same - // past points at the next step. - matrixes[i][si].Floats = series.Floats[1:] - case len(series.Histograms) > 0 && series.Histograms[0].T == ts: - vectors[i] = append(vectors[i], Sample{Metric: series.Metric, H: series.Histograms[0].H, T: ts, DropName: series.DropName}) - matrixes[i][si].Histograms = series.Histograms[1:] - default: - continue - } - if prepSeries != nil { - bufHelpers[i] = append(bufHelpers[i], seriesHelpers[i][si]) - } - // Don't add histogram size here because we only - // copy the pointer above, not the whole - // histogram. - ev.currentSamples++ - if ev.currentSamples > ev.maxSamples { - ev.error(ErrTooManySamples(env)) - } + bh = bufHelpers[i][:0] + sh = seriesHelpers[i] } + vectors[i], bh = ev.gatherVector(ts, matrixes[i], vectors[i], bh, sh) args[i] = vectors[i] - ev.samplesStats.UpdatePeak(ev.currentSamples) + if prepSeries != nil { + bufHelpers[i] = bh + } } // Make the function call. @@ -3682,3 +3661,41 @@ func newHistogramStatsSeries(series storage.Series) *histogramStatsSeries { func (s histogramStatsSeries) Iterator(it chunkenc.Iterator) chunkenc.Iterator { return NewHistogramStatsIterator(s.Series.Iterator(it)) } + +// gatherVector gathers a Vector for ts from the series in input. +// output is used as a buffer. +// If bufHelpers and seriesHelpers are provided, seriesHelpers[i] is appended to bufHelpers for every input index i. +// The gathered Vector and bufHelper are returned. +func (ev *evaluator) gatherVector(ts int64, input Matrix, output Vector, bufHelpers, seriesHelpers []EvalSeriesHelper) (Vector, []EvalSeriesHelper) { + output = output[:0] + for i, series := range input { + switch { + case len(series.Floats) > 0 && series.Floats[0].T == ts: + s := series.Floats[0] + output = append(output, Sample{Metric: series.Metric, F: s.F, T: ts, DropName: series.DropName}) + // Move input vectors forward so we don't have to re-scan the same + // past points at the next step. + input[i].Floats = series.Floats[1:] + case len(series.Histograms) > 0 && series.Histograms[0].T == ts: + s := series.Histograms[0] + output = append(output, Sample{Metric: series.Metric, H: s.H, T: ts, DropName: series.DropName}) + input[i].Histograms = series.Histograms[1:] + default: + continue + } + if len(seriesHelpers) > 0 { + bufHelpers = append(bufHelpers, seriesHelpers[i]) + } + + // Don't add histogram size here because we only + // copy the pointer above, not the whole + // histogram. + ev.currentSamples++ + if ev.currentSamples > ev.maxSamples { + ev.error(ErrTooManySamples(env)) + } + } + ev.samplesStats.UpdatePeak(ev.currentSamples) + + return output, bufHelpers +}