diff --git a/build/semconv/templates/go/weaver.yaml b/build/semconv/templates/go/weaver.yaml new file mode 100644 index 0000000000..76666ba1fa --- /dev/null +++ b/build/semconv/templates/go/weaver.yaml @@ -0,0 +1,65 @@ +# Copyright The Prometheus Authors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Weaver configuration for generating Prometheus instrumentation code +# from semantic convention registries. +# +# Usage: +# weaver registry generate \ +# --registry \ +# --templates \ +# go \ +# --skip-policies + +params: + included_namespaces: [] + +templates: + - template: metrics.go.j2 + application_mode: single + file_name: "metrics.go" + filter: > + semconv_grouped_metrics({"exclude_deprecated": true}) + + - template: metrics.md.j2 + application_mode: single + file_name: "README.md" + filter: > + semconv_grouped_metrics({"exclude_deprecated": false}) + +acronyms: [TLS, RTT, DNS, SHA, TSDB, HTTP, API, SD, WAL, WBL, GC, OOO] + +comment_formats: + go: + format: markdown + prefix: "// " + indent_first_level_list_items: true + shortcut_reference_link: true + trim: true + remove_trailing_dots: false + go_1tab: + format: markdown + prefix: " // " + indent_first_level_list_items: true + shortcut_reference_link: true + trim: true + word_wrap: + line_length: 80 +default_comment_format: go + +text_maps: + go_instrument_type: + counter: Counter + histogram: Histogram + updowncounter: Gauge + gauge: Gauge diff --git a/config/semconv/registry.yaml b/config/semconv/registry.yaml new file mode 100644 index 0000000000..53b2998e1c --- /dev/null +++ b/config/semconv/registry.yaml @@ -0,0 +1,21 @@ +# Semantic convention registry for Prometheus configuration metrics. +# +# This file is the source of truth for these metrics. +# Run `make generate-semconv` to regenerate the Go code. + +groups: + - id: metric.prometheus_config_last_reload_success_timestamp_seconds + type: metric + stability: development + brief: Timestamp of the last successful configuration reload. + metric_name: prometheus_config_last_reload_success_timestamp_seconds + instrument: gauge + unit: s + + - id: metric.prometheus_config_last_reload_successful + type: metric + stability: development + brief: Whether the last configuration reload attempt was successful. + metric_name: prometheus_config_last_reload_successful + instrument: gauge + unit: "1" diff --git a/discovery/semconv/registry.yaml b/discovery/semconv/registry.yaml new file mode 100644 index 0000000000..3488f697c7 --- /dev/null +++ b/discovery/semconv/registry.yaml @@ -0,0 +1,328 @@ +# Semantic convention registry for Prometheus service discovery metrics. +# +# This file is the source of truth for these metrics. +# Run `make generate-semconv` to regenerate the Go code. + +groups: + # General SD metrics + - id: metric.prometheus_sd_discovered_targets + type: metric + stability: development + brief: Current number of discovered targets. + metric_name: prometheus_sd_discovered_targets + instrument: gauge + unit: "{target}" + attributes: + - id: config + type: string + stability: development + brief: The scrape config name. + examples: + - prometheus + - node_exporter + - id: name + type: string + stability: development + brief: The discovery manager name. + examples: + - scrape + - notify + + - id: metric.prometheus_sd_failed_configs + type: metric + stability: development + brief: Current number of service discovery configurations that failed to load. + metric_name: prometheus_sd_failed_configs + instrument: gauge + unit: "{config}" + attributes: + - id: name + type: string + stability: development + brief: The discovery manager name. + examples: + - scrape + - notify + + - id: metric.prometheus_sd_received_updates_total + type: metric + stability: development + brief: Total number of update events received from the SD providers. + metric_name: prometheus_sd_received_updates_total + instrument: counter + unit: "{update}" + attributes: + - id: name + type: string + stability: development + brief: The discovery manager name. + examples: + - scrape + - notify + + - id: metric.prometheus_sd_updates_delayed_total + type: metric + stability: development + brief: Total number of update events that couldn't be sent immediately. + metric_name: prometheus_sd_updates_delayed_total + instrument: counter + unit: "{update}" + attributes: + - id: name + type: string + stability: development + brief: The discovery manager name. + examples: + - scrape + - notify + + - id: metric.prometheus_sd_updates_total + type: metric + stability: development + brief: Total number of update events sent to the SD consumers. + metric_name: prometheus_sd_updates_total + instrument: counter + unit: "{update}" + attributes: + - id: name + type: string + stability: development + brief: The discovery manager name. + examples: + - scrape + - notify + + # Refresh metrics + - id: metric.prometheus_sd_refresh_duration_seconds + type: metric + stability: development + brief: The duration of a SD refresh cycle. + metric_name: prometheus_sd_refresh_duration_seconds + instrument: histogram + unit: s + + - id: metric.prometheus_sd_refresh_duration_histogram_seconds + type: metric + stability: development + brief: The duration of a SD refresh cycle as a histogram. + metric_name: prometheus_sd_refresh_duration_histogram_seconds + instrument: histogram + unit: s + attributes: + - id: mechanism + type: string + stability: development + brief: The service discovery mechanism. + examples: + - dns + - kubernetes + - consul + + - id: metric.prometheus_sd_refresh_failures_total + type: metric + stability: development + brief: Number of SD refresh failures. + metric_name: prometheus_sd_refresh_failures_total + instrument: counter + unit: "{failure}" + attributes: + - id: mechanism + type: string + stability: development + brief: The service discovery mechanism. + examples: + - dns + - kubernetes + - id: config + type: string + stability: development + brief: The scrape config name. + examples: + - prometheus + + # Provider-specific metrics + - id: metric.prometheus_sd_azure_cache_hit_total + type: metric + stability: development + brief: Number of cache hits during Azure SD. + metric_name: prometheus_sd_azure_cache_hit_total + instrument: counter + unit: "{hit}" + + - id: metric.prometheus_sd_azure_failures_total + type: metric + stability: development + brief: Number of Azure SD failures. + metric_name: prometheus_sd_azure_failures_total + instrument: counter + unit: "{failure}" + + - id: metric.prometheus_sd_consul_rpc_duration_seconds + type: metric + stability: development + brief: The duration of a Consul RPC call. + metric_name: prometheus_sd_consul_rpc_duration_seconds + instrument: histogram + unit: s + + - id: metric.prometheus_sd_consul_rpc_failures_total + type: metric + stability: development + brief: Number of Consul RPC call failures. + metric_name: prometheus_sd_consul_rpc_failures_total + instrument: counter + unit: "{failure}" + + - id: metric.prometheus_sd_dns_lookup_failures_total + type: metric + stability: development + brief: Number of DNS SD lookup failures. + metric_name: prometheus_sd_dns_lookup_failures_total + instrument: counter + unit: "{failure}" + + - id: metric.prometheus_sd_dns_lookups_total + type: metric + stability: development + brief: Number of DNS SD lookups. + metric_name: prometheus_sd_dns_lookups_total + instrument: counter + unit: "{lookup}" + + - id: metric.prometheus_sd_file_mtime_seconds + type: metric + stability: development + brief: The modification time of the SD file. + metric_name: prometheus_sd_file_mtime_seconds + instrument: gauge + unit: s + attributes: + - id: filename + type: string + stability: development + brief: The file path. + examples: + - /etc/prometheus/file_sd/targets.json + + - id: metric.prometheus_sd_file_read_errors_total + type: metric + stability: development + brief: Number of file SD read errors. + metric_name: prometheus_sd_file_read_errors_total + instrument: counter + unit: "{error}" + + - id: metric.prometheus_sd_file_scan_duration_seconds + type: metric + stability: development + brief: The duration of the file SD scan. + metric_name: prometheus_sd_file_scan_duration_seconds + instrument: histogram + unit: s + + - id: metric.prometheus_sd_file_watcher_errors_total + type: metric + stability: development + brief: Number of file SD watcher errors. + metric_name: prometheus_sd_file_watcher_errors_total + instrument: counter + unit: "{error}" + + - id: metric.prometheus_sd_http_failures_total + type: metric + stability: development + brief: Number of HTTP SD failures. + metric_name: prometheus_sd_http_failures_total + instrument: counter + unit: "{failure}" + + - id: metric.prometheus_sd_kubernetes_events_total + type: metric + stability: development + brief: Number of Kubernetes events processed. + metric_name: prometheus_sd_kubernetes_events_total + instrument: counter + unit: "{event}" + attributes: + - id: event + type: string + stability: development + brief: The event type. + examples: + - add + - update + - delete + - id: role + type: string + stability: development + brief: The Kubernetes role. + examples: + - pod + - node + - service + - endpoints + + - id: metric.prometheus_sd_kubernetes_failures_total + type: metric + stability: development + brief: Number of Kubernetes SD failures. + metric_name: prometheus_sd_kubernetes_failures_total + instrument: counter + unit: "{failure}" + + - id: metric.prometheus_sd_kuma_fetch_duration_seconds + type: metric + stability: development + brief: The duration of a Kuma MADS fetch call. + metric_name: prometheus_sd_kuma_fetch_duration_seconds + instrument: histogram + unit: s + + - id: metric.prometheus_sd_kuma_fetch_failures_total + type: metric + stability: development + brief: Number of Kuma SD fetch failures. + metric_name: prometheus_sd_kuma_fetch_failures_total + instrument: counter + unit: "{failure}" + + - id: metric.prometheus_sd_kuma_fetch_skipped_updates_total + type: metric + stability: development + brief: Number of Kuma SD updates skipped due to no changes. + metric_name: prometheus_sd_kuma_fetch_skipped_updates_total + instrument: counter + unit: "{update}" + + - id: metric.prometheus_sd_linode_failures_total + type: metric + stability: development + brief: Number of Linode SD failures. + metric_name: prometheus_sd_linode_failures_total + instrument: counter + unit: "{failure}" + + - id: metric.prometheus_sd_nomad_failures_total + type: metric + stability: development + brief: Number of Nomad SD failures. + metric_name: prometheus_sd_nomad_failures_total + instrument: counter + unit: "{failure}" + + # Treecache (ZooKeeper) metrics + - id: metric.prometheus_treecache_watcher_goroutines + type: metric + stability: development + brief: The current number of treecache watcher goroutines. + metric_name: prometheus_treecache_watcher_goroutines + instrument: gauge + unit: "{goroutine}" + + - id: metric.prometheus_treecache_zookeeper_failures_total + type: metric + stability: development + brief: Total number of ZooKeeper failures. + metric_name: prometheus_treecache_zookeeper_failures_total + instrument: counter + unit: "{failure}" diff --git a/notifier/semconv/registry.yaml b/notifier/semconv/registry.yaml new file mode 100644 index 0000000000..4083454c22 --- /dev/null +++ b/notifier/semconv/registry.yaml @@ -0,0 +1,37 @@ +# Semantic convention registry for Prometheus notifier metrics. +# +# This file is the source of truth for these metrics. +# Run `make generate-semconv` to regenerate the Go code. + +groups: + - id: metric.prometheus_notifications_alertmanagers_discovered + type: metric + stability: development + brief: The number of alertmanagers discovered and active. + metric_name: prometheus_notifications_alertmanagers_discovered + instrument: gauge + unit: "{alertmanager}" + + - id: metric.prometheus_notifications_dropped_total + type: metric + stability: development + brief: Total number of alerts dropped due to errors when sending to Alertmanager. + metric_name: prometheus_notifications_dropped_total + instrument: counter + unit: "{notification}" + + - id: metric.prometheus_notifications_queue_capacity + type: metric + stability: development + brief: The capacity of the alert notifications queue. + metric_name: prometheus_notifications_queue_capacity + instrument: gauge + unit: "{notification}" + + - id: metric.prometheus_notifications_queue_length + type: metric + stability: development + brief: The number of alert notifications in the queue. + metric_name: prometheus_notifications_queue_length + instrument: gauge + unit: "{notification}" diff --git a/promql/semconv/registry.yaml b/promql/semconv/registry.yaml new file mode 100644 index 0000000000..0880dfe5c3 --- /dev/null +++ b/promql/semconv/registry.yaml @@ -0,0 +1,71 @@ +# Semantic convention registry for Prometheus PromQL engine metrics. +# +# This file is the source of truth for these metrics. +# Run `make generate-semconv` to regenerate the Go code. + +groups: + - id: metric.prometheus_engine_queries + type: metric + stability: development + brief: The current number of queries being executed or waiting. + metric_name: prometheus_engine_queries + instrument: gauge + unit: "{query}" + + - id: metric.prometheus_engine_queries_concurrent_max + type: metric + stability: development + brief: The max number of concurrent queries. + metric_name: prometheus_engine_queries_concurrent_max + instrument: gauge + unit: "{query}" + + - id: metric.prometheus_engine_query_log_enabled + type: metric + stability: development + brief: State of the query log. + metric_name: prometheus_engine_query_log_enabled + instrument: gauge + unit: "1" + + - id: metric.prometheus_engine_query_log_failures_total + type: metric + stability: development + brief: The number of query log failures. + metric_name: prometheus_engine_query_log_failures_total + instrument: counter + unit: "{failure}" + + - id: metric.prometheus_engine_query_samples_total + type: metric + stability: development + brief: The total number of samples loaded by all queries. + metric_name: prometheus_engine_query_samples_total + instrument: counter + unit: "{sample}" + + - id: metric.prometheus_engine_query_duration_seconds + type: metric + stability: development + brief: Query timings. + metric_name: prometheus_engine_query_duration_seconds + instrument: histogram + unit: s + + - id: metric.prometheus_engine_query_duration_histogram_seconds + type: metric + stability: development + brief: Histogram of query timings. + metric_name: prometheus_engine_query_duration_histogram_seconds + instrument: histogram + unit: s + attributes: + - id: slice + type: string + stability: development + brief: The query execution phase. + examples: + - inner_eval + - prepare_time + - queue_time + - result_sort diff --git a/rules/semconv/registry.yaml b/rules/semconv/registry.yaml new file mode 100644 index 0000000000..91c1565730 --- /dev/null +++ b/rules/semconv/registry.yaml @@ -0,0 +1,202 @@ +# Semantic convention registry for Prometheus rule evaluation metrics. +# +# This file is the source of truth for these metrics. +# Run `make generate-semconv` to regenerate the Go code. + +groups: + - id: metric.prometheus_rule_evaluation_duration_seconds + type: metric + stability: development + brief: The duration of rule group evaluations. + metric_name: prometheus_rule_evaluation_duration_seconds + instrument: histogram + unit: s + + - id: metric.prometheus_rule_evaluation_duration_histogram_seconds + type: metric + stability: development + brief: The duration of rule evaluations as a histogram. + metric_name: prometheus_rule_evaluation_duration_histogram_seconds + instrument: histogram + unit: s + + - id: metric.prometheus_rule_evaluation_failures_total + type: metric + stability: development + brief: The total number of rule evaluation failures. + metric_name: prometheus_rule_evaluation_failures_total + instrument: counter + unit: "{failure}" + attributes: + - id: rule_group + type: string + stability: development + brief: The rule group name. + examples: + - alerting_rules.yml;my_group + + - id: metric.prometheus_rule_evaluations_total + type: metric + stability: development + brief: The total number of rule evaluations. + metric_name: prometheus_rule_evaluations_total + instrument: counter + unit: "{evaluation}" + attributes: + - id: rule_group + type: string + stability: development + brief: The rule group name. + examples: + - alerting_rules.yml;my_group + + - id: metric.prometheus_rule_group_duration_seconds + type: metric + stability: development + brief: The duration of rule group evaluations. + metric_name: prometheus_rule_group_duration_seconds + instrument: histogram + unit: s + + - id: metric.prometheus_rule_group_duration_histogram_seconds + type: metric + stability: development + brief: The duration of rule group evaluations as a histogram. + metric_name: prometheus_rule_group_duration_histogram_seconds + instrument: histogram + unit: s + + - id: metric.prometheus_rule_group_interval_seconds + type: metric + stability: development + brief: The interval of a rule group. + metric_name: prometheus_rule_group_interval_seconds + instrument: gauge + unit: s + attributes: + - id: rule_group + type: string + stability: development + brief: The rule group name. + examples: + - alerting_rules.yml;my_group + + - id: metric.prometheus_rule_group_iterations_missed_total + type: metric + stability: development + brief: The total number of rule group evaluations missed due to slow rule group evaluation. + metric_name: prometheus_rule_group_iterations_missed_total + instrument: counter + unit: "{iteration}" + attributes: + - id: rule_group + type: string + stability: development + brief: The rule group name. + examples: + - alerting_rules.yml;my_group + + - id: metric.prometheus_rule_group_iterations_total + type: metric + stability: development + brief: The total number of scheduled rule group evaluations. + metric_name: prometheus_rule_group_iterations_total + instrument: counter + unit: "{iteration}" + attributes: + - id: rule_group + type: string + stability: development + brief: The rule group name. + examples: + - alerting_rules.yml;my_group + + - id: metric.prometheus_rule_group_last_duration_seconds + type: metric + stability: development + brief: The duration of the last rule group evaluation. + metric_name: prometheus_rule_group_last_duration_seconds + instrument: gauge + unit: s + attributes: + - id: rule_group + type: string + stability: development + brief: The rule group name. + examples: + - alerting_rules.yml;my_group + + - id: metric.prometheus_rule_group_last_evaluation_samples + type: metric + stability: development + brief: The number of samples returned during the last rule group evaluation. + metric_name: prometheus_rule_group_last_evaluation_samples + instrument: gauge + unit: "{sample}" + attributes: + - id: rule_group + type: string + stability: development + brief: The rule group name. + examples: + - alerting_rules.yml;my_group + + - id: metric.prometheus_rule_group_last_evaluation_timestamp_seconds + type: metric + stability: development + brief: The timestamp of the last rule group evaluation. + metric_name: prometheus_rule_group_last_evaluation_timestamp_seconds + instrument: gauge + unit: s + attributes: + - id: rule_group + type: string + stability: development + brief: The rule group name. + examples: + - alerting_rules.yml;my_group + + - id: metric.prometheus_rule_group_last_restore_duration_seconds + type: metric + stability: development + brief: The duration of the last alert restoration from the ALERTS_FOR_STATE series. + metric_name: prometheus_rule_group_last_restore_duration_seconds + instrument: gauge + unit: s + attributes: + - id: rule_group + type: string + stability: development + brief: The rule group name. + examples: + - alerting_rules.yml;my_group + + - id: metric.prometheus_rule_group_last_rule_duration_sum_seconds + type: metric + stability: development + brief: The sum of the durations of all rules in the last rule group evaluation. + metric_name: prometheus_rule_group_last_rule_duration_sum_seconds + instrument: gauge + unit: s + attributes: + - id: rule_group + type: string + stability: development + brief: The rule group name. + examples: + - alerting_rules.yml;my_group + + - id: metric.prometheus_rule_group_rules + type: metric + stability: development + brief: The number of rules in a rule group. + metric_name: prometheus_rule_group_rules + instrument: gauge + unit: "{rule}" + attributes: + - id: rule_group + type: string + stability: development + brief: The rule group name. + examples: + - alerting_rules.yml;my_group diff --git a/scrape/semconv/registry.yaml b/scrape/semconv/registry.yaml new file mode 100644 index 0000000000..453cbde356 --- /dev/null +++ b/scrape/semconv/registry.yaml @@ -0,0 +1,285 @@ +# Semantic convention registry for Prometheus scrape/target metrics. +# +# This file is the source of truth for these metrics. +# Run `make generate-semconv` to regenerate the Go code. + +groups: + - id: metric.prometheus_target_interval_length_seconds + type: metric + stability: development + brief: Actual intervals between scrapes. + metric_name: prometheus_target_interval_length_seconds + instrument: histogram + unit: s + + - id: metric.prometheus_target_interval_length_histogram_seconds + type: metric + stability: development + brief: Actual intervals between scrapes as a histogram. + metric_name: prometheus_target_interval_length_histogram_seconds + instrument: histogram + unit: s + attributes: + - id: interval + type: string + stability: development + brief: The configured scrape interval. + examples: + - 15s + - 30s + + - id: metric.prometheus_target_metadata_cache_bytes + type: metric + stability: development + brief: The number of bytes that are currently used for storing metric metadata in the cache. + metric_name: prometheus_target_metadata_cache_bytes + instrument: gauge + unit: By + attributes: + - id: scrape_job + type: string + stability: development + brief: The scrape job name. + examples: + - prometheus + - node_exporter + + - id: metric.prometheus_target_metadata_cache_entries + type: metric + stability: development + brief: Total number of metric metadata entries in the cache. + metric_name: prometheus_target_metadata_cache_entries + instrument: gauge + unit: "{entry}" + attributes: + - id: scrape_job + type: string + stability: development + brief: The scrape job name. + examples: + - prometheus + - node_exporter + + - id: metric.prometheus_target_scrape_duration_seconds + type: metric + stability: development + brief: Scrape request latency histogram. + metric_name: prometheus_target_scrape_duration_seconds + instrument: histogram + unit: s + + - id: metric.prometheus_target_scrape_pool_exceeded_label_limits_total + type: metric + stability: development + brief: Total number of times scrape pools hit the label limits. + metric_name: prometheus_target_scrape_pool_exceeded_label_limits_total + instrument: counter + unit: "{occurrence}" + + - id: metric.prometheus_target_scrape_pool_exceeded_target_limit_total + type: metric + stability: development + brief: Total number of times scrape pools hit the target limit. + metric_name: prometheus_target_scrape_pool_exceeded_target_limit_total + instrument: counter + unit: "{occurrence}" + + - id: metric.prometheus_target_scrape_pool_reloads_failed_total + type: metric + stability: development + brief: Total number of failed scrape pool reloads. + metric_name: prometheus_target_scrape_pool_reloads_failed_total + instrument: counter + unit: "{reload}" + + - id: metric.prometheus_target_scrape_pool_reloads_total + type: metric + stability: development + brief: Total number of scrape pool reloads. + metric_name: prometheus_target_scrape_pool_reloads_total + instrument: counter + unit: "{reload}" + + - id: metric.prometheus_target_scrape_pool_symboltable_items + type: metric + stability: development + brief: Current number of symbols in the scrape pool symbol table. + metric_name: prometheus_target_scrape_pool_symboltable_items + instrument: gauge + unit: "{symbol}" + attributes: + - id: scrape_job + type: string + stability: development + brief: The scrape job name. + examples: + - prometheus + - node_exporter + + - id: metric.prometheus_target_scrape_pool_sync_total + type: metric + stability: development + brief: Total number of syncs that were executed on a scrape pool. + metric_name: prometheus_target_scrape_pool_sync_total + instrument: counter + unit: "{sync}" + attributes: + - id: scrape_job + type: string + stability: development + brief: The scrape job name. + examples: + - prometheus + - node_exporter + + - id: metric.prometheus_target_scrape_pool_target_limit + type: metric + stability: development + brief: Maximum number of targets allowed in this scrape pool. + metric_name: prometheus_target_scrape_pool_target_limit + instrument: gauge + unit: "{target}" + attributes: + - id: scrape_job + type: string + stability: development + brief: The scrape job name. + examples: + - prometheus + - node_exporter + + - id: metric.prometheus_target_scrape_pool_targets + type: metric + stability: development + brief: Current number of targets in this scrape pool. + metric_name: prometheus_target_scrape_pool_targets + instrument: gauge + unit: "{target}" + attributes: + - id: scrape_job + type: string + stability: development + brief: The scrape job name. + examples: + - prometheus + - node_exporter + + - id: metric.prometheus_target_scrape_pools_failed_total + type: metric + stability: development + brief: Total number of scrape pool creations that failed. + metric_name: prometheus_target_scrape_pools_failed_total + instrument: counter + unit: "{pool}" + + - id: metric.prometheus_target_scrape_pools_total + type: metric + stability: development + brief: Total number of scrape pool creation attempts. + metric_name: prometheus_target_scrape_pools_total + instrument: counter + unit: "{pool}" + + - id: metric.prometheus_target_scrapes_cache_flush_forced_total + type: metric + stability: development + brief: Total number of scrapes that forced a complete label cache flush. + metric_name: prometheus_target_scrapes_cache_flush_forced_total + instrument: counter + unit: "{scrape}" + + - id: metric.prometheus_target_scrapes_exceeded_body_size_limit_total + type: metric + stability: development + brief: Total number of scrapes that hit the body size limit. + metric_name: prometheus_target_scrapes_exceeded_body_size_limit_total + instrument: counter + unit: "{scrape}" + + - id: metric.prometheus_target_scrapes_exceeded_native_histogram_bucket_limit_total + type: metric + stability: development + brief: Total number of scrapes that hit the native histogram bucket limit. + metric_name: prometheus_target_scrapes_exceeded_native_histogram_bucket_limit_total + instrument: counter + unit: "{scrape}" + + - id: metric.prometheus_target_scrapes_exceeded_sample_limit_total + type: metric + stability: development + brief: Total number of scrapes that hit the sample limit. + metric_name: prometheus_target_scrapes_exceeded_sample_limit_total + instrument: counter + unit: "{scrape}" + + - id: metric.prometheus_target_scrapes_exemplar_out_of_order_total + type: metric + stability: development + brief: Total number of exemplar rejected due to not being out of the expected order. + metric_name: prometheus_target_scrapes_exemplar_out_of_order_total + instrument: counter + unit: "{exemplar}" + + - id: metric.prometheus_target_scrapes_sample_duplicate_timestamp_total + type: metric + stability: development + brief: Total number of samples rejected due to duplicate timestamps but different values. + metric_name: prometheus_target_scrapes_sample_duplicate_timestamp_total + instrument: counter + unit: "{sample}" + + - id: metric.prometheus_target_scrapes_sample_out_of_bounds_total + type: metric + stability: development + brief: Total number of samples rejected due to timestamp falling outside of the time bounds. + metric_name: prometheus_target_scrapes_sample_out_of_bounds_total + instrument: counter + unit: "{sample}" + + - id: metric.prometheus_target_scrapes_sample_out_of_order_total + type: metric + stability: development + brief: Total number of samples rejected due to not being out of the expected order. + metric_name: prometheus_target_scrapes_sample_out_of_order_total + instrument: counter + unit: "{sample}" + + - id: metric.prometheus_target_sync_failed_total + type: metric + stability: development + brief: Total number of target sync failures. + metric_name: prometheus_target_sync_failed_total + instrument: counter + unit: "{sync}" + attributes: + - id: scrape_job + type: string + stability: development + brief: The scrape job name. + examples: + - prometheus + - node_exporter + + - id: metric.prometheus_target_sync_length_seconds + type: metric + stability: development + brief: Actual interval to sync the scrape pool. + metric_name: prometheus_target_sync_length_seconds + instrument: histogram + unit: s + + - id: metric.prometheus_target_sync_length_histogram_seconds + type: metric + stability: development + brief: Actual interval to sync the scrape pool as a histogram. + metric_name: prometheus_target_sync_length_histogram_seconds + instrument: histogram + unit: s + attributes: + - id: scrape_job + type: string + stability: development + brief: The scrape job name. + examples: + - prometheus + - node_exporter diff --git a/storage/remote/semconv/registry.yaml b/storage/remote/semconv/registry.yaml new file mode 100644 index 0000000000..2c539270f3 --- /dev/null +++ b/storage/remote/semconv/registry.yaml @@ -0,0 +1,53 @@ +# Semantic convention registry for Prometheus remote storage metrics. +# +# This file is the source of truth for these metrics. +# Run `make generate-semconv` to regenerate the Go code. + +groups: + - id: metric.prometheus_remote_storage_exemplars_in_total + type: metric + stability: development + brief: Exemplars in to remote storage, compare to determine dropped exemplars. + metric_name: prometheus_remote_storage_exemplars_in_total + instrument: counter + unit: "{exemplar}" + + - id: metric.prometheus_remote_storage_highest_timestamp_in_seconds + type: metric + stability: development + brief: Highest timestamp that has come into the remote storage via the Appender interface. + metric_name: prometheus_remote_storage_highest_timestamp_in_seconds + instrument: gauge + unit: s + + - id: metric.prometheus_remote_storage_histograms_in_total + type: metric + stability: development + brief: Histograms in to remote storage, compare to determine dropped histograms. + metric_name: prometheus_remote_storage_histograms_in_total + instrument: counter + unit: "{histogram}" + + - id: metric.prometheus_remote_storage_samples_in_total + type: metric + stability: development + brief: Samples in to remote storage, compare to determine dropped samples. + metric_name: prometheus_remote_storage_samples_in_total + instrument: counter + unit: "{sample}" + + - id: metric.prometheus_remote_storage_string_interner_zero_reference_releases_total + type: metric + stability: development + brief: The number of times release has been called for strings that are not interned. + metric_name: prometheus_remote_storage_string_interner_zero_reference_releases_total + instrument: counter + unit: "{release}" + + - id: metric.prometheus_remote_read_handler_queries + type: metric + stability: development + brief: The number of in-flight remote read queries. + metric_name: prometheus_remote_read_handler_queries + instrument: gauge + unit: "{query}" diff --git a/template/semconv/registry.yaml b/template/semconv/registry.yaml new file mode 100644 index 0000000000..bb77368e34 --- /dev/null +++ b/template/semconv/registry.yaml @@ -0,0 +1,21 @@ +# Semantic convention registry for Prometheus template metrics. +# +# This file is the source of truth for these metrics. +# Run `make generate-semconv` to regenerate the Go code. + +groups: + - id: metric.prometheus_template_text_expansion_failures_total + type: metric + stability: development + brief: The total number of template text expansion failures. + metric_name: prometheus_template_text_expansion_failures_total + instrument: counter + unit: "{failure}" + + - id: metric.prometheus_template_text_expansions_total + type: metric + stability: development + brief: The total number of template text expansions. + metric_name: prometheus_template_text_expansions_total + instrument: counter + unit: "{expansion}" diff --git a/tsdb/semconv/registry.yaml b/tsdb/semconv/registry.yaml new file mode 100644 index 0000000000..0dde4de51d --- /dev/null +++ b/tsdb/semconv/registry.yaml @@ -0,0 +1,746 @@ +# Semantic convention registry for Prometheus TSDB metrics. +# +# This file is the source of truth for these metrics. +# Run `make generate-semconv` to regenerate the Go code. + +groups: + # Block metrics + - id: metric.prometheus_tsdb_blocks_loaded + type: metric + stability: development + brief: Number of currently loaded data blocks. + metric_name: prometheus_tsdb_blocks_loaded + instrument: gauge + unit: "{block}" + + - id: metric.prometheus_tsdb_storage_blocks_bytes + type: metric + stability: development + brief: The number of bytes that are currently used for local storage by all blocks. + metric_name: prometheus_tsdb_storage_blocks_bytes + instrument: gauge + unit: By + + # Checkpoint metrics + - id: metric.prometheus_tsdb_checkpoint_creations_failed_total + type: metric + stability: development + brief: Total number of checkpoint creations that failed. + metric_name: prometheus_tsdb_checkpoint_creations_failed_total + instrument: counter + unit: "{checkpoint}" + + - id: metric.prometheus_tsdb_checkpoint_creations_total + type: metric + stability: development + brief: Total number of checkpoint creations attempted. + metric_name: prometheus_tsdb_checkpoint_creations_total + instrument: counter + unit: "{checkpoint}" + + - id: metric.prometheus_tsdb_checkpoint_deletions_failed_total + type: metric + stability: development + brief: Total number of checkpoint deletions that failed. + metric_name: prometheus_tsdb_checkpoint_deletions_failed_total + instrument: counter + unit: "{checkpoint}" + + - id: metric.prometheus_tsdb_checkpoint_deletions_total + type: metric + stability: development + brief: Total number of checkpoint deletions attempted. + metric_name: prometheus_tsdb_checkpoint_deletions_total + instrument: counter + unit: "{checkpoint}" + + # Compaction metrics + - id: metric.prometheus_tsdb_compaction_chunk_range_seconds + type: metric + stability: development + brief: Final time range of chunks on their first compaction. + metric_name: prometheus_tsdb_compaction_chunk_range_seconds + instrument: histogram + unit: s + + - id: metric.prometheus_tsdb_compaction_chunk_samples + type: metric + stability: development + brief: Final number of samples on their first compaction. + metric_name: prometheus_tsdb_compaction_chunk_samples + instrument: histogram + unit: "{sample}" + + - id: metric.prometheus_tsdb_compaction_chunk_size_bytes + type: metric + stability: development + brief: Final size of chunks on their first compaction. + metric_name: prometheus_tsdb_compaction_chunk_size_bytes + instrument: histogram + unit: By + + - id: metric.prometheus_tsdb_compaction_duration_seconds + type: metric + stability: development + brief: Duration of compaction runs. + metric_name: prometheus_tsdb_compaction_duration_seconds + instrument: histogram + unit: s + + - id: metric.prometheus_tsdb_compaction_populating_block + type: metric + stability: development + brief: Set to 1 when a block is being written to the disk. + metric_name: prometheus_tsdb_compaction_populating_block + instrument: gauge + unit: "1" + + - id: metric.prometheus_tsdb_compactions_failed_total + type: metric + stability: development + brief: Total number of compactions that failed. + metric_name: prometheus_tsdb_compactions_failed_total + instrument: counter + unit: "{compaction}" + + - id: metric.prometheus_tsdb_compactions_skipped_total + type: metric + stability: development + brief: Total number of skipped compactions due to overlap. + metric_name: prometheus_tsdb_compactions_skipped_total + instrument: counter + unit: "{compaction}" + + - id: metric.prometheus_tsdb_compactions_total + type: metric + stability: development + brief: Total number of compactions that were executed. + metric_name: prometheus_tsdb_compactions_total + instrument: counter + unit: "{compaction}" + + - id: metric.prometheus_tsdb_compactions_triggered_total + type: metric + stability: development + brief: Total number of triggered compactions. + metric_name: prometheus_tsdb_compactions_triggered_total + instrument: counter + unit: "{compaction}" + + - id: metric.prometheus_tsdb_vertical_compactions_total + type: metric + stability: development + brief: Total number of compactions done on overlapping blocks. + metric_name: prometheus_tsdb_vertical_compactions_total + instrument: counter + unit: "{compaction}" + + # Data metrics + - id: metric.prometheus_tsdb_clean_start + type: metric + stability: development + brief: Set to 1 if the TSDB was clean at startup, 0 otherwise. + metric_name: prometheus_tsdb_clean_start + instrument: gauge + unit: "1" + + - id: metric.prometheus_tsdb_data_replay_duration_seconds + type: metric + stability: development + brief: Time taken to replay the data on disk. + metric_name: prometheus_tsdb_data_replay_duration_seconds + instrument: gauge + unit: s + + - id: metric.prometheus_tsdb_lowest_timestamp + type: metric + stability: development + brief: Lowest timestamp value stored in the database. + metric_name: prometheus_tsdb_lowest_timestamp + instrument: gauge + unit: "1" + + - id: metric.prometheus_tsdb_lowest_timestamp_seconds + type: metric + stability: development + brief: Lowest timestamp value stored in the database in seconds. + metric_name: prometheus_tsdb_lowest_timestamp_seconds + instrument: gauge + unit: s + + # Exemplar metrics + - id: metric.prometheus_tsdb_exemplar_exemplars_appended_total + type: metric + stability: development + brief: Total number of appended exemplars. + metric_name: prometheus_tsdb_exemplar_exemplars_appended_total + instrument: counter + unit: "{exemplar}" + + - id: metric.prometheus_tsdb_exemplar_exemplars_in_storage + type: metric + stability: development + brief: Number of exemplars currently in circular storage. + metric_name: prometheus_tsdb_exemplar_exemplars_in_storage + instrument: gauge + unit: "{exemplar}" + + - id: metric.prometheus_tsdb_exemplar_last_exemplars_timestamp_seconds + type: metric + stability: development + brief: The timestamp of the oldest exemplar stored in circular storage. + metric_name: prometheus_tsdb_exemplar_last_exemplars_timestamp_seconds + instrument: gauge + unit: s + + - id: metric.prometheus_tsdb_exemplar_max_exemplars + type: metric + stability: development + brief: Total number of exemplars the exemplar storage can store. + metric_name: prometheus_tsdb_exemplar_max_exemplars + instrument: gauge + unit: "{exemplar}" + + - id: metric.prometheus_tsdb_exemplar_out_of_order_exemplars_total + type: metric + stability: development + brief: Total number of out-of-order exemplar ingestion failed attempts. + metric_name: prometheus_tsdb_exemplar_out_of_order_exemplars_total + instrument: counter + unit: "{exemplar}" + + - id: metric.prometheus_tsdb_exemplar_series_with_exemplars_in_storage + type: metric + stability: development + brief: Number of series with exemplars currently in circular storage. + metric_name: prometheus_tsdb_exemplar_series_with_exemplars_in_storage + instrument: gauge + unit: "{series}" + + # Head metrics + - id: metric.prometheus_tsdb_head_active_appenders + type: metric + stability: development + brief: Number of currently active appender transactions. + metric_name: prometheus_tsdb_head_active_appenders + instrument: gauge + unit: "{appender}" + + - id: metric.prometheus_tsdb_head_chunks + type: metric + stability: development + brief: Total number of chunks in the head block. + metric_name: prometheus_tsdb_head_chunks + instrument: gauge + unit: "{chunk}" + + - id: metric.prometheus_tsdb_head_chunks_created_total + type: metric + stability: development + brief: Total number of chunks created in the head block. + metric_name: prometheus_tsdb_head_chunks_created_total + instrument: counter + unit: "{chunk}" + + - id: metric.prometheus_tsdb_head_chunks_removed_total + type: metric + stability: development + brief: Total number of chunks removed from the head block. + metric_name: prometheus_tsdb_head_chunks_removed_total + instrument: counter + unit: "{chunk}" + + - id: metric.prometheus_tsdb_head_chunks_storage_size_bytes + type: metric + stability: development + brief: Size of the chunks_head directory. + metric_name: prometheus_tsdb_head_chunks_storage_size_bytes + instrument: gauge + unit: By + + - id: metric.prometheus_tsdb_head_gc_duration_seconds + type: metric + stability: development + brief: Runtime of garbage collection in the head block. + metric_name: prometheus_tsdb_head_gc_duration_seconds + instrument: histogram + unit: s + + - id: metric.prometheus_tsdb_head_max_time + type: metric + stability: development + brief: Maximum timestamp of the head block. + metric_name: prometheus_tsdb_head_max_time + instrument: gauge + unit: "1" + + - id: metric.prometheus_tsdb_head_max_time_seconds + type: metric + stability: development + brief: Maximum timestamp of the head block in seconds. + metric_name: prometheus_tsdb_head_max_time_seconds + instrument: gauge + unit: s + + - id: metric.prometheus_tsdb_head_min_time + type: metric + stability: development + brief: Minimum timestamp of the head block. + metric_name: prometheus_tsdb_head_min_time + instrument: gauge + unit: "1" + + - id: metric.prometheus_tsdb_head_min_time_seconds + type: metric + stability: development + brief: Minimum timestamp of the head block in seconds. + metric_name: prometheus_tsdb_head_min_time_seconds + instrument: gauge + unit: s + + - id: metric.prometheus_tsdb_head_out_of_order_samples_appended_total + type: metric + stability: development + brief: Total number of appended out-of-order samples. + metric_name: prometheus_tsdb_head_out_of_order_samples_appended_total + instrument: counter + unit: "{sample}" + attributes: + - id: type + type: string + stability: development + brief: The sample type. + examples: + - float + - histogram + + - id: metric.prometheus_tsdb_head_samples_appended_total + type: metric + stability: development + brief: Total number of appended samples. + metric_name: prometheus_tsdb_head_samples_appended_total + instrument: counter + unit: "{sample}" + attributes: + - id: type + type: string + stability: development + brief: The sample type. + examples: + - float + - histogram + + - id: metric.prometheus_tsdb_head_series + type: metric + stability: development + brief: Total number of series in the head block. + metric_name: prometheus_tsdb_head_series + instrument: gauge + unit: "{series}" + + - id: metric.prometheus_tsdb_head_series_created_total + type: metric + stability: development + brief: Total number of series created in the head block. + metric_name: prometheus_tsdb_head_series_created_total + instrument: counter + unit: "{series}" + + - id: metric.prometheus_tsdb_head_series_not_found_total + type: metric + stability: development + brief: Total number of requests for series that were not found. + metric_name: prometheus_tsdb_head_series_not_found_total + instrument: counter + unit: "{request}" + + - id: metric.prometheus_tsdb_head_series_removed_total + type: metric + stability: development + brief: Total number of series removed from the head block. + metric_name: prometheus_tsdb_head_series_removed_total + instrument: counter + unit: "{series}" + + - id: metric.prometheus_tsdb_head_stale_series + type: metric + stability: development + brief: Number of stale series in the head block. + metric_name: prometheus_tsdb_head_stale_series + instrument: gauge + unit: "{series}" + + - id: metric.prometheus_tsdb_head_truncations_failed_total + type: metric + stability: development + brief: Total number of head truncations that failed. + metric_name: prometheus_tsdb_head_truncations_failed_total + instrument: counter + unit: "{truncation}" + + - id: metric.prometheus_tsdb_head_truncations_total + type: metric + stability: development + brief: Total number of head truncations attempted. + metric_name: prometheus_tsdb_head_truncations_total + instrument: counter + unit: "{truncation}" + + # Isolation metrics + - id: metric.prometheus_tsdb_isolation_high_watermark + type: metric + stability: development + brief: The isolation high watermark. + metric_name: prometheus_tsdb_isolation_high_watermark + instrument: gauge + unit: "1" + + - id: metric.prometheus_tsdb_isolation_low_watermark + type: metric + stability: development + brief: The isolation low watermark. + metric_name: prometheus_tsdb_isolation_low_watermark + instrument: gauge + unit: "1" + + # Mmap metrics + - id: metric.prometheus_tsdb_mmap_chunk_corruptions_total + type: metric + stability: development + brief: Total number of memory-mapped chunk corruptions. + metric_name: prometheus_tsdb_mmap_chunk_corruptions_total + instrument: counter + unit: "{corruption}" + + - id: metric.prometheus_tsdb_mmap_chunks_total + type: metric + stability: development + brief: Total number of memory-mapped chunks. + metric_name: prometheus_tsdb_mmap_chunks_total + instrument: counter + unit: "{chunk}" + + # Out-of-order sample metrics + - id: metric.prometheus_tsdb_out_of_bound_samples_total + type: metric + stability: development + brief: Total number of out-of-bound samples ingestion failed attempts. + metric_name: prometheus_tsdb_out_of_bound_samples_total + instrument: counter + unit: "{sample}" + attributes: + - id: type + type: string + stability: development + brief: The sample type. + examples: + - float + + - id: metric.prometheus_tsdb_out_of_order_samples_total + type: metric + stability: development + brief: Total number of out-of-order samples ingestion failed attempts. + metric_name: prometheus_tsdb_out_of_order_samples_total + instrument: counter + unit: "{sample}" + attributes: + - id: type + type: string + stability: development + brief: The sample type. + examples: + - float + - histogram + + - id: metric.prometheus_tsdb_sample_ooo_delta + type: metric + stability: development + brief: Delta in seconds between the time when an out-of-order sample was ingested and the latest sample in the chunk. + metric_name: prometheus_tsdb_sample_ooo_delta + instrument: histogram + unit: s + + - id: metric.prometheus_tsdb_too_old_samples_total + type: metric + stability: development + brief: Total number of samples that were too old to be ingested. + metric_name: prometheus_tsdb_too_old_samples_total + instrument: counter + unit: "{sample}" + attributes: + - id: type + type: string + stability: development + brief: The sample type. + examples: + - float + + # Out-of-order WBL metrics + - id: metric.prometheus_tsdb_out_of_order_wbl_completed_pages_total + type: metric + stability: development + brief: Total number of completed WBL pages for out-of-order samples. + metric_name: prometheus_tsdb_out_of_order_wbl_completed_pages_total + instrument: counter + unit: "{page}" + + - id: metric.prometheus_tsdb_out_of_order_wbl_fsync_duration_seconds + type: metric + stability: development + brief: Duration of WBL fsync for out-of-order samples. + metric_name: prometheus_tsdb_out_of_order_wbl_fsync_duration_seconds + instrument: histogram + unit: s + + - id: metric.prometheus_tsdb_out_of_order_wbl_page_flushes_total + type: metric + stability: development + brief: Total number of WBL page flushes for out-of-order samples. + metric_name: prometheus_tsdb_out_of_order_wbl_page_flushes_total + instrument: counter + unit: "{flush}" + + - id: metric.prometheus_tsdb_out_of_order_wbl_record_part_writes_total + type: metric + stability: development + brief: Total number of WBL record part writes for out-of-order samples. + metric_name: prometheus_tsdb_out_of_order_wbl_record_part_writes_total + instrument: counter + unit: "{write}" + + - id: metric.prometheus_tsdb_out_of_order_wbl_record_parts_bytes_written_total + type: metric + stability: development + brief: Total bytes written to WBL record parts for out-of-order samples. + metric_name: prometheus_tsdb_out_of_order_wbl_record_parts_bytes_written_total + instrument: counter + unit: By + + - id: metric.prometheus_tsdb_out_of_order_wbl_segment_current + type: metric + stability: development + brief: Current out-of-order WBL segment. + metric_name: prometheus_tsdb_out_of_order_wbl_segment_current + instrument: gauge + unit: "{segment}" + + - id: metric.prometheus_tsdb_out_of_order_wbl_storage_size_bytes + type: metric + stability: development + brief: Size of the out-of-order WBL storage. + metric_name: prometheus_tsdb_out_of_order_wbl_storage_size_bytes + instrument: gauge + unit: By + + - id: metric.prometheus_tsdb_out_of_order_wbl_truncations_failed_total + type: metric + stability: development + brief: Total number of out-of-order WBL truncations that failed. + metric_name: prometheus_tsdb_out_of_order_wbl_truncations_failed_total + instrument: counter + unit: "{truncation}" + + - id: metric.prometheus_tsdb_out_of_order_wbl_truncations_total + type: metric + stability: development + brief: Total number of out-of-order WBL truncations. + metric_name: prometheus_tsdb_out_of_order_wbl_truncations_total + instrument: counter + unit: "{truncation}" + + - id: metric.prometheus_tsdb_out_of_order_wbl_writes_failed_total + type: metric + stability: development + brief: Total number of out-of-order WBL writes that failed. + metric_name: prometheus_tsdb_out_of_order_wbl_writes_failed_total + instrument: counter + unit: "{write}" + + # Reload metrics + - id: metric.prometheus_tsdb_reloads_failures_total + type: metric + stability: development + brief: Number of times the database reloads failed. + metric_name: prometheus_tsdb_reloads_failures_total + instrument: counter + unit: "{reload}" + + - id: metric.prometheus_tsdb_reloads_total + type: metric + stability: development + brief: Number of times the database reloads. + metric_name: prometheus_tsdb_reloads_total + instrument: counter + unit: "{reload}" + + # Retention metrics + - id: metric.prometheus_tsdb_retention_limit_bytes + type: metric + stability: development + brief: Maximum number of bytes to be retained in the TSDB. + metric_name: prometheus_tsdb_retention_limit_bytes + instrument: gauge + unit: By + + - id: metric.prometheus_tsdb_retention_limit_seconds + type: metric + stability: development + brief: Maximum age in seconds for samples to be retained in the TSDB. + metric_name: prometheus_tsdb_retention_limit_seconds + instrument: gauge + unit: s + + - id: metric.prometheus_tsdb_size_retentions_total + type: metric + stability: development + brief: Number of times that blocks were deleted because the maximum number of bytes was exceeded. + metric_name: prometheus_tsdb_size_retentions_total + instrument: counter + unit: "{retention}" + + - id: metric.prometheus_tsdb_time_retentions_total + type: metric + stability: development + brief: Number of times that blocks were deleted because the maximum time limit was exceeded. + metric_name: prometheus_tsdb_time_retentions_total + instrument: counter + unit: "{retention}" + + # Snapshot metrics + - id: metric.prometheus_tsdb_snapshot_replay_error_total + type: metric + stability: development + brief: Total number of snapshot replay errors. + metric_name: prometheus_tsdb_snapshot_replay_error_total + instrument: counter + unit: "{error}" + + # Symbol table metrics + - id: metric.prometheus_tsdb_symbol_table_size_bytes + type: metric + stability: development + brief: Size of the symbol table in bytes. + metric_name: prometheus_tsdb_symbol_table_size_bytes + instrument: gauge + unit: By + + # Tombstone metrics + - id: metric.prometheus_tsdb_tombstone_cleanup_seconds + type: metric + stability: development + brief: Time taken to clean up tombstones. + metric_name: prometheus_tsdb_tombstone_cleanup_seconds + instrument: histogram + unit: s + + # WAL metrics + - id: metric.prometheus_tsdb_wal_completed_pages_total + type: metric + stability: development + brief: Total number of completed WAL pages. + metric_name: prometheus_tsdb_wal_completed_pages_total + instrument: counter + unit: "{page}" + + - id: metric.prometheus_tsdb_wal_corruptions_total + type: metric + stability: development + brief: Total number of WAL corruptions. + metric_name: prometheus_tsdb_wal_corruptions_total + instrument: counter + unit: "{corruption}" + + - id: metric.prometheus_tsdb_wal_fsync_duration_seconds + type: metric + stability: development + brief: Duration of WAL fsync. + metric_name: prometheus_tsdb_wal_fsync_duration_seconds + instrument: histogram + unit: s + + - id: metric.prometheus_tsdb_wal_page_flushes_total + type: metric + stability: development + brief: Total number of WAL page flushes. + metric_name: prometheus_tsdb_wal_page_flushes_total + instrument: counter + unit: "{flush}" + + - id: metric.prometheus_tsdb_wal_record_bytes_saved_total + type: metric + stability: development + brief: Total bytes saved by WAL record compression. + metric_name: prometheus_tsdb_wal_record_bytes_saved_total + instrument: counter + unit: By + attributes: + - id: compression + type: string + stability: development + brief: The compression algorithm. + examples: + - snappy + + - id: metric.prometheus_tsdb_wal_record_part_writes_total + type: metric + stability: development + brief: Total number of WAL record part writes. + metric_name: prometheus_tsdb_wal_record_part_writes_total + instrument: counter + unit: "{write}" + + - id: metric.prometheus_tsdb_wal_record_parts_bytes_written_total + type: metric + stability: development + brief: Total bytes written to WAL record parts. + metric_name: prometheus_tsdb_wal_record_parts_bytes_written_total + instrument: counter + unit: By + + - id: metric.prometheus_tsdb_wal_segment_current + type: metric + stability: development + brief: Current WAL segment. + metric_name: prometheus_tsdb_wal_segment_current + instrument: gauge + unit: "{segment}" + + - id: metric.prometheus_tsdb_wal_storage_size_bytes + type: metric + stability: development + brief: Size of the WAL storage. + metric_name: prometheus_tsdb_wal_storage_size_bytes + instrument: gauge + unit: By + + - id: metric.prometheus_tsdb_wal_truncate_duration_seconds + type: metric + stability: development + brief: Duration of WAL truncation. + metric_name: prometheus_tsdb_wal_truncate_duration_seconds + instrument: histogram + unit: s + + - id: metric.prometheus_tsdb_wal_truncations_failed_total + type: metric + stability: development + brief: Total number of WAL truncations that failed. + metric_name: prometheus_tsdb_wal_truncations_failed_total + instrument: counter + unit: "{truncation}" + + - id: metric.prometheus_tsdb_wal_truncations_total + type: metric + stability: development + brief: Total number of WAL truncations. + metric_name: prometheus_tsdb_wal_truncations_total + instrument: counter + unit: "{truncation}" + + - id: metric.prometheus_tsdb_wal_writes_failed_total + type: metric + stability: development + brief: Total number of WAL writes that failed. + metric_name: prometheus_tsdb_wal_writes_failed_total + instrument: counter + unit: "{write}" diff --git a/web/semconv/registry.yaml b/web/semconv/registry.yaml new file mode 100644 index 0000000000..34ea15f853 --- /dev/null +++ b/web/semconv/registry.yaml @@ -0,0 +1,120 @@ +# Semantic convention registry for Prometheus web/HTTP metrics. +# +# This file is the source of truth for these metrics. +# Run `make generate-semconv` to regenerate the Go code. + +groups: + # HTTP metrics + - id: metric.prometheus_http_request_duration_seconds + type: metric + stability: development + brief: Histogram of latencies for HTTP requests. + metric_name: prometheus_http_request_duration_seconds + instrument: histogram + unit: s + attributes: + - id: handler + type: string + stability: development + brief: The HTTP handler. + examples: + - / + - /-/healthy + - /-/ready + - /api/v1/query + + - id: metric.prometheus_http_requests_total + type: metric + stability: development + brief: Counter of HTTP requests. + metric_name: prometheus_http_requests_total + instrument: counter + unit: "{request}" + attributes: + - id: code + type: string + stability: development + brief: The HTTP response status code. + examples: + - "200" + - "400" + - "404" + - "500" + - id: handler + type: string + stability: development + brief: The HTTP handler. + examples: + - / + - /-/healthy + - /-/ready + - /api/v1/query + + - id: metric.prometheus_http_response_size_bytes + type: metric + stability: development + brief: Histogram of response size for HTTP requests. + metric_name: prometheus_http_response_size_bytes + instrument: histogram + unit: By + attributes: + - id: handler + type: string + stability: development + brief: The HTTP handler. + examples: + - / + - /-/healthy + - /-/ready + - /api/v1/query + + # API notification metrics + - id: metric.prometheus_api_notification_active_subscribers + type: metric + stability: development + brief: The current number of active notification subscribers. + metric_name: prometheus_api_notification_active_subscribers + instrument: gauge + unit: "{subscriber}" + + - id: metric.prometheus_api_notification_updates_dropped_total + type: metric + stability: development + brief: Total number of API notification updates that were dropped. + metric_name: prometheus_api_notification_updates_dropped_total + instrument: counter + unit: "{update}" + + - id: metric.prometheus_api_notification_updates_sent_total + type: metric + stability: development + brief: Total number of API notification updates sent. + metric_name: prometheus_api_notification_updates_sent_total + instrument: counter + unit: "{update}" + + # Federation metrics + - id: metric.prometheus_web_federation_errors_total + type: metric + stability: development + brief: Total number of errors that occurred while sending federation responses. + metric_name: prometheus_web_federation_errors_total + instrument: counter + unit: "{error}" + + - id: metric.prometheus_web_federation_warnings_total + type: metric + stability: development + brief: Total number of warnings that occurred while sending federation responses. + metric_name: prometheus_web_federation_warnings_total + instrument: counter + unit: "{warning}" + + # Readiness metrics + - id: metric.prometheus_ready + type: metric + stability: development + brief: Whether Prometheus startup was fully completed and the server is ready for normal operation. + metric_name: prometheus_ready + instrument: gauge + unit: "1"