diff --git a/hack/tools/instrumentation/documentation/documentation-list.yaml b/hack/tools/instrumentation/documentation/documentation-list.yaml index 70bb8000aa7..5908f3affda 100644 --- a/hack/tools/instrumentation/documentation/documentation-list.yaml +++ b/hack/tools/instrumentation/documentation/documentation-list.yaml @@ -1,22 +1,2499 @@ -- name: version_info - namespace: etcd - help: Etcd server's binary version +- name: aggregation_count_total + subsystem: aggregator_discovery + help: Counter of number of times discovery was aggregated + type: Counter + stabilityLevel: ALPHA + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: nopeer_requests_total + subsystem: aggregator_discovery + help: Counter of number of times no-peer (non peer-aggregated) discovery was requested + type: Counter + stabilityLevel: ALPHA + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: peer_aggregated_cache_hits_total + subsystem: aggregator_discovery + help: Counter of number of times discovery was served from peer-aggregated cache + type: Counter + stabilityLevel: ALPHA + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: peer_aggregated_cache_misses_total + subsystem: aggregator_discovery + help: Counter of number of times discovery was aggregated across all API servers + type: Counter + stabilityLevel: ALPHA + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: aggregator_openapi_v2_regeneration_count + help: Counter of OpenAPI v2 spec regeneration count broken down by causing APIService + name and reason. + type: Counter + stabilityLevel: ALPHA + labels: + - apiservice + - reason + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: aggregator_openapi_v2_regeneration_duration + help: Gauge of OpenAPI v2 spec regeneration duration in seconds. type: Gauge stabilityLevel: ALPHA labels: - - binary_version + - reason componentEndpoints: - - component: etcd-version-monitor + - component: kube-apiserver endpoint: /metrics -- name: certificate_manager_client_ttl_seconds - subsystem: kubelet - help: Gauge of the TTL (time-to-live) of the Kubelet's client certificate. The value - is in seconds until certificate expiry (negative if already expired). If client - certificate is invalid or unused, the value will be +INF. +- name: aggregator_unavailable_apiservice + help: Gauge of APIServices which are marked as unavailable broken down by APIService + name. + type: Custom + stabilityLevel: ALPHA + labels: + - name + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: aggregator_unavailable_apiservice_total + help: Counter of APIServices which are marked as unavailable broken down by APIService + name and reason. + type: Counter + stabilityLevel: ALPHA + labels: + - name + - reason + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: ratcheting_seconds + subsystem: validation + namespace: apiextensions_apiserver + help: Time for comparison of old to new for the purposes of CRDValidationRatcheting + during an UPDATE in seconds. + type: Histogram + stabilityLevel: ALPHA + buckets: + - 1e-05 + - 4e-05 + - 0.00016 + - 0.00064 + - 0.00256 + - 0.01024 + - 0.04096 + - 0.16384 + - 0.65536 + - 2.62144 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: apiextensions_openapi_v2_regeneration_count + help: Counter of OpenAPI v2 spec regeneration count broken down by causing CRD name + and reason. + type: Counter + stabilityLevel: ALPHA + labels: + - crd + - reason + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: apiextensions_openapi_v3_regeneration_count + help: Counter of OpenAPI v3 spec regeneration count broken down by group, version, + causing CRD and reason. + type: Counter + stabilityLevel: ALPHA + labels: + - crd + - group + - reason + - version + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: match_condition_evaluation_errors_total + subsystem: admission + namespace: apiserver + help: Admission match condition evaluation errors count, identified by name of resource + containing the match condition and broken out for each kind containing matchConditions + (webhook or policy), operation and admission type (validate or admit). + type: Counter + stabilityLevel: ALPHA + labels: + - kind + - name + - operation + - type + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: match_condition_evaluation_seconds + subsystem: admission + namespace: apiserver + help: Admission match condition evaluation time in seconds, identified by name and + broken out for each kind containing matchConditions (webhook or policy), operation + and type (validate or admit). + type: Histogram + stabilityLevel: ALPHA + labels: + - kind + - name + - operation + - type + buckets: + - 0.001 + - 0.005 + - 0.01 + - 0.025 + - 0.1 + - 0.2 + - 0.25 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: match_condition_exclusions_total + subsystem: admission + namespace: apiserver + help: Admission match condition evaluation exclusions count, identified by name + of resource containing the match condition and broken out for each kind containing + matchConditions (webhook or policy), operation and admission type (validate or + admit). + type: Counter + stabilityLevel: ALPHA + labels: + - kind + - name + - operation + - type + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: step_admission_duration_seconds_summary + subsystem: admission + namespace: apiserver + help: Admission sub-step latency summary in seconds, broken out for each operation + and API resource and step type (validate or admit). + type: Summary + stabilityLevel: ALPHA + labels: + - operation + - rejected + - type + maxAge: 18000000000000 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: webhook_fail_open_count + subsystem: admission + namespace: apiserver + help: Admission webhook fail open count, identified by name and broken out for each + admission type (validating or admit). + type: Counter + stabilityLevel: ALPHA + labels: + - name + - type + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: webhook_rejection_count + subsystem: admission + namespace: apiserver + help: Admission webhook rejection count, identified by name and broken out for each + admission type (validating or admit) and operation. Additional labels specify + an error type (calling_webhook_error or apiserver_internal_error if an error occurred; + no_error otherwise) and optionally a non-zero rejection code if the webhook rejects + the request with an HTTP status code (honored by the apiserver when the code is + greater or equal to 400). Codes greater than 600 are truncated to 600, to keep + the metrics cardinality bounded. + type: Counter + stabilityLevel: ALPHA + labels: + - error_type + - name + - operation + - rejection_code + - type + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: webhook_request_total + subsystem: admission + namespace: apiserver + help: Admission webhook request total, identified by name and broken out for each + admission type (validating or admit) and operation. Additional labels specify + whether the request was rejected or not and an HTTP status code. Codes greater + than 600 are truncated to 600, to keep the metrics cardinality bounded. + type: Counter + stabilityLevel: ALPHA + labels: + - code + - name + - operation + - rejected + - type + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: error_total + subsystem: apiserver_audit + help: Counter of audit events that failed to be audited properly. Plugin identifies + the plugin affected by the error. + type: Counter + stabilityLevel: ALPHA + labels: + - plugin + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: event_total + subsystem: apiserver_audit + help: Counter of audit events generated and sent to the audit backend. + type: Counter + stabilityLevel: ALPHA + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: level_total + subsystem: apiserver_audit + help: Counter of policy levels for audit events (1 per request). + type: Counter + stabilityLevel: ALPHA + labels: + - level + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: requests_rejected_total + subsystem: apiserver_audit + help: Counter of apiserver requests rejected due to an error in audit logging backend. + type: Counter + stabilityLevel: ALPHA + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: apiserver_authentication_config_controller_last_config_info + help: Information about the last applied authentication configuration with hash + as label, split by apiserver identity. + type: Custom + stabilityLevel: ALPHA + labels: + - apiserver_id_hash + - hash + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: apiserver_authentication_jwt_authenticator_jwks_fetch_last_key_set_info + help: Information about the last JWKS fetched by the JWT authenticator with hash + as label, split by api server identity and jwt issuer. + type: Custom + stabilityLevel: ALPHA + labels: + - jwt_issuer_hash + - apiserver_id_hash + - hash + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: jwt_authenticator_jwks_fetch_last_timestamp_seconds + subsystem: authentication + namespace: apiserver + help: Timestamp of the last successful or failed JWKS fetch split by result, api + server identity and jwt issuer for the JWT authenticator. + type: Gauge + stabilityLevel: ALPHA + labels: + - apiserver_id_hash + - jwt_issuer_hash + - result + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: jwt_authenticator_latency_seconds + subsystem: authentication + namespace: apiserver + help: Latency of jwt authentication operations in seconds. This is the time spent + authenticating a token for cache miss only (i.e. when the token is not found in + the cache). + type: Histogram + stabilityLevel: ALPHA + labels: + - jwt_issuer_hash + - result + buckets: + - 0.001 + - 0.005 + - 0.01 + - 0.025 + - 0.05 + - 0.1 + - 0.25 + - 0.5 + - 1 + - 2.5 + - 5 + - 10 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: apiserver_authorization_config_controller_last_config_info + help: Information about the last applied authorization configuration with hash as + label, split by apiserver identity. + type: Custom + stabilityLevel: ALPHA + labels: + - apiserver_id_hash + - hash + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: decisions_total + subsystem: authorization + namespace: apiserver + help: Total number of terminal decisions made by an authorizer split by authorizer + type, name, and decision. + type: Counter + stabilityLevel: ALPHA + labels: + - decision + - name + - type + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: match_condition_evaluation_errors_total + subsystem: authorization + namespace: apiserver + help: Total number of errors when an authorization webhook encounters a match condition + error split by authorizer type and name. + type: Counter + stabilityLevel: ALPHA + labels: + - name + - type + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: match_condition_evaluation_seconds + subsystem: authorization + namespace: apiserver + help: Authorization match condition evaluation time in seconds, split by authorizer + type and name. + type: Histogram + stabilityLevel: ALPHA + labels: + - name + - type + buckets: + - 0.001 + - 0.005 + - 0.01 + - 0.025 + - 0.1 + - 0.2 + - 0.25 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: match_condition_exclusions_total + subsystem: authorization + namespace: apiserver + help: Total number of exclusions when an authorization webhook is skipped because + match conditions exclude it. + type: Counter + stabilityLevel: ALPHA + labels: + - name + - type + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: webhook_duration_seconds + subsystem: authorization + namespace: apiserver + help: Request latency in seconds. + type: Histogram + stabilityLevel: ALPHA + labels: + - name + - result + buckets: + - 0.005 + - 0.01 + - 0.025 + - 0.05 + - 0.1 + - 0.25 + - 0.5 + - 1 + - 2.5 + - 5 + - 10 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: webhook_evaluations_fail_open_total + subsystem: authorization + namespace: apiserver + help: NoOpinion results due to webhook timeout or error. + type: Counter + stabilityLevel: ALPHA + labels: + - name + - result + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: webhook_evaluations_total + subsystem: authorization + namespace: apiserver + help: Round-trips to authorization webhooks. + type: Counter + stabilityLevel: ALPHA + labels: + - name + - result + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: cache_list_fetched_objects_total + namespace: apiserver + help: Number of objects read from watch cache in the course of serving a LIST request + type: Counter + stabilityLevel: ALPHA + labels: + - group + - index + - resource + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: cache_list_returned_objects_total + namespace: apiserver + help: Number of objects returned for a LIST request from watch cache + type: Counter + stabilityLevel: ALPHA + labels: + - group + - resource + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: cache_list_total + namespace: apiserver + help: Number of LIST requests served from watch cache + type: Counter + stabilityLevel: ALPHA + labels: + - group + - index + - resource + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: csr_honored_duration_total + subsystem: certificates_registry + namespace: apiserver + help: Total number of issued CSRs with a requested duration that was honored, sliced + by signer (only kubernetes.io signer names are specifically identified) + type: Counter + stabilityLevel: ALPHA + labels: + - signerName + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: csr_requested_duration_total + subsystem: certificates_registry + namespace: apiserver + help: Total number of issued CSRs with a requested duration, sliced by signer (only + kubernetes.io signer names are specifically identified) + type: Counter + stabilityLevel: ALPHA + labels: + - signerName + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: certificate_expiration_seconds + subsystem: client + namespace: apiserver + help: Distribution of the remaining lifetime on the certificate used to authenticate + a request. + type: Histogram + stabilityLevel: ALPHA + buckets: + - 0 + - 1800 + - 3600 + - 7200 + - 21600 + - 43200 + - 86400 + - 172800 + - 345600 + - 604800 + - 2.592e+06 + - 7.776e+06 + - 1.5552e+07 + - 3.1104e+07 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: ip_errors_total + subsystem: clusterip_repair + namespace: apiserver + help: 'Number of errors detected on clusterips by the repair loop broken down by + type of error: leak, repair, full, outOfRange, duplicate, unknown, invalid' + type: Counter + stabilityLevel: ALPHA + labels: + - type + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: reconcile_errors_total + subsystem: clusterip_repair + namespace: apiserver + help: Number of reconciliation failures on the clusterip repair reconcile loop + type: Counter + stabilityLevel: ALPHA + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: conversion_webhook_duration_seconds + namespace: apiserver + help: Conversion webhook request latency + type: Histogram + stabilityLevel: ALPHA + labels: + - failure_type + - result + buckets: + - 0.005 + - 0.01 + - 0.02 + - 0.05 + - 0.1 + - 0.2 + - 0.5 + - 1 + - 2 + - 5 + - 10 + - 20 + - 30 + - 45 + - 60 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: conversion_webhook_request_total + namespace: apiserver + help: Counter for conversion webhook requests with success/failure and failure error + type + type: Counter + stabilityLevel: ALPHA + labels: + - failure_type + - result + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: apiserver_crd_conversion_webhook_duration_seconds + help: CRD webhook conversion duration in seconds + type: Histogram + stabilityLevel: ALPHA + labels: + - crd_name + - from_version + - succeeded + - to_version + buckets: + - 0.001 + - 0.002 + - 0.004 + - 0.008 + - 0.016 + - 0.032 + - 0.064 + - 0.128 + - 0.256 + - 0.512 + - 1.024 + - 2.048 + - 4.096 + - 8.192 + - 16.384 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: current_inqueue_requests + subsystem: apiserver + help: Maximal number of queued requests in this apiserver per request kind in last + second. + type: Gauge + stabilityLevel: ALPHA + labels: + - request_kind + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: apiserver_delegated_authn_request_duration_seconds + help: Request latency in seconds. Broken down by status code. + type: Histogram + stabilityLevel: ALPHA + labels: + - code + buckets: + - 0.25 + - 0.5 + - 0.7 + - 1 + - 1.5 + - 3 + - 5 + - 10 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: apiserver_delegated_authn_request_total + help: Number of HTTP requests partitioned by status code. + type: Counter + stabilityLevel: ALPHA + labels: + - code + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: apiserver_delegated_authz_request_duration_seconds + help: Request latency in seconds. Broken down by status code. + type: Histogram + stabilityLevel: ALPHA + labels: + - code + buckets: + - 0.25 + - 0.5 + - 0.7 + - 1 + - 1.5 + - 3 + - 5 + - 10 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: apiserver_delegated_authz_request_total + help: Number of HTTP requests partitioned by status code. + type: Counter + stabilityLevel: ALPHA + labels: + - code + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: dial_duration_seconds + subsystem: egress_dialer + namespace: apiserver + help: Dial latency histogram in seconds, labeled by the protocol (http-connect or + grpc), transport (tcp or uds) + type: Histogram + stabilityLevel: ALPHA + labels: + - protocol + - transport + buckets: + - 0.005 + - 0.025 + - 0.1 + - 0.5 + - 2.5 + - 12.5 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: dial_failure_count + subsystem: egress_dialer + namespace: apiserver + help: Dial failure count, labeled by the protocol (http-connect or grpc), transport + (tcp or uds), and stage (connect or proxy). The stage indicates at which stage + the dial failed + type: Counter + stabilityLevel: ALPHA + labels: + - protocol + - stage + - transport + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: dial_start_total + subsystem: egress_dialer + namespace: apiserver + help: Dial starts, labeled by the protocol (http-connect or grpc) and transport + (tcp or uds). + type: Counter + stabilityLevel: ALPHA + labels: + - protocol + - transport + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: automatic_reload_last_timestamp_seconds + subsystem: encryption_config_controller + namespace: apiserver + help: Timestamp of the last successful or failed automatic reload of encryption + configuration split by apiserver identity. + type: Gauge + stabilityLevel: ALPHA + labels: + - apiserver_id_hash + - status + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: automatic_reloads_total + subsystem: encryption_config_controller + namespace: apiserver + help: Total number of reload successes and failures of encryption configuration + split by apiserver identity. + type: Counter + stabilityLevel: ALPHA + labels: + - apiserver_id_hash + - status + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: apiserver_encryption_config_controller_last_config_info + help: Information about the last applied encryption configuration with hash as label, + split by apiserver identity. + type: Custom + stabilityLevel: ALPHA + labels: + - apiserver_id_hash + - hash + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: dek_cache_fill_percent + subsystem: envelope_encryption + namespace: apiserver + help: Percent of the cache slots currently occupied by cached DEKs. type: Gauge stabilityLevel: ALPHA componentEndpoints: - - component: kubelet + - component: kube-apiserver + endpoint: /metrics +- name: dek_cache_inter_arrival_time_seconds + subsystem: envelope_encryption + namespace: apiserver + help: Time (in seconds) of inter arrival of transformation requests. + type: Histogram + stabilityLevel: ALPHA + labels: + - transformation_type + buckets: + - 60 + - 120 + - 240 + - 480 + - 960 + - 1920 + - 3840 + - 7680 + - 15360 + - 30720 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: dek_source_cache_size + subsystem: envelope_encryption + namespace: apiserver + help: Number of records in data encryption key (DEK) source cache. On a restart, + this value is an approximation of the number of decrypt RPC calls the server will + make to the KMS plugin. + type: Gauge + stabilityLevel: ALPHA + labels: + - provider_name + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: invalid_key_id_from_status_total + subsystem: envelope_encryption + namespace: apiserver + help: Number of times an invalid keyID is returned by the Status RPC call split + by error. + type: Counter + stabilityLevel: ALPHA + labels: + - error + - provider_name + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: key_id_hash_last_timestamp_seconds + subsystem: envelope_encryption + namespace: apiserver + help: The last time in seconds when a keyID was used. + type: Gauge + stabilityLevel: ALPHA + labels: + - apiserver_id_hash + - key_id_hash + - provider_name + - transformation_type + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: key_id_hash_status_last_timestamp_seconds + subsystem: envelope_encryption + namespace: apiserver + help: The last time in seconds when a keyID was returned by the Status RPC call. + type: Gauge + stabilityLevel: ALPHA + labels: + - apiserver_id_hash + - key_id_hash + - provider_name + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: key_id_hash_total + subsystem: envelope_encryption + namespace: apiserver + help: Number of times a keyID is used split by transformation type, provider, and + apiserver identity. + type: Counter + stabilityLevel: ALPHA + labels: + - apiserver_id_hash + - key_id_hash + - provider_name + - transformation_type + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: kms_operations_latency_seconds + subsystem: envelope_encryption + namespace: apiserver + help: KMS operation duration with gRPC error code status total. + type: Histogram + stabilityLevel: ALPHA + labels: + - grpc_status_code + - method_name + - provider_name + buckets: + - 0.0001 + - 0.0002 + - 0.0004 + - 0.0008 + - 0.0016 + - 0.0032 + - 0.0064 + - 0.0128 + - 0.0256 + - 0.0512 + - 0.1024 + - 0.2048 + - 0.4096 + - 0.8192 + - 1.6384 + - 3.2768 + - 6.5536 + - 13.1072 + - 26.2144 + - 52.4288 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: fetch_keys_data_timestamp + subsystem: externaljwt + namespace: apiserver + help: Unix Timestamp in seconds of the last successful FetchKeys data_timestamp + value returned by the external signer + type: Gauge + stabilityLevel: ALPHA + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: fetch_keys_request_total + subsystem: externaljwt + namespace: apiserver + help: Total attempts at syncing supported JWKs + type: Counter + stabilityLevel: ALPHA + labels: + - code + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: fetch_keys_success_timestamp + subsystem: externaljwt + namespace: apiserver + help: Unix Timestamp in seconds of the last successful FetchKeys request + type: Gauge + stabilityLevel: ALPHA + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: request_duration_seconds + subsystem: externaljwt + namespace: apiserver + help: Request duration and time for calls to external-jwt-signer + type: Histogram + stabilityLevel: ALPHA + labels: + - code + - method + buckets: + - 0.001 + - 0.005 + - 0.01 + - 0.025 + - 0.05 + - 0.1 + - 0.25 + - 0.5 + - 1 + - 2.5 + - 5 + - 10 + - 30 + - 60 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: sign_request_total + subsystem: externaljwt + namespace: apiserver + help: Total attempts at signing JWT + type: Counter + stabilityLevel: ALPHA + labels: + - code + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: current_inqueue_seats + subsystem: flowcontrol + namespace: apiserver + help: Number of seats currently pending in queues of the API Priority and Fairness + subsystem + type: Gauge + stabilityLevel: ALPHA + labels: + - flow_schema + - priority_level + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: current_limit_seats + subsystem: flowcontrol + namespace: apiserver + help: current derived number of execution seats available to each priority level + type: Gauge + stabilityLevel: ALPHA + labels: + - priority_level + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: current_r + subsystem: flowcontrol + namespace: apiserver + help: R(time of last change) + type: Gauge + stabilityLevel: ALPHA + labels: + - priority_level + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: demand_seats + subsystem: flowcontrol + namespace: apiserver + help: Observations, at the end of every nanosecond, of (the number of seats each + priority level could use) / (nominal number of seats for that level) + type: TimingRatioHistogram + stabilityLevel: ALPHA + labels: + - priority_level + buckets: + - 0.2 + - 0.4 + - 0.6 + - 0.8 + - 1 + - 1.2 + - 1.4 + - 1.7 + - 2 + - 2.8 + - 4 + - 6 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: demand_seats_average + subsystem: flowcontrol + namespace: apiserver + help: Time-weighted average, over last adjustment period, of demand_seats + type: Gauge + stabilityLevel: ALPHA + labels: + - priority_level + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: demand_seats_high_watermark + subsystem: flowcontrol + namespace: apiserver + help: High watermark, over last adjustment period, of demand_seats + type: Gauge + stabilityLevel: ALPHA + labels: + - priority_level + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: demand_seats_smoothed + subsystem: flowcontrol + namespace: apiserver + help: Smoothed seat demands + type: Gauge + stabilityLevel: ALPHA + labels: + - priority_level + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: demand_seats_stdev + subsystem: flowcontrol + namespace: apiserver + help: Time-weighted standard deviation, over last adjustment period, of demand_seats + type: Gauge + stabilityLevel: ALPHA + labels: + - priority_level + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: dispatch_r + subsystem: flowcontrol + namespace: apiserver + help: R(time of last dispatch) + type: Gauge + stabilityLevel: ALPHA + labels: + - priority_level + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: epoch_advance_total + subsystem: flowcontrol + namespace: apiserver + help: Number of times the queueset's progress meter jumped backward + type: Counter + stabilityLevel: ALPHA + labels: + - priority_level + - success + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: latest_s + subsystem: flowcontrol + namespace: apiserver + help: S(most recently dispatched request) + type: Gauge + stabilityLevel: ALPHA + labels: + - priority_level + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: lower_limit_seats + subsystem: flowcontrol + namespace: apiserver + help: Configured lower bound on number of execution seats available to each priority + level + type: Gauge + stabilityLevel: ALPHA + labels: + - priority_level + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: next_discounted_s_bounds + subsystem: flowcontrol + namespace: apiserver + help: min and max, over queues, of S(oldest waiting request in queue) - estimated + work in progress + type: Gauge + stabilityLevel: ALPHA + labels: + - bound + - priority_level + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: next_s_bounds + subsystem: flowcontrol + namespace: apiserver + help: min and max, over queues, of S(oldest waiting request in queue) + type: Gauge + stabilityLevel: ALPHA + labels: + - bound + - priority_level + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: priority_level_request_utilization + subsystem: flowcontrol + namespace: apiserver + help: Observations, at the end of every nanosecond, of number of requests (as a + fraction of the relevant limit) waiting or in any stage of execution (but only + initial stage for WATCHes) + type: TimingRatioHistogram + stabilityLevel: ALPHA + labels: + - phase + - priority_level + buckets: + - 0 + - 0.001 + - 0.003 + - 0.01 + - 0.03 + - 0.1 + - 0.25 + - 0.5 + - 0.75 + - 1 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: priority_level_seat_utilization + subsystem: flowcontrol + namespace: apiserver + help: Observations, at the end of every nanosecond, of utilization of seats for + any stage of execution (but only initial stage for WATCHes) + type: TimingRatioHistogram + stabilityLevel: ALPHA + labels: + - priority_level + buckets: + - 0 + - 0.1 + - 0.2 + - 0.3 + - 0.4 + - 0.5 + - 0.6 + - 0.7 + - 0.8 + - 0.9 + - 0.95 + - 0.99 + - 1 + constLabels: + phase: executing + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: read_vs_write_current_requests + subsystem: flowcontrol + namespace: apiserver + help: Observations, at the end of every nanosecond, of the number of requests (as + a fraction of the relevant limit) waiting or in regular stage of execution + type: TimingRatioHistogram + stabilityLevel: ALPHA + labels: + - phase + - request_kind + buckets: + - 0 + - 0.001 + - 0.01 + - 0.1 + - 0.2 + - 0.3 + - 0.4 + - 0.5 + - 0.6 + - 0.7 + - 0.8 + - 0.9 + - 0.95 + - 0.99 + - 1 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: request_concurrency_in_use + subsystem: flowcontrol + namespace: apiserver + help: Concurrency (number of seats) occupied by the currently executing (initial + stage for a WATCH, any stage otherwise) requests in the API Priority and Fairness + subsystem + type: Gauge + deprecatedVersion: 1.31.0 + stabilityLevel: ALPHA + labels: + - flow_schema + - priority_level + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: request_concurrency_limit + subsystem: flowcontrol + namespace: apiserver + help: Nominal number of execution seats configured for each priority level + type: Gauge + deprecatedVersion: 1.30.0 + stabilityLevel: ALPHA + labels: + - priority_level + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: request_dispatch_no_accommodation_total + subsystem: flowcontrol + namespace: apiserver + help: Number of times a dispatch attempt resulted in a non accommodation due to + lack of available seats + type: Counter + stabilityLevel: ALPHA + labels: + - flow_schema + - priority_level + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: request_execution_seconds + subsystem: flowcontrol + namespace: apiserver + help: Duration of initial stage (for a WATCH) or any (for a non-WATCH) stage of + request execution in the API Priority and Fairness subsystem + type: Histogram + stabilityLevel: ALPHA + labels: + - flow_schema + - priority_level + - type + buckets: + - 0 + - 0.005 + - 0.02 + - 0.05 + - 0.1 + - 0.2 + - 0.5 + - 1 + - 2 + - 5 + - 10 + - 15 + - 30 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: request_queue_length_after_enqueue + subsystem: flowcontrol + namespace: apiserver + help: Length of queue in the API Priority and Fairness subsystem, as seen by each + request after it is enqueued + type: Histogram + stabilityLevel: ALPHA + labels: + - flow_schema + - priority_level + buckets: + - 0 + - 10 + - 25 + - 50 + - 100 + - 250 + - 500 + - 1000 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: seat_fair_frac + subsystem: flowcontrol + namespace: apiserver + help: Fair fraction of server's concurrency to allocate to each priority level that + can use it + type: Gauge + stabilityLevel: ALPHA + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: target_seats + subsystem: flowcontrol + namespace: apiserver + help: Seat allocation targets + type: Gauge + stabilityLevel: ALPHA + labels: + - priority_level + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: upper_limit_seats + subsystem: flowcontrol + namespace: apiserver + help: Configured upper bound on number of execution seats available to each priority + level + type: Gauge + stabilityLevel: ALPHA + labels: + - priority_level + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: watch_count_samples + subsystem: flowcontrol + namespace: apiserver + help: count of watchers for mutating requests in API Priority and Fairness + type: Histogram + stabilityLevel: ALPHA + labels: + - flow_schema + - priority_level + buckets: + - 0 + - 1 + - 10 + - 100 + - 1000 + - 10000 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: work_estimated_seats + subsystem: flowcontrol + namespace: apiserver + help: Number of estimated seats (maximum of initial and final seats) associated + with requests in API Priority and Fairness + type: Histogram + stabilityLevel: ALPHA + labels: + - flow_schema + - priority_level + buckets: + - 1 + - 2 + - 4 + - 8 + - 16 + - 32 + - 64 + - 100 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: attempts_duration_seconds + subsystem: impersonation + namespace: apiserver + help: Latency of impersonation attempts in seconds split by mode and decision. + type: Histogram + stabilityLevel: ALPHA + labels: + - decision + - mode + buckets: + - 0.001 + - 0.002 + - 0.004 + - 0.008 + - 0.016 + - 0.032 + - 0.064 + - 0.128 + - 0.256 + - 0.512 + - 1.024 + - 2.048 + - 4.096 + - 8.192 + - 16.384 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: attempts_total + subsystem: impersonation + namespace: apiserver + help: Total number of impersonation attempts split by mode and decision. + type: Counter + stabilityLevel: ALPHA + labels: + - decision + - mode + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: authorization_attempts_duration_seconds + subsystem: impersonation + namespace: apiserver + help: Latency of authorization checks made by the impersonation handler in seconds + split by mode and decision. + type: Histogram + stabilityLevel: ALPHA + labels: + - decision + - mode + buckets: + - 0.001 + - 0.002 + - 0.004 + - 0.008 + - 0.016 + - 0.032 + - 0.064 + - 0.128 + - 0.256 + - 0.512 + - 1.024 + - 2.048 + - 4.096 + - 8.192 + - 16.384 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: authorization_attempts_total + subsystem: impersonation + namespace: apiserver + help: Total number of authorization checks made by the impersonation handler split + by mode and decision. + type: Counter + stabilityLevel: ALPHA + labels: + - decision + - mode + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: init_events_total + namespace: apiserver + help: Counter of init events processed in watch cache broken by resource type. + type: Counter + stabilityLevel: ALPHA + labels: + - group + - resource + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: x509_insecure_sha1_total + subsystem: kube_aggregator + namespace: apiserver + help: Counts the number of requests to servers with insecure SHA1 signatures in + their serving certificate OR the number of connection failures due to the insecure + SHA1 signatures (either/or, based on the runtime environment) + type: Counter + stabilityLevel: ALPHA + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: x509_missing_san_total + subsystem: kube_aggregator + namespace: apiserver + help: Counts the number of requests to servers missing SAN extension in their serving + certificate OR the number of connection failures due to the lack of x509 certificate + SAN extension missing (either/or, based on the runtime environment) + type: Counter + stabilityLevel: ALPHA + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: automatic_reload_last_timestamp_seconds + subsystem: manifest_admission_config_controller + namespace: apiserver + help: Timestamp of the last automatic reload of admission manifest configuration + split by status, plugin, and apiserver identity. + type: Gauge + stabilityLevel: ALPHA + labels: + - apiserver_id_hash + - plugin + - status + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: automatic_reloads_total + subsystem: manifest_admission_config_controller + namespace: apiserver + help: Total number of automatic reloads of admission manifest configuration split + by status, plugin, and apiserver identity. + type: Counter + stabilityLevel: ALPHA + labels: + - apiserver_id_hash + - plugin + - status + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: apiserver_manifest_admission_config_controller_last_config_info + help: Information about the last applied admission manifest configuration with hash + as label, split by plugin and apiserver identity. + type: Custom + stabilityLevel: ALPHA + labels: + - plugin + - apiserver_id_hash + - hash + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: check_duration_seconds + subsystem: mutating_admission_policy + namespace: apiserver + help: Mutation admission latency for individual mutation expressions in seconds, + labeled by policy and binding. + type: Histogram + stabilityLevel: ALPHA + labels: + - error_type + - policy + - policy_binding + buckets: + - 5e-07 + - 0.001 + - 0.01 + - 0.1 + - 1 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: check_total + subsystem: mutating_admission_policy + namespace: apiserver + help: Mutation admission policy check total, labeled by policy and further identified + by binding. + type: Counter + stabilityLevel: ALPHA + labels: + - error_type + - policy + - policy_binding + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: port_errors_total + subsystem: nodeport_repair + namespace: apiserver + help: 'Number of errors detected on ports by the repair loop broken down by type + of error: leak, repair, full, outOfRange, duplicate, unknown' + type: Counter + stabilityLevel: ALPHA + labels: + - type + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: reconcile_errors_total + subsystem: nodeport_repair + namespace: apiserver + help: Number of reconciliation failures on the nodeport repair reconcile loop + type: Counter + stabilityLevel: ALPHA + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: peer_discovery_sync_errors_total + subsystem: apiserver + help: Total number of errors encountered while syncing discovery information from + a peer kube-apiserver + type: Counter + stabilityLevel: ALPHA + labels: + - type + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: peer_proxy_errors_total + subsystem: apiserver + help: Total number of errors encountered while proxying requests to a peer kube + apiserver + type: Counter + stabilityLevel: ALPHA + labels: + - group + - resource + - type + - version + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: request_aborts_total + subsystem: apiserver + help: Number of requests which apiserver aborted possibly due to a timeout, for + each group, version, verb, resource, subresource and scope + type: Counter + stabilityLevel: ALPHA + labels: + - group + - resource + - scope + - subresource + - verb + - version + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: request_body_size_bytes + subsystem: apiserver + help: Apiserver request body size in bytes broken out by resource and verb. + type: Histogram + stabilityLevel: ALPHA + labels: + - group + - resource + - verb + buckets: + - 50000 + - 150000 + - 250000 + - 350000 + - 450000 + - 550000 + - 650000 + - 750000 + - 850000 + - 950000 + - 1.05e+06 + - 1.15e+06 + - 1.25e+06 + - 1.35e+06 + - 1.45e+06 + - 1.55e+06 + - 1.65e+06 + - 1.75e+06 + - 1.85e+06 + - 1.95e+06 + - 2.05e+06 + - 2.15e+06 + - 2.25e+06 + - 2.35e+06 + - 2.45e+06 + - 2.55e+06 + - 2.65e+06 + - 2.75e+06 + - 2.85e+06 + - 2.95e+06 + - 3.05e+06 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: request_filter_duration_seconds + subsystem: apiserver + help: Request filter latency distribution in seconds, for each filter type + type: Histogram + stabilityLevel: ALPHA + labels: + - filter + buckets: + - 0.0001 + - 0.0003 + - 0.001 + - 0.003 + - 0.01 + - 0.03 + - 0.1 + - 0.3 + - 1 + - 5 + - 10 + - 15 + - 30 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: request_post_timeout_total + subsystem: apiserver + help: Tracks the activity of the request handlers after the associated requests + have been timed out by the apiserver + type: Counter + stabilityLevel: ALPHA + labels: + - source + - status + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: request_sli_duration_seconds + subsystem: apiserver + help: Response latency distribution (not counting webhook duration and priority + & fairness queue wait times) in seconds for each verb, group, version, resource, + subresource, scope and component. + type: Histogram + stabilityLevel: ALPHA + labels: + - component + - group + - resource + - scope + - subresource + - verb + - version + buckets: + - 0.05 + - 0.1 + - 0.2 + - 0.4 + - 0.6 + - 0.8 + - 1 + - 1.25 + - 1.5 + - 2 + - 3 + - 4 + - 5 + - 6 + - 8 + - 10 + - 15 + - 20 + - 30 + - 45 + - 60 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: request_slo_duration_seconds + subsystem: apiserver + help: Response latency distribution (not counting webhook duration and priority + & fairness queue wait times) in seconds for each verb, group, version, resource, + subresource, scope and component. + type: Histogram + deprecatedVersion: 1.27.0 + stabilityLevel: ALPHA + labels: + - component + - group + - resource + - scope + - subresource + - verb + - version + buckets: + - 0.05 + - 0.1 + - 0.2 + - 0.4 + - 0.6 + - 0.8 + - 1 + - 1.25 + - 1.5 + - 2 + - 3 + - 4 + - 5 + - 6 + - 8 + - 10 + - 15 + - 20 + - 30 + - 45 + - 60 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: request_terminations_total + subsystem: apiserver + help: Number of requests which apiserver terminated in self-defense. + type: Counter + stabilityLevel: ALPHA + labels: + - code + - component + - group + - resource + - scope + - subresource + - verb + - version + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: request_timestamp_comparison_time + subsystem: apiserver + help: Time taken for comparison of old vs new objects in UPDATE or PATCH requests + type: Histogram + stabilityLevel: ALPHA + labels: + - code_path + buckets: + - 0.0001 + - 0.0003 + - 0.001 + - 0.003 + - 0.01 + - 0.03 + - 0.1 + - 0.3 + - 1 + - 5 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: rerouted_request_total + subsystem: apiserver + help: '`Total number of requests that were proxied to a peer kube-apiserver because + the local apiserver was not capable of serving it, broken down by ''group'', ''version'', + and ''resource'' indicating the GVR of the request. If all three are empty (""), + the request is a discovery request.`' + type: Counter + stabilityLevel: ALPHA + labels: + - code + - group + - resource + - version + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: apiserver_resource_objects + help: Number of stored objects at the time of last check split by kind. In case + of a fetching error, the value will be -1. + type: Gauge + stabilityLevel: ALPHA + labels: + - group + - resource + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: apiserver_resource_size_estimate_bytes + help: Estimated size of stored objects in database. Estimate is based on sum of + last observed sizes of serialized objects. In case of a fetching error, the value + will be -1. + type: Gauge + stabilityLevel: ALPHA + labels: + - group + - resource + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: selfrequest_total + subsystem: apiserver + help: Counter of apiserver self-requests broken out for each verb, API resource + and subresource. + type: Counter + stabilityLevel: ALPHA + labels: + - group + - resource + - subresource + - verb + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: storage_consistency_checks_total + namespace: apiserver + help: Counter for status of consistency checks between etcd and watch cache + type: Counter + stabilityLevel: ALPHA + labels: + - group + - resource + - status + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: data_key_generation_duration_seconds + subsystem: storage + namespace: apiserver + help: Latencies in seconds of data encryption key(DEK) generation operations. + type: Histogram + stabilityLevel: ALPHA + buckets: + - 5e-06 + - 1e-05 + - 2e-05 + - 4e-05 + - 8e-05 + - 0.00016 + - 0.00032 + - 0.00064 + - 0.00128 + - 0.00256 + - 0.00512 + - 0.01024 + - 0.02048 + - 0.04096 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: data_key_generation_failures_total + subsystem: storage + namespace: apiserver + help: Total number of failed data encryption key(DEK) generation operations. + type: Counter + stabilityLevel: ALPHA + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: storage_db_total_size_in_bytes + subsystem: apiserver + help: Total size of the storage database file physically allocated in bytes. + type: Gauge + deprecatedVersion: 1.28.0 + stabilityLevel: ALPHA + labels: + - endpoint + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: storage_decode_errors_total + namespace: apiserver + help: Number of stored object decode errors split by object type + type: Counter + stabilityLevel: ALPHA + labels: + - group + - resource + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: envelope_transformation_cache_misses_total + subsystem: storage + namespace: apiserver + help: Total number of cache misses while accessing key decryption key(KEK). + type: Counter + stabilityLevel: ALPHA + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: apiserver_storage_list_evaluated_objects_total + help: Number of objects tested in the course of serving a LIST request from storage + type: Counter + stabilityLevel: ALPHA + labels: + - group + - resource + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: apiserver_storage_list_fetched_objects_total + help: Number of objects read from storage in the course of serving a LIST request + type: Counter + stabilityLevel: ALPHA + labels: + - group + - resource + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: apiserver_storage_list_returned_objects_total + help: Number of objects returned for a LIST request from storage + type: Counter + stabilityLevel: ALPHA + labels: + - group + - resource + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: apiserver_storage_list_total + help: Number of LIST requests served from storage + type: Counter + stabilityLevel: ALPHA + labels: + - group + - resource + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: transformation_duration_seconds + subsystem: storage + namespace: apiserver + help: Latencies in seconds of value transformation operations. + type: Histogram + stabilityLevel: ALPHA + labels: + - transformation_type + - transformer_prefix + buckets: + - 5e-06 + - 1e-05 + - 2e-05 + - 4e-05 + - 8e-05 + - 0.00016 + - 0.00032 + - 0.00064 + - 0.00128 + - 0.00256 + - 0.00512 + - 0.01024 + - 0.02048 + - 0.04096 + - 0.08192 + - 0.16384 + - 0.32768 + - 0.65536 + - 1.31072 + - 2.62144 + - 5.24288 + - 10.48576 + - 20.97152 + - 41.94304 + - 83.88608 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: transformation_operations_total + subsystem: storage + namespace: apiserver + help: Total number of transformations. Successful transformation will have a status + 'OK' and a varied status string when the transformation fails. The status, resource, + and transformation_type fields can be used for alerting purposes. For example, + you can monitor for encryption/decryption failures using the transformation_type + (e.g., from_storage for decryption and to_storage for encryption). Additionally, + these fields can be used to ensure that the correct transformers are applied to + each resource. + type: Counter + stabilityLevel: ALPHA + labels: + - resource + - status + - transformation_type + - transformer_prefix + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: stream_translator_requests_total + subsystem: apiserver + help: Total number of requests that were handled by the StreamTranslatorProxy, which + processes streaming RemoteCommand/V5 + type: Counter + stabilityLevel: ALPHA + labels: + - code + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: stream_tunnel_requests_total + subsystem: apiserver + help: Total number of requests that were handled by the StreamTunnelProxy, which + processes streaming PortForward/V2 + type: Counter + stabilityLevel: ALPHA + labels: + - code + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: terminated_watchers_total + namespace: apiserver + help: Counter of watchers closed due to unresponsiveness broken by resource type. + type: Counter + stabilityLevel: ALPHA + labels: + - group + - resource + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: tls_handshake_errors_total + subsystem: apiserver + help: Number of requests dropped with 'TLS handshake error from' error + type: Counter + stabilityLevel: ALPHA + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: declarative_validation_panics_total + subsystem: validation + namespace: apiserver + help: Number of panics in declarative validation, broken down by validation identifier. + type: Counter + stabilityLevel: ALPHA + labels: + - validation_identifier + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: declarative_validation_parity_discrepancies_total + subsystem: validation + namespace: apiserver + help: Number of discrepancies between declarative and handwritten validation, broken + down by validation identifier. + type: Counter + stabilityLevel: ALPHA + labels: + - validation_identifier + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: consistent_read_total + subsystem: watch_cache + namespace: apiserver + help: Counter for consistent reads from cache. + type: Counter + stabilityLevel: ALPHA + labels: + - fallback + - group + - resource + - success + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: events_dispatched_total + subsystem: watch_cache + namespace: apiserver + help: Counter of events dispatched in watch cache broken by resource type. + type: Counter + stabilityLevel: ALPHA + labels: + - group + - resource + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: events_received_total + subsystem: watch_cache + namespace: apiserver + help: Counter of events received in watch cache broken by resource type. + type: Counter + stabilityLevel: ALPHA + labels: + - group + - resource + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: initializations_total + subsystem: watch_cache + namespace: apiserver + help: Counter of watch cache initializations broken by resource type. + type: Counter + stabilityLevel: ALPHA + labels: + - group + - resource + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: read_wait_seconds + subsystem: watch_cache + namespace: apiserver + help: Histogram of time spent waiting for a watch cache to become fresh. + type: Histogram + stabilityLevel: ALPHA + labels: + - group + - resource + buckets: + - 0.005 + - 0.025 + - 0.05 + - 0.1 + - 0.2 + - 0.4 + - 0.6 + - 0.8 + - 1 + - 1.25 + - 1.5 + - 2 + - 3 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: resource_version + subsystem: watch_cache + namespace: apiserver + help: Current resource version of watch cache broken by resource type. This is truncated + to the 15 least significant digits. + type: Gauge + stabilityLevel: ALPHA + labels: + - group + - resource + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: watch_events_sizes + subsystem: apiserver + help: Watch event size distribution in bytes + type: Histogram + stabilityLevel: ALPHA + labels: + - group + - resource + - version + buckets: + - 1024 + - 2048 + - 4096 + - 8192 + - 16384 + - 32768 + - 65536 + - 131072 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: watch_events_total + subsystem: apiserver + help: Number of events sent in watch clients + type: Counter + stabilityLevel: ALPHA + labels: + - group + - resource + - version + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: watch_filtered_events_total + namespace: apiserver + help: Counter of events filtered out by shard selector during watch dispatch, broken + by resource type. + type: Counter + stabilityLevel: ALPHA + labels: + - group + - resource + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: watch_shards_total + namespace: apiserver + help: Number of active sharded watch connections broken by resource type. + type: Gauge + stabilityLevel: ALPHA + labels: + - group + - resource + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: x509_insecure_sha1_total + subsystem: webhooks + namespace: apiserver + help: Counts the number of requests to servers with insecure SHA1 signatures in + their serving certificate OR the number of connection failures due to the insecure + SHA1 signatures (either/or, based on the runtime environment) + type: Counter + stabilityLevel: ALPHA + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: x509_missing_san_total + subsystem: webhooks + namespace: apiserver + help: Counts the number of requests to servers missing SAN extension in their serving + certificate OR the number of connection failures due to the lack of x509 certificate + SAN extension missing (either/or, based on the runtime environment) + type: Counter + stabilityLevel: ALPHA + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: websocket_streaming_requests_total + subsystem: apiserver + help: Total number of WebSocket streaming requests (exec/attach/portforward) routed + by the API server, labeled by subresource and proxy_type. proxy_type is proxied_to_kubelet + when the kubelet handles the request directly; otherwise translated_at_apiserver. + type: Counter + stabilityLevel: ALPHA + labels: + - proxy_type + - subresource + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: attachdetach_controller_forced_detaches + subsystem: attach_detach_controller + help: Number of times the A/D Controller performed a forced detach + type: Counter + stabilityLevel: ALPHA + labels: + - reason + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: attachdetach_controller_total_volumes + help: Number of volumes in A/D Controller + type: Custom + stabilityLevel: ALPHA + labels: + - plugin_name + - state + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: authenticated_user_requests + help: Counter of authenticated requests broken out by username. + type: Counter + stabilityLevel: ALPHA + labels: + - username + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: authentication_attempts + help: Counter of authenticated attempts. + type: Counter + stabilityLevel: ALPHA + labels: + - result + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: authentication_duration_seconds + help: Authentication duration in seconds broken out by result. + type: Histogram + stabilityLevel: ALPHA + labels: + - result + buckets: + - 0.001 + - 0.002 + - 0.004 + - 0.008 + - 0.016 + - 0.032 + - 0.064 + - 0.128 + - 0.256 + - 0.512 + - 1.024 + - 2.048 + - 4.096 + - 8.192 + - 16.384 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: active_fetch_count + subsystem: token_cache + namespace: authentication + type: Gauge + stabilityLevel: ALPHA + labels: + - status + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: fetch_total + subsystem: token_cache + namespace: authentication + type: Counter + stabilityLevel: ALPHA + labels: + - status + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: request_duration_seconds + subsystem: token_cache + namespace: authentication + type: Histogram + stabilityLevel: ALPHA + labels: + - status + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: request_total + subsystem: token_cache + namespace: authentication + type: Counter + stabilityLevel: ALPHA + labels: + - status + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: authorization_attempts_total + help: Counter of authorization attempts broken down by result. It can be either + 'allowed', 'denied', 'no-opinion' or 'error'. + type: Counter + stabilityLevel: ALPHA + labels: + - result + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: authorization_duration_seconds + help: Authorization duration in seconds broken out by result. + type: Histogram + stabilityLevel: ALPHA + labels: + - result + buckets: + - 0.001 + - 0.002 + - 0.004 + - 0.008 + - 0.016 + - 0.032 + - 0.064 + - 0.128 + - 0.256 + - 0.512 + - 1.024 + - 2.048 + - 4.096 + - 8.192 + - 16.384 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: request_duration_seconds + subsystem: cloud_provider_webhook + help: Request latency in seconds. Broken down by status code. + type: Histogram + stabilityLevel: ALPHA + labels: + - code + - webhook + buckets: + - 0.25 + - 0.5 + - 0.7 + - 1 + - 1.5 + - 3 + - 5 + - 10 + componentEndpoints: + - component: cloud-controller-manager + endpoint: /metrics +- name: request_total + subsystem: cloud_provider_webhook + help: Number of HTTP requests partitioned by status code. + type: Counter + stabilityLevel: ALPHA + labels: + - code + - webhook + componentEndpoints: + - component: cloud-controller-manager endpoint: /metrics - name: sync_duration_seconds subsystem: clustertrustbundle_publisher @@ -54,61 +2531,57 @@ componentEndpoints: - component: kube-controller-manager endpoint: /metrics -- name: sync_duration_seconds - subsystem: root_ca_cert_publisher - help: Number of namespace syncs happened in root ca cert publisher. +- name: container_swap_limit_bytes + help: Current amount of the container swap limit in bytes. Reported only on non-windows + systems + type: Custom + stabilityLevel: ALPHA + labels: + - container + - pod + - namespace + componentEndpoints: + - component: kubelet + endpoint: /metrics/resource +- name: container_swap_usage_bytes + help: Current amount of the container swap usage in bytes. Reported only on non-windows + systems + type: Custom + stabilityLevel: ALPHA + labels: + - container + - pod + - namespace + componentEndpoints: + - component: kubelet + endpoint: /metrics/resource +- name: operations_seconds + subsystem: csi + help: Container Storage Interface operation duration with gRPC error code status + total type: Histogram stabilityLevel: ALPHA labels: - - code - buckets: - - 0.001 - - 0.002 - - 0.004 - - 0.008 - - 0.016 - - 0.032 - - 0.064 - - 0.128 - - 0.256 - - 0.512 - - 1.024 - - 2.048 - - 4.096 - - 8.192 - - 16.384 - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: sync_total - subsystem: root_ca_cert_publisher - help: Number of namespace syncs happened in root ca cert publisher. - type: Counter - stabilityLevel: ALPHA - labels: - - code - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: job_creation_skew_duration_seconds - subsystem: cronjob_controller - help: Time between when a cronjob is scheduled to be run, and when the corresponding - job is created - type: Histogram - stabilityLevel: STABLE + - driver_name + - grpc_status_code + - method_name + - migrated buckets: + - 0.1 + - 0.25 + - 0.5 - 1 - - 2 - - 4 - - 8 - - 16 - - 32 - - 64 - - 128 - - 256 - - 512 + - 2.5 + - 5 + - 10 + - 15 + - 25 + - 50 + - 120 + - 300 + - 600 componentEndpoints: - - component: kube-controller-manager + - component: kubelet endpoint: /metrics - name: stale_sync_skips_total subsystem: daemonset_controller @@ -151,6 +2624,109 @@ componentEndpoints: - component: kube-controller-manager endpoint: /metrics +- name: grpc_operations_duration_seconds + subsystem: dra + help: Duration in seconds of the DRA gRPC operations + type: Histogram + stabilityLevel: ALPHA + labels: + - driver_name + - grpc_status_code + - method_name + buckets: + - 0.1 + - 0.1534127404634391 + - 0.23535468936502524 + - 0.36106407876409946 + - 0.5539182980610752 + - 0.849781240983936 + - 1.303672689737678 + - 1.9999999999999993 + - 3.0682548092687805 + - 4.7070937873005025 + - 7.221281575281985 + - 11.078365961221499 + - 16.995624819678714 + - 26.07345379475354 + - 39.99999999999997 + componentEndpoints: + - component: kubelet + endpoint: /metrics +- name: operations_duration_seconds + subsystem: dra + help: Latency histogram in seconds for the duration of handling all ResourceClaims + referenced by a pod when the pod starts or stops. Identified by the name of the + operation (PrepareResources or UnprepareResources) and separated by the success + of the operation. The number of failed operations is provided through the histogram's + overall count. + type: Histogram + stabilityLevel: ALPHA + labels: + - is_error + - operation_name + buckets: + - 0.1 + - 0.1534127404634391 + - 0.23535468936502524 + - 0.36106407876409946 + - 0.5539182980610752 + - 0.849781240983936 + - 1.303672689737678 + - 1.9999999999999993 + - 3.0682548092687805 + - 4.7070937873005025 + - 7.221281575281985 + - 11.078365961221499 + - 16.995624819678714 + - 26.07345379475354 + - 39.99999999999997 + componentEndpoints: + - component: kubelet + endpoint: /metrics +- name: dra_resource_claims_in_use + help: The number of ResourceClaims that are currently in use on the node, by driver + name (driver_name label value) and across all drivers (special value for + driver_name). Note that the sum of all by-driver counts is not the total number + of in-use ResourceClaims because the same ResourceClaim might use devices from + different drivers. Instead, use the count for the driver_name. + type: Custom + stabilityLevel: ALPHA + labels: + - driver_name + componentEndpoints: + - component: kubelet + endpoint: /metrics +- name: changes + subsystem: endpoint_slice_controller + help: Number of EndpointSlice changes + type: Counter + stabilityLevel: ALPHA + labels: + - operation + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: endpointslices_changed_per_sync + subsystem: endpoint_slice_controller + help: Number of EndpointSlices changed on each Service sync + type: Histogram + stabilityLevel: ALPHA + labels: + - topology + - traffic_distribution + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: syncs + subsystem: endpoint_slice_controller + help: Number of EndpointSlice syncs + type: Counter + stabilityLevel: ALPHA + labels: + - result + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics - name: addresses_skipped_per_sync subsystem: endpoint_slice_mirroring_controller help: Number of addresses skipped on each Endpoints sync due to being invalid or @@ -306,6 +2882,172 @@ componentEndpoints: - component: kube-controller-manager endpoint: /metrics +- name: create_failures_total + subsystem: ephemeral_volume_controller + help: Number of PersistentVolumeClaim creation requests + type: Counter + stabilityLevel: ALPHA + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: create_total + subsystem: ephemeral_volume_controller + help: Number of PersistentVolumeClaim creation requests + type: Counter + stabilityLevel: ALPHA + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: etcd_bookmark_counts + help: Number of etcd bookmarks (progress notify events) split by kind. + type: Gauge + deprecatedVersion: 1.36.0 + stabilityLevel: ALPHA + labels: + - group + - resource + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: etcd_bookmark_total + help: Number of etcd bookmarks (progress notify events) split by kind. + type: Counter + stabilityLevel: ALPHA + labels: + - group + - resource + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: etcd_lease_object_counts + help: Number of objects attached to a single etcd lease. + type: Histogram + stabilityLevel: ALPHA + buckets: + - 10 + - 50 + - 100 + - 500 + - 1000 + - 2500 + - 5000 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: etcd_request_duration_seconds + help: Etcd request latency in seconds for each operation and object type. + type: Histogram + stabilityLevel: ALPHA + labels: + - group + - operation + - resource + buckets: + - 0.005 + - 0.025 + - 0.05 + - 0.1 + - 0.2 + - 0.4 + - 0.6 + - 0.8 + - 1 + - 1.25 + - 1.5 + - 2 + - 3 + - 4 + - 5 + - 6 + - 8 + - 10 + - 15 + - 20 + - 30 + - 45 + - 60 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: etcd_request_errors_total + help: Etcd failed request counts for each operation and object type. + type: Counter + stabilityLevel: ALPHA + labels: + - group + - operation + - resource + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: etcd_requests_total + help: Etcd request counts for each operation and object type. + type: Counter + stabilityLevel: ALPHA + labels: + - group + - operation + - resource + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: version_info + namespace: etcd + help: Etcd server's binary version + type: Gauge + stabilityLevel: ALPHA + labels: + - binary_version + componentEndpoints: + - component: etcd-version-monitor + endpoint: /metrics +- name: field_validation_request_duration_seconds + help: Response latency distribution in seconds for each field validation value + type: Histogram + stabilityLevel: ALPHA + labels: + - field_validation + buckets: + - 0.05 + - 0.1 + - 0.2 + - 0.4 + - 0.6 + - 0.8 + - 1 + - 1.25 + - 1.5 + - 2 + - 3 + - 4 + - 5 + - 6 + - 8 + - 10 + - 15 + - 20 + - 30 + - 45 + - 60 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: force_cleaned_failed_volume_operation_errors_total + help: The number of volumes that failed force cleanup after their reconstruction + failed during kubelet startup. + type: Counter + stabilityLevel: ALPHA + componentEndpoints: + - component: kubelet + endpoint: /metrics +- name: force_cleaned_failed_volume_operations_total + help: The number of volumes that were force cleaned after their reconstruction failed + during kubelet startup. This includes both successful and failed cleanups. + type: Counter + stabilityLevel: ALPHA + componentEndpoints: + - component: kubelet + endpoint: /metrics - name: resources_sync_error_total subsystem: garbagecollector_controller help: Number of garbage collector resources sync errors @@ -333,6 +3075,88 @@ componentEndpoints: - component: kube-controller-manager endpoint: /metrics +- name: processing_latency_seconds + subsystem: informer + help: Time taken to process events after popping from the queue. + type: Histogram + stabilityLevel: ALPHA + labels: + - group + - name + - resource + - version + buckets: + - 0.001 + - 0.005 + - 0.01 + - 0.025 + - 0.05 + - 0.1 + - 0.25 + - 0.5 + - 1 + - 2.5 + - 5 + - 10 + componentEndpoints: + - component: cloud-controller-manager + endpoint: /metrics + - component: kube-apiserver + endpoint: /metrics + - component: kube-controller-manager + endpoint: /metrics + - component: kube-proxy + endpoint: /metrics + - component: kube-scheduler + endpoint: /metrics + - component: kubelet + endpoint: /metrics +- name: queued_items + subsystem: informer + help: Number of items currently queued in the FIFO. + type: Gauge + stabilityLevel: ALPHA + labels: + - group + - name + - resource + - version + componentEndpoints: + - component: cloud-controller-manager + endpoint: /metrics + - component: kube-apiserver + endpoint: /metrics + - component: kube-controller-manager + endpoint: /metrics + - component: kube-proxy + endpoint: /metrics + - component: kube-scheduler + endpoint: /metrics + - component: kubelet + endpoint: /metrics +- name: store_resource_version + subsystem: informer + help: The 15 least significant digits of the resource version of the store. + type: Gauge + stabilityLevel: ALPHA + labels: + - group + - name + - resource + - version + componentEndpoints: + - component: cloud-controller-manager + endpoint: /metrics + - component: kube-apiserver + endpoint: /metrics + - component: kube-controller-manager + endpoint: /metrics + - component: kube-proxy + endpoint: /metrics + - component: kube-scheduler + endpoint: /metrics + - component: kubelet + endpoint: /metrics - name: job_finished_indexes_total subsystem: job_controller help: "`The number of finished indexes. Possible values for the\n\t\t\tstatus label @@ -384,677 +3208,181 @@ componentEndpoints: - component: kube-controller-manager endpoint: /metrics -- name: unhealthy_nodes_in_zone - subsystem: node_collector - help: Gauge measuring number of not Ready Nodes per zones. +- name: allocated_ips + subsystem: clusterip_allocator + namespace: kube_apiserver + help: Gauge measuring the number of allocated IPs for Services type: Gauge stabilityLevel: ALPHA labels: - - zone + - cidr componentEndpoints: - - component: kube-controller-manager + - component: kube-apiserver endpoint: /metrics -- name: update_all_nodes_health_duration_seconds - subsystem: node_collector - help: Duration in seconds for NodeController to update the health of all nodes. +- name: allocation_duration_seconds + subsystem: clusterip_allocator + namespace: kube_apiserver + help: Duration in seconds to allocate a Cluster IP by ServiceCIDR type: Histogram stabilityLevel: ALPHA + labels: + - cidr buckets: + - 0.005 - 0.01 - - 0.04 - - 0.16 - - 0.64 - - 2.56 - - 10.24 - - 40.96 - - 163.84 - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: update_node_health_duration_seconds - subsystem: node_collector - help: Duration in seconds for NodeController to update the health of a single node. - type: Histogram - stabilityLevel: ALPHA - buckets: - - 0.001 - - 0.004 - - 0.016 - - 0.064 - - 0.256 - - 1.024 - - 4.096 - - 16.384 - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: zone_health - subsystem: node_collector - help: Gauge measuring percentage of healthy nodes per zone. - type: Gauge - stabilityLevel: ALPHA - labels: - - zone - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: zone_size - subsystem: node_collector - help: Gauge measuring number of registered Nodes per zones. - type: Gauge - stabilityLevel: ALPHA - labels: - - zone - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: cidrset_allocation_tries_per_request - subsystem: node_ipam_controller - help: Number of endpoints added on each Service sync - type: Histogram - stabilityLevel: ALPHA - labels: - - clusterCIDR - buckets: - - 1 - - 5 - - 25 - - 125 - - 625 - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: cidrset_cidrs_allocations_total - subsystem: node_ipam_controller - help: Counter measuring total number of CIDR allocations. - type: Counter - stabilityLevel: ALPHA - labels: - - clusterCIDR - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: cidrset_cidrs_releases_total - subsystem: node_ipam_controller - help: Counter measuring total number of CIDR releases. - type: Counter - stabilityLevel: ALPHA - labels: - - clusterCIDR - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: cidrset_usage_cidrs - subsystem: node_ipam_controller - help: Gauge measuring percentage of allocated CIDRs. - type: Gauge - stabilityLevel: ALPHA - labels: - - clusterCIDR - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: cirdset_max_cidrs - subsystem: node_ipam_controller - help: Maximum number of CIDRs that can be allocated. - type: Gauge - stabilityLevel: ALPHA - labels: - - clusterCIDR - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: force_delete_pod_errors_total - subsystem: pod_gc_collector - help: Number of errors encountered when forcefully deleting the pods since the Pod - GC Controller started. - type: Counter - stabilityLevel: ALPHA - labels: - - namespace - - reason - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: force_delete_pods_total - subsystem: pod_gc_collector - help: Number of pods that are being forcefully deleted since the Pod GC Controller - started. - type: Counter - stabilityLevel: ALPHA - labels: - - namespace - - reason - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: metric_computation_duration_seconds - subsystem: horizontal_pod_autoscaler_controller - help: The time(seconds) that the HPA controller takes to calculate one metric. The - label 'action' should be either 'scale_down', 'scale_up', or 'none'. The label - 'error' should be either 'spec', 'internal', or 'none'. The label 'metric_type' - corresponds to HPA.spec.metrics[*].type - type: Histogram - stabilityLevel: BETA - labels: - - action - - error - - metric_type - buckets: - - 0.001 - - 0.002 - - 0.004 - - 0.008 - - 0.016 - - 0.032 - - 0.064 - - 0.128 - - 0.256 - - 0.512 - - 1.024 - - 2.048 - - 4.096 - - 8.192 - - 16.384 - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: metric_computation_total - subsystem: horizontal_pod_autoscaler_controller - help: Number of metric computations. The label 'action' should be either 'scale_down', - 'scale_up', or 'none'. Also, the label 'error' should be either 'spec', 'internal', - or 'none'. The label 'metric_type' corresponds to HPA.spec.metrics[*].type - type: Counter - stabilityLevel: BETA - labels: - - action - - error - - metric_type - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: reconciliation_duration_seconds - subsystem: horizontal_pod_autoscaler_controller - help: The time(seconds) that the HPA controller takes to reconcile once. The label - 'action' should be either 'scale_down', 'scale_up', or 'none'. Also, the label - 'error' should be either 'spec', 'internal', or 'none'. Note that if both spec - and internal errors happen during a reconciliation, the first one to occur is - reported in `error` label. - type: Histogram - stabilityLevel: BETA - labels: - - action - - error - buckets: - - 0.001 - - 0.002 - - 0.004 - - 0.008 - - 0.016 - - 0.032 - - 0.064 - - 0.128 - - 0.256 - - 0.512 - - 1.024 - - 2.048 - - 4.096 - - 8.192 - - 16.384 - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: reconciliations_total - subsystem: horizontal_pod_autoscaler_controller - help: Number of reconciliations of HPA controller. The label 'action' should be - either 'scale_down', 'scale_up', or 'none'. Also, the label 'error' should be - either 'spec', 'internal', or 'none'. Note that if both spec and internal errors - happen during a reconciliation, the first one to occur is reported in `error` - label. - type: Counter - stabilityLevel: BETA - labels: - - action - - error - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: pod_failures_handled_by_failure_policy_total - subsystem: job_controller - help: "`The number of failed Pods handled by failure policy with\n\t\t\trespect - to the failure policy action applied based on the matched\n\t\t\trule. Possible - values of the action label correspond to the\n\t\t\tpossible values for the failure - policy rule action, which are:\n\t\t\t\"FailJob\", \"Ignore\" and \"Count\".`" - type: Counter - stabilityLevel: BETA - labels: - - action - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: terminated_pods_tracking_finalizer_total - subsystem: job_controller - help: |- - `The number of terminated pods (phase=Failed|Succeeded) - that have the finalizer batch.kubernetes.io/job-tracking - The event label can be "add" or "delete".` - type: Counter - stabilityLevel: BETA - labels: - - event - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: job_pods_finished_total - subsystem: job_controller - help: The number of finished Pods that are fully tracked - type: Counter - stabilityLevel: STABLE - labels: - - completion_mode - - result - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: job_sync_duration_seconds - subsystem: job_controller - help: The time it took to sync a job - type: Histogram - stabilityLevel: STABLE - labels: - - action - - completion_mode - - result - buckets: - - 0.004 - - 0.008 - - 0.016 - - 0.032 - - 0.064 - - 0.128 - - 0.256 - - 0.512 - - 1.024 - - 2.048 - - 4.096 - - 8.192 - - 16.384 - - 32.768 - - 65.536 - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: job_syncs_total - subsystem: job_controller - help: The number of job syncs - type: Counter - stabilityLevel: STABLE - labels: - - action - - completion_mode - - result - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: jobs_finished_total - subsystem: job_controller - help: The number of finished jobs - type: Counter - stabilityLevel: STABLE - labels: - - completion_mode - - reason - - result - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: evictions_total - subsystem: node_collector - help: Number of Node evictions that happened since current instance of NodeController - started. - type: Counter - stabilityLevel: STABLE - labels: - - zone - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: attachdetach_controller_forced_detaches - subsystem: attach_detach_controller - help: Number of times the A/D Controller performed a forced detach - type: Counter - stabilityLevel: ALPHA - labels: - - reason - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: attachdetach_controller_total_volumes - help: Number of volumes in A/D Controller - type: Custom - stabilityLevel: ALPHA - labels: - - plugin_name - - state - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: create_failures_total - subsystem: ephemeral_volume_controller - help: Number of PersistentVolumeClaim creation requests - type: Counter - stabilityLevel: ALPHA - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: create_total - subsystem: ephemeral_volume_controller - help: Number of PersistentVolumeClaim creation requests - type: Counter - stabilityLevel: ALPHA - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: pv_collector_bound_pv_count - help: Gauge measuring number of persistent volume currently bound - type: Custom - stabilityLevel: ALPHA - labels: - - storage_class - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: pv_collector_bound_pvc_count - help: Gauge measuring number of persistent volume claim currently bound - type: Custom - stabilityLevel: ALPHA - labels: - - namespace - - storage_class - - volume_attributes_class - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: pv_collector_total_pv_count - help: Gauge measuring total number of persistent volumes - type: Custom - stabilityLevel: ALPHA - labels: - - plugin_name - - volume_mode - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: pv_collector_unbound_pv_count - help: Gauge measuring number of persistent volume currently unbound - type: Custom - stabilityLevel: ALPHA - labels: - - storage_class - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: pv_collector_unbound_pvc_count - help: Gauge measuring number of persistent volume claim currently unbound - type: Custom - stabilityLevel: ALPHA - labels: - - namespace - - storage_class - - volume_attributes_class - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: sorting_deletion_age_ratio - subsystem: replicaset_controller - help: The ratio of chosen deleted pod's ages to the current youngest pod's age (at - the time). Should be <2. The intent of this metric is to measure the rough efficacy - of the LogarithmicScaleDown feature gate's effect on the sorting (and deletion) - of pods when a replicaset scales down. This only considers Ready pods when calculating - and reporting. - type: Histogram - stabilityLevel: ALPHA - buckets: + - 0.025 + - 0.05 + - 0.1 - 0.25 - 0.5 - 1 - - 2 - - 4 - - 8 - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: stale_sync_skips_total - subsystem: replicaset_controller - help: Total number of ReplicaSet syncs skipped due to a stale watch cache. - type: Counter - stabilityLevel: ALPHA - labels: - - group - - resource - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: creates_total - subsystem: resourceclaim_controller - help: Number of ResourceClaims creation requests, categorized by creation status - and admin access - type: Counter - stabilityLevel: ALPHA - labels: - - admin_access - - status - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: resourceclaim_controller_resource_claims - help: Number of ResourceClaims, categorized by allocation status, admin access, - and source. Source can be 'resource_claim_template' (created from a template), - 'extended_resource' (extended resources), or empty (manually created by a user). - type: Custom - stabilityLevel: ALPHA - labels: - - allocated - - admin_access - - source - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: request_processing_duration_seconds - subsystem: resourcepoolstatusrequest_controller - help: Time taken to process a ResourcePoolStatusRequest - type: Histogram - stabilityLevel: ALPHA - labels: - - driver_name - buckets: - - 0.001 - - 0.002 - - 0.004 - - 0.008 - - 0.016 - - 0.032 - - 0.064 - - 0.128 - - 0.256 - - 0.512 - - 1.024 - - 2.048 - - 4.096 - - 8.192 - - 16.384 - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: request_processing_errors_total - subsystem: resourcepoolstatusrequest_controller - help: Total number of errors encountered while processing ResourcePoolStatusRequests - type: Counter - stabilityLevel: ALPHA - labels: - - driver_name - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: requests_processed_total - subsystem: resourcepoolstatusrequest_controller - help: Total number of ResourcePoolStatusRequests processed - type: Counter - stabilityLevel: ALPHA - labels: - - driver_name - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: retroactive_storageclass_errors_total - help: Total number of failed retroactive StorageClass assignments to persistent - volume claim - type: Counter - stabilityLevel: ALPHA - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: retroactive_storageclass_total - help: Total number of retroactive StorageClass assignments to persistent volume - claim - type: Counter - stabilityLevel: ALPHA - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: selinux_warning_controller_selinux_volume_conflict - help: Conflict between two Pods using the same volume - type: Custom - stabilityLevel: ALPHA - labels: - - property - - pod1_namespace - - pod1_name - - pod1_value - - pod2_namespace - - pod2_name - - pod2_value - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: stale_sync_skips_total - subsystem: statefulset_controller - help: Total number of StatefulSet syncs skipped due to a stale watch cache. - type: Counter - stabilityLevel: ALPHA - labels: - - group - - resource - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: statefulset_max_unavailable - subsystem: statefulset_controller - help: Maximum number of unavailable pods allowed during StatefulSet rolling updates - type: Gauge - stabilityLevel: ALPHA - labels: - - pod_management_policy - - statefulset_name - - statefulset_namespace - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: statefulset_unavailable_replicas - subsystem: statefulset_controller - help: Current number of unavailable pods in StatefulSet - type: Gauge - stabilityLevel: ALPHA - labels: - - pod_management_policy - - statefulset_name - - statefulset_namespace - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: storage_count_attachable_volumes_in_use - help: Measure number of volumes in use - type: Custom - stabilityLevel: ALPHA - labels: - - node - - volume_plugin - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: pod_deletion_duration_seconds - subsystem: taint_eviction_controller - help: Latency, in seconds, between the time when a taint effect has been activated - for the Pod and its deletion via TaintEvictionController. - type: Histogram - stabilityLevel: ALPHA - buckets: - - 0.005 - - 0.025 - - 0.1 - - 0.5 - - 1 - 2.5 + - 5 - 10 - - 30 - - 60 - - 120 - - 180 - - 240 componentEndpoints: - - component: kube-controller-manager + - component: kube-apiserver endpoint: /metrics -- name: pod_deletions_total - subsystem: taint_eviction_controller - help: Total number of Pods deleted by TaintEvictionController since its start. - type: Counter - stabilityLevel: ALPHA - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: job_deletion_duration_seconds - subsystem: ttl_after_finished_controller - help: The time it took to delete the job since it became eligible for deletion - type: Histogram - stabilityLevel: ALPHA - buckets: - - 0.1 - - 0.2 - - 0.4 - - 0.8 - - 1.6 - - 3.2 - - 6.4 - - 12.8 - - 25.6 - - 51.2 - - 102.4 - - 204.8 - - 409.6 - - 819.2 - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: volume_operation_errors_total - help: Total volume operation errors +- name: allocation_errors_total + subsystem: clusterip_allocator + namespace: kube_apiserver + help: Number of errors trying to allocate Cluster IPs type: Counter stabilityLevel: ALPHA labels: - - operation_name - - plugin_name + - cidr + - scope componentEndpoints: - - component: kube-controller-manager + - component: kube-apiserver endpoint: /metrics -- name: volume_operation_total_errors - help: Total volume operation errors +- name: allocation_total + subsystem: clusterip_allocator + namespace: kube_apiserver + help: Number of Cluster IPs allocations type: Counter - deprecatedVersion: 1.36.0 stabilityLevel: ALPHA labels: - - operation_name - - plugin_name + - cidr + - scope componentEndpoints: - - component: kube-controller-manager + - component: kube-apiserver + endpoint: /metrics +- name: available_ips + subsystem: clusterip_allocator + namespace: kube_apiserver + help: Gauge measuring the number of available IPs for Services + type: Gauge + stabilityLevel: ALPHA + labels: + - cidr + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: allocated_ports + subsystem: nodeport_allocator + namespace: kube_apiserver + help: Gauge measuring the number of allocated NodePorts for Services + type: Gauge + stabilityLevel: ALPHA + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: allocation_errors_total + subsystem: nodeport_allocator + namespace: kube_apiserver + help: Number of errors trying to allocate NodePort + type: Counter + stabilityLevel: ALPHA + labels: + - scope + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: allocation_total + subsystem: nodeport_allocator + namespace: kube_apiserver + help: Number of NodePort allocations + type: Counter + stabilityLevel: ALPHA + labels: + - scope + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: available_ports + subsystem: nodeport_allocator + namespace: kube_apiserver + help: Gauge measuring the number of available NodePorts for Services + type: Gauge + stabilityLevel: ALPHA + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: backend_tls_failure_total + subsystem: pod_logs + namespace: kube_apiserver + help: Total number of requests for pods/logs that failed due to kubelet server TLS + verification + type: Counter + stabilityLevel: ALPHA + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: insecure_backend_total + subsystem: pod_logs + namespace: kube_apiserver + help: 'Total number of requests for pods/logs sliced by usage type: enforce_tls, + skip_tls_allowed, skip_tls_denied' + type: Counter + stabilityLevel: ALPHA + labels: + - usage + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: pods_logs_backend_tls_failure_total + subsystem: pod_logs + namespace: kube_apiserver + help: Total number of requests for pods/logs that failed due to kubelet server TLS + verification + type: Counter + deprecatedVersion: 1.27.0 + stabilityLevel: ALPHA + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: pods_logs_insecure_backend_total + subsystem: pod_logs + namespace: kube_apiserver + help: 'Total number of requests for pods/logs sliced by usage type: enforce_tls, + skip_tls_allowed, skip_tls_denied' + type: Counter + deprecatedVersion: 1.27.0 + stabilityLevel: ALPHA + labels: + - usage + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: active_pods + subsystem: kubelet + help: The number of pods the kubelet considers active and which are being considered + when admitting new pods. static is true if the pod is not from the apiserver. + type: Gauge + stabilityLevel: ALPHA + labels: + - static + componentEndpoints: + - component: kubelet + endpoint: /metrics +- name: admission_rejections_total + subsystem: kubelet + help: Cumulative number pod admission rejections by the Kubelet. + type: Counter + stabilityLevel: ALPHA + labels: + - reason + componentEndpoints: + - component: kubelet endpoint: /metrics - name: client_expiration_renew_errors subsystem: certificate_manager @@ -1065,6 +3393,16 @@ componentEndpoints: - component: kubelet endpoint: /metrics +- name: certificate_manager_client_ttl_seconds + subsystem: kubelet + help: Gauge of the TTL (time-to-live) of the Kubelet's client certificate. The value + is in seconds until certificate expiry (negative if already expired). If client + certificate is invalid or unused, the value will be +INF. + type: Gauge + stabilityLevel: ALPHA + componentEndpoints: + - component: kubelet + endpoint: /metrics - name: certificate_manager_server_rotation_seconds subsystem: kubelet help: Histogram of the number of seconds the previous certificate lived before being @@ -1095,173 +3433,6 @@ componentEndpoints: - component: kubelet endpoint: /metrics -- name: kubelet_credential_provider_config_info - help: Information about the last applied credential provider configuration with - hash as label - type: Custom - stabilityLevel: ALPHA - labels: - - hash - componentEndpoints: - - component: kubelet - endpoint: /metrics -- name: credential_provider_plugin_duration - subsystem: kubelet - help: Duration of execution in seconds for credential provider plugin - type: Histogram - stabilityLevel: ALPHA - labels: - - plugin_name - buckets: - - 0.005 - - 0.01 - - 0.025 - - 0.05 - - 0.1 - - 0.25 - - 0.5 - - 1 - - 2.5 - - 5 - - 10 - componentEndpoints: - - component: kubelet - endpoint: /metrics -- name: credential_provider_plugin_errors_total - subsystem: kubelet - help: Number of errors from credential provider plugin - type: Counter - stabilityLevel: ALPHA - labels: - - plugin_name - componentEndpoints: - - component: kubelet - endpoint: /metrics -- name: server_expiration_renew_errors - subsystem: kubelet - help: Counter of certificate renewal errors. - type: Counter - stabilityLevel: ALPHA - componentEndpoints: - - component: kubelet - endpoint: /metrics -- name: container_swap_limit_bytes - help: Current amount of the container swap limit in bytes. Reported only on non-windows - systems - type: Custom - stabilityLevel: ALPHA - labels: - - container - - pod - - namespace - componentEndpoints: - - component: kubelet - endpoint: /metrics/resource -- name: container_swap_usage_bytes - help: Current amount of the container swap usage in bytes. Reported only on non-windows - systems - type: Custom - stabilityLevel: ALPHA - labels: - - container - - pod - - namespace - componentEndpoints: - - component: kubelet - endpoint: /metrics/resource -- name: grpc_operations_duration_seconds - subsystem: dra - help: Duration in seconds of the DRA gRPC operations - type: Histogram - stabilityLevel: ALPHA - labels: - - driver_name - - grpc_status_code - - method_name - buckets: - - 0.1 - - 0.1534127404634391 - - 0.23535468936502524 - - 0.36106407876409946 - - 0.5539182980610752 - - 0.849781240983936 - - 1.303672689737678 - - 1.9999999999999993 - - 3.0682548092687805 - - 4.7070937873005025 - - 7.221281575281985 - - 11.078365961221499 - - 16.995624819678714 - - 26.07345379475354 - - 39.99999999999997 - componentEndpoints: - - component: kubelet - endpoint: /metrics -- name: operations_duration_seconds - subsystem: dra - help: Latency histogram in seconds for the duration of handling all ResourceClaims - referenced by a pod when the pod starts or stops. Identified by the name of the - operation (PrepareResources or UnprepareResources) and separated by the success - of the operation. The number of failed operations is provided through the histogram's - overall count. - type: Histogram - stabilityLevel: ALPHA - labels: - - is_error - - operation_name - buckets: - - 0.1 - - 0.1534127404634391 - - 0.23535468936502524 - - 0.36106407876409946 - - 0.5539182980610752 - - 0.849781240983936 - - 1.303672689737678 - - 1.9999999999999993 - - 3.0682548092687805 - - 4.7070937873005025 - - 7.221281575281985 - - 11.078365961221499 - - 16.995624819678714 - - 26.07345379475354 - - 39.99999999999997 - componentEndpoints: - - component: kubelet - endpoint: /metrics -- name: dra_resource_claims_in_use - help: The number of ResourceClaims that are currently in use on the node, by driver - name (driver_name label value) and across all drivers (special value for - driver_name). Note that the sum of all by-driver counts is not the total number - of in-use ResourceClaims because the same ResourceClaim might use devices from - different drivers. Instead, use the count for the driver_name. - type: Custom - stabilityLevel: ALPHA - labels: - - driver_name - componentEndpoints: - - component: kubelet - endpoint: /metrics -- name: active_pods - subsystem: kubelet - help: The number of pods the kubelet considers active and which are being considered - when admitting new pods. static is true if the pod is not from the apiserver. - type: Gauge - stabilityLevel: ALPHA - labels: - - static - componentEndpoints: - - component: kubelet - endpoint: /metrics -- name: admission_rejections_total - subsystem: kubelet - help: Cumulative number pod admission rejections by the Kubelet. - type: Counter - stabilityLevel: ALPHA - labels: - - reason - componentEndpoints: - - component: kubelet - endpoint: /metrics - name: cgroup_manager_duration_seconds subsystem: kubelet help: Duration in seconds for cgroup manager operations. Broken down by method. @@ -1400,6 +3571,48 @@ componentEndpoints: - component: kubelet endpoint: /metrics +- name: kubelet_credential_provider_config_info + help: Information about the last applied credential provider configuration with + hash as label + type: Custom + stabilityLevel: ALPHA + labels: + - hash + componentEndpoints: + - component: kubelet + endpoint: /metrics +- name: credential_provider_plugin_duration + subsystem: kubelet + help: Duration of execution in seconds for credential provider plugin + type: Histogram + stabilityLevel: ALPHA + labels: + - plugin_name + buckets: + - 0.005 + - 0.01 + - 0.025 + - 0.05 + - 0.1 + - 0.25 + - 0.5 + - 1 + - 2.5 + - 5 + - 10 + componentEndpoints: + - component: kubelet + endpoint: /metrics +- name: credential_provider_plugin_errors_total + subsystem: kubelet + help: Number of errors from credential provider plugin + type: Counter + stabilityLevel: ALPHA + labels: + - plugin_name + componentEndpoints: + - component: kubelet + endpoint: /metrics - name: cri_losing_support subsystem: kubelet help: the Kubernetes version that the currently running CRI implementation will @@ -1542,6 +3755,57 @@ componentEndpoints: - component: kubelet endpoint: /metrics +- name: http_inflight_requests + subsystem: kubelet + help: Number of the inflight http requests + type: Gauge + stabilityLevel: ALPHA + labels: + - long_running + - method + - path + - server_type + componentEndpoints: + - component: kubelet + endpoint: /metrics +- name: http_requests_duration_seconds + subsystem: kubelet + help: Duration in seconds to serve http requests + type: Histogram + stabilityLevel: ALPHA + labels: + - long_running + - method + - path + - server_type + buckets: + - 0.005 + - 0.01 + - 0.025 + - 0.05 + - 0.1 + - 0.25 + - 0.5 + - 1 + - 2.5 + - 5 + - 10 + componentEndpoints: + - component: kubelet + endpoint: /metrics +- name: http_requests_total + subsystem: kubelet + help: Number of the http requests received since the server started + type: Counter + stabilityLevel: ALPHA + labels: + - long_running + - method + - path + - server_type + componentEndpoints: + - component: kubelet + endpoint: /metrics - name: image_garbage_collected_total subsystem: kubelet help: Total number of images garbage collected by the kubelet, whether through disk @@ -1688,6 +3952,17 @@ componentEndpoints: - component: kubelet endpoint: /metrics +- name: metrics_provider + subsystem: kubelet + help: Metrics provider used by kubelet to collect container stats. Values can be + 'cadvisor' and 'cri' + type: Gauge + stabilityLevel: ALPHA + labels: + - provider + componentEndpoints: + - component: kubelet + endpoint: /metrics - name: mirror_pods subsystem: kubelet help: The number of mirror pods the kubelet will try to create (one per admitted @@ -2300,6 +4575,14 @@ componentEndpoints: - component: kubelet endpoint: /metrics +- name: server_expiration_renew_errors + subsystem: kubelet + help: Counter of certificate renewal errors. + type: Counter + stabilityLevel: ALPHA + componentEndpoints: + - component: kubelet + endpoint: /metrics - name: sleep_action_terminated_early_total subsystem: kubelet help: The number of times lifecycle sleep handler got terminated before it finishes @@ -2438,6 +4721,28 @@ componentEndpoints: - component: kubelet endpoint: /metrics +- name: volume_metric_collection_duration_seconds + subsystem: kubelet + help: Duration in seconds to calculate volume stats + type: Histogram + stabilityLevel: ALPHA + labels: + - metric_source + buckets: + - 0.005 + - 0.01 + - 0.025 + - 0.05 + - 0.1 + - 0.25 + - 0.5 + - 1 + - 2.5 + - 5 + - 10 + componentEndpoints: + - component: kubelet + endpoint: /metrics - name: kubelet_volume_stats_available_bytes help: Number of available bytes in the volume type: Custom @@ -2509,6 +4814,17 @@ componentEndpoints: - component: kubelet endpoint: /metrics +- name: websocket_streaming_requests_total + subsystem: kubelet + help: Total number of WebSocket streaming requests (exec/attach/portforward) received + by the kubelet. + type: Counter + stabilityLevel: ALPHA + labels: + - subresource + componentEndpoints: + - component: kubelet + endpoint: /metrics - name: working_pods subsystem: kubelet help: Number of pods the kubelet is actually running, broken down by lifecycle phase, @@ -2524,281 +4840,6 @@ componentEndpoints: - component: kubelet endpoint: /metrics -- name: node_swap_usage_bytes - help: Current swap usage of the node in bytes. Reported only on non-windows systems - type: Custom - stabilityLevel: ALPHA - componentEndpoints: - - component: kubelet - endpoint: /metrics/resource -- name: plugin_manager_total_plugins - help: Number of plugins in Plugin Manager - type: Custom - stabilityLevel: ALPHA - labels: - - socket_path - - state - componentEndpoints: - - component: kubelet - endpoint: /metrics -- name: pod_swap_usage_bytes - help: Current amount of the pod swap usage in bytes. Reported only on non-windows - systems - type: Custom - stabilityLevel: ALPHA - labels: - - pod - - namespace - componentEndpoints: - - component: kubelet - endpoint: /metrics/resource -- name: resource_manager_allocation_errors_total - help: Number of errors encountered during exclusive resource allocation. - type: Counter - stabilityLevel: ALPHA - labels: - - resource_name - - source - componentEndpoints: - - component: kubelet - endpoint: /metrics -- name: resource_manager_allocations_total - help: Number of exclusive resource allocations performed by a resource manager. - type: Counter - stabilityLevel: ALPHA - labels: - - resource_name - - source - componentEndpoints: - - component: kubelet - endpoint: /metrics -- name: resource_manager_container_assignments - help: Number of containers with a specific type of resource assignment. - type: Counter - stabilityLevel: ALPHA - labels: - - assignment_type - - resource_name - componentEndpoints: - - component: kubelet - endpoint: /metrics -- name: scrape_error - help: 1 if there was an error while getting container metrics, 0 otherwise - type: Custom - deprecatedVersion: 1.29.0 - stabilityLevel: ALPHA - componentEndpoints: - - component: kubelet - endpoint: /metrics/resource -- name: image_volume_mounted_errors_total - subsystem: kubelet - help: Number of failed image volume mounts. - type: Counter - stabilityLevel: BETA - componentEndpoints: - - component: kubelet - endpoint: /metrics -- name: image_volume_mounted_succeed_total - subsystem: kubelet - help: Number of successful image volume mounts. - type: Counter - stabilityLevel: BETA - componentEndpoints: - - component: kubelet - endpoint: /metrics -- name: image_volume_requested_total - subsystem: kubelet - help: Number of requested image volumes. - type: Counter - stabilityLevel: BETA - componentEndpoints: - - component: kubelet - endpoint: /metrics -- name: container_cpu_usage_seconds_total - help: Cumulative cpu time consumed by the container in core-seconds - type: Custom - stabilityLevel: STABLE - labels: - - container - - pod - - namespace - componentEndpoints: - - component: kubelet - endpoint: /metrics/resource -- name: container_memory_working_set_bytes - help: Current working set of the container in bytes - type: Custom - stabilityLevel: STABLE - labels: - - container - - pod - - namespace - componentEndpoints: - - component: kubelet - endpoint: /metrics/resource -- name: container_start_time_seconds - help: Start time of the container since unix epoch in seconds - type: Custom - stabilityLevel: STABLE - labels: - - container - - pod - - namespace - componentEndpoints: - - component: kubelet - endpoint: /metrics/resource -- name: node_cpu_usage_seconds_total - help: Cumulative cpu time consumed by the node in core-seconds - type: Custom - stabilityLevel: STABLE - componentEndpoints: - - component: kubelet - endpoint: /metrics/resource -- name: node_memory_working_set_bytes - help: Current working set of the node in bytes - type: Custom - stabilityLevel: STABLE - componentEndpoints: - - component: kubelet - endpoint: /metrics/resource -- name: pod_cpu_usage_seconds_total - help: Cumulative cpu time consumed by the pod in core-seconds - type: Custom - stabilityLevel: STABLE - labels: - - pod - - namespace - componentEndpoints: - - component: kubelet - endpoint: /metrics/resource -- name: pod_memory_working_set_bytes - help: Current working set of the pod in bytes - type: Custom - stabilityLevel: STABLE - labels: - - pod - - namespace - componentEndpoints: - - component: kubelet - endpoint: /metrics/resource -- name: resource_scrape_error - help: 1 if there was an error while getting container metrics, 0 otherwise - type: Custom - stabilityLevel: STABLE - componentEndpoints: - - component: kubelet - endpoint: /metrics/resource -- name: force_cleaned_failed_volume_operation_errors_total - help: The number of volumes that failed force cleanup after their reconstruction - failed during kubelet startup. - type: Counter - stabilityLevel: ALPHA - componentEndpoints: - - component: kubelet - endpoint: /metrics -- name: force_cleaned_failed_volume_operations_total - help: The number of volumes that were force cleaned after their reconstruction failed - during kubelet startup. This includes both successful and failed cleanups. - type: Counter - stabilityLevel: ALPHA - componentEndpoints: - - component: kubelet - endpoint: /metrics -- name: http_inflight_requests - subsystem: kubelet - help: Number of the inflight http requests - type: Gauge - stabilityLevel: ALPHA - labels: - - long_running - - method - - path - - server_type - componentEndpoints: - - component: kubelet - endpoint: /metrics -- name: http_requests_duration_seconds - subsystem: kubelet - help: Duration in seconds to serve http requests - type: Histogram - stabilityLevel: ALPHA - labels: - - long_running - - method - - path - - server_type - buckets: - - 0.005 - - 0.01 - - 0.025 - - 0.05 - - 0.1 - - 0.25 - - 0.5 - - 1 - - 2.5 - - 5 - - 10 - componentEndpoints: - - component: kubelet - endpoint: /metrics -- name: http_requests_total - subsystem: kubelet - help: Number of the http requests received since the server started - type: Counter - stabilityLevel: ALPHA - labels: - - long_running - - method - - path - - server_type - componentEndpoints: - - component: kubelet - endpoint: /metrics -- name: metrics_provider - subsystem: kubelet - help: Metrics provider used by kubelet to collect container stats. Values can be - 'cadvisor' and 'cri' - type: Gauge - stabilityLevel: ALPHA - labels: - - provider - componentEndpoints: - - component: kubelet - endpoint: /metrics -- name: volume_metric_collection_duration_seconds - subsystem: kubelet - help: Duration in seconds to calculate volume stats - type: Histogram - stabilityLevel: ALPHA - labels: - - metric_source - buckets: - - 0.005 - - 0.01 - - 0.025 - - 0.05 - - 0.1 - - 0.25 - - 0.5 - - 1 - - 2.5 - - 5 - - 10 - componentEndpoints: - - component: kubelet - endpoint: /metrics -- name: websocket_streaming_requests_total - subsystem: kubelet - help: Total number of WebSocket streaming requests (exec/attach/portforward) received - by the kubelet. - type: Counter - stabilityLevel: ALPHA - labels: - - subresource - componentEndpoints: - - component: kubelet - endpoint: /metrics - name: conntrack_reconciler_deleted_entries_total subsystem: kubeproxy help: Cumulative conntrack flows deleted by conntrack reconciler @@ -3163,6 +5204,314 @@ componentEndpoints: - component: kube-proxy endpoint: /metrics +- name: leader_election_master_status + help: Gauge of if the reporting system is master of the relevant lease, 0 indicates + backup, 1 indicates master. 'name' is the string used to identify the lease. Please + make sure to group by name. + type: Gauge + stabilityLevel: ALPHA + labels: + - name + componentEndpoints: + - component: cloud-controller-manager + endpoint: /metrics + - component: kube-apiserver + endpoint: /metrics + - component: kube-controller-manager + endpoint: /metrics + - component: kube-proxy + endpoint: /metrics + - component: kube-scheduler + endpoint: /metrics + - component: kubelet + endpoint: /metrics +- name: leader_election_slowpath_total + help: Total number of slow path exercised in renewing leader leases. 'name' is the + string used to identify the lease. Please make sure to group by name. + type: Counter + stabilityLevel: ALPHA + labels: + - name + componentEndpoints: + - component: cloud-controller-manager + endpoint: /metrics + - component: kube-apiserver + endpoint: /metrics + - component: kube-controller-manager + endpoint: /metrics + - component: kube-proxy + endpoint: /metrics + - component: kube-scheduler + endpoint: /metrics + - component: kubelet + endpoint: /metrics +- name: graph_actions_duration_seconds + subsystem: node_authorizer + help: Histogram of duration of graph actions in node authorizer. + type: Histogram + stabilityLevel: ALPHA + labels: + - operation + buckets: + - 0.0001 + - 0.0002 + - 0.0004 + - 0.0008 + - 0.0016 + - 0.0032 + - 0.0064 + - 0.0128 + - 0.0256 + - 0.0512 + - 0.1024 + - 0.2048 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: unhealthy_nodes_in_zone + subsystem: node_collector + help: Gauge measuring number of not Ready Nodes per zones. + type: Gauge + stabilityLevel: ALPHA + labels: + - zone + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: update_all_nodes_health_duration_seconds + subsystem: node_collector + help: Duration in seconds for NodeController to update the health of all nodes. + type: Histogram + stabilityLevel: ALPHA + buckets: + - 0.01 + - 0.04 + - 0.16 + - 0.64 + - 2.56 + - 10.24 + - 40.96 + - 163.84 + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: update_node_health_duration_seconds + subsystem: node_collector + help: Duration in seconds for NodeController to update the health of a single node. + type: Histogram + stabilityLevel: ALPHA + buckets: + - 0.001 + - 0.004 + - 0.016 + - 0.064 + - 0.256 + - 1.024 + - 4.096 + - 16.384 + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: zone_health + subsystem: node_collector + help: Gauge measuring percentage of healthy nodes per zone. + type: Gauge + stabilityLevel: ALPHA + labels: + - zone + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: zone_size + subsystem: node_collector + help: Gauge measuring number of registered Nodes per zones. + type: Gauge + stabilityLevel: ALPHA + labels: + - zone + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: cloud_provider_taint_removal_delay_seconds + subsystem: node_controller + help: Number of seconds after node creation when NodeController removed the cloud-provider + taint of a single node. + type: Histogram + stabilityLevel: ALPHA + buckets: + - 1 + - 4 + - 16 + - 64 + - 256 + - 1024 + componentEndpoints: + - component: cloud-controller-manager + endpoint: /metrics +- name: initial_node_sync_delay_seconds + subsystem: node_controller + help: Number of seconds after node creation when NodeController finished the initial + synchronization of a single node. + type: Histogram + stabilityLevel: ALPHA + buckets: + - 1 + - 4 + - 16 + - 64 + - 256 + - 1024 + componentEndpoints: + - component: cloud-controller-manager + endpoint: /metrics +- name: cidrset_allocation_tries_per_request + subsystem: node_ipam_controller + help: Number of endpoints added on each Service sync + type: Histogram + stabilityLevel: ALPHA + labels: + - clusterCIDR + buckets: + - 1 + - 5 + - 25 + - 125 + - 625 + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: cidrset_cidrs_allocations_total + subsystem: node_ipam_controller + help: Counter measuring total number of CIDR allocations. + type: Counter + stabilityLevel: ALPHA + labels: + - clusterCIDR + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: cidrset_cidrs_releases_total + subsystem: node_ipam_controller + help: Counter measuring total number of CIDR releases. + type: Counter + stabilityLevel: ALPHA + labels: + - clusterCIDR + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: cidrset_usage_cidrs + subsystem: node_ipam_controller + help: Gauge measuring percentage of allocated CIDRs. + type: Gauge + stabilityLevel: ALPHA + labels: + - clusterCIDR + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: cirdset_max_cidrs + subsystem: node_ipam_controller + help: Maximum number of CIDRs that can be allocated. + type: Gauge + stabilityLevel: ALPHA + labels: + - clusterCIDR + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: node_swap_usage_bytes + help: Current swap usage of the node in bytes. Reported only on non-windows systems + type: Custom + stabilityLevel: ALPHA + componentEndpoints: + - component: kubelet + endpoint: /metrics/resource +- name: plugin_manager_total_plugins + help: Number of plugins in Plugin Manager + type: Custom + stabilityLevel: ALPHA + labels: + - socket_path + - state + componentEndpoints: + - component: kubelet + endpoint: /metrics +- name: force_delete_pod_errors_total + subsystem: pod_gc_collector + help: Number of errors encountered when forcefully deleting the pods since the Pod + GC Controller started. + type: Counter + stabilityLevel: ALPHA + labels: + - namespace + - reason + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: force_delete_pods_total + subsystem: pod_gc_collector + help: Number of pods that are being forcefully deleted since the Pod GC Controller + started. + type: Counter + stabilityLevel: ALPHA + labels: + - namespace + - reason + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: pod_security_errors_total + help: Number of errors preventing normal evaluation. Non-fatal errors may result + in the latest restricted profile being used for evaluation. + type: Counter + stabilityLevel: ALPHA + labels: + - fatal + - request_operation + - resource + - subresource + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: pod_security_evaluations_total + help: Number of policy evaluations that occurred, not counting ignored or exempt + requests. + type: Counter + stabilityLevel: ALPHA + labels: + - decision + - mode + - policy_level + - policy_version + - request_operation + - resource + - subresource + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: pod_security_exemptions_total + help: Number of exempt requests, not counting ignored or out of scope requests. + type: Counter + stabilityLevel: ALPHA + labels: + - request_operation + - resource + - subresource + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: pod_swap_usage_bytes + help: Current amount of the pod swap usage in bytes. Reported only on non-windows + systems + type: Custom + stabilityLevel: ALPHA + labels: + - pod + - namespace + componentEndpoints: + - component: kubelet + endpoint: /metrics/resource - name: probe_duration_seconds subsystem: prober help: Duration in seconds for a probe response. @@ -3176,6 +5525,56 @@ componentEndpoints: - component: kubelet endpoint: /metrics/probes +- name: pv_collector_bound_pv_count + help: Gauge measuring number of persistent volume currently bound + type: Custom + stabilityLevel: ALPHA + labels: + - storage_class + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: pv_collector_bound_pvc_count + help: Gauge measuring number of persistent volume claim currently bound + type: Custom + stabilityLevel: ALPHA + labels: + - namespace + - storage_class + - volume_attributes_class + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: pv_collector_total_pv_count + help: Gauge measuring total number of persistent volumes + type: Custom + stabilityLevel: ALPHA + labels: + - plugin_name + - volume_mode + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: pv_collector_unbound_pv_count + help: Gauge measuring number of persistent volume currently unbound + type: Custom + stabilityLevel: ALPHA + labels: + - storage_class + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: pv_collector_unbound_pvc_count + help: Gauge measuring number of persistent volume claim currently unbound + type: Custom + stabilityLevel: ALPHA + labels: + - namespace + - storage_class + - volume_attributes_class + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics - name: reconstruct_volume_operations_errors_total help: The number of volumes that failed reconstruction from the operating system during kubelet startup. @@ -3192,401 +5591,593 @@ componentEndpoints: - component: kubelet endpoint: /metrics -- name: volume_manager_selinux_container_errors_total - help: Number of errors when kubelet cannot compute SELinux context for a container. - Kubelet can't start such a Pod then and it will retry, therefore value of this - metric may not represent the actual nr. of containers. - type: Gauge +- name: sorting_deletion_age_ratio + subsystem: replicaset_controller + help: The ratio of chosen deleted pod's ages to the current youngest pod's age (at + the time). Should be <2. The intent of this metric is to measure the rough efficacy + of the LogarithmicScaleDown feature gate's effect on the sorting (and deletion) + of pods when a replicaset scales down. This only considers Ready pods when calculating + and reporting. + type: Histogram + stabilityLevel: ALPHA + buckets: + - 0.25 + - 0.5 + - 1 + - 2 + - 4 + - 8 + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: stale_sync_skips_total + subsystem: replicaset_controller + help: Total number of ReplicaSet syncs skipped due to a stale watch cache. + type: Counter stabilityLevel: ALPHA labels: - - access_mode + - group + - resource + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: resource_manager_allocation_errors_total + help: Number of errors encountered during exclusive resource allocation. + type: Counter + stabilityLevel: ALPHA + labels: + - resource_name + - source componentEndpoints: - component: kubelet endpoint: /metrics -- name: volume_manager_selinux_container_warnings_total - help: Number of errors when kubelet cannot compute SELinux context for a container - that are ignored. They will become real errors when SELinuxMountReadWriteOncePod - feature is expanded to all volume access modes. - type: Gauge +- name: resource_manager_allocations_total + help: Number of exclusive resource allocations performed by a resource manager. + type: Counter stabilityLevel: ALPHA labels: - - access_mode + - resource_name + - source componentEndpoints: - component: kubelet endpoint: /metrics -- name: volume_manager_selinux_pod_context_mismatch_errors_total - help: Number of errors when a Pod defines different SELinux contexts for its containers - that use the same volume. Kubelet can't start such a Pod then and it will retry, - therefore value of this metric may not represent the actual nr. of Pods. - type: Gauge +- name: resource_manager_container_assignments + help: Number of containers with a specific type of resource assignment. + type: Counter stabilityLevel: ALPHA labels: - - access_mode + - assignment_type + - resource_name componentEndpoints: - component: kubelet endpoint: /metrics -- name: volume_manager_selinux_pod_context_mismatch_warnings_total - help: Number of errors when a Pod defines different SELinux contexts for its containers - that use the same volume. They are not errors yet, but they will become real errors - when SELinuxMountReadWriteOncePod feature is expanded to all volume access modes. - type: Gauge +- name: creates_total + subsystem: resourceclaim_controller + help: Number of ResourceClaims creation requests, categorized by creation status + and admin access + type: Counter stabilityLevel: ALPHA labels: - - access_mode + - admin_access + - status componentEndpoints: - - component: kubelet + - component: kube-controller-manager endpoint: /metrics -- name: volume_manager_selinux_volume_context_mismatch_errors_total - help: Number of errors when a Pod uses a volume that is already mounted with a different - SELinux context than the Pod needs. Kubelet can't start such a Pod then and it - will retry, therefore value of this metric may not represent the actual nr. of - Pods. - type: Gauge - stabilityLevel: ALPHA - labels: - - access_mode - - volume_plugin - componentEndpoints: - - component: kubelet - endpoint: /metrics -- name: volume_manager_selinux_volume_context_mismatch_warnings_total - help: Number of errors when a Pod uses a volume that is already mounted with a different - SELinux context than the Pod needs. They are not errors yet, but they will become - real errors when SELinuxMountReadWriteOncePod feature is expanded to all volume - access modes. - type: Gauge - stabilityLevel: ALPHA - labels: - - access_mode - - volume_plugin - componentEndpoints: - - component: kubelet - endpoint: /metrics -- name: volume_manager_selinux_volumes_admitted_total - help: Number of volumes whose SELinux context was fine and will be mounted with - mount -o context option. - type: Gauge - stabilityLevel: ALPHA - labels: - - access_mode - - volume_plugin - componentEndpoints: - - component: kubelet - endpoint: /metrics -- name: volume_manager_total_volumes - help: Number of volumes in Volume Manager +- name: resourceclaim_controller_resource_claims + help: Number of ResourceClaims, categorized by allocation status, admin access, + and source. Source can be 'resource_claim_template' (created from a template), + 'extended_resource' (extended resources), or empty (manually created by a user). type: Custom stabilityLevel: ALPHA labels: - - plugin_name - - state + - allocated + - admin_access + - source componentEndpoints: - - component: kubelet + - component: kube-controller-manager endpoint: /metrics -- name: probe_total - subsystem: prober - help: Cumulative number of a liveness, readiness or startup probe for a container - by result. - type: Counter - stabilityLevel: BETA - labels: - - container - - namespace - - pod - - pod_uid - - probe_type - - result - componentEndpoints: - - component: kubelet - endpoint: /metrics/probes -- name: csr_honored_duration_total - subsystem: certificates_registry - namespace: apiserver - help: Total number of issued CSRs with a requested duration that was honored, sliced - by signer (only kubernetes.io signer names are specifically identified) - type: Counter - stabilityLevel: ALPHA - labels: - - signerName - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: csr_requested_duration_total - subsystem: certificates_registry - namespace: apiserver - help: Total number of issued CSRs with a requested duration, sliced by signer (only - kubernetes.io signer names are specifically identified) - type: Counter - stabilityLevel: ALPHA - labels: - - signerName - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: ip_errors_total - subsystem: clusterip_repair - namespace: apiserver - help: 'Number of errors detected on clusterips by the repair loop broken down by - type of error: leak, repair, full, outOfRange, duplicate, unknown, invalid' - type: Counter - stabilityLevel: ALPHA - labels: - - type - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: reconcile_errors_total - subsystem: clusterip_repair - namespace: apiserver - help: Number of reconciliation failures on the clusterip repair reconcile loop - type: Counter - stabilityLevel: ALPHA - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: port_errors_total - subsystem: nodeport_repair - namespace: apiserver - help: 'Number of errors detected on ports by the repair loop broken down by type - of error: leak, repair, full, outOfRange, duplicate, unknown' - type: Counter - stabilityLevel: ALPHA - labels: - - type - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: reconcile_errors_total - subsystem: nodeport_repair - namespace: apiserver - help: Number of reconciliation failures on the nodeport repair reconcile loop - type: Counter - stabilityLevel: ALPHA - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: allocated_ips - subsystem: clusterip_allocator - namespace: kube_apiserver - help: Gauge measuring the number of allocated IPs for Services - type: Gauge - stabilityLevel: ALPHA - labels: - - cidr - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: allocation_duration_seconds - subsystem: clusterip_allocator - namespace: kube_apiserver - help: Duration in seconds to allocate a Cluster IP by ServiceCIDR +- name: request_processing_duration_seconds + subsystem: resourcepoolstatusrequest_controller + help: Time taken to process a ResourcePoolStatusRequest type: Histogram stabilityLevel: ALPHA labels: - - cidr - buckets: - - 0.005 - - 0.01 - - 0.025 - - 0.05 - - 0.1 - - 0.25 - - 0.5 - - 1 - - 2.5 - - 5 - - 10 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: allocation_errors_total - subsystem: clusterip_allocator - namespace: kube_apiserver - help: Number of errors trying to allocate Cluster IPs - type: Counter - stabilityLevel: ALPHA - labels: - - cidr - - scope - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: allocation_total - subsystem: clusterip_allocator - namespace: kube_apiserver - help: Number of Cluster IPs allocations - type: Counter - stabilityLevel: ALPHA - labels: - - cidr - - scope - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: available_ips - subsystem: clusterip_allocator - namespace: kube_apiserver - help: Gauge measuring the number of available IPs for Services - type: Gauge - stabilityLevel: ALPHA - labels: - - cidr - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: allocated_ports - subsystem: nodeport_allocator - namespace: kube_apiserver - help: Gauge measuring the number of allocated NodePorts for Services - type: Gauge - stabilityLevel: ALPHA - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: allocation_errors_total - subsystem: nodeport_allocator - namespace: kube_apiserver - help: Number of errors trying to allocate NodePort - type: Counter - stabilityLevel: ALPHA - labels: - - scope - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: allocation_total - subsystem: nodeport_allocator - namespace: kube_apiserver - help: Number of NodePort allocations - type: Counter - stabilityLevel: ALPHA - labels: - - scope - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: available_ports - subsystem: nodeport_allocator - namespace: kube_apiserver - help: Gauge measuring the number of available NodePorts for Services - type: Gauge - stabilityLevel: ALPHA - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: backend_tls_failure_total - subsystem: pod_logs - namespace: kube_apiserver - help: Total number of requests for pods/logs that failed due to kubelet server TLS - verification - type: Counter - stabilityLevel: ALPHA - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: insecure_backend_total - subsystem: pod_logs - namespace: kube_apiserver - help: 'Total number of requests for pods/logs sliced by usage type: enforce_tls, - skip_tls_allowed, skip_tls_denied' - type: Counter - stabilityLevel: ALPHA - labels: - - usage - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: pods_logs_backend_tls_failure_total - subsystem: pod_logs - namespace: kube_apiserver - help: Total number of requests for pods/logs that failed due to kubelet server TLS - verification - type: Counter - deprecatedVersion: 1.27.0 - stabilityLevel: ALPHA - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: pods_logs_insecure_backend_total - subsystem: pod_logs - namespace: kube_apiserver - help: 'Total number of requests for pods/logs sliced by usage type: enforce_tls, - skip_tls_allowed, skip_tls_denied' - type: Counter - deprecatedVersion: 1.27.0 - stabilityLevel: ALPHA - labels: - - usage - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: fetch_keys_data_timestamp - subsystem: externaljwt - namespace: apiserver - help: Unix Timestamp in seconds of the last successful FetchKeys data_timestamp - value returned by the external signer - type: Gauge - stabilityLevel: ALPHA - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: fetch_keys_request_total - subsystem: externaljwt - namespace: apiserver - help: Total attempts at syncing supported JWKs - type: Counter - stabilityLevel: ALPHA - labels: - - code - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: fetch_keys_success_timestamp - subsystem: externaljwt - namespace: apiserver - help: Unix Timestamp in seconds of the last successful FetchKeys request - type: Gauge - stabilityLevel: ALPHA - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: request_duration_seconds - subsystem: externaljwt - namespace: apiserver - help: Request duration and time for calls to external-jwt-signer - type: Histogram - stabilityLevel: ALPHA - labels: - - code - - method + - driver_name buckets: - 0.001 + - 0.002 + - 0.004 + - 0.008 + - 0.016 + - 0.032 + - 0.064 + - 0.128 + - 0.256 + - 0.512 + - 1.024 + - 2.048 + - 4.096 + - 8.192 + - 16.384 + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: request_processing_errors_total + subsystem: resourcepoolstatusrequest_controller + help: Total number of errors encountered while processing ResourcePoolStatusRequests + type: Counter + stabilityLevel: ALPHA + labels: + - driver_name + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: requests_processed_total + subsystem: resourcepoolstatusrequest_controller + help: Total number of ResourcePoolStatusRequests processed + type: Counter + stabilityLevel: ALPHA + labels: + - driver_name + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: rest_client_dns_resolution_duration_seconds + help: DNS resolver latency in seconds. Broken down by host. + type: Histogram + stabilityLevel: ALPHA + labels: + - host + buckets: - 0.005 - - 0.01 - 0.025 - - 0.05 - 0.1 - 0.25 - 0.5 - 1 - - 2.5 - - 5 - - 10 + - 2 + - 4 + - 8 + - 15 + - 30 + componentEndpoints: + - component: cloud-controller-manager + endpoint: /metrics + - component: kube-apiserver + endpoint: /metrics + - component: kube-controller-manager + endpoint: /metrics + - component: kube-proxy + endpoint: /metrics + - component: kube-scheduler + endpoint: /metrics + - component: kubelet + endpoint: /metrics +- name: rest_client_exec_plugin_call_total + help: Number of calls to an exec plugin, partitioned by the type of event encountered + (no_error, plugin_execution_error, plugin_not_found_error, client_internal_error) + and an optional exit code. The exit code will be set to 0 if and only if the plugin + call was successful. + type: Counter + stabilityLevel: ALPHA + labels: + - call_status + - code + componentEndpoints: + - component: cloud-controller-manager + endpoint: /metrics + - component: kube-apiserver + endpoint: /metrics + - component: kube-controller-manager + endpoint: /metrics + - component: kube-proxy + endpoint: /metrics + - component: kube-scheduler + endpoint: /metrics + - component: kubelet + endpoint: /metrics +- name: rest_client_exec_plugin_certificate_rotation_age + help: Histogram of the number of seconds the last auth exec plugin client certificate + lived before being rotated. If auth exec plugin client certificates are unused, + histogram will contain no data. + type: Histogram + stabilityLevel: ALPHA + buckets: + - 600 + - 1800 + - 3600 + - 14400 + - 86400 + - 604800 + - 2.592e+06 + - 7.776e+06 + - 1.5552e+07 + - 3.1104e+07 + - 1.24416e+08 + componentEndpoints: + - component: cloud-controller-manager + endpoint: /metrics + - component: kube-apiserver + endpoint: /metrics + - component: kube-controller-manager + endpoint: /metrics + - component: kube-proxy + endpoint: /metrics + - component: kube-scheduler + endpoint: /metrics + - component: kubelet + endpoint: /metrics +- name: rest_client_exec_plugin_policy_call_total + help: Number of comparisons of an exec plugin to the plugin policy and allowlist + (if any), partitioned by whether or not the policy permits the plugin + type: Counter + stabilityLevel: ALPHA + labels: + - allowed + - denied + componentEndpoints: + - component: cloud-controller-manager + endpoint: /metrics + - component: kube-apiserver + endpoint: /metrics + - component: kube-controller-manager + endpoint: /metrics + - component: kube-proxy + endpoint: /metrics + - component: kube-scheduler + endpoint: /metrics + - component: kubelet + endpoint: /metrics +- name: rest_client_exec_plugin_ttl_seconds + help: Gauge of the shortest TTL (time-to-live) of the client certificate(s) managed + by the auth exec plugin. The value is in seconds until certificate expiry (negative + if already expired). If auth exec plugins are unused or manage no TLS certificates, + the value will be +INF. + type: Gauge + stabilityLevel: ALPHA + componentEndpoints: + - component: cloud-controller-manager + endpoint: /metrics + - component: kube-apiserver + endpoint: /metrics + - component: kube-controller-manager + endpoint: /metrics + - component: kube-proxy + endpoint: /metrics + - component: kube-scheduler + endpoint: /metrics + - component: kubelet + endpoint: /metrics +- name: rest_client_rate_limiter_duration_seconds + help: Client side rate limiter latency in seconds. Broken down by verb, and host. + type: Histogram + stabilityLevel: ALPHA + labels: + - host + - verb + buckets: + - 0.005 + - 0.025 + - 0.1 + - 0.25 + - 0.5 + - 1 + - 2 + - 4 + - 8 + - 15 - 30 - 60 componentEndpoints: + - component: cloud-controller-manager + endpoint: /metrics - component: kube-apiserver endpoint: /metrics -- name: sign_request_total - subsystem: externaljwt - namespace: apiserver - help: Total attempts at signing JWT + - component: kube-controller-manager + endpoint: /metrics + - component: kube-proxy + endpoint: /metrics + - component: kube-scheduler + endpoint: /metrics + - component: kubelet + endpoint: /metrics +- name: rest_client_request_duration_seconds + help: Request latency in seconds. Broken down by verb, and host. + type: Histogram + stabilityLevel: ALPHA + labels: + - host + - verb + buckets: + - 0.005 + - 0.025 + - 0.1 + - 0.25 + - 0.5 + - 1 + - 2 + - 4 + - 8 + - 15 + - 30 + - 60 + componentEndpoints: + - component: cloud-controller-manager + endpoint: /metrics + - component: kube-apiserver + endpoint: /metrics + - component: kube-controller-manager + endpoint: /metrics + - component: kube-proxy + endpoint: /metrics + - component: kube-scheduler + endpoint: /metrics + - component: kubelet + endpoint: /metrics +- name: rest_client_request_retries_total + help: Number of request retries, partitioned by status code, verb, and host. + type: Counter + stabilityLevel: ALPHA + labels: + - code + - host + - verb + componentEndpoints: + - component: cloud-controller-manager + endpoint: /metrics + - component: kube-apiserver + endpoint: /metrics + - component: kube-controller-manager + endpoint: /metrics + - component: kube-proxy + endpoint: /metrics + - component: kube-scheduler + endpoint: /metrics + - component: kubelet + endpoint: /metrics +- name: rest_client_request_size_bytes + help: Request size in bytes. Broken down by verb and host. + type: Histogram + stabilityLevel: ALPHA + labels: + - host + - verb + buckets: + - 64 + - 256 + - 512 + - 1024 + - 4096 + - 16384 + - 65536 + - 262144 + - 1.048576e+06 + - 4.194304e+06 + - 1.6777216e+07 + componentEndpoints: + - component: cloud-controller-manager + endpoint: /metrics + - component: kube-apiserver + endpoint: /metrics + - component: kube-controller-manager + endpoint: /metrics + - component: kube-proxy + endpoint: /metrics + - component: kube-scheduler + endpoint: /metrics + - component: kubelet + endpoint: /metrics +- name: rest_client_requests_total + help: Number of HTTP requests, partitioned by status code, method, and host. + type: Counter + stabilityLevel: ALPHA + labels: + - code + - host + - method + componentEndpoints: + - component: cloud-controller-manager + endpoint: /metrics + - component: kube-apiserver + endpoint: /metrics + - component: kube-controller-manager + endpoint: /metrics + - component: kube-proxy + endpoint: /metrics + - component: kube-scheduler + endpoint: /metrics + - component: kubelet + endpoint: /metrics +- name: rest_client_response_size_bytes + help: Response size in bytes. Broken down by verb and host. + type: Histogram + stabilityLevel: ALPHA + labels: + - host + - verb + buckets: + - 64 + - 256 + - 512 + - 1024 + - 4096 + - 16384 + - 65536 + - 262144 + - 1.048576e+06 + - 4.194304e+06 + - 1.6777216e+07 + componentEndpoints: + - component: cloud-controller-manager + endpoint: /metrics + - component: kube-apiserver + endpoint: /metrics + - component: kube-controller-manager + endpoint: /metrics + - component: kube-proxy + endpoint: /metrics + - component: kube-scheduler + endpoint: /metrics + - component: kubelet + endpoint: /metrics +- name: rest_client_transport_ca_reload_total + help: Number of times a CA reload is attempted, partitioned by the result and reason + for the reload attempt + type: Counter + stabilityLevel: ALPHA + labels: + - reason + - result + componentEndpoints: + - component: cloud-controller-manager + endpoint: /metrics + - component: kube-apiserver + endpoint: /metrics + - component: kube-controller-manager + endpoint: /metrics + - component: kube-proxy + endpoint: /metrics + - component: kube-scheduler + endpoint: /metrics + - component: kubelet + endpoint: /metrics +- name: rest_client_transport_cache_entries + help: Number of transport entries in the internal cache. + type: Gauge + stabilityLevel: ALPHA + componentEndpoints: + - component: cloud-controller-manager + endpoint: /metrics + - component: kube-apiserver + endpoint: /metrics + - component: kube-controller-manager + endpoint: /metrics + - component: kube-proxy + endpoint: /metrics + - component: kube-scheduler + endpoint: /metrics + - component: kubelet + endpoint: /metrics +- name: rest_client_transport_cache_gc_calls_total + help: 'Number of times a GC cleanup attempts to delete a transport cache entry, + partitioned by the result: deleted, skipped' + type: Counter + stabilityLevel: ALPHA + labels: + - result + componentEndpoints: + - component: cloud-controller-manager + endpoint: /metrics + - component: kube-apiserver + endpoint: /metrics + - component: kube-controller-manager + endpoint: /metrics + - component: kube-proxy + endpoint: /metrics + - component: kube-scheduler + endpoint: /metrics + - component: kubelet + endpoint: /metrics +- name: rest_client_transport_cert_rotation_gc_calls_total + help: Number of times a cert rotation goroutine cancel func is called via GC cleanup + of the associated transport + type: Counter + stabilityLevel: ALPHA + componentEndpoints: + - component: cloud-controller-manager + endpoint: /metrics + - component: kube-apiserver + endpoint: /metrics + - component: kube-controller-manager + endpoint: /metrics + - component: kube-proxy + endpoint: /metrics + - component: kube-scheduler + endpoint: /metrics + - component: kubelet + endpoint: /metrics +- name: rest_client_transport_create_calls_total + help: 'Number of calls to get a new transport, partitioned by the result of the + operation hit: obtained from the cache, miss: created and added to the cache, + miss-gc: recreated and added back to the cache after being garbage collected, + uncacheable: created and not cached' + type: Counter + stabilityLevel: ALPHA + labels: + - result + componentEndpoints: + - component: cloud-controller-manager + endpoint: /metrics + - component: kube-apiserver + endpoint: /metrics + - component: kube-controller-manager + endpoint: /metrics + - component: kube-proxy + endpoint: /metrics + - component: kube-scheduler + endpoint: /metrics + - component: kubelet + endpoint: /metrics +- name: retroactive_storageclass_errors_total + help: Total number of failed retroactive StorageClass assignments to persistent + volume claim + type: Counter + stabilityLevel: ALPHA + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: retroactive_storageclass_total + help: Total number of retroactive StorageClass assignments to persistent volume + claim + type: Counter + stabilityLevel: ALPHA + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: sync_duration_seconds + subsystem: root_ca_cert_publisher + help: Number of namespace syncs happened in root ca cert publisher. + type: Histogram + stabilityLevel: ALPHA + labels: + - code + buckets: + - 0.001 + - 0.002 + - 0.004 + - 0.008 + - 0.016 + - 0.032 + - 0.064 + - 0.128 + - 0.256 + - 0.512 + - 1.024 + - 2.048 + - 4.096 + - 8.192 + - 16.384 + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: sync_total + subsystem: root_ca_cert_publisher + help: Number of namespace syncs happened in root ca cert publisher. type: Counter stabilityLevel: ALPHA labels: - code componentEndpoints: - - component: kube-apiserver + - component: kube-controller-manager + endpoint: /metrics +- name: route_sync_total + subsystem: route_controller + help: A metric counting the amount of times routes have been synced with the cloud + provider. + type: Counter + stabilityLevel: ALPHA + componentEndpoints: + - component: cloud-controller-manager endpoint: /metrics - name: async_api_call_execution_duration_seconds subsystem: scheduler @@ -4024,6 +6615,96 @@ componentEndpoints: - component: kube-scheduler endpoint: /metrics +- name: scrape_error + help: 1 if there was an error while getting container metrics, 0 otherwise + type: Custom + deprecatedVersion: 1.29.0 + stabilityLevel: ALPHA + componentEndpoints: + - component: kubelet + endpoint: /metrics/resource +- name: selinux_warning_controller_selinux_volume_conflict + help: Conflict between two Pods using the same volume + type: Custom + stabilityLevel: ALPHA + labels: + - property + - pod1_namespace + - pod1_name + - pod1_value + - pod2_namespace + - pod2_name + - pod2_value + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: loadbalancer_sync_total + subsystem: service_controller + help: A metric counting the amount of times any load balancer has been configured, + as an effect of service/node changes on the cluster + type: Counter + stabilityLevel: ALPHA + componentEndpoints: + - component: cloud-controller-manager + endpoint: /metrics +- name: nodesync_error_total + subsystem: service_controller + help: A metric counting the amount of times any load balancer has been configured + and errored, as an effect of node changes on the cluster + type: Counter + stabilityLevel: ALPHA + componentEndpoints: + - component: cloud-controller-manager + endpoint: /metrics +- name: nodesync_latency_seconds + subsystem: service_controller + help: A metric measuring the latency for nodesync which updates loadbalancer hosts + on cluster node updates. + type: Histogram + stabilityLevel: ALPHA + buckets: + - 1 + - 2 + - 4 + - 8 + - 16 + - 32 + - 64 + - 128 + - 256 + - 512 + - 1024 + - 2048 + - 4096 + - 8192 + - 16384 + componentEndpoints: + - component: cloud-controller-manager + endpoint: /metrics +- name: update_loadbalancer_host_latency_seconds + subsystem: service_controller + help: A metric measuring the latency for updating each load balancer hosts. + type: Histogram + stabilityLevel: ALPHA + buckets: + - 1 + - 2 + - 4 + - 8 + - 16 + - 32 + - 64 + - 128 + - 256 + - 512 + - 1024 + - 2048 + - 4096 + - 8192 + - 16384 + componentEndpoints: + - component: cloud-controller-manager + endpoint: /metrics - name: invalid_legacy_auto_token_uses_total subsystem: serviceaccount help: Cumulative invalid auto-generated legacy tokens used @@ -4072,311 +6753,50 @@ componentEndpoints: - component: kube-apiserver endpoint: /metrics -- name: goroutines - subsystem: scheduler - help: Number of running goroutines split by the work they do such as binding. - type: Gauge - stabilityLevel: BETA - labels: - - operation - componentEndpoints: - - component: kube-scheduler - endpoint: /metrics -- name: permit_wait_duration_seconds - subsystem: scheduler - help: Duration of waiting on permit. - type: Histogram - stabilityLevel: BETA - labels: - - result - buckets: - - 0.001 - - 0.002 - - 0.004 - - 0.008 - - 0.016 - - 0.032 - - 0.064 - - 0.128 - - 0.256 - - 0.512 - - 1.024 - - 2.048 - - 4.096 - - 8.192 - - 16.384 - componentEndpoints: - - component: kube-scheduler - endpoint: /metrics -- name: plugin_evaluation_total - subsystem: scheduler - help: Number of attempts to schedule pods by each plugin and the extension point - (available only in PreFilter, Filter, PreScore, and Score). +- name: stale_sync_skips_total + subsystem: statefulset_controller + help: Total number of StatefulSet syncs skipped due to a stale watch cache. type: Counter - stabilityLevel: BETA - labels: - - extension_point - - plugin - - profile - componentEndpoints: - - component: kube-scheduler - endpoint: /metrics -- name: pod_scheduling_sli_duration_seconds - subsystem: scheduler - help: E2e latency for a pod being scheduled, from the time the pod enters the scheduling - queue and might involve multiple scheduling attempts. - type: Histogram - stabilityLevel: BETA - labels: - - attempts - buckets: - - 0.01 - - 0.02 - - 0.04 - - 0.08 - - 0.16 - - 0.32 - - 0.64 - - 1.28 - - 2.56 - - 5.12 - - 10.24 - - 20.48 - - 40.96 - - 81.92 - - 163.84 - - 327.68 - - 655.36 - - 1310.72 - - 2621.44 - - 5242.88 - componentEndpoints: - - component: kube-scheduler - endpoint: /metrics -- name: unschedulable_pods - subsystem: scheduler - help: The number of unschedulable pods broken down by plugin name. A pod will increment - the gauge for all plugins that caused it to not schedule and so this metric have - meaning only when broken down by plugin. - type: Gauge - stabilityLevel: BETA - labels: - - plugin - - profile - componentEndpoints: - - component: kube-scheduler - endpoint: /metrics -- name: kube_pod_resource_limit - help: Resources limit for workloads on the cluster, broken down by pod. This shows - the resource usage the scheduler and kubelet expect per pod for resources along - with the unit for the resource if any. - type: Custom - stabilityLevel: STABLE - labels: - - namespace - - pod - - node - - scheduler - - priority - - resource - - unit - componentEndpoints: - - component: kube-scheduler - endpoint: /metrics -- name: kube_pod_resource_request - help: Resources requested by workloads on the cluster, broken down by pod. This - shows the resource usage the scheduler and kubelet expect per pod for resources - along with the unit for the resource if any. - type: Custom - stabilityLevel: STABLE - labels: - - namespace - - pod - - node - - scheduler - - priority - - resource - - unit - componentEndpoints: - - component: kube-scheduler - endpoint: /metrics -- name: framework_extension_point_duration_seconds - subsystem: scheduler - help: Latency for running all plugins of a specific extension point. - type: Histogram - stabilityLevel: STABLE - labels: - - extension_point - - profile - - status - buckets: - - 0.0001 - - 0.0002 - - 0.0004 - - 0.0008 - - 0.0016 - - 0.0032 - - 0.0064 - - 0.0128 - - 0.0256 - - 0.0512 - - 0.1024 - - 0.2048 - componentEndpoints: - - component: kube-scheduler - endpoint: /metrics -- name: pending_pods - subsystem: scheduler - help: Number of pending pods, by the queue type. 'active' means number of pods in - activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number - of pods in unschedulablePods that the scheduler attempted to schedule and failed; - 'gated' is the number of unschedulable pods that the scheduler never attempted - to schedule because they are gated. - type: Gauge - stabilityLevel: STABLE - labels: - - queue - componentEndpoints: - - component: kube-scheduler - endpoint: /metrics -- name: pod_scheduling_attempts - subsystem: scheduler - help: Number of attempts to successfully schedule a pod. - type: Histogram - stabilityLevel: STABLE - buckets: - - 1 - - 2 - - 4 - - 8 - - 16 - componentEndpoints: - - component: kube-scheduler - endpoint: /metrics -- name: preemption_attempts_total - subsystem: scheduler - help: Total preemption attempts in the cluster till now - type: Counter - stabilityLevel: STABLE - componentEndpoints: - - component: kube-scheduler - endpoint: /metrics -- name: preemption_victims - subsystem: scheduler - help: Number of selected preemption victims - type: Histogram - stabilityLevel: STABLE - buckets: - - 1 - - 2 - - 4 - - 8 - - 16 - - 32 - - 64 - componentEndpoints: - - component: kube-scheduler - endpoint: /metrics -- name: queue_incoming_pods_total - subsystem: scheduler - help: Number of pods added to scheduling queues by event and queue type. - type: Counter - stabilityLevel: STABLE - labels: - - event - - queue - componentEndpoints: - - component: kube-scheduler - endpoint: /metrics -- name: schedule_attempts_total - subsystem: scheduler - help: Number of attempts to schedule pods, by the result. 'unschedulable' means - a pod could not be scheduled, while 'error' means an internal scheduler problem. - type: Counter - stabilityLevel: STABLE - labels: - - profile - - result - componentEndpoints: - - component: kube-scheduler - endpoint: /metrics -- name: scheduling_attempt_duration_seconds - subsystem: scheduler - help: Scheduling attempt latency in seconds (scheduling algorithm + binding) - type: Histogram - stabilityLevel: STABLE - labels: - - profile - - result - buckets: - - 0.001 - - 0.002 - - 0.004 - - 0.008 - - 0.016 - - 0.032 - - 0.064 - - 0.128 - - 0.256 - - 0.512 - - 1.024 - - 2.048 - - 4.096 - - 8.192 - - 16.384 - componentEndpoints: - - component: kube-scheduler - endpoint: /metrics -- name: operations_seconds - subsystem: csi - help: Container Storage Interface operation duration with gRPC error code status - total - type: Histogram stabilityLevel: ALPHA labels: - - driver_name - - grpc_status_code - - method_name - - migrated - buckets: - - 0.1 - - 0.25 - - 0.5 - - 1 - - 2.5 - - 5 - - 10 - - 15 - - 25 - - 50 - - 120 - - 300 - - 600 + - group + - resource componentEndpoints: - - component: kubelet + - component: kube-controller-manager endpoint: /metrics -- name: graph_actions_duration_seconds - subsystem: node_authorizer - help: Histogram of duration of graph actions in node authorizer. - type: Histogram +- name: statefulset_max_unavailable + subsystem: statefulset_controller + help: Maximum number of unavailable pods allowed during StatefulSet rolling updates + type: Gauge stabilityLevel: ALPHA labels: - - operation - buckets: - - 0.0001 - - 0.0002 - - 0.0004 - - 0.0008 - - 0.0016 - - 0.0032 - - 0.0064 - - 0.0128 - - 0.0256 - - 0.0512 - - 0.1024 - - 0.2048 + - pod_management_policy + - statefulset_name + - statefulset_namespace componentEndpoints: - - component: kube-apiserver + - component: kube-controller-manager + endpoint: /metrics +- name: statefulset_unavailable_replicas + subsystem: statefulset_controller + help: Current number of unavailable pods in StatefulSet + type: Gauge + stabilityLevel: ALPHA + labels: + - pod_management_policy + - statefulset_name + - statefulset_namespace + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: storage_count_attachable_volumes_in_use + help: Measure number of volumes in use + type: Custom + stabilityLevel: ALPHA + labels: + - node + - volume_plugin + componentEndpoints: + - component: kube-controller-manager endpoint: /metrics - name: storage_operation_duration_seconds help: Storage operation duration @@ -4404,6 +6824,194 @@ componentEndpoints: - component: kubelet endpoint: /metrics +- name: pod_deletion_duration_seconds + subsystem: taint_eviction_controller + help: Latency, in seconds, between the time when a taint effect has been activated + for the Pod and its deletion via TaintEvictionController. + type: Histogram + stabilityLevel: ALPHA + buckets: + - 0.005 + - 0.025 + - 0.1 + - 0.5 + - 1 + - 2.5 + - 10 + - 30 + - 60 + - 120 + - 180 + - 240 + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: pod_deletions_total + subsystem: taint_eviction_controller + help: Total number of Pods deleted by TaintEvictionController since its start. + type: Counter + stabilityLevel: ALPHA + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: job_deletion_duration_seconds + subsystem: ttl_after_finished_controller + help: The time it took to delete the job since it became eligible for deletion + type: Histogram + stabilityLevel: ALPHA + buckets: + - 0.1 + - 0.2 + - 0.4 + - 0.8 + - 1.6 + - 3.2 + - 6.4 + - 12.8 + - 25.6 + - 51.2 + - 102.4 + - 204.8 + - 409.6 + - 819.2 + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: version_info + help: Provides the compatibility version info of the component. The component label + is the name of the component, usually kube, but is relevant for aggregated-apiservers. + type: Gauge + stabilityLevel: ALPHA + labels: + - binary + - component + - emulation + - min_compat + componentEndpoints: + - component: cloud-controller-manager + endpoint: /metrics + - component: kube-apiserver + endpoint: /metrics + - component: kube-controller-manager + endpoint: /metrics + - component: kube-proxy + endpoint: /metrics + - component: kube-scheduler + endpoint: /metrics + - component: kubelet + endpoint: /metrics +- name: volume_manager_selinux_container_errors_total + help: Number of errors when kubelet cannot compute SELinux context for a container. + Kubelet can't start such a Pod then and it will retry, therefore value of this + metric may not represent the actual nr. of containers. + type: Gauge + stabilityLevel: ALPHA + labels: + - access_mode + componentEndpoints: + - component: kubelet + endpoint: /metrics +- name: volume_manager_selinux_container_warnings_total + help: Number of errors when kubelet cannot compute SELinux context for a container + that are ignored. They will become real errors when SELinuxMountReadWriteOncePod + feature is expanded to all volume access modes. + type: Gauge + stabilityLevel: ALPHA + labels: + - access_mode + componentEndpoints: + - component: kubelet + endpoint: /metrics +- name: volume_manager_selinux_pod_context_mismatch_errors_total + help: Number of errors when a Pod defines different SELinux contexts for its containers + that use the same volume. Kubelet can't start such a Pod then and it will retry, + therefore value of this metric may not represent the actual nr. of Pods. + type: Gauge + stabilityLevel: ALPHA + labels: + - access_mode + componentEndpoints: + - component: kubelet + endpoint: /metrics +- name: volume_manager_selinux_pod_context_mismatch_warnings_total + help: Number of errors when a Pod defines different SELinux contexts for its containers + that use the same volume. They are not errors yet, but they will become real errors + when SELinuxMountReadWriteOncePod feature is expanded to all volume access modes. + type: Gauge + stabilityLevel: ALPHA + labels: + - access_mode + componentEndpoints: + - component: kubelet + endpoint: /metrics +- name: volume_manager_selinux_volume_context_mismatch_errors_total + help: Number of errors when a Pod uses a volume that is already mounted with a different + SELinux context than the Pod needs. Kubelet can't start such a Pod then and it + will retry, therefore value of this metric may not represent the actual nr. of + Pods. + type: Gauge + stabilityLevel: ALPHA + labels: + - access_mode + - volume_plugin + componentEndpoints: + - component: kubelet + endpoint: /metrics +- name: volume_manager_selinux_volume_context_mismatch_warnings_total + help: Number of errors when a Pod uses a volume that is already mounted with a different + SELinux context than the Pod needs. They are not errors yet, but they will become + real errors when SELinuxMountReadWriteOncePod feature is expanded to all volume + access modes. + type: Gauge + stabilityLevel: ALPHA + labels: + - access_mode + - volume_plugin + componentEndpoints: + - component: kubelet + endpoint: /metrics +- name: volume_manager_selinux_volumes_admitted_total + help: Number of volumes whose SELinux context was fine and will be mounted with + mount -o context option. + type: Gauge + stabilityLevel: ALPHA + labels: + - access_mode + - volume_plugin + componentEndpoints: + - component: kubelet + endpoint: /metrics +- name: volume_manager_total_volumes + help: Number of volumes in Volume Manager + type: Custom + stabilityLevel: ALPHA + labels: + - plugin_name + - state + componentEndpoints: + - component: kubelet + endpoint: /metrics +- name: volume_operation_errors_total + help: Total volume operation errors + type: Counter + stabilityLevel: ALPHA + labels: + - operation_name + - plugin_name + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: volume_operation_total_errors + help: Total volume operation errors + type: Counter + deprecatedVersion: 1.36.0 + stabilityLevel: ALPHA + labels: + - operation_name + - plugin_name + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics - name: volume_operation_total_seconds help: Storage operation end to end duration in seconds type: Histogram @@ -4428,2439 +7036,6 @@ componentEndpoints: - component: kubelet endpoint: /metrics -- name: ratcheting_seconds - subsystem: validation - namespace: apiextensions_apiserver - help: Time for comparison of old to new for the purposes of CRDValidationRatcheting - during an UPDATE in seconds. - type: Histogram - stabilityLevel: ALPHA - buckets: - - 1e-05 - - 4e-05 - - 0.00016 - - 0.00064 - - 0.00256 - - 0.01024 - - 0.04096 - - 0.16384 - - 0.65536 - - 2.62144 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: apiextensions_openapi_v2_regeneration_count - help: Counter of OpenAPI v2 spec regeneration count broken down by causing CRD name - and reason. - type: Counter - stabilityLevel: ALPHA - labels: - - crd - - reason - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: apiextensions_openapi_v3_regeneration_count - help: Counter of OpenAPI v3 spec regeneration count broken down by group, version, - causing CRD and reason. - type: Counter - stabilityLevel: ALPHA - labels: - - crd - - group - - reason - - version - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: conversion_webhook_duration_seconds - namespace: apiserver - help: Conversion webhook request latency - type: Histogram - stabilityLevel: ALPHA - labels: - - failure_type - - result - buckets: - - 0.005 - - 0.01 - - 0.02 - - 0.05 - - 0.1 - - 0.2 - - 0.5 - - 1 - - 2 - - 5 - - 10 - - 20 - - 30 - - 45 - - 60 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: conversion_webhook_request_total - namespace: apiserver - help: Counter for conversion webhook requests with success/failure and failure error - type - type: Counter - stabilityLevel: ALPHA - labels: - - failure_type - - result - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: apiserver_crd_conversion_webhook_duration_seconds - help: CRD webhook conversion duration in seconds - type: Histogram - stabilityLevel: ALPHA - labels: - - crd_name - - from_version - - succeeded - - to_version - buckets: - - 0.001 - - 0.002 - - 0.004 - - 0.008 - - 0.016 - - 0.032 - - 0.064 - - 0.128 - - 0.256 - - 0.512 - - 1.024 - - 2.048 - - 4.096 - - 8.192 - - 16.384 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: match_condition_evaluation_errors_total - subsystem: admission - namespace: apiserver - help: Admission match condition evaluation errors count, identified by name of resource - containing the match condition and broken out for each kind containing matchConditions - (webhook or policy), operation and admission type (validate or admit). - type: Counter - stabilityLevel: ALPHA - labels: - - kind - - name - - operation - - type - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: match_condition_evaluation_seconds - subsystem: admission - namespace: apiserver - help: Admission match condition evaluation time in seconds, identified by name and - broken out for each kind containing matchConditions (webhook or policy), operation - and type (validate or admit). - type: Histogram - stabilityLevel: ALPHA - labels: - - kind - - name - - operation - - type - buckets: - - 0.001 - - 0.005 - - 0.01 - - 0.025 - - 0.1 - - 0.2 - - 0.25 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: match_condition_exclusions_total - subsystem: admission - namespace: apiserver - help: Admission match condition evaluation exclusions count, identified by name - of resource containing the match condition and broken out for each kind containing - matchConditions (webhook or policy), operation and admission type (validate or - admit). - type: Counter - stabilityLevel: ALPHA - labels: - - kind - - name - - operation - - type - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: step_admission_duration_seconds_summary - subsystem: admission - namespace: apiserver - help: Admission sub-step latency summary in seconds, broken out for each operation - and API resource and step type (validate or admit). - type: Summary - stabilityLevel: ALPHA - labels: - - operation - - rejected - - type - maxAge: 18000000000000 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: webhook_fail_open_count - subsystem: admission - namespace: apiserver - help: Admission webhook fail open count, identified by name and broken out for each - admission type (validating or admit). - type: Counter - stabilityLevel: ALPHA - labels: - - name - - type - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: webhook_rejection_count - subsystem: admission - namespace: apiserver - help: Admission webhook rejection count, identified by name and broken out for each - admission type (validating or admit) and operation. Additional labels specify - an error type (calling_webhook_error or apiserver_internal_error if an error occurred; - no_error otherwise) and optionally a non-zero rejection code if the webhook rejects - the request with an HTTP status code (honored by the apiserver when the code is - greater or equal to 400). Codes greater than 600 are truncated to 600, to keep - the metrics cardinality bounded. - type: Counter - stabilityLevel: ALPHA - labels: - - error_type - - name - - operation - - rejection_code - - type - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: webhook_request_total - subsystem: admission - namespace: apiserver - help: Admission webhook request total, identified by name and broken out for each - admission type (validating or admit) and operation. Additional labels specify - whether the request was rejected or not and an HTTP status code. Codes greater - than 600 are truncated to 600, to keep the metrics cardinality bounded. - type: Counter - stabilityLevel: ALPHA - labels: - - code - - name - - operation - - rejected - - type - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: automatic_reload_last_timestamp_seconds - subsystem: manifest_admission_config_controller - namespace: apiserver - help: Timestamp of the last automatic reload of admission manifest configuration - split by status, plugin, and apiserver identity. - type: Gauge - stabilityLevel: ALPHA - labels: - - apiserver_id_hash - - plugin - - status - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: automatic_reloads_total - subsystem: manifest_admission_config_controller - namespace: apiserver - help: Total number of automatic reloads of admission manifest configuration split - by status, plugin, and apiserver identity. - type: Counter - stabilityLevel: ALPHA - labels: - - apiserver_id_hash - - plugin - - status - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: apiserver_manifest_admission_config_controller_last_config_info - help: Information about the last applied admission manifest configuration with hash - as label, split by plugin and apiserver identity. - type: Custom - stabilityLevel: ALPHA - labels: - - plugin - - apiserver_id_hash - - hash - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: check_duration_seconds - subsystem: mutating_admission_policy - namespace: apiserver - help: Mutation admission latency for individual mutation expressions in seconds, - labeled by policy and binding. - type: Histogram - stabilityLevel: ALPHA - labels: - - error_type - - policy - - policy_binding - buckets: - - 5e-07 - - 0.001 - - 0.01 - - 0.1 - - 1 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: check_total - subsystem: mutating_admission_policy - namespace: apiserver - help: Mutation admission policy check total, labeled by policy and further identified - by binding. - type: Counter - stabilityLevel: ALPHA - labels: - - error_type - - policy - - policy_binding - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: check_duration_seconds - subsystem: validating_admission_policy - namespace: apiserver - help: Validation admission latency for individual validation expressions in seconds, - labeled by policy and further including binding and enforcement action taken. - type: Histogram - stabilityLevel: BETA - labels: - - enforcement_action - - error_type - - policy - - policy_binding - buckets: - - 5e-07 - - 0.001 - - 0.01 - - 0.1 - - 1 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: check_total - subsystem: validating_admission_policy - namespace: apiserver - help: Validation admission policy check total, labeled by policy and further identified - by binding and enforcement action taken. - type: Counter - stabilityLevel: BETA - labels: - - enforcement_action - - error_type - - policy - - policy_binding - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: controller_admission_duration_seconds - subsystem: admission - namespace: apiserver - help: Admission controller latency histogram in seconds, identified by name and - broken out for each operation and API resource and type (validate or admit). - type: Histogram - stabilityLevel: STABLE - labels: - - name - - operation - - rejected - - type - buckets: - - 0.005 - - 0.025 - - 0.1 - - 0.5 - - 1 - - 2.5 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: step_admission_duration_seconds - subsystem: admission - namespace: apiserver - help: Admission sub-step latency histogram in seconds, broken out for each operation - and API resource and step type (validate or admit). - type: Histogram - stabilityLevel: STABLE - labels: - - operation - - rejected - - type - buckets: - - 0.005 - - 0.025 - - 0.1 - - 0.5 - - 1 - - 2.5 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: webhook_admission_duration_seconds - subsystem: admission - namespace: apiserver - help: Admission webhook latency histogram in seconds, identified by name and broken - out for each operation and API resource and type (validate or admit). - type: Histogram - stabilityLevel: STABLE - labels: - - name - - operation - - rejected - - type - buckets: - - 0.005 - - 0.025 - - 0.1 - - 0.5 - - 1 - - 2.5 - - 10 - - 25 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: error_total - subsystem: apiserver_audit - help: Counter of audit events that failed to be audited properly. Plugin identifies - the plugin affected by the error. - type: Counter - stabilityLevel: ALPHA - labels: - - plugin - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: event_total - subsystem: apiserver_audit - help: Counter of audit events generated and sent to the audit backend. - type: Counter - stabilityLevel: ALPHA - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: level_total - subsystem: apiserver_audit - help: Counter of policy levels for audit events (1 per request). - type: Counter - stabilityLevel: ALPHA - labels: - - level - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: requests_rejected_total - subsystem: apiserver_audit - help: Counter of apiserver requests rejected due to an error in audit logging backend. - type: Counter - stabilityLevel: ALPHA - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: certificate_expiration_seconds - subsystem: client - namespace: apiserver - help: Distribution of the remaining lifetime on the certificate used to authenticate - a request. - type: Histogram - stabilityLevel: ALPHA - buckets: - - 0 - - 1800 - - 3600 - - 7200 - - 21600 - - 43200 - - 86400 - - 172800 - - 345600 - - 604800 - - 2.592e+06 - - 7.776e+06 - - 1.5552e+07 - - 3.1104e+07 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: apiserver_delegated_authn_request_duration_seconds - help: Request latency in seconds. Broken down by status code. - type: Histogram - stabilityLevel: ALPHA - labels: - - code - buckets: - - 0.25 - - 0.5 - - 0.7 - - 1 - - 1.5 - - 3 - - 5 - - 10 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: apiserver_delegated_authn_request_total - help: Number of HTTP requests partitioned by status code. - type: Counter - stabilityLevel: ALPHA - labels: - - code - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: active_fetch_count - subsystem: token_cache - namespace: authentication - type: Gauge - stabilityLevel: ALPHA - labels: - - status - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: fetch_total - subsystem: token_cache - namespace: authentication - type: Counter - stabilityLevel: ALPHA - labels: - - status - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: request_duration_seconds - subsystem: token_cache - namespace: authentication - type: Histogram - stabilityLevel: ALPHA - labels: - - status - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: request_total - subsystem: token_cache - namespace: authentication - type: Counter - stabilityLevel: ALPHA - labels: - - status - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: aggregation_count_total - subsystem: aggregator_discovery - help: Counter of number of times discovery was aggregated - type: Counter - stabilityLevel: ALPHA - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: nopeer_requests_total - subsystem: aggregator_discovery - help: Counter of number of times no-peer (non peer-aggregated) discovery was requested - type: Counter - stabilityLevel: ALPHA - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: peer_aggregated_cache_hits_total - subsystem: aggregator_discovery - help: Counter of number of times discovery was served from peer-aggregated cache - type: Counter - stabilityLevel: ALPHA - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: peer_aggregated_cache_misses_total - subsystem: aggregator_discovery - help: Counter of number of times discovery was aggregated across all API servers - type: Counter - stabilityLevel: ALPHA - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: decisions_total - subsystem: authorization - namespace: apiserver - help: Total number of terminal decisions made by an authorizer split by authorizer - type, name, and decision. - type: Counter - stabilityLevel: ALPHA - labels: - - decision - - name - - type - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: match_condition_evaluation_errors_total - subsystem: authorization - namespace: apiserver - help: Total number of errors when an authorization webhook encounters a match condition - error split by authorizer type and name. - type: Counter - stabilityLevel: ALPHA - labels: - - name - - type - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: match_condition_evaluation_seconds - subsystem: authorization - namespace: apiserver - help: Authorization match condition evaluation time in seconds, split by authorizer - type and name. - type: Histogram - stabilityLevel: ALPHA - labels: - - name - - type - buckets: - - 0.001 - - 0.005 - - 0.01 - - 0.025 - - 0.1 - - 0.2 - - 0.25 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: match_condition_exclusions_total - subsystem: authorization - namespace: apiserver - help: Total number of exclusions when an authorization webhook is skipped because - match conditions exclude it. - type: Counter - stabilityLevel: ALPHA - labels: - - name - - type - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: current_inqueue_requests - subsystem: apiserver - help: Maximal number of queued requests in this apiserver per request kind in last - second. - type: Gauge - stabilityLevel: ALPHA - labels: - - request_kind - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: apiserver_delegated_authz_request_duration_seconds - help: Request latency in seconds. Broken down by status code. - type: Histogram - stabilityLevel: ALPHA - labels: - - code - buckets: - - 0.25 - - 0.5 - - 0.7 - - 1 - - 1.5 - - 3 - - 5 - - 10 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: apiserver_delegated_authz_request_total - help: Number of HTTP requests partitioned by status code. - type: Counter - stabilityLevel: ALPHA - labels: - - code - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: dial_duration_seconds - subsystem: egress_dialer - namespace: apiserver - help: Dial latency histogram in seconds, labeled by the protocol (http-connect or - grpc), transport (tcp or uds) - type: Histogram - stabilityLevel: ALPHA - labels: - - protocol - - transport - buckets: - - 0.005 - - 0.025 - - 0.1 - - 0.5 - - 2.5 - - 12.5 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: dial_failure_count - subsystem: egress_dialer - namespace: apiserver - help: Dial failure count, labeled by the protocol (http-connect or grpc), transport - (tcp or uds), and stage (connect or proxy). The stage indicates at which stage - the dial failed - type: Counter - stabilityLevel: ALPHA - labels: - - protocol - - stage - - transport - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: dial_start_total - subsystem: egress_dialer - namespace: apiserver - help: Dial starts, labeled by the protocol (http-connect or grpc) and transport - (tcp or uds). - type: Counter - stabilityLevel: ALPHA - labels: - - protocol - - transport - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: attempts_duration_seconds - subsystem: impersonation - namespace: apiserver - help: Latency of impersonation attempts in seconds split by mode and decision. - type: Histogram - stabilityLevel: ALPHA - labels: - - decision - - mode - buckets: - - 0.001 - - 0.002 - - 0.004 - - 0.008 - - 0.016 - - 0.032 - - 0.064 - - 0.128 - - 0.256 - - 0.512 - - 1.024 - - 2.048 - - 4.096 - - 8.192 - - 16.384 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: attempts_total - subsystem: impersonation - namespace: apiserver - help: Total number of impersonation attempts split by mode and decision. - type: Counter - stabilityLevel: ALPHA - labels: - - decision - - mode - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: authorization_attempts_duration_seconds - subsystem: impersonation - namespace: apiserver - help: Latency of authorization checks made by the impersonation handler in seconds - split by mode and decision. - type: Histogram - stabilityLevel: ALPHA - labels: - - decision - - mode - buckets: - - 0.001 - - 0.002 - - 0.004 - - 0.008 - - 0.016 - - 0.032 - - 0.064 - - 0.128 - - 0.256 - - 0.512 - - 1.024 - - 2.048 - - 4.096 - - 8.192 - - 16.384 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: authorization_attempts_total - subsystem: impersonation - namespace: apiserver - help: Total number of authorization checks made by the impersonation handler split - by mode and decision. - type: Counter - stabilityLevel: ALPHA - labels: - - decision - - mode - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: request_aborts_total - subsystem: apiserver - help: Number of requests which apiserver aborted possibly due to a timeout, for - each group, version, verb, resource, subresource and scope - type: Counter - stabilityLevel: ALPHA - labels: - - group - - resource - - scope - - subresource - - verb - - version - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: request_body_size_bytes - subsystem: apiserver - help: Apiserver request body size in bytes broken out by resource and verb. - type: Histogram - stabilityLevel: ALPHA - labels: - - group - - resource - - verb - buckets: - - 50000 - - 150000 - - 250000 - - 350000 - - 450000 - - 550000 - - 650000 - - 750000 - - 850000 - - 950000 - - 1.05e+06 - - 1.15e+06 - - 1.25e+06 - - 1.35e+06 - - 1.45e+06 - - 1.55e+06 - - 1.65e+06 - - 1.75e+06 - - 1.85e+06 - - 1.95e+06 - - 2.05e+06 - - 2.15e+06 - - 2.25e+06 - - 2.35e+06 - - 2.45e+06 - - 2.55e+06 - - 2.65e+06 - - 2.75e+06 - - 2.85e+06 - - 2.95e+06 - - 3.05e+06 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: request_filter_duration_seconds - subsystem: apiserver - help: Request filter latency distribution in seconds, for each filter type - type: Histogram - stabilityLevel: ALPHA - labels: - - filter - buckets: - - 0.0001 - - 0.0003 - - 0.001 - - 0.003 - - 0.01 - - 0.03 - - 0.1 - - 0.3 - - 1 - - 5 - - 10 - - 15 - - 30 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: request_post_timeout_total - subsystem: apiserver - help: Tracks the activity of the request handlers after the associated requests - have been timed out by the apiserver - type: Counter - stabilityLevel: ALPHA - labels: - - source - - status - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: request_sli_duration_seconds - subsystem: apiserver - help: Response latency distribution (not counting webhook duration and priority - & fairness queue wait times) in seconds for each verb, group, version, resource, - subresource, scope and component. - type: Histogram - stabilityLevel: ALPHA - labels: - - component - - group - - resource - - scope - - subresource - - verb - - version - buckets: - - 0.05 - - 0.1 - - 0.2 - - 0.4 - - 0.6 - - 0.8 - - 1 - - 1.25 - - 1.5 - - 2 - - 3 - - 4 - - 5 - - 6 - - 8 - - 10 - - 15 - - 20 - - 30 - - 45 - - 60 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: request_slo_duration_seconds - subsystem: apiserver - help: Response latency distribution (not counting webhook duration and priority - & fairness queue wait times) in seconds for each verb, group, version, resource, - subresource, scope and component. - type: Histogram - deprecatedVersion: 1.27.0 - stabilityLevel: ALPHA - labels: - - component - - group - - resource - - scope - - subresource - - verb - - version - buckets: - - 0.05 - - 0.1 - - 0.2 - - 0.4 - - 0.6 - - 0.8 - - 1 - - 1.25 - - 1.5 - - 2 - - 3 - - 4 - - 5 - - 6 - - 8 - - 10 - - 15 - - 20 - - 30 - - 45 - - 60 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: request_terminations_total - subsystem: apiserver - help: Number of requests which apiserver terminated in self-defense. - type: Counter - stabilityLevel: ALPHA - labels: - - code - - component - - group - - resource - - scope - - subresource - - verb - - version - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: request_timestamp_comparison_time - subsystem: apiserver - help: Time taken for comparison of old vs new objects in UPDATE or PATCH requests - type: Histogram - stabilityLevel: ALPHA - labels: - - code_path - buckets: - - 0.0001 - - 0.0003 - - 0.001 - - 0.003 - - 0.01 - - 0.03 - - 0.1 - - 0.3 - - 1 - - 5 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: selfrequest_total - subsystem: apiserver - help: Counter of apiserver self-requests broken out for each verb, API resource - and subresource. - type: Counter - stabilityLevel: ALPHA - labels: - - group - - resource - - subresource - - verb - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: tls_handshake_errors_total - subsystem: apiserver - help: Number of requests dropped with 'TLS handshake error from' error - type: Counter - stabilityLevel: ALPHA - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: watch_events_sizes - subsystem: apiserver - help: Watch event size distribution in bytes - type: Histogram - stabilityLevel: ALPHA - labels: - - group - - resource - - version - buckets: - - 1024 - - 2048 - - 4096 - - 8192 - - 16384 - - 32768 - - 65536 - - 131072 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: watch_events_total - subsystem: apiserver - help: Number of events sent in watch clients - type: Counter - stabilityLevel: ALPHA - labels: - - group - - resource - - version - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: authenticated_user_requests - help: Counter of authenticated requests broken out by username. - type: Counter - stabilityLevel: ALPHA - labels: - - username - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: authentication_attempts - help: Counter of authenticated attempts. - type: Counter - stabilityLevel: ALPHA - labels: - - result - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: authentication_duration_seconds - help: Authentication duration in seconds broken out by result. - type: Histogram - stabilityLevel: ALPHA - labels: - - result - buckets: - - 0.001 - - 0.002 - - 0.004 - - 0.008 - - 0.016 - - 0.032 - - 0.064 - - 0.128 - - 0.256 - - 0.512 - - 1.024 - - 2.048 - - 4.096 - - 8.192 - - 16.384 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: authorization_attempts_total - help: Counter of authorization attempts broken down by result. It can be either - 'allowed', 'denied', 'no-opinion' or 'error'. - type: Counter - stabilityLevel: ALPHA - labels: - - result - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: authorization_duration_seconds - help: Authorization duration in seconds broken out by result. - type: Histogram - stabilityLevel: ALPHA - labels: - - result - buckets: - - 0.001 - - 0.002 - - 0.004 - - 0.008 - - 0.016 - - 0.032 - - 0.064 - - 0.128 - - 0.256 - - 0.512 - - 1.024 - - 2.048 - - 4.096 - - 8.192 - - 16.384 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: field_validation_request_duration_seconds - help: Response latency distribution in seconds for each field validation value - type: Histogram - stabilityLevel: ALPHA - labels: - - field_validation - buckets: - - 0.05 - - 0.1 - - 0.2 - - 0.4 - - 0.6 - - 0.8 - - 1 - - 1.25 - - 1.5 - - 2 - - 3 - - 4 - - 5 - - 6 - - 8 - - 10 - - 15 - - 20 - - 30 - - 45 - - 60 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: compilation_duration_seconds - subsystem: cel - namespace: apiserver - help: CEL compilation time in seconds. - type: Histogram - stabilityLevel: BETA - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: evaluation_duration_seconds - subsystem: cel - namespace: apiserver - help: CEL evaluation time in seconds. - type: Histogram - stabilityLevel: BETA - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: watch_list_duration_seconds - subsystem: apiserver - help: Response latency distribution in seconds for watch list requests broken by - group, version, resource and scope. - type: Histogram - stabilityLevel: BETA - labels: - - group - - resource - - scope - - version - buckets: - - 0.05 - - 0.1 - - 0.2 - - 0.4 - - 0.6 - - 0.8 - - 1 - - 2 - - 4 - - 6 - - 8 - - 10 - - 15 - - 20 - - 30 - - 45 - - 60 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: current_inflight_requests - subsystem: apiserver - help: Maximal number of currently used inflight request limit of this apiserver - per request kind in last second. - type: Gauge - stabilityLevel: STABLE - labels: - - request_kind - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: longrunning_requests - subsystem: apiserver - help: Gauge of all active long-running apiserver requests broken out by verb, group, - version, resource, scope and component. Not all requests are tracked this way. - type: Gauge - stabilityLevel: STABLE - labels: - - component - - group - - resource - - scope - - subresource - - verb - - version - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: request_duration_seconds - subsystem: apiserver - help: Response latency distribution in seconds for each verb, dry run value, group, - version, resource, subresource, scope and component. - type: Histogram - stabilityLevel: STABLE - labels: - - component - - dry_run - - group - - resource - - scope - - subresource - - verb - - version - buckets: - - 0.005 - - 0.025 - - 0.05 - - 0.1 - - 0.2 - - 0.4 - - 0.6 - - 0.8 - - 1 - - 1.25 - - 1.5 - - 2 - - 3 - - 4 - - 5 - - 6 - - 8 - - 10 - - 15 - - 20 - - 30 - - 45 - - 60 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: request_total - subsystem: apiserver - help: Counter of apiserver requests broken out for each verb, dry run value, group, - version, resource, scope, component, and HTTP response code. - type: Counter - stabilityLevel: STABLE - labels: - - code - - component - - dry_run - - group - - resource - - scope - - subresource - - verb - - version - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: requested_deprecated_apis - subsystem: apiserver - help: Gauge of deprecated APIs that have been requested, broken out by API group, - version, resource, subresource, and removed_release. - type: Gauge - stabilityLevel: STABLE - labels: - - group - - removed_release - - resource - - subresource - - version - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: response_sizes - subsystem: apiserver - help: Response size distribution in bytes for each group, version, verb, resource, - subresource, scope and component. - type: Histogram - stabilityLevel: STABLE - labels: - - component - - group - - resource - - scope - - subresource - - verb - - version - buckets: - - 1000 - - 10000 - - 100000 - - 1e+06 - - 1e+07 - - 1e+08 - - 1e+09 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: apiserver_authentication_config_controller_last_config_info - help: Information about the last applied authentication configuration with hash - as label, split by apiserver identity. - type: Custom - stabilityLevel: ALPHA - labels: - - apiserver_id_hash - - hash - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: apiserver_authorization_config_controller_last_config_info - help: Information about the last applied authorization configuration with hash as - label, split by apiserver identity. - type: Custom - stabilityLevel: ALPHA - labels: - - apiserver_id_hash - - hash - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: cache_list_fetched_objects_total - namespace: apiserver - help: Number of objects read from watch cache in the course of serving a LIST request - type: Counter - stabilityLevel: ALPHA - labels: - - group - - index - - resource - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: cache_list_returned_objects_total - namespace: apiserver - help: Number of objects returned for a LIST request from watch cache - type: Counter - stabilityLevel: ALPHA - labels: - - group - - resource - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: cache_list_total - namespace: apiserver - help: Number of LIST requests served from watch cache - type: Counter - stabilityLevel: ALPHA - labels: - - group - - index - - resource - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: automatic_reload_last_timestamp_seconds - subsystem: encryption_config_controller - namespace: apiserver - help: Timestamp of the last successful or failed automatic reload of encryption - configuration split by apiserver identity. - type: Gauge - stabilityLevel: ALPHA - labels: - - apiserver_id_hash - - status - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: automatic_reloads_total - subsystem: encryption_config_controller - namespace: apiserver - help: Total number of reload successes and failures of encryption configuration - split by apiserver identity. - type: Counter - stabilityLevel: ALPHA - labels: - - apiserver_id_hash - - status - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: apiserver_encryption_config_controller_last_config_info - help: Information about the last applied encryption configuration with hash as label, - split by apiserver identity. - type: Custom - stabilityLevel: ALPHA - labels: - - apiserver_id_hash - - hash - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: dek_cache_fill_percent - subsystem: envelope_encryption - namespace: apiserver - help: Percent of the cache slots currently occupied by cached DEKs. - type: Gauge - stabilityLevel: ALPHA - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: dek_cache_inter_arrival_time_seconds - subsystem: envelope_encryption - namespace: apiserver - help: Time (in seconds) of inter arrival of transformation requests. - type: Histogram - stabilityLevel: ALPHA - labels: - - transformation_type - buckets: - - 60 - - 120 - - 240 - - 480 - - 960 - - 1920 - - 3840 - - 7680 - - 15360 - - 30720 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: dek_source_cache_size - subsystem: envelope_encryption - namespace: apiserver - help: Number of records in data encryption key (DEK) source cache. On a restart, - this value is an approximation of the number of decrypt RPC calls the server will - make to the KMS plugin. - type: Gauge - stabilityLevel: ALPHA - labels: - - provider_name - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: invalid_key_id_from_status_total - subsystem: envelope_encryption - namespace: apiserver - help: Number of times an invalid keyID is returned by the Status RPC call split - by error. - type: Counter - stabilityLevel: ALPHA - labels: - - error - - provider_name - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: key_id_hash_last_timestamp_seconds - subsystem: envelope_encryption - namespace: apiserver - help: The last time in seconds when a keyID was used. - type: Gauge - stabilityLevel: ALPHA - labels: - - apiserver_id_hash - - key_id_hash - - provider_name - - transformation_type - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: key_id_hash_status_last_timestamp_seconds - subsystem: envelope_encryption - namespace: apiserver - help: The last time in seconds when a keyID was returned by the Status RPC call. - type: Gauge - stabilityLevel: ALPHA - labels: - - apiserver_id_hash - - key_id_hash - - provider_name - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: key_id_hash_total - subsystem: envelope_encryption - namespace: apiserver - help: Number of times a keyID is used split by transformation type, provider, and - apiserver identity. - type: Counter - stabilityLevel: ALPHA - labels: - - apiserver_id_hash - - key_id_hash - - provider_name - - transformation_type - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: kms_operations_latency_seconds - subsystem: envelope_encryption - namespace: apiserver - help: KMS operation duration with gRPC error code status total. - type: Histogram - stabilityLevel: ALPHA - labels: - - grpc_status_code - - method_name - - provider_name - buckets: - - 0.0001 - - 0.0002 - - 0.0004 - - 0.0008 - - 0.0016 - - 0.0032 - - 0.0064 - - 0.0128 - - 0.0256 - - 0.0512 - - 0.1024 - - 0.2048 - - 0.4096 - - 0.8192 - - 1.6384 - - 3.2768 - - 6.5536 - - 13.1072 - - 26.2144 - - 52.4288 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: current_inqueue_seats - subsystem: flowcontrol - namespace: apiserver - help: Number of seats currently pending in queues of the API Priority and Fairness - subsystem - type: Gauge - stabilityLevel: ALPHA - labels: - - flow_schema - - priority_level - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: current_limit_seats - subsystem: flowcontrol - namespace: apiserver - help: current derived number of execution seats available to each priority level - type: Gauge - stabilityLevel: ALPHA - labels: - - priority_level - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: current_r - subsystem: flowcontrol - namespace: apiserver - help: R(time of last change) - type: Gauge - stabilityLevel: ALPHA - labels: - - priority_level - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: demand_seats - subsystem: flowcontrol - namespace: apiserver - help: Observations, at the end of every nanosecond, of (the number of seats each - priority level could use) / (nominal number of seats for that level) - type: TimingRatioHistogram - stabilityLevel: ALPHA - labels: - - priority_level - buckets: - - 0.2 - - 0.4 - - 0.6 - - 0.8 - - 1 - - 1.2 - - 1.4 - - 1.7 - - 2 - - 2.8 - - 4 - - 6 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: demand_seats_average - subsystem: flowcontrol - namespace: apiserver - help: Time-weighted average, over last adjustment period, of demand_seats - type: Gauge - stabilityLevel: ALPHA - labels: - - priority_level - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: demand_seats_high_watermark - subsystem: flowcontrol - namespace: apiserver - help: High watermark, over last adjustment period, of demand_seats - type: Gauge - stabilityLevel: ALPHA - labels: - - priority_level - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: demand_seats_smoothed - subsystem: flowcontrol - namespace: apiserver - help: Smoothed seat demands - type: Gauge - stabilityLevel: ALPHA - labels: - - priority_level - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: demand_seats_stdev - subsystem: flowcontrol - namespace: apiserver - help: Time-weighted standard deviation, over last adjustment period, of demand_seats - type: Gauge - stabilityLevel: ALPHA - labels: - - priority_level - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: dispatch_r - subsystem: flowcontrol - namespace: apiserver - help: R(time of last dispatch) - type: Gauge - stabilityLevel: ALPHA - labels: - - priority_level - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: epoch_advance_total - subsystem: flowcontrol - namespace: apiserver - help: Number of times the queueset's progress meter jumped backward - type: Counter - stabilityLevel: ALPHA - labels: - - priority_level - - success - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: latest_s - subsystem: flowcontrol - namespace: apiserver - help: S(most recently dispatched request) - type: Gauge - stabilityLevel: ALPHA - labels: - - priority_level - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: lower_limit_seats - subsystem: flowcontrol - namespace: apiserver - help: Configured lower bound on number of execution seats available to each priority - level - type: Gauge - stabilityLevel: ALPHA - labels: - - priority_level - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: next_discounted_s_bounds - subsystem: flowcontrol - namespace: apiserver - help: min and max, over queues, of S(oldest waiting request in queue) - estimated - work in progress - type: Gauge - stabilityLevel: ALPHA - labels: - - bound - - priority_level - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: next_s_bounds - subsystem: flowcontrol - namespace: apiserver - help: min and max, over queues, of S(oldest waiting request in queue) - type: Gauge - stabilityLevel: ALPHA - labels: - - bound - - priority_level - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: priority_level_request_utilization - subsystem: flowcontrol - namespace: apiserver - help: Observations, at the end of every nanosecond, of number of requests (as a - fraction of the relevant limit) waiting or in any stage of execution (but only - initial stage for WATCHes) - type: TimingRatioHistogram - stabilityLevel: ALPHA - labels: - - phase - - priority_level - buckets: - - 0 - - 0.001 - - 0.003 - - 0.01 - - 0.03 - - 0.1 - - 0.25 - - 0.5 - - 0.75 - - 1 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: priority_level_seat_utilization - subsystem: flowcontrol - namespace: apiserver - help: Observations, at the end of every nanosecond, of utilization of seats for - any stage of execution (but only initial stage for WATCHes) - type: TimingRatioHistogram - stabilityLevel: ALPHA - labels: - - priority_level - buckets: - - 0 - - 0.1 - - 0.2 - - 0.3 - - 0.4 - - 0.5 - - 0.6 - - 0.7 - - 0.8 - - 0.9 - - 0.95 - - 0.99 - - 1 - constLabels: - phase: executing - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: read_vs_write_current_requests - subsystem: flowcontrol - namespace: apiserver - help: Observations, at the end of every nanosecond, of the number of requests (as - a fraction of the relevant limit) waiting or in regular stage of execution - type: TimingRatioHistogram - stabilityLevel: ALPHA - labels: - - phase - - request_kind - buckets: - - 0 - - 0.001 - - 0.01 - - 0.1 - - 0.2 - - 0.3 - - 0.4 - - 0.5 - - 0.6 - - 0.7 - - 0.8 - - 0.9 - - 0.95 - - 0.99 - - 1 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: request_concurrency_in_use - subsystem: flowcontrol - namespace: apiserver - help: Concurrency (number of seats) occupied by the currently executing (initial - stage for a WATCH, any stage otherwise) requests in the API Priority and Fairness - subsystem - type: Gauge - deprecatedVersion: 1.31.0 - stabilityLevel: ALPHA - labels: - - flow_schema - - priority_level - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: request_concurrency_limit - subsystem: flowcontrol - namespace: apiserver - help: Nominal number of execution seats configured for each priority level - type: Gauge - deprecatedVersion: 1.30.0 - stabilityLevel: ALPHA - labels: - - priority_level - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: request_dispatch_no_accommodation_total - subsystem: flowcontrol - namespace: apiserver - help: Number of times a dispatch attempt resulted in a non accommodation due to - lack of available seats - type: Counter - stabilityLevel: ALPHA - labels: - - flow_schema - - priority_level - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: request_execution_seconds - subsystem: flowcontrol - namespace: apiserver - help: Duration of initial stage (for a WATCH) or any (for a non-WATCH) stage of - request execution in the API Priority and Fairness subsystem - type: Histogram - stabilityLevel: ALPHA - labels: - - flow_schema - - priority_level - - type - buckets: - - 0 - - 0.005 - - 0.02 - - 0.05 - - 0.1 - - 0.2 - - 0.5 - - 1 - - 2 - - 5 - - 10 - - 15 - - 30 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: request_queue_length_after_enqueue - subsystem: flowcontrol - namespace: apiserver - help: Length of queue in the API Priority and Fairness subsystem, as seen by each - request after it is enqueued - type: Histogram - stabilityLevel: ALPHA - labels: - - flow_schema - - priority_level - buckets: - - 0 - - 10 - - 25 - - 50 - - 100 - - 250 - - 500 - - 1000 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: seat_fair_frac - subsystem: flowcontrol - namespace: apiserver - help: Fair fraction of server's concurrency to allocate to each priority level that - can use it - type: Gauge - stabilityLevel: ALPHA - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: target_seats - subsystem: flowcontrol - namespace: apiserver - help: Seat allocation targets - type: Gauge - stabilityLevel: ALPHA - labels: - - priority_level - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: upper_limit_seats - subsystem: flowcontrol - namespace: apiserver - help: Configured upper bound on number of execution seats available to each priority - level - type: Gauge - stabilityLevel: ALPHA - labels: - - priority_level - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: watch_count_samples - subsystem: flowcontrol - namespace: apiserver - help: count of watchers for mutating requests in API Priority and Fairness - type: Histogram - stabilityLevel: ALPHA - labels: - - flow_schema - - priority_level - buckets: - - 0 - - 1 - - 10 - - 100 - - 1000 - - 10000 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: work_estimated_seats - subsystem: flowcontrol - namespace: apiserver - help: Number of estimated seats (maximum of initial and final seats) associated - with requests in API Priority and Fairness - type: Histogram - stabilityLevel: ALPHA - labels: - - flow_schema - - priority_level - buckets: - - 1 - - 2 - - 4 - - 8 - - 16 - - 32 - - 64 - - 100 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: init_events_total - namespace: apiserver - help: Counter of init events processed in watch cache broken by resource type. - type: Counter - stabilityLevel: ALPHA - labels: - - group - - resource - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: apiserver_resource_objects - help: Number of stored objects at the time of last check split by kind. In case - of a fetching error, the value will be -1. - type: Gauge - stabilityLevel: ALPHA - labels: - - group - - resource - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: apiserver_resource_size_estimate_bytes - help: Estimated size of stored objects in database. Estimate is based on sum of - last observed sizes of serialized objects. In case of a fetching error, the value - will be -1. - type: Gauge - stabilityLevel: ALPHA - labels: - - group - - resource - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: storage_consistency_checks_total - namespace: apiserver - help: Counter for status of consistency checks between etcd and watch cache - type: Counter - stabilityLevel: ALPHA - labels: - - group - - resource - - status - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: data_key_generation_duration_seconds - subsystem: storage - namespace: apiserver - help: Latencies in seconds of data encryption key(DEK) generation operations. - type: Histogram - stabilityLevel: ALPHA - buckets: - - 5e-06 - - 1e-05 - - 2e-05 - - 4e-05 - - 8e-05 - - 0.00016 - - 0.00032 - - 0.00064 - - 0.00128 - - 0.00256 - - 0.00512 - - 0.01024 - - 0.02048 - - 0.04096 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: data_key_generation_failures_total - subsystem: storage - namespace: apiserver - help: Total number of failed data encryption key(DEK) generation operations. - type: Counter - stabilityLevel: ALPHA - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: storage_db_total_size_in_bytes - subsystem: apiserver - help: Total size of the storage database file physically allocated in bytes. - type: Gauge - deprecatedVersion: 1.28.0 - stabilityLevel: ALPHA - labels: - - endpoint - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: storage_decode_errors_total - namespace: apiserver - help: Number of stored object decode errors split by object type - type: Counter - stabilityLevel: ALPHA - labels: - - group - - resource - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: envelope_transformation_cache_misses_total - subsystem: storage - namespace: apiserver - help: Total number of cache misses while accessing key decryption key(KEK). - type: Counter - stabilityLevel: ALPHA - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: apiserver_storage_list_evaluated_objects_total - help: Number of objects tested in the course of serving a LIST request from storage - type: Counter - stabilityLevel: ALPHA - labels: - - group - - resource - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: apiserver_storage_list_fetched_objects_total - help: Number of objects read from storage in the course of serving a LIST request - type: Counter - stabilityLevel: ALPHA - labels: - - group - - resource - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: apiserver_storage_list_returned_objects_total - help: Number of objects returned for a LIST request from storage - type: Counter - stabilityLevel: ALPHA - labels: - - group - - resource - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: apiserver_storage_list_total - help: Number of LIST requests served from storage - type: Counter - stabilityLevel: ALPHA - labels: - - group - - resource - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: transformation_duration_seconds - subsystem: storage - namespace: apiserver - help: Latencies in seconds of value transformation operations. - type: Histogram - stabilityLevel: ALPHA - labels: - - transformation_type - - transformer_prefix - buckets: - - 5e-06 - - 1e-05 - - 2e-05 - - 4e-05 - - 8e-05 - - 0.00016 - - 0.00032 - - 0.00064 - - 0.00128 - - 0.00256 - - 0.00512 - - 0.01024 - - 0.02048 - - 0.04096 - - 0.08192 - - 0.16384 - - 0.32768 - - 0.65536 - - 1.31072 - - 2.62144 - - 5.24288 - - 10.48576 - - 20.97152 - - 41.94304 - - 83.88608 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: transformation_operations_total - subsystem: storage - namespace: apiserver - help: Total number of transformations. Successful transformation will have a status - 'OK' and a varied status string when the transformation fails. The status, resource, - and transformation_type fields can be used for alerting purposes. For example, - you can monitor for encryption/decryption failures using the transformation_type - (e.g., from_storage for decryption and to_storage for encryption). Additionally, - these fields can be used to ensure that the correct transformers are applied to - each resource. - type: Counter - stabilityLevel: ALPHA - labels: - - resource - - status - - transformation_type - - transformer_prefix - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: terminated_watchers_total - namespace: apiserver - help: Counter of watchers closed due to unresponsiveness broken by resource type. - type: Counter - stabilityLevel: ALPHA - labels: - - group - - resource - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: consistent_read_total - subsystem: watch_cache - namespace: apiserver - help: Counter for consistent reads from cache. - type: Counter - stabilityLevel: ALPHA - labels: - - fallback - - group - - resource - - success - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: events_dispatched_total - subsystem: watch_cache - namespace: apiserver - help: Counter of events dispatched in watch cache broken by resource type. - type: Counter - stabilityLevel: ALPHA - labels: - - group - - resource - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: events_received_total - subsystem: watch_cache - namespace: apiserver - help: Counter of events received in watch cache broken by resource type. - type: Counter - stabilityLevel: ALPHA - labels: - - group - - resource - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: initializations_total - subsystem: watch_cache - namespace: apiserver - help: Counter of watch cache initializations broken by resource type. - type: Counter - stabilityLevel: ALPHA - labels: - - group - - resource - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: read_wait_seconds - subsystem: watch_cache - namespace: apiserver - help: Histogram of time spent waiting for a watch cache to become fresh. - type: Histogram - stabilityLevel: ALPHA - labels: - - group - - resource - buckets: - - 0.005 - - 0.025 - - 0.05 - - 0.1 - - 0.2 - - 0.4 - - 0.6 - - 0.8 - - 1 - - 1.25 - - 1.5 - - 2 - - 3 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: resource_version - subsystem: watch_cache - namespace: apiserver - help: Current resource version of watch cache broken by resource type. This is truncated - to the 15 least significant digits. - type: Gauge - stabilityLevel: ALPHA - labels: - - group - - resource - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: watch_filtered_events_total - namespace: apiserver - help: Counter of events filtered out by shard selector during watch dispatch, broken - by resource type. - type: Counter - stabilityLevel: ALPHA - labels: - - group - - resource - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: watch_shards_total - namespace: apiserver - help: Number of active sharded watch connections broken by resource type. - type: Gauge - stabilityLevel: ALPHA - labels: - - group - - resource - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: etcd_bookmark_counts - help: Number of etcd bookmarks (progress notify events) split by kind. - type: Gauge - deprecatedVersion: 1.36.0 - stabilityLevel: ALPHA - labels: - - group - - resource - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: etcd_bookmark_total - help: Number of etcd bookmarks (progress notify events) split by kind. - type: Counter - stabilityLevel: ALPHA - labels: - - group - - resource - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: etcd_lease_object_counts - help: Number of objects attached to a single etcd lease. - type: Histogram - stabilityLevel: ALPHA - buckets: - - 10 - - 50 - - 100 - - 500 - - 1000 - - 2500 - - 5000 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: etcd_request_duration_seconds - help: Etcd request latency in seconds for each operation and object type. - type: Histogram - stabilityLevel: ALPHA - labels: - - group - - operation - - resource - buckets: - - 0.005 - - 0.025 - - 0.05 - - 0.1 - - 0.2 - - 0.4 - - 0.6 - - 0.8 - - 1 - - 1.25 - - 1.5 - - 2 - - 3 - - 4 - - 5 - - 6 - - 8 - - 10 - - 15 - - 20 - - 30 - - 45 - - 60 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: etcd_request_errors_total - help: Etcd failed request counts for each operation and object type. - type: Counter - stabilityLevel: ALPHA - labels: - - group - - operation - - resource - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: etcd_requests_total - help: Etcd request counts for each operation and object type. - type: Counter - stabilityLevel: ALPHA - labels: - - group - - operation - - resource - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics - name: capacity subsystem: watch_cache help: Total capacity of watch cache broken by resource type. @@ -6946,6 +7121,24 @@ componentEndpoints: - component: kube-apiserver endpoint: /metrics +- name: compilation_duration_seconds + subsystem: cel + namespace: apiserver + help: CEL compilation time in seconds. + type: Histogram + stabilityLevel: BETA + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: evaluation_duration_seconds + subsystem: cel + namespace: apiserver + help: CEL evaluation time in seconds. + type: Histogram + stabilityLevel: BETA + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics - name: current_executing_requests subsystem: flowcontrol namespace: apiserver @@ -7060,246 +7253,39 @@ componentEndpoints: - component: kube-apiserver endpoint: /metrics -- name: apiserver_storage_objects - help: '[DEPRECATED, consider using apiserver_resource_objects instead] Number of - stored objects at the time of last check split by kind. In case of a fetching - error, the value will be -1.' - type: Gauge - deprecatedVersion: 1.34.0 - stabilityLevel: STABLE - labels: - - resource - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: apiserver_storage_size_bytes - help: Size of the storage database file physically allocated in bytes. - type: Custom - stabilityLevel: STABLE - labels: - - storage_cluster_id - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: apiserver_authentication_jwt_authenticator_jwks_fetch_last_key_set_info - help: Information about the last JWKS fetched by the JWT authenticator with hash - as label, split by api server identity and jwt issuer. - type: Custom - stabilityLevel: ALPHA - labels: - - jwt_issuer_hash - - apiserver_id_hash - - hash - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: jwt_authenticator_jwks_fetch_last_timestamp_seconds - subsystem: authentication +- name: check_duration_seconds + subsystem: validating_admission_policy namespace: apiserver - help: Timestamp of the last successful or failed JWKS fetch split by result, api - server identity and jwt issuer for the JWT authenticator. - type: Gauge - stabilityLevel: ALPHA - labels: - - apiserver_id_hash - - jwt_issuer_hash - - result - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: jwt_authenticator_latency_seconds - subsystem: authentication - namespace: apiserver - help: Latency of jwt authentication operations in seconds. This is the time spent - authenticating a token for cache miss only (i.e. when the token is not found in - the cache). + help: Validation admission latency for individual validation expressions in seconds, + labeled by policy and further including binding and enforcement action taken. type: Histogram - stabilityLevel: ALPHA + stabilityLevel: BETA labels: - - jwt_issuer_hash - - result + - enforcement_action + - error_type + - policy + - policy_binding buckets: + - 5e-07 - 0.001 - - 0.005 - 0.01 - - 0.025 - - 0.05 - 0.1 - - 0.25 - - 0.5 - 1 - - 2.5 - - 5 - - 10 componentEndpoints: - component: kube-apiserver endpoint: /metrics -- name: webhook_duration_seconds - subsystem: authorization +- name: check_total + subsystem: validating_admission_policy namespace: apiserver - help: Request latency in seconds. - type: Histogram - stabilityLevel: ALPHA + help: Validation admission policy check total, labeled by policy and further identified + by binding and enforcement action taken. + type: Counter + stabilityLevel: BETA labels: - - name - - result - buckets: - - 0.005 - - 0.01 - - 0.025 - - 0.05 - - 0.1 - - 0.25 - - 0.5 - - 1 - - 2.5 - - 5 - - 10 - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: webhook_evaluations_fail_open_total - subsystem: authorization - namespace: apiserver - help: NoOpinion results due to webhook timeout or error. - type: Counter - stabilityLevel: ALPHA - labels: - - name - - result - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: webhook_evaluations_total - subsystem: authorization - namespace: apiserver - help: Round-trips to authorization webhooks. - type: Counter - stabilityLevel: ALPHA - labels: - - name - - result - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: peer_discovery_sync_errors_total - subsystem: apiserver - help: Total number of errors encountered while syncing discovery information from - a peer kube-apiserver - type: Counter - stabilityLevel: ALPHA - labels: - - type - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: peer_proxy_errors_total - subsystem: apiserver - help: Total number of errors encountered while proxying requests to a peer kube - apiserver - type: Counter - stabilityLevel: ALPHA - labels: - - group - - resource - - type - - version - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: rerouted_request_total - subsystem: apiserver - help: '`Total number of requests that were proxied to a peer kube-apiserver because - the local apiserver was not capable of serving it, broken down by ''group'', ''version'', - and ''resource'' indicating the GVR of the request. If all three are empty (""), - the request is a discovery request.`' - type: Counter - stabilityLevel: ALPHA - labels: - - code - - group - - resource - - version - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: stream_translator_requests_total - subsystem: apiserver - help: Total number of requests that were handled by the StreamTranslatorProxy, which - processes streaming RemoteCommand/V5 - type: Counter - stabilityLevel: ALPHA - labels: - - code - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: stream_tunnel_requests_total - subsystem: apiserver - help: Total number of requests that were handled by the StreamTunnelProxy, which - processes streaming PortForward/V2 - type: Counter - stabilityLevel: ALPHA - labels: - - code - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: declarative_validation_panics_total - subsystem: validation - namespace: apiserver - help: Number of panics in declarative validation, broken down by validation identifier. - type: Counter - stabilityLevel: ALPHA - labels: - - validation_identifier - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: declarative_validation_parity_discrepancies_total - subsystem: validation - namespace: apiserver - help: Number of discrepancies between declarative and handwritten validation, broken - down by validation identifier. - type: Counter - stabilityLevel: ALPHA - labels: - - validation_identifier - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: x509_insecure_sha1_total - subsystem: webhooks - namespace: apiserver - help: Counts the number of requests to servers with insecure SHA1 signatures in - their serving certificate OR the number of connection failures due to the insecure - SHA1 signatures (either/or, based on the runtime environment) - type: Counter - stabilityLevel: ALPHA - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: x509_missing_san_total - subsystem: webhooks - namespace: apiserver - help: Counts the number of requests to servers missing SAN extension in their serving - certificate OR the number of connection failures due to the lack of x509 certificate - SAN extension missing (either/or, based on the runtime environment) - type: Counter - stabilityLevel: ALPHA - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: websocket_streaming_requests_total - subsystem: apiserver - help: Total number of WebSocket streaming requests (exec/attach/portforward) routed - by the API server, labeled by subresource and proxy_type. proxy_type is proxied_to_kubelet - when the kubelet handles the request directly; otherwise translated_at_apiserver. - type: Counter - stabilityLevel: ALPHA - labels: - - proxy_type - - subresource + - enforcement_action + - error_type + - policy + - policy_binding componentEndpoints: - component: kube-apiserver endpoint: /metrics @@ -7322,687 +7308,38 @@ componentEndpoints: - component: kube-apiserver endpoint: /metrics -- name: request_duration_seconds - subsystem: cloud_provider_webhook - help: Request latency in seconds. Broken down by status code. +- name: watch_list_duration_seconds + subsystem: apiserver + help: Response latency distribution in seconds for watch list requests broken by + group, version, resource and scope. type: Histogram - stabilityLevel: ALPHA - labels: - - code - - webhook - buckets: - - 0.25 - - 0.5 - - 0.7 - - 1 - - 1.5 - - 3 - - 5 - - 10 - componentEndpoints: - - component: cloud-controller-manager - endpoint: /metrics -- name: request_total - subsystem: cloud_provider_webhook - help: Number of HTTP requests partitioned by status code. - type: Counter - stabilityLevel: ALPHA - labels: - - code - - webhook - componentEndpoints: - - component: cloud-controller-manager - endpoint: /metrics -- name: cloud_provider_taint_removal_delay_seconds - subsystem: node_controller - help: Number of seconds after node creation when NodeController removed the cloud-provider - taint of a single node. - type: Histogram - stabilityLevel: ALPHA - buckets: - - 1 - - 4 - - 16 - - 64 - - 256 - - 1024 - componentEndpoints: - - component: cloud-controller-manager - endpoint: /metrics -- name: initial_node_sync_delay_seconds - subsystem: node_controller - help: Number of seconds after node creation when NodeController finished the initial - synchronization of a single node. - type: Histogram - stabilityLevel: ALPHA - buckets: - - 1 - - 4 - - 16 - - 64 - - 256 - - 1024 - componentEndpoints: - - component: cloud-controller-manager - endpoint: /metrics -- name: route_sync_total - subsystem: route_controller - help: A metric counting the amount of times routes have been synced with the cloud - provider. - type: Counter - stabilityLevel: ALPHA - componentEndpoints: - - component: cloud-controller-manager - endpoint: /metrics -- name: loadbalancer_sync_total - subsystem: service_controller - help: A metric counting the amount of times any load balancer has been configured, - as an effect of service/node changes on the cluster - type: Counter - stabilityLevel: ALPHA - componentEndpoints: - - component: cloud-controller-manager - endpoint: /metrics -- name: nodesync_error_total - subsystem: service_controller - help: A metric counting the amount of times any load balancer has been configured - and errored, as an effect of node changes on the cluster - type: Counter - stabilityLevel: ALPHA - componentEndpoints: - - component: cloud-controller-manager - endpoint: /metrics -- name: nodesync_latency_seconds - subsystem: service_controller - help: A metric measuring the latency for nodesync which updates loadbalancer hosts - on cluster node updates. - type: Histogram - stabilityLevel: ALPHA - buckets: - - 1 - - 2 - - 4 - - 8 - - 16 - - 32 - - 64 - - 128 - - 256 - - 512 - - 1024 - - 2048 - - 4096 - - 8192 - - 16384 - componentEndpoints: - - component: cloud-controller-manager - endpoint: /metrics -- name: update_loadbalancer_host_latency_seconds - subsystem: service_controller - help: A metric measuring the latency for updating each load balancer hosts. - type: Histogram - stabilityLevel: ALPHA - buckets: - - 1 - - 2 - - 4 - - 8 - - 16 - - 32 - - 64 - - 128 - - 256 - - 512 - - 1024 - - 2048 - - 4096 - - 8192 - - 16384 - componentEndpoints: - - component: cloud-controller-manager - endpoint: /metrics -- name: processing_latency_seconds - subsystem: informer - help: Time taken to process events after popping from the queue. - type: Histogram - stabilityLevel: ALPHA + stabilityLevel: BETA labels: - group - - name - resource + - scope - version buckets: - - 0.001 - - 0.005 - - 0.01 - - 0.025 - 0.05 - 0.1 - - 0.25 - - 0.5 + - 0.2 + - 0.4 + - 0.6 + - 0.8 - 1 - - 2.5 - - 5 + - 2 + - 4 + - 6 + - 8 - 10 - componentEndpoints: - - component: cloud-controller-manager - endpoint: /metrics - - component: kube-apiserver - endpoint: /metrics - - component: kube-controller-manager - endpoint: /metrics - - component: kube-proxy - endpoint: /metrics - - component: kube-scheduler - endpoint: /metrics - - component: kubelet - endpoint: /metrics -- name: queued_items - subsystem: informer - help: Number of items currently queued in the FIFO. - type: Gauge - stabilityLevel: ALPHA - labels: - - group - - name - - resource - - version - componentEndpoints: - - component: cloud-controller-manager - endpoint: /metrics - - component: kube-apiserver - endpoint: /metrics - - component: kube-controller-manager - endpoint: /metrics - - component: kube-proxy - endpoint: /metrics - - component: kube-scheduler - endpoint: /metrics - - component: kubelet - endpoint: /metrics -- name: store_resource_version - subsystem: informer - help: The 15 least significant digits of the resource version of the store. - type: Gauge - stabilityLevel: ALPHA - labels: - - group - - name - - resource - - version - componentEndpoints: - - component: cloud-controller-manager - endpoint: /metrics - - component: kube-apiserver - endpoint: /metrics - - component: kube-controller-manager - endpoint: /metrics - - component: kube-proxy - endpoint: /metrics - - component: kube-scheduler - endpoint: /metrics - - component: kubelet - endpoint: /metrics -- name: leader_election_master_status - help: Gauge of if the reporting system is master of the relevant lease, 0 indicates - backup, 1 indicates master. 'name' is the string used to identify the lease. Please - make sure to group by name. - type: Gauge - stabilityLevel: ALPHA - labels: - - name - componentEndpoints: - - component: cloud-controller-manager - endpoint: /metrics - - component: kube-apiserver - endpoint: /metrics - - component: kube-controller-manager - endpoint: /metrics - - component: kube-proxy - endpoint: /metrics - - component: kube-scheduler - endpoint: /metrics - - component: kubelet - endpoint: /metrics -- name: leader_election_slowpath_total - help: Total number of slow path exercised in renewing leader leases. 'name' is the - string used to identify the lease. Please make sure to group by name. - type: Counter - stabilityLevel: ALPHA - labels: - - name - componentEndpoints: - - component: cloud-controller-manager - endpoint: /metrics - - component: kube-apiserver - endpoint: /metrics - - component: kube-controller-manager - endpoint: /metrics - - component: kube-proxy - endpoint: /metrics - - component: kube-scheduler - endpoint: /metrics - - component: kubelet - endpoint: /metrics -- name: rest_client_dns_resolution_duration_seconds - help: DNS resolver latency in seconds. Broken down by host. - type: Histogram - stabilityLevel: ALPHA - labels: - - host - buckets: - - 0.005 - - 0.025 - - 0.1 - - 0.25 - - 0.5 - - 1 - - 2 - - 4 - - 8 - - 15 - - 30 - componentEndpoints: - - component: cloud-controller-manager - endpoint: /metrics - - component: kube-apiserver - endpoint: /metrics - - component: kube-controller-manager - endpoint: /metrics - - component: kube-proxy - endpoint: /metrics - - component: kube-scheduler - endpoint: /metrics - - component: kubelet - endpoint: /metrics -- name: rest_client_exec_plugin_call_total - help: Number of calls to an exec plugin, partitioned by the type of event encountered - (no_error, plugin_execution_error, plugin_not_found_error, client_internal_error) - and an optional exit code. The exit code will be set to 0 if and only if the plugin - call was successful. - type: Counter - stabilityLevel: ALPHA - labels: - - call_status - - code - componentEndpoints: - - component: cloud-controller-manager - endpoint: /metrics - - component: kube-apiserver - endpoint: /metrics - - component: kube-controller-manager - endpoint: /metrics - - component: kube-proxy - endpoint: /metrics - - component: kube-scheduler - endpoint: /metrics - - component: kubelet - endpoint: /metrics -- name: rest_client_exec_plugin_certificate_rotation_age - help: Histogram of the number of seconds the last auth exec plugin client certificate - lived before being rotated. If auth exec plugin client certificates are unused, - histogram will contain no data. - type: Histogram - stabilityLevel: ALPHA - buckets: - - 600 - - 1800 - - 3600 - - 14400 - - 86400 - - 604800 - - 2.592e+06 - - 7.776e+06 - - 1.5552e+07 - - 3.1104e+07 - - 1.24416e+08 - componentEndpoints: - - component: cloud-controller-manager - endpoint: /metrics - - component: kube-apiserver - endpoint: /metrics - - component: kube-controller-manager - endpoint: /metrics - - component: kube-proxy - endpoint: /metrics - - component: kube-scheduler - endpoint: /metrics - - component: kubelet - endpoint: /metrics -- name: rest_client_exec_plugin_policy_call_total - help: Number of comparisons of an exec plugin to the plugin policy and allowlist - (if any), partitioned by whether or not the policy permits the plugin - type: Counter - stabilityLevel: ALPHA - labels: - - allowed - - denied - componentEndpoints: - - component: cloud-controller-manager - endpoint: /metrics - - component: kube-apiserver - endpoint: /metrics - - component: kube-controller-manager - endpoint: /metrics - - component: kube-proxy - endpoint: /metrics - - component: kube-scheduler - endpoint: /metrics - - component: kubelet - endpoint: /metrics -- name: rest_client_exec_plugin_ttl_seconds - help: Gauge of the shortest TTL (time-to-live) of the client certificate(s) managed - by the auth exec plugin. The value is in seconds until certificate expiry (negative - if already expired). If auth exec plugins are unused or manage no TLS certificates, - the value will be +INF. - type: Gauge - stabilityLevel: ALPHA - componentEndpoints: - - component: cloud-controller-manager - endpoint: /metrics - - component: kube-apiserver - endpoint: /metrics - - component: kube-controller-manager - endpoint: /metrics - - component: kube-proxy - endpoint: /metrics - - component: kube-scheduler - endpoint: /metrics - - component: kubelet - endpoint: /metrics -- name: rest_client_rate_limiter_duration_seconds - help: Client side rate limiter latency in seconds. Broken down by verb, and host. - type: Histogram - stabilityLevel: ALPHA - labels: - - host - - verb - buckets: - - 0.005 - - 0.025 - - 0.1 - - 0.25 - - 0.5 - - 1 - - 2 - - 4 - - 8 - 15 + - 20 - 30 + - 45 - 60 componentEndpoints: - - component: cloud-controller-manager - endpoint: /metrics - component: kube-apiserver endpoint: /metrics - - component: kube-controller-manager - endpoint: /metrics - - component: kube-proxy - endpoint: /metrics - - component: kube-scheduler - endpoint: /metrics - - component: kubelet - endpoint: /metrics -- name: rest_client_request_duration_seconds - help: Request latency in seconds. Broken down by verb, and host. - type: Histogram - stabilityLevel: ALPHA - labels: - - host - - verb - buckets: - - 0.005 - - 0.025 - - 0.1 - - 0.25 - - 0.5 - - 1 - - 2 - - 4 - - 8 - - 15 - - 30 - - 60 - componentEndpoints: - - component: cloud-controller-manager - endpoint: /metrics - - component: kube-apiserver - endpoint: /metrics - - component: kube-controller-manager - endpoint: /metrics - - component: kube-proxy - endpoint: /metrics - - component: kube-scheduler - endpoint: /metrics - - component: kubelet - endpoint: /metrics -- name: rest_client_request_retries_total - help: Number of request retries, partitioned by status code, verb, and host. - type: Counter - stabilityLevel: ALPHA - labels: - - code - - host - - verb - componentEndpoints: - - component: cloud-controller-manager - endpoint: /metrics - - component: kube-apiserver - endpoint: /metrics - - component: kube-controller-manager - endpoint: /metrics - - component: kube-proxy - endpoint: /metrics - - component: kube-scheduler - endpoint: /metrics - - component: kubelet - endpoint: /metrics -- name: rest_client_request_size_bytes - help: Request size in bytes. Broken down by verb and host. - type: Histogram - stabilityLevel: ALPHA - labels: - - host - - verb - buckets: - - 64 - - 256 - - 512 - - 1024 - - 4096 - - 16384 - - 65536 - - 262144 - - 1.048576e+06 - - 4.194304e+06 - - 1.6777216e+07 - componentEndpoints: - - component: cloud-controller-manager - endpoint: /metrics - - component: kube-apiserver - endpoint: /metrics - - component: kube-controller-manager - endpoint: /metrics - - component: kube-proxy - endpoint: /metrics - - component: kube-scheduler - endpoint: /metrics - - component: kubelet - endpoint: /metrics -- name: rest_client_requests_total - help: Number of HTTP requests, partitioned by status code, method, and host. - type: Counter - stabilityLevel: ALPHA - labels: - - code - - host - - method - componentEndpoints: - - component: cloud-controller-manager - endpoint: /metrics - - component: kube-apiserver - endpoint: /metrics - - component: kube-controller-manager - endpoint: /metrics - - component: kube-proxy - endpoint: /metrics - - component: kube-scheduler - endpoint: /metrics - - component: kubelet - endpoint: /metrics -- name: rest_client_response_size_bytes - help: Response size in bytes. Broken down by verb and host. - type: Histogram - stabilityLevel: ALPHA - labels: - - host - - verb - buckets: - - 64 - - 256 - - 512 - - 1024 - - 4096 - - 16384 - - 65536 - - 262144 - - 1.048576e+06 - - 4.194304e+06 - - 1.6777216e+07 - componentEndpoints: - - component: cloud-controller-manager - endpoint: /metrics - - component: kube-apiserver - endpoint: /metrics - - component: kube-controller-manager - endpoint: /metrics - - component: kube-proxy - endpoint: /metrics - - component: kube-scheduler - endpoint: /metrics - - component: kubelet - endpoint: /metrics -- name: rest_client_transport_ca_reload_total - help: Number of times a CA reload is attempted, partitioned by the result and reason - for the reload attempt - type: Counter - stabilityLevel: ALPHA - labels: - - reason - - result - componentEndpoints: - - component: cloud-controller-manager - endpoint: /metrics - - component: kube-apiserver - endpoint: /metrics - - component: kube-controller-manager - endpoint: /metrics - - component: kube-proxy - endpoint: /metrics - - component: kube-scheduler - endpoint: /metrics - - component: kubelet - endpoint: /metrics -- name: rest_client_transport_cache_entries - help: Number of transport entries in the internal cache. - type: Gauge - stabilityLevel: ALPHA - componentEndpoints: - - component: cloud-controller-manager - endpoint: /metrics - - component: kube-apiserver - endpoint: /metrics - - component: kube-controller-manager - endpoint: /metrics - - component: kube-proxy - endpoint: /metrics - - component: kube-scheduler - endpoint: /metrics - - component: kubelet - endpoint: /metrics -- name: rest_client_transport_cache_gc_calls_total - help: 'Number of times a GC cleanup attempts to delete a transport cache entry, - partitioned by the result: deleted, skipped' - type: Counter - stabilityLevel: ALPHA - labels: - - result - componentEndpoints: - - component: cloud-controller-manager - endpoint: /metrics - - component: kube-apiserver - endpoint: /metrics - - component: kube-controller-manager - endpoint: /metrics - - component: kube-proxy - endpoint: /metrics - - component: kube-scheduler - endpoint: /metrics - - component: kubelet - endpoint: /metrics -- name: rest_client_transport_cert_rotation_gc_calls_total - help: Number of times a cert rotation goroutine cancel func is called via GC cleanup - of the associated transport - type: Counter - stabilityLevel: ALPHA - componentEndpoints: - - component: cloud-controller-manager - endpoint: /metrics - - component: kube-apiserver - endpoint: /metrics - - component: kube-controller-manager - endpoint: /metrics - - component: kube-proxy - endpoint: /metrics - - component: kube-scheduler - endpoint: /metrics - - component: kubelet - endpoint: /metrics -- name: rest_client_transport_create_calls_total - help: 'Number of calls to get a new transport, partitioned by the result of the - operation hit: obtained from the cache, miss: created and added to the cache, - miss-gc: recreated and added back to the cache after being garbage collected, - uncacheable: created and not cached' - type: Counter - stabilityLevel: ALPHA - labels: - - result - componentEndpoints: - - component: cloud-controller-manager - endpoint: /metrics - - component: kube-apiserver - endpoint: /metrics - - component: kube-controller-manager - endpoint: /metrics - - component: kube-proxy - endpoint: /metrics - - component: kube-scheduler - endpoint: /metrics - - component: kubelet - endpoint: /metrics -- name: version_info - help: Provides the compatibility version info of the component. The component label - is the name of the component, usually kube, but is relevant for aggregated-apiservers. - type: Gauge - stabilityLevel: ALPHA - labels: - - binary - - component - - emulation - - min_compat - componentEndpoints: - - component: cloud-controller-manager - endpoint: /metrics - - component: kube-apiserver - endpoint: /metrics - - component: kube-controller-manager - endpoint: /metrics - - component: kube-proxy - endpoint: /metrics - - component: kube-scheduler - endpoint: /metrics - - component: kubelet - endpoint: /metrics - name: disabled_metrics_total help: The count of disabled metrics. type: Counter @@ -8020,6 +7357,88 @@ endpoint: /metrics - component: kubelet endpoint: /metrics +- name: desired_endpoint_slices + subsystem: endpoint_slice_controller + help: Number of EndpointSlices that would exist with perfect endpoint allocation + type: Gauge + stabilityLevel: BETA + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: endpoints_added_per_sync + subsystem: endpoint_slice_controller + help: Number of endpoints added on each Service sync + type: Histogram + stabilityLevel: BETA + buckets: + - 2 + - 4 + - 8 + - 16 + - 32 + - 64 + - 128 + - 256 + - 512 + - 1024 + - 2048 + - 4096 + - 8192 + - 16384 + - 32768 + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: endpoints_desired + subsystem: endpoint_slice_controller + help: Number of endpoints desired + type: Gauge + stabilityLevel: BETA + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: endpoints_removed_per_sync + subsystem: endpoint_slice_controller + help: Number of endpoints removed on each Service sync + type: Histogram + stabilityLevel: BETA + buckets: + - 2 + - 4 + - 8 + - 16 + - 32 + - 64 + - 128 + - 256 + - 512 + - 1024 + - 2048 + - 4096 + - 8192 + - 16384 + - 32768 + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: num_endpoint_slices + subsystem: endpoint_slice_controller + help: Number of EndpointSlices + type: Gauge + stabilityLevel: BETA + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: services_count_by_traffic_distribution + subsystem: endpoint_slice_controller + help: Number of Services using some specific trafficDistribution + type: Gauge + stabilityLevel: BETA + labels: + - traffic_distribution + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics - name: hidden_metrics_total help: The count of hidden metrics. type: Counter @@ -8037,6 +7456,147 @@ endpoint: /metrics - component: kubelet endpoint: /metrics +- name: metric_computation_duration_seconds + subsystem: horizontal_pod_autoscaler_controller + help: The time(seconds) that the HPA controller takes to calculate one metric. The + label 'action' should be either 'scale_down', 'scale_up', or 'none'. The label + 'error' should be either 'spec', 'internal', or 'none'. The label 'metric_type' + corresponds to HPA.spec.metrics[*].type + type: Histogram + stabilityLevel: BETA + labels: + - action + - error + - metric_type + buckets: + - 0.001 + - 0.002 + - 0.004 + - 0.008 + - 0.016 + - 0.032 + - 0.064 + - 0.128 + - 0.256 + - 0.512 + - 1.024 + - 2.048 + - 4.096 + - 8.192 + - 16.384 + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: metric_computation_total + subsystem: horizontal_pod_autoscaler_controller + help: Number of metric computations. The label 'action' should be either 'scale_down', + 'scale_up', or 'none'. Also, the label 'error' should be either 'spec', 'internal', + or 'none'. The label 'metric_type' corresponds to HPA.spec.metrics[*].type + type: Counter + stabilityLevel: BETA + labels: + - action + - error + - metric_type + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: reconciliation_duration_seconds + subsystem: horizontal_pod_autoscaler_controller + help: The time(seconds) that the HPA controller takes to reconcile once. The label + 'action' should be either 'scale_down', 'scale_up', or 'none'. Also, the label + 'error' should be either 'spec', 'internal', or 'none'. Note that if both spec + and internal errors happen during a reconciliation, the first one to occur is + reported in `error` label. + type: Histogram + stabilityLevel: BETA + labels: + - action + - error + buckets: + - 0.001 + - 0.002 + - 0.004 + - 0.008 + - 0.016 + - 0.032 + - 0.064 + - 0.128 + - 0.256 + - 0.512 + - 1.024 + - 2.048 + - 4.096 + - 8.192 + - 16.384 + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: reconciliations_total + subsystem: horizontal_pod_autoscaler_controller + help: Number of reconciliations of HPA controller. The label 'action' should be + either 'scale_down', 'scale_up', or 'none'. Also, the label 'error' should be + either 'spec', 'internal', or 'none'. Note that if both spec and internal errors + happen during a reconciliation, the first one to occur is reported in `error` + label. + type: Counter + stabilityLevel: BETA + labels: + - action + - error + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: pod_failures_handled_by_failure_policy_total + subsystem: job_controller + help: "`The number of failed Pods handled by failure policy with\n\t\t\trespect + to the failure policy action applied based on the matched\n\t\t\trule. Possible + values of the action label correspond to the\n\t\t\tpossible values for the failure + policy rule action, which are:\n\t\t\t\"FailJob\", \"Ignore\" and \"Count\".`" + type: Counter + stabilityLevel: BETA + labels: + - action + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: terminated_pods_tracking_finalizer_total + subsystem: job_controller + help: |- + `The number of terminated pods (phase=Failed|Succeeded) + that have the finalizer batch.kubernetes.io/job-tracking + The event label can be "add" or "delete".` + type: Counter + stabilityLevel: BETA + labels: + - event + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: image_volume_mounted_errors_total + subsystem: kubelet + help: Number of failed image volume mounts. + type: Counter + stabilityLevel: BETA + componentEndpoints: + - component: kubelet + endpoint: /metrics +- name: image_volume_mounted_succeed_total + subsystem: kubelet + help: Number of successful image volume mounts. + type: Counter + stabilityLevel: BETA + componentEndpoints: + - component: kubelet + endpoint: /metrics +- name: image_volume_requested_total + subsystem: kubelet + help: Number of requested image volumes. + type: Counter + stabilityLevel: BETA + componentEndpoints: + - component: kubelet + endpoint: /metrics - name: kubernetes_build_info help: A metric with a constant '1' value labeled by major, minor, git version, git commit, git tree state, build date, Go version, and compiler from which Kubernetes @@ -8087,6 +7647,22 @@ endpoint: /metrics - component: kubelet endpoint: /metrics +- name: probe_total + subsystem: prober + help: Cumulative number of a liveness, readiness or startup probe for a container + by result. + type: Counter + stabilityLevel: BETA + labels: + - container + - namespace + - pod + - pod_uid + - probe_type + - result + componentEndpoints: + - component: kubelet + endpoint: /metrics/probes - name: registered_metrics_total help: The count of registered metrics broken by stability level and deprecation version. @@ -8128,6 +7704,100 @@ endpoint: /metrics - component: kubelet endpoint: /metrics +- name: goroutines + subsystem: scheduler + help: Number of running goroutines split by the work they do such as binding. + type: Gauge + stabilityLevel: BETA + labels: + - operation + componentEndpoints: + - component: kube-scheduler + endpoint: /metrics +- name: permit_wait_duration_seconds + subsystem: scheduler + help: Duration of waiting on permit. + type: Histogram + stabilityLevel: BETA + labels: + - result + buckets: + - 0.001 + - 0.002 + - 0.004 + - 0.008 + - 0.016 + - 0.032 + - 0.064 + - 0.128 + - 0.256 + - 0.512 + - 1.024 + - 2.048 + - 4.096 + - 8.192 + - 16.384 + componentEndpoints: + - component: kube-scheduler + endpoint: /metrics +- name: plugin_evaluation_total + subsystem: scheduler + help: Number of attempts to schedule pods by each plugin and the extension point + (available only in PreFilter, Filter, PreScore, and Score). + type: Counter + stabilityLevel: BETA + labels: + - extension_point + - plugin + - profile + componentEndpoints: + - component: kube-scheduler + endpoint: /metrics +- name: pod_scheduling_sli_duration_seconds + subsystem: scheduler + help: E2e latency for a pod being scheduled, from the time the pod enters the scheduling + queue and might involve multiple scheduling attempts. + type: Histogram + stabilityLevel: BETA + labels: + - attempts + buckets: + - 0.01 + - 0.02 + - 0.04 + - 0.08 + - 0.16 + - 0.32 + - 0.64 + - 1.28 + - 2.56 + - 5.12 + - 10.24 + - 20.48 + - 40.96 + - 81.92 + - 163.84 + - 327.68 + - 655.36 + - 1310.72 + - 2621.44 + - 5242.88 + componentEndpoints: + - component: kube-scheduler + endpoint: /metrics +- name: unschedulable_pods + subsystem: scheduler + help: The number of unschedulable pods broken down by plugin name. A pod will increment + the gauge for all plugins that caused it to not schedule and so this metric have + meaning only when broken down by plugin. + type: Gauge + stabilityLevel: BETA + labels: + - plugin + - profile + componentEndpoints: + - component: kube-scheduler + endpoint: /metrics - name: adds_total subsystem: workqueue help: Total number of adds handled by workqueue @@ -8292,6 +7962,373 @@ endpoint: /metrics - component: kubelet endpoint: /metrics +- name: controller_admission_duration_seconds + subsystem: admission + namespace: apiserver + help: Admission controller latency histogram in seconds, identified by name and + broken out for each operation and API resource and type (validate or admit). + type: Histogram + stabilityLevel: STABLE + labels: + - name + - operation + - rejected + - type + buckets: + - 0.005 + - 0.025 + - 0.1 + - 0.5 + - 1 + - 2.5 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: step_admission_duration_seconds + subsystem: admission + namespace: apiserver + help: Admission sub-step latency histogram in seconds, broken out for each operation + and API resource and step type (validate or admit). + type: Histogram + stabilityLevel: STABLE + labels: + - operation + - rejected + - type + buckets: + - 0.005 + - 0.025 + - 0.1 + - 0.5 + - 1 + - 2.5 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: webhook_admission_duration_seconds + subsystem: admission + namespace: apiserver + help: Admission webhook latency histogram in seconds, identified by name and broken + out for each operation and API resource and type (validate or admit). + type: Histogram + stabilityLevel: STABLE + labels: + - name + - operation + - rejected + - type + buckets: + - 0.005 + - 0.025 + - 0.1 + - 0.5 + - 1 + - 2.5 + - 10 + - 25 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: current_inflight_requests + subsystem: apiserver + help: Maximal number of currently used inflight request limit of this apiserver + per request kind in last second. + type: Gauge + stabilityLevel: STABLE + labels: + - request_kind + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: longrunning_requests + subsystem: apiserver + help: Gauge of all active long-running apiserver requests broken out by verb, group, + version, resource, scope and component. Not all requests are tracked this way. + type: Gauge + stabilityLevel: STABLE + labels: + - component + - group + - resource + - scope + - subresource + - verb + - version + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: request_duration_seconds + subsystem: apiserver + help: Response latency distribution in seconds for each verb, dry run value, group, + version, resource, subresource, scope and component. + type: Histogram + stabilityLevel: STABLE + labels: + - component + - dry_run + - group + - resource + - scope + - subresource + - verb + - version + buckets: + - 0.005 + - 0.025 + - 0.05 + - 0.1 + - 0.2 + - 0.4 + - 0.6 + - 0.8 + - 1 + - 1.25 + - 1.5 + - 2 + - 3 + - 4 + - 5 + - 6 + - 8 + - 10 + - 15 + - 20 + - 30 + - 45 + - 60 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: request_total + subsystem: apiserver + help: Counter of apiserver requests broken out for each verb, dry run value, group, + version, resource, scope, component, and HTTP response code. + type: Counter + stabilityLevel: STABLE + labels: + - code + - component + - dry_run + - group + - resource + - scope + - subresource + - verb + - version + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: requested_deprecated_apis + subsystem: apiserver + help: Gauge of deprecated APIs that have been requested, broken out by API group, + version, resource, subresource, and removed_release. + type: Gauge + stabilityLevel: STABLE + labels: + - group + - removed_release + - resource + - subresource + - version + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: response_sizes + subsystem: apiserver + help: Response size distribution in bytes for each group, version, verb, resource, + subresource, scope and component. + type: Histogram + stabilityLevel: STABLE + labels: + - component + - group + - resource + - scope + - subresource + - verb + - version + buckets: + - 1000 + - 10000 + - 100000 + - 1e+06 + - 1e+07 + - 1e+08 + - 1e+09 + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: apiserver_storage_objects + help: '[DEPRECATED, consider using apiserver_resource_objects instead] Number of + stored objects at the time of last check split by kind. In case of a fetching + error, the value will be -1.' + type: Gauge + deprecatedVersion: 1.34.0 + stabilityLevel: STABLE + labels: + - resource + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: apiserver_storage_size_bytes + help: Size of the storage database file physically allocated in bytes. + type: Custom + stabilityLevel: STABLE + labels: + - storage_cluster_id + componentEndpoints: + - component: kube-apiserver + endpoint: /metrics +- name: container_cpu_usage_seconds_total + help: Cumulative cpu time consumed by the container in core-seconds + type: Custom + stabilityLevel: STABLE + labels: + - container + - pod + - namespace + componentEndpoints: + - component: kubelet + endpoint: /metrics/resource +- name: container_memory_working_set_bytes + help: Current working set of the container in bytes + type: Custom + stabilityLevel: STABLE + labels: + - container + - pod + - namespace + componentEndpoints: + - component: kubelet + endpoint: /metrics/resource +- name: container_start_time_seconds + help: Start time of the container since unix epoch in seconds + type: Custom + stabilityLevel: STABLE + labels: + - container + - pod + - namespace + componentEndpoints: + - component: kubelet + endpoint: /metrics/resource +- name: job_creation_skew_duration_seconds + subsystem: cronjob_controller + help: Time between when a cronjob is scheduled to be run, and when the corresponding + job is created + type: Histogram + stabilityLevel: STABLE + buckets: + - 1 + - 2 + - 4 + - 8 + - 16 + - 32 + - 64 + - 128 + - 256 + - 512 + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: job_pods_finished_total + subsystem: job_controller + help: The number of finished Pods that are fully tracked + type: Counter + stabilityLevel: STABLE + labels: + - completion_mode + - result + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: job_sync_duration_seconds + subsystem: job_controller + help: The time it took to sync a job + type: Histogram + stabilityLevel: STABLE + labels: + - action + - completion_mode + - result + buckets: + - 0.004 + - 0.008 + - 0.016 + - 0.032 + - 0.064 + - 0.128 + - 0.256 + - 0.512 + - 1.024 + - 2.048 + - 4.096 + - 8.192 + - 16.384 + - 32.768 + - 65.536 + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: job_syncs_total + subsystem: job_controller + help: The number of job syncs + type: Counter + stabilityLevel: STABLE + labels: + - action + - completion_mode + - result + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: jobs_finished_total + subsystem: job_controller + help: The number of finished jobs + type: Counter + stabilityLevel: STABLE + labels: + - completion_mode + - reason + - result + componentEndpoints: + - component: kube-controller-manager + endpoint: /metrics +- name: kube_pod_resource_limit + help: Resources limit for workloads on the cluster, broken down by pod. This shows + the resource usage the scheduler and kubelet expect per pod for resources along + with the unit for the resource if any. + type: Custom + stabilityLevel: STABLE + labels: + - namespace + - pod + - node + - scheduler + - priority + - resource + - unit + componentEndpoints: + - component: kube-scheduler + endpoint: /metrics +- name: kube_pod_resource_request + help: Resources requested by workloads on the cluster, broken down by pod. This + shows the resource usage the scheduler and kubelet expect per pod for resources + along with the unit for the resource if any. + type: Custom + stabilityLevel: STABLE + labels: + - namespace + - pod + - node + - scheduler + - priority + - resource + - unit + componentEndpoints: + - component: kube-scheduler + endpoint: /metrics - name: healthcheck namespace: kubernetes help: This metric records the result of a single healthcheck. @@ -8335,229 +8372,182 @@ endpoint: /metrics/slis - component: kubelet endpoint: /metrics/slis -- name: aggregator_openapi_v2_regeneration_count - help: Counter of OpenAPI v2 spec regeneration count broken down by causing APIService - name and reason. +- name: evictions_total + subsystem: node_collector + help: Number of Node evictions that happened since current instance of NodeController + started. type: Counter - stabilityLevel: ALPHA + stabilityLevel: STABLE labels: - - apiservice - - reason + - zone componentEndpoints: - - component: kube-apiserver + - component: kube-controller-manager endpoint: /metrics -- name: aggregator_openapi_v2_regeneration_duration - help: Gauge of OpenAPI v2 spec regeneration duration in seconds. - type: Gauge - stabilityLevel: ALPHA - labels: - - reason - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: aggregator_unavailable_apiservice - help: Gauge of APIServices which are marked as unavailable broken down by APIService - name. +- name: node_cpu_usage_seconds_total + help: Cumulative cpu time consumed by the node in core-seconds type: Custom - stabilityLevel: ALPHA + stabilityLevel: STABLE + componentEndpoints: + - component: kubelet + endpoint: /metrics/resource +- name: node_memory_working_set_bytes + help: Current working set of the node in bytes + type: Custom + stabilityLevel: STABLE + componentEndpoints: + - component: kubelet + endpoint: /metrics/resource +- name: pod_cpu_usage_seconds_total + help: Cumulative cpu time consumed by the pod in core-seconds + type: Custom + stabilityLevel: STABLE labels: - - name + - pod + - namespace componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: aggregator_unavailable_apiservice_total - help: Counter of APIServices which are marked as unavailable broken down by APIService - name and reason. - type: Counter - stabilityLevel: ALPHA + - component: kubelet + endpoint: /metrics/resource +- name: pod_memory_working_set_bytes + help: Current working set of the pod in bytes + type: Custom + stabilityLevel: STABLE labels: - - name - - reason + - pod + - namespace componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: x509_insecure_sha1_total - subsystem: kube_aggregator - namespace: apiserver - help: Counts the number of requests to servers with insecure SHA1 signatures in - their serving certificate OR the number of connection failures due to the insecure - SHA1 signatures (either/or, based on the runtime environment) - type: Counter - stabilityLevel: ALPHA + - component: kubelet + endpoint: /metrics/resource +- name: resource_scrape_error + help: 1 if there was an error while getting container metrics, 0 otherwise + type: Custom + stabilityLevel: STABLE componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: x509_missing_san_total - subsystem: kube_aggregator - namespace: apiserver - help: Counts the number of requests to servers missing SAN extension in their serving - certificate OR the number of connection failures due to the lack of x509 certificate - SAN extension missing (either/or, based on the runtime environment) - type: Counter - stabilityLevel: ALPHA - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: changes - subsystem: endpoint_slice_controller - help: Number of EndpointSlice changes - type: Counter - stabilityLevel: ALPHA - labels: - - operation - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: endpointslices_changed_per_sync - subsystem: endpoint_slice_controller - help: Number of EndpointSlices changed on each Service sync + - component: kubelet + endpoint: /metrics/resource +- name: framework_extension_point_duration_seconds + subsystem: scheduler + help: Latency for running all plugins of a specific extension point. type: Histogram - stabilityLevel: ALPHA + stabilityLevel: STABLE labels: - - topology - - traffic_distribution + - extension_point + - profile + - status + buckets: + - 0.0001 + - 0.0002 + - 0.0004 + - 0.0008 + - 0.0016 + - 0.0032 + - 0.0064 + - 0.0128 + - 0.0256 + - 0.0512 + - 0.1024 + - 0.2048 componentEndpoints: - - component: kube-controller-manager + - component: kube-scheduler endpoint: /metrics -- name: syncs - subsystem: endpoint_slice_controller - help: Number of EndpointSlice syncs - type: Counter - stabilityLevel: ALPHA +- name: pending_pods + subsystem: scheduler + help: Number of pending pods, by the queue type. 'active' means number of pods in + activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number + of pods in unschedulablePods that the scheduler attempted to schedule and failed; + 'gated' is the number of unschedulable pods that the scheduler never attempted + to schedule because they are gated. + type: Gauge + stabilityLevel: STABLE labels: + - queue + componentEndpoints: + - component: kube-scheduler + endpoint: /metrics +- name: pod_scheduling_attempts + subsystem: scheduler + help: Number of attempts to successfully schedule a pod. + type: Histogram + stabilityLevel: STABLE + buckets: + - 1 + - 2 + - 4 + - 8 + - 16 + componentEndpoints: + - component: kube-scheduler + endpoint: /metrics +- name: preemption_attempts_total + subsystem: scheduler + help: Total preemption attempts in the cluster till now + type: Counter + stabilityLevel: STABLE + componentEndpoints: + - component: kube-scheduler + endpoint: /metrics +- name: preemption_victims + subsystem: scheduler + help: Number of selected preemption victims + type: Histogram + stabilityLevel: STABLE + buckets: + - 1 + - 2 + - 4 + - 8 + - 16 + - 32 + - 64 + componentEndpoints: + - component: kube-scheduler + endpoint: /metrics +- name: queue_incoming_pods_total + subsystem: scheduler + help: Number of pods added to scheduling queues by event and queue type. + type: Counter + stabilityLevel: STABLE + labels: + - event + - queue + componentEndpoints: + - component: kube-scheduler + endpoint: /metrics +- name: schedule_attempts_total + subsystem: scheduler + help: Number of attempts to schedule pods, by the result. 'unschedulable' means + a pod could not be scheduled, while 'error' means an internal scheduler problem. + type: Counter + stabilityLevel: STABLE + labels: + - profile - result componentEndpoints: - - component: kube-controller-manager + - component: kube-scheduler endpoint: /metrics -- name: desired_endpoint_slices - subsystem: endpoint_slice_controller - help: Number of EndpointSlices that would exist with perfect endpoint allocation - type: Gauge - stabilityLevel: BETA - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: endpoints_added_per_sync - subsystem: endpoint_slice_controller - help: Number of endpoints added on each Service sync +- name: scheduling_attempt_duration_seconds + subsystem: scheduler + help: Scheduling attempt latency in seconds (scheduling algorithm + binding) type: Histogram - stabilityLevel: BETA + stabilityLevel: STABLE + labels: + - profile + - result buckets: - - 2 - - 4 - - 8 - - 16 - - 32 - - 64 - - 128 - - 256 - - 512 - - 1024 - - 2048 - - 4096 - - 8192 - - 16384 - - 32768 + - 0.001 + - 0.002 + - 0.004 + - 0.008 + - 0.016 + - 0.032 + - 0.064 + - 0.128 + - 0.256 + - 0.512 + - 1.024 + - 2.048 + - 4.096 + - 8.192 + - 16.384 componentEndpoints: - - component: kube-controller-manager + - component: kube-scheduler endpoint: /metrics -- name: endpoints_desired - subsystem: endpoint_slice_controller - help: Number of endpoints desired - type: Gauge - stabilityLevel: BETA - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: endpoints_removed_per_sync - subsystem: endpoint_slice_controller - help: Number of endpoints removed on each Service sync - type: Histogram - stabilityLevel: BETA - buckets: - - 2 - - 4 - - 8 - - 16 - - 32 - - 64 - - 128 - - 256 - - 512 - - 1024 - - 2048 - - 4096 - - 8192 - - 16384 - - 32768 - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: num_endpoint_slices - subsystem: endpoint_slice_controller - help: Number of EndpointSlices - type: Gauge - stabilityLevel: BETA - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: services_count_by_traffic_distribution - subsystem: endpoint_slice_controller - help: Number of Services using some specific trafficDistribution - type: Gauge - stabilityLevel: BETA - labels: - - traffic_distribution - componentEndpoints: - - component: kube-controller-manager - endpoint: /metrics -- name: pod_security_errors_total - help: Number of errors preventing normal evaluation. Non-fatal errors may result - in the latest restricted profile being used for evaluation. - type: Counter - stabilityLevel: ALPHA - labels: - - fatal - - request_operation - - resource - - subresource - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: pod_security_evaluations_total - help: Number of policy evaluations that occurred, not counting ignored or exempt - requests. - type: Counter - stabilityLevel: ALPHA - labels: - - decision - - mode - - policy_level - - policy_version - - request_operation - - resource - - subresource - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: pod_security_exemptions_total - help: Number of exempt requests, not counting ignored or out of scope requests. - type: Counter - stabilityLevel: ALPHA - labels: - - request_operation - - resource - - subresource - componentEndpoints: - - component: kube-apiserver - endpoint: /metrics -- name: latency - type: Summary - stabilityLevel: ALPHA - labels: - - node - objectives: - 0.5: 0.05 - 0.75: 0.025 - 0.9: 0.01 - 0.99: 0.001 diff --git a/hack/tools/instrumentation/documentation/documentation.md b/hack/tools/instrumentation/documentation/documentation.md index 1dd0dc8ed50..747c5c55152 100644 --- a/hack/tools/instrumentation/documentation/documentation.md +++ b/hack/tools/instrumentation/documentation/documentation.md @@ -8,7 +8,7 @@ description: >- ## Metrics (v1.36) - + This page details the metrics that different Kubernetes components export. You can query the metrics endpoint for these components using an HTTP scrape, and fetch the current metrics data in Prometheus format. @@ -424,6 +424,48 @@ Beta metrics observe a looser API contract than its stable counterparts. No labe
  • Counter
    • cloud-controller-manager (/metrics)
    • kube-apiserver (/metrics)
    • kube-controller-manager (/metrics)
    • kube-proxy (/metrics)
    • kube-scheduler (/metrics)
    • kubelet (/metrics)
  • +
    endpoint_slice_controller_desired_endpoint_slices
    +
    Number of EndpointSlices that would exist with perfect endpoint allocation
    +
      +
    • BETA
    • +
    • Gauge
    • +
      • kube-controller-manager (/metrics)
    +
    +
    endpoint_slice_controller_endpoints_added_per_sync
    +
    Number of endpoints added on each Service sync
    +
      +
    • BETA
    • +
    • Histogram
    • +
      • kube-controller-manager (/metrics)
    +
    +
    endpoint_slice_controller_endpoints_desired
    +
    Number of endpoints desired
    +
      +
    • BETA
    • +
    • Gauge
    • +
      • kube-controller-manager (/metrics)
    +
    +
    endpoint_slice_controller_endpoints_removed_per_sync
    +
    Number of endpoints removed on each Service sync
    +
      +
    • BETA
    • +
    • Histogram
    • +
      • kube-controller-manager (/metrics)
    +
    +
    endpoint_slice_controller_num_endpoint_slices
    +
    Number of EndpointSlices
    +
      +
    • BETA
    • +
    • Gauge
    • +
      • kube-controller-manager (/metrics)
    +
    +
    endpoint_slice_controller_services_count_by_traffic_distribution
    +
    Number of Services using some specific trafficDistribution
    +
      +
    • BETA
    • +
    • Gauge
    • +
    • traffic_distribution
      • kube-controller-manager (/metrics)
    +
    hidden_metrics_total
    The count of hidden metrics.
      @@ -431,6 +473,48 @@ Beta metrics observe a looser API contract than its stable counterparts. No labe
    • Counter
      • cloud-controller-manager (/metrics)
      • kube-apiserver (/metrics)
      • kube-controller-manager (/metrics)
      • kube-proxy (/metrics)
      • kube-scheduler (/metrics)
      • kubelet (/metrics)
    +
    horizontal_pod_autoscaler_controller_metric_computation_duration_seconds
    +
    The time(seconds) that the HPA controller takes to calculate one metric. The label 'action' should be either 'scale_down', 'scale_up', or 'none'. The label 'error' should be either 'spec', 'internal', or 'none'. The label 'metric_type' corresponds to HPA.spec.metrics[*].type
    +
      +
    • BETA
    • +
    • Histogram
    • +
    • actionerrormetric_type
      • kube-controller-manager (/metrics)
    +
    +
    horizontal_pod_autoscaler_controller_metric_computation_total
    +
    Number of metric computations. The label 'action' should be either 'scale_down', 'scale_up', or 'none'. Also, the label 'error' should be either 'spec', 'internal', or 'none'. The label 'metric_type' corresponds to HPA.spec.metrics[*].type
    +
      +
    • BETA
    • +
    • Counter
    • +
    • actionerrormetric_type
      • kube-controller-manager (/metrics)
    +
    +
    horizontal_pod_autoscaler_controller_reconciliation_duration_seconds
    +
    The time(seconds) that the HPA controller takes to reconcile once. The label 'action' should be either 'scale_down', 'scale_up', or 'none'. Also, the label 'error' should be either 'spec', 'internal', or 'none'. Note that if both spec and internal errors happen during a reconciliation, the first one to occur is reported in `error` label.
    +
      +
    • BETA
    • +
    • Histogram
    • +
    • actionerror
      • kube-controller-manager (/metrics)
    +
    +
    horizontal_pod_autoscaler_controller_reconciliations_total
    +
    Number of reconciliations of HPA controller. The label 'action' should be either 'scale_down', 'scale_up', or 'none'. Also, the label 'error' should be either 'spec', 'internal', or 'none'. Note that if both spec and internal errors happen during a reconciliation, the first one to occur is reported in `error` label.
    +
      +
    • BETA
    • +
    • Counter
    • +
    • actionerror
      • kube-controller-manager (/metrics)
    +
    +
    job_controller_pod_failures_handled_by_failure_policy_total
    +
    `The number of failed Pods handled by failure policy with, respect to the failure policy action applied based on the matched, rule. Possible values of the action label correspond to the, possible values for the failure policy rule action, which are:, "FailJob", "Ignore" and "Count".`
    +
      +
    • BETA
    • +
    • Counter
    • +
    • action
      • kube-controller-manager (/metrics)
    +
    +
    job_controller_terminated_pods_tracking_finalizer_total
    +
    `The number of terminated pods (phase=Failed|Succeeded), that have the finalizer batch.kubernetes.io/job-tracking, The event label can be "add" or "delete".`
    +
      +
    • BETA
    • +
    • Counter
    • +
    • event
      • kube-controller-manager (/metrics)
    +
    kubelet_image_volume_mounted_errors_total
    Number of failed image volume mounts.
      @@ -487,6 +571,27 @@ Beta metrics observe a looser API contract than its stable counterparts. No labe
    • Gauge
    • managername
      • cloud-controller-manager (/metrics)
      • kube-apiserver (/metrics)
      • kube-controller-manager (/metrics)
      • kube-proxy (/metrics)
      • kube-scheduler (/metrics)
      • kubelet (/metrics)
    +
    scheduler_goroutines
    +
    Number of running goroutines split by the work they do such as binding.
    +
      +
    • BETA
    • +
    • Gauge
    • +
    • operation
      • kube-scheduler (/metrics)
    +
    +
    scheduler_permit_wait_duration_seconds
    +
    Duration of waiting on permit.
    +
      +
    • BETA
    • +
    • Histogram
    • +
    • result
      • kube-scheduler (/metrics)
    +
    +
    scheduler_plugin_evaluation_total
    +
    Number of attempts to schedule pods by each plugin and the extension point (available only in PreFilter, Filter, PreScore, and Score).
    +
      +
    • BETA
    • +
    • Counter
    • +
    • extension_pointpluginprofile
      • kube-scheduler (/metrics)
    +
    scheduler_pod_scheduling_sli_duration_seconds
    E2e latency for a pod being scheduled, from the time the pod enters the scheduling queue and might involve multiple scheduling attempts.
      @@ -494,6 +599,13 @@ Beta metrics observe a looser API contract than its stable counterparts. No labe
    • Histogram
    • attempts
      • kube-scheduler (/metrics)
    +
    scheduler_unschedulable_pods
    +
    The number of unschedulable pods broken down by plugin name. A pod will increment the gauge for all plugins that caused it to not schedule and so this metric have meaning only when broken down by plugin.
    +
      +
    • BETA
    • +
    • Gauge
    • +
    • pluginprofile
      • kube-scheduler (/metrics)
    +
    workqueue_adds_total
    Total number of adds handled by workqueue
      @@ -1222,6 +1334,34 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
    • Histogram
    • flow_schemapriority_level
      • kube-apiserver (/metrics)
    +
    apiserver_impersonation_attempts_duration_seconds
    +
    Latency of impersonation attempts in seconds split by mode and decision.
    +
      +
    • ALPHA
    • +
    • Histogram
    • +
    • decisionmode
      • kube-apiserver (/metrics)
    +
    +
    apiserver_impersonation_attempts_total
    +
    Total number of impersonation attempts split by mode and decision.
    +
      +
    • ALPHA
    • +
    • Counter
    • +
    • decisionmode
      • kube-apiserver (/metrics)
    +
    +
    apiserver_impersonation_authorization_attempts_duration_seconds
    +
    Latency of authorization checks made by the impersonation handler in seconds split by mode and decision.
    +
      +
    • ALPHA
    • +
    • Histogram
    • +
    • decisionmode
      • kube-apiserver (/metrics)
    +
    +
    apiserver_impersonation_authorization_attempts_total
    +
    Total number of authorization checks made by the impersonation handler split by mode and decision.
    +
      +
    • ALPHA
    • +
    • Counter
    • +
    • decisionmode
      • kube-apiserver (/metrics)
    +
    apiserver_init_events_total
    Counter of init events processed in watch cache broken by resource type.
      @@ -1243,6 +1383,27 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
    • Counter
      • kube-apiserver (/metrics)
    +
    apiserver_manifest_admission_config_controller_automatic_reload_last_timestamp_seconds
    +
    Timestamp of the last automatic reload of admission manifest configuration split by status, plugin, and apiserver identity.
    +
      +
    • ALPHA
    • +
    • Gauge
    • +
    • apiserver_id_hashpluginstatus
      • kube-apiserver (/metrics)
    +
    +
    apiserver_manifest_admission_config_controller_automatic_reloads_total
    +
    Total number of automatic reloads of admission manifest configuration split by status, plugin, and apiserver identity.
    +
      +
    • ALPHA
    • +
    • Counter
    • +
    • apiserver_id_hashpluginstatus
      • kube-apiserver (/metrics)
    +
    +
    apiserver_manifest_admission_config_controller_last_config_info
    +
    Information about the last applied admission manifest configuration with hash as label, split by plugin and apiserver identity.
    +
      +
    • ALPHA
    • +
    • Custom
    • +
    • pluginapiserver_id_hashhash
      • kube-apiserver (/metrics)
    +
    apiserver_mutating_admission_policy_check_duration_seconds
    Mutation admission latency for individual mutation expressions in seconds, labeled by policy and binding.
      @@ -1531,7 +1692,7 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
    • groupresource
      • kube-apiserver (/metrics)
    apiserver_watch_cache_resource_version
    -
    Current resource version of watch cache broken by resource type.
    +
    Current resource version of watch cache broken by resource type. This is truncated to the 15 least significant digits.
    • ALPHA
    • Gauge
    • @@ -1551,6 +1712,20 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
    • Counter
    • groupresourceversion
      • kube-apiserver (/metrics)
    +
    apiserver_watch_filtered_events_total
    +
    Counter of events filtered out by shard selector during watch dispatch, broken by resource type.
    +
      +
    • ALPHA
    • +
    • Counter
    • +
    • groupresource
      • kube-apiserver (/metrics)
    +
    +
    apiserver_watch_shards_total
    +
    Number of active sharded watch connections broken by resource type.
    +
      +
    • ALPHA
    • +
    • Gauge
    • +
    • groupresource
      • kube-apiserver (/metrics)
    +
    apiserver_webhooks_x509_insecure_sha1_total
    Counts the number of requests to servers with insecure SHA1 signatures in their serving certificate OR the number of connection failures due to the insecure SHA1 signatures (either/or, based on the runtime environment)
      @@ -1565,6 +1740,13 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
    • Counter
      • kube-apiserver (/metrics)
    +
    apiserver_websocket_streaming_requests_total
    +
    Total number of WebSocket streaming requests (exec/attach/portforward) routed by the API server, labeled by subresource and proxy_type. proxy_type is proxied_to_kubelet when the kubelet handles the request directly; otherwise translated_at_apiserver.
    +
      +
    • ALPHA
    • +
    • Counter
    • +
    • proxy_typesubresource
      • kube-apiserver (/metrics)
    +
    attach_detach_controller_attachdetach_controller_forced_detaches
    Number of times the A/D Controller performed a forced detach
      @@ -1740,34 +1922,6 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
    • Counter
    • operation
      • kube-controller-manager (/metrics)
    -
    endpoint_slice_controller_desired_endpoint_slices
    -
    Number of EndpointSlices that would exist with perfect endpoint allocation
    -
      -
    • ALPHA
    • -
    • Gauge
    • -
      • kube-controller-manager (/metrics)
    -
    -
    endpoint_slice_controller_endpoints_added_per_sync
    -
    Number of endpoints added on each Service sync
    -
      -
    • ALPHA
    • -
    • Histogram
    • -
      • kube-controller-manager (/metrics)
    -
    -
    endpoint_slice_controller_endpoints_desired
    -
    Number of endpoints desired
    -
      -
    • ALPHA
    • -
    • Gauge
    • -
      • kube-controller-manager (/metrics)
    -
    -
    endpoint_slice_controller_endpoints_removed_per_sync
    -
    Number of endpoints removed on each Service sync
    -
      -
    • ALPHA
    • -
    • Histogram
    • -
      • kube-controller-manager (/metrics)
    -
    endpoint_slice_controller_endpointslices_changed_per_sync
    Number of EndpointSlices changed on each Service sync
      @@ -1775,20 +1929,6 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
    • Histogram
    • topologytraffic_distribution
      • kube-controller-manager (/metrics)
    -
    endpoint_slice_controller_num_endpoint_slices
    -
    Number of EndpointSlices
    -
      -
    • ALPHA
    • -
    • Gauge
    • -
      • kube-controller-manager (/metrics)
    -
    -
    endpoint_slice_controller_services_count_by_traffic_distribution
    -
    Number of Services using some specific trafficDistribution
    -
      -
    • ALPHA
    • -
    • Gauge
    • -
    • traffic_distribution
      • kube-controller-manager (/metrics)
    -
    endpoint_slice_controller_syncs
    Number of EndpointSlice syncs
      @@ -1957,20 +2097,6 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
    • Gauge
    • hpa_namenamespace
      • kube-controller-manager (/metrics)
    -
    horizontal_pod_autoscaler_controller_metric_computation_duration_seconds
    -
    The time(seconds) that the HPA controller takes to calculate one metric. The label 'action' should be either 'scale_down', 'scale_up', or 'none'. The label 'error' should be either 'spec', 'internal', or 'none'. The label 'metric_type' corresponds to HPA.spec.metrics[*].type
    -
      -
    • ALPHA
    • -
    • Histogram
    • -
    • actionerrormetric_type
      • kube-controller-manager (/metrics)
    -
    -
    horizontal_pod_autoscaler_controller_metric_computation_total
    -
    Number of metric computations. The label 'action' should be either 'scale_down', 'scale_up', or 'none'. Also, the label 'error' should be either 'spec', 'internal', or 'none'. The label 'metric_type' corresponds to HPA.spec.metrics[*].type
    -
      -
    • ALPHA
    • -
    • Counter
    • -
    • actionerrormetric_type
      • kube-controller-manager (/metrics)
    -
    horizontal_pod_autoscaler_controller_num_horizontal_pod_autoscalers
    Current number of controlled HPA objects.
      @@ -1978,20 +2104,6 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
    • Gauge
      • kube-controller-manager (/metrics)
    -
    horizontal_pod_autoscaler_controller_reconciliation_duration_seconds
    -
    The time(seconds) that the HPA controller takes to reconcile once. The label 'action' should be either 'scale_down', 'scale_up', or 'none'. Also, the label 'error' should be either 'spec', 'internal', or 'none'. Note that if both spec and internal errors happen during a reconciliation, the first one to occur is reported in `error` label.
    -
      -
    • ALPHA
    • -
    • Histogram
    • -
    • actionerror
      • kube-controller-manager (/metrics)
    -
    -
    horizontal_pod_autoscaler_controller_reconciliations_total
    -
    Number of reconciliations of HPA controller. The label 'action' should be either 'scale_down', 'scale_up', or 'none'. Also, the label 'error' should be either 'spec', 'internal', or 'none'. Note that if both spec and internal errors happen during a reconciliation, the first one to occur is reported in `error` label.
    -
      -
    • ALPHA
    • -
    • Counter
    • -
    • actionerror
      • kube-controller-manager (/metrics)
    -
    informer_processing_latency_seconds
    Time taken to process events after popping from the queue.
      @@ -2006,6 +2118,13 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
    • Gauge
    • groupnameresourceversion
      • cloud-controller-manager (/metrics)
      • kube-apiserver (/metrics)
      • kube-controller-manager (/metrics)
      • kube-proxy (/metrics)
      • kube-scheduler (/metrics)
      • kubelet (/metrics)
    +
    informer_store_resource_version
    +
    The 15 least significant digits of the resource version of the store.
    +
      +
    • ALPHA
    • +
    • Gauge
    • +
    • groupnameresourceversion
      • cloud-controller-manager (/metrics)
      • kube-apiserver (/metrics)
      • kube-controller-manager (/metrics)
      • kube-proxy (/metrics)
      • kube-scheduler (/metrics)
      • kubelet (/metrics)
    +
    job_controller_job_finished_indexes_total
    `The number of finished indexes. Possible values for the, status label are: "succeeded", "failed". Possible values for the, backoffLimit label are: "perIndex" and "global"`
      @@ -2027,13 +2146,6 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
    • Counter
    • controller_name
      • kube-controller-manager (/metrics)
    -
    job_controller_pod_failures_handled_by_failure_policy_total
    -
    `The number of failed Pods handled by failure policy with, respect to the failure policy action applied based on the matched, rule. Possible values of the action label correspond to the, possible values for the failure policy rule action, which are:, "FailJob", "Ignore" and "Count".`
    -
      -
    • ALPHA
    • -
    • Counter
    • -
    • action
      • kube-controller-manager (/metrics)
    -
    job_controller_stale_sync_skips_total
    Total number of Job syncs skipped due to a stale watch cache.
      @@ -2041,13 +2153,6 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
    • Counter
    • groupresource
      • kube-controller-manager (/metrics)
    -
    job_controller_terminated_pods_tracking_finalizer_total
    -
    `The number of terminated pods (phase=Failed|Succeeded), that have the finalizer batch.kubernetes.io/job-tracking, The event label can be "add" or "delete".`
    -
      -
    • ALPHA
    • -
    • Counter
    • -
    • event
      • kube-controller-manager (/metrics)
    -
    kube_apiserver_clusterip_allocator_allocated_ips
    Gauge measuring the number of allocated IPs for Services
      @@ -2468,6 +2573,20 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
    • Counter
      • kubelet (/metrics)
    +
    kubelet_memory_qos_node_memory_low_bytes
    +
    Total cgroup v2 memory.low in bytes for Burstable pods. This memory is soft-reserved and may be reclaimed under extreme pressure.
    +
      +
    • ALPHA
    • +
    • Gauge
    • +
      • kubelet (/metrics)
    +
    +
    kubelet_memory_qos_node_memory_min_bytes
    +
    Total cgroup v2 memory.min in bytes for Guaranteed pods. This memory is hard-reserved and never reclaimed by the kernel.
    +
      +
    • ALPHA
    • +
    • Gauge
    • +
      • kubelet (/metrics)
    +
    kubelet_metrics_provider
    Metrics provider used by kubelet to collect container stats. Values can be 'cadvisor' and 'cri'
      @@ -2559,6 +2678,13 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
    • Gauge
      • kubelet (/metrics)
    +
    kubelet_pleg_pod_relist_duration_seconds
    +
    Duration in seconds for relisting a single pod in PLEG.
    +
      +
    • ALPHA
    • +
    • Histogram
    • +
      • kubelet (/metrics)
    +
    kubelet_pleg_relist_duration_seconds
    Duration in seconds for relisting pods in PLEG.
      @@ -2685,6 +2811,13 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
    • Histogram
      • kubelet (/metrics)
    +
    kubelet_pod_watch_events_dropped_total
    +
    Cumulative number of pod watch events dropped.
    +
      +
    • ALPHA
    • +
    • Counter
    • +
      • kubelet (/metrics)
    +
    kubelet_pod_worker_duration_seconds
    Duration in seconds to sync a single pod. Broken down by operation type: create, update, or sync
      @@ -2839,6 +2972,13 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
    • Counter
      • kubelet (/metrics)
    +
    kubelet_terminated_containers_total
    +
    Cumulative number of container terminations.
    +
      +
    • ALPHA
    • +
    • Counter
    • +
    • container_typeexit_codereason
      • kubelet (/metrics)
    +
    kubelet_topology_manager_admission_duration_ms
    Duration in milliseconds to serve a pod admission request.
      @@ -2916,6 +3056,13 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
    • Custom
    • namespacepersistentvolumeclaim
      • kubelet (/metrics)
    +
    kubelet_websocket_streaming_requests_total
    +
    Total number of WebSocket streaming requests (exec/attach/portforward) received by the kubelet.
    +
      +
    • ALPHA
    • +
    • Counter
    • +
    • subresource
      • kubelet (/metrics)
    +
    kubelet_working_pods
    Number of pods the kubelet is actually running, broken down by lifecycle phase, whether the pod is desired, orphaned, or runtime only (also orphaned), and whether the pod is static. An orphaned pod has been removed from local configuration or force deleted in the API and consumes resources that are not otherwise visible.
      @@ -3315,6 +3462,27 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
    • Counter
    • groupresource
      • kube-controller-manager (/metrics)
    +
    resource_manager_allocation_errors_total
    +
    Number of errors encountered during exclusive resource allocation.
    +
      +
    • ALPHA
    • +
    • Counter
    • +
    • resource_namesource
      • kubelet (/metrics)
    +
    +
    resource_manager_allocations_total
    +
    Number of exclusive resource allocations performed by a resource manager.
    +
      +
    • ALPHA
    • +
    • Counter
    • +
    • resource_namesource
      • kubelet (/metrics)
    +
    +
    resource_manager_container_assignments
    +
    Number of containers with a specific type of resource assignment.
    +
      +
    • ALPHA
    • +
    • Counter
    • +
    • assignment_typeresource_name
      • kubelet (/metrics)
    +
    resourceclaim_controller_creates_total
    Number of ResourceClaims creation requests, categorized by creation status and admin access
      @@ -3329,6 +3497,27 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
    • Custom
    • allocatedadmin_accesssource
      • kube-controller-manager (/metrics)
    +
    resourcepoolstatusrequest_controller_request_processing_duration_seconds
    +
    Time taken to process a ResourcePoolStatusRequest
    +
      +
    • ALPHA
    • +
    • Histogram
    • +
    • driver_name
      • kube-controller-manager (/metrics)
    +
    +
    resourcepoolstatusrequest_controller_request_processing_errors_total
    +
    Total number of errors encountered while processing ResourcePoolStatusRequests
    +
      +
    • ALPHA
    • +
    • Counter
    • +
    • driver_name
      • kube-controller-manager (/metrics)
    +
    +
    resourcepoolstatusrequest_controller_requests_processed_total
    +
    Total number of ResourcePoolStatusRequests processed
    +
      +
    • ALPHA
    • +
    • Counter
    • +
    • driver_name
      • kube-controller-manager (/metrics)
    +
    rest_client_dns_resolution_duration_seconds
    DNS resolver latency in seconds. Broken down by host.
      @@ -3406,6 +3595,13 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
    • Histogram
    • hostverb
      • cloud-controller-manager (/metrics)
      • kube-apiserver (/metrics)
      • kube-controller-manager (/metrics)
      • kube-proxy (/metrics)
      • kube-scheduler (/metrics)
      • kubelet (/metrics)
    +
    rest_client_transport_ca_reload_total
    +
    Number of times a CA reload is attempted, partitioned by the result and reason for the reload attempt
    +
      +
    • ALPHA
    • +
    • Counter
    • +
    • reasonresult
      • cloud-controller-manager (/metrics)
      • kube-apiserver (/metrics)
      • kube-controller-manager (/metrics)
      • kube-proxy (/metrics)
      • kube-scheduler (/metrics)
      • kubelet (/metrics)
    +
    rest_client_transport_cache_entries
    Number of transport entries in the internal cache.
      @@ -3413,8 +3609,22 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
    • Gauge
      • cloud-controller-manager (/metrics)
      • kube-apiserver (/metrics)
      • kube-controller-manager (/metrics)
      • kube-proxy (/metrics)
      • kube-scheduler (/metrics)
      • kubelet (/metrics)
    +
    rest_client_transport_cache_gc_calls_total
    +
    Number of times a GC cleanup attempts to delete a transport cache entry, partitioned by the result: deleted, skipped
    +
      +
    • ALPHA
    • +
    • Counter
    • +
    • result
      • cloud-controller-manager (/metrics)
      • kube-apiserver (/metrics)
      • kube-controller-manager (/metrics)
      • kube-proxy (/metrics)
      • kube-scheduler (/metrics)
      • kubelet (/metrics)
    +
    +
    rest_client_transport_cert_rotation_gc_calls_total
    +
    Number of times a cert rotation goroutine cancel func is called via GC cleanup of the associated transport
    +
      +
    • ALPHA
    • +
    • Counter
    • +
      • cloud-controller-manager (/metrics)
      • kube-apiserver (/metrics)
      • kube-controller-manager (/metrics)
      • kube-proxy (/metrics)
      • kube-scheduler (/metrics)
      • kubelet (/metrics)
    +
    rest_client_transport_create_calls_total
    -
    Number of calls to get a new transport, partitioned by the result of the operation hit: obtained from the cache, miss: created and added to the cache, uncacheable: created and not cached
    +
    Number of calls to get a new transport, partitioned by the result of the operation hit: obtained from the cache, miss: created and added to the cache, miss-gc: recreated and added back to the cache after being garbage collected, uncacheable: created and not cached
    • ALPHA
    • Counter
    • @@ -3490,6 +3700,20 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
    • Gauge
    • type
      • kube-scheduler (/metrics)
    +
    scheduler_dra_bindingconditions_allocations_total
    +
    Number of allocations using devices with BindingConditions, counted per driver per scheduling attempt
    +
      +
    • ALPHA
    • +
    • Counter
    • +
    • driverprofilestatus
      • kube-scheduler (/metrics)
    +
    +
    scheduler_dra_bindingconditions_wait_duration_seconds
    +
    Time in seconds spent waiting for BindingConditions to be satisfied during PreBind.
    +
      +
    • ALPHA
    • +
    • Histogram
    • +
    • driverprofilestatus
      • kube-scheduler (/metrics)
    +
    scheduler_event_handling_duration_seconds
    Event handling latency in seconds.
      @@ -3504,13 +3728,6 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
    • Histogram
    • hintedprofile
      • kube-scheduler (/metrics)
    -
    scheduler_goroutines
    -
    Number of running goroutines split by the work they do such as binding.
    -
      -
    • ALPHA
    • -
    • Gauge
    • -
    • operation
      • kube-scheduler (/metrics)
    -
    scheduler_inflight_events
    Number of events currently tracked in the scheduling queue.
      @@ -3525,20 +3742,6 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
    • Gauge
    • call_type
      • kube-scheduler (/metrics)
    -
    scheduler_permit_wait_duration_seconds
    -
    Duration of waiting on permit.
    -
      -
    • ALPHA
    • -
    • Histogram
    • -
    • result
      • kube-scheduler (/metrics)
    -
    -
    scheduler_plugin_evaluation_total
    -
    Number of attempts to schedule pods by each plugin and the extension point (available only in PreFilter, Filter, PreScore, and Score).
    -
      -
    • ALPHA
    • -
    • Counter
    • -
    • extension_pointpluginprofile
      • kube-scheduler (/metrics)
    -
    scheduler_plugin_execution_duration_seconds
    Duration for running a plugin at a specific extension point.
      @@ -3616,13 +3819,6 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
    • Histogram
    • profile
      • kube-scheduler (/metrics)
    -
    scheduler_unschedulable_pods
    -
    The number of unschedulable pods broken down by plugin name. A pod will increment the gauge for all plugins that caused it to not schedule and so this metric have meaning only when broken down by plugin.
    -
      -
    • ALPHA
    • -
    • Gauge
    • -
    • pluginprofile
      • kube-scheduler (/metrics)
    -
    scheduler_volume_binder_cache_requests_total
    Total number for request volume binding cache
      @@ -3840,13 +4036,20 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
    • Custom
    • plugin_namestate
      • kubelet (/metrics)
    -
    volume_operation_total_errors
    +
    volume_operation_errors_total
    Total volume operation errors
    • ALPHA
    • Counter
    • operation_nameplugin_name
      • kube-controller-manager (/metrics)
    +
    volume_operation_total_errors
    +
    Total volume operation errors
    +
      +
    • ALPHA
    • +
    • Counter
    • +
    • operation_nameplugin_name
      • kube-controller-manager (/metrics)
    • 1.36.0
    +
    volume_operation_total_seconds
    Storage operation end to end duration in seconds
      diff --git a/hack/tools/instrumentation/main.go b/hack/tools/instrumentation/main.go index 6588cc625fa..c223d2e22aa 100644 --- a/hack/tools/instrumentation/main.go +++ b/hack/tools/instrumentation/main.go @@ -112,7 +112,13 @@ func main() { for scanner.Scan() { arg := scanner.Text() ms, es := searchPathForStableMetrics(arg, endpointConfig) - stableMetrics = append(stableMetrics, ms...) + for _, m := range ms { + fqName := m.BuildFQName() + if _, ok := stableMetricNames[fqName]; !ok { + stableMetrics = append(stableMetrics, m) + } + stableMetricNames[fqName] = struct{}{} + } errors = append(errors, es...) } } diff --git a/hack/tools/instrumentation/stability-utils.sh b/hack/tools/instrumentation/stability-utils.sh index 788d7a68e8e..b383d48ac70 100755 --- a/hack/tools/instrumentation/stability-utils.sh +++ b/hack/tools/instrumentation/stability-utils.sh @@ -31,6 +31,7 @@ stability_check_setup() { GOTOOLCHAIN="$(kube::golang::hack_tools_gotoolchain)" go -C "${KUBE_ROOT}/hack/tools/instrumentation" install . ./sort ./documentation } +# shellcheck disable=SC2120 function find_files_to_check() { # Similar to find but faster and easier to understand. We want to include # modified and untracked files because this might be running against code @@ -43,12 +44,15 @@ function find_files_to_check() { ':!:*/third_party/*' `# catches third_party/...` \ ':!:hack/*' `# catches hack/...` \ ':!:*/hack/*' `# catches any subdir/hack/...` \ + ':!:test/*' `# catches test/...` \ + ':!:*/test/*' `# catches any subdir/test/...` \ ':!:*/*_test.go' \ ':!:hack/tools/instrumentation' \ ':(glob)**/*.go' \ "$@" } +# shellcheck disable=SC2120 function find_test_files() { git ls-files -cmo --exclude-standard \ "$@" @@ -61,23 +65,15 @@ reset=$(tput sgr0) function kube::validate::stablemetrics() { stability_check_setup temp_file=$(mktemp) - temp_file2=$(mktemp) - doValidate=$(find_files_to_check -z \ - | sort -z \ - | KUBE_ROOT=${KUBE_ROOT} xargs -0 -L 200 \ - "${GOBIN}/instrumentation" \ + + find_files_to_check \ + | KUBE_ROOT=${KUBE_ROOT} "${GOBIN}/instrumentation" \ -- \ - 1>"${temp_file}") + - \ + 1>"${temp_file}" - if $doValidate; then - echo -e "${green}Diffing hack/tools/instrumentation/testdata/stable-metrics-list.yaml\n${reset}" - fi - doSort=$(KUBE_ROOT=${KUBE_ROOT} "${GOBIN}/sort" --sort-file="${temp_file}" 1>"${temp_file2}") - if ! $doSort; then - echo "${red}!!! sorting metrics has failed! ${reset}" >&2 - exit 1 - fi - if diff -u "$KUBE_ROOT/hack/tools/instrumentation/testdata/stable-metrics-list.yaml" "$temp_file2"; then + echo -e "${green}Diffing hack/tools/instrumentation/testdata/stable-metrics-list.yaml\n${reset}" + if diff -u "$KUBE_ROOT/hack/tools/instrumentation/testdata/stable-metrics-list.yaml" "$temp_file"; then echo -e "${green}\nPASS metrics stability verification ${reset}" return 0 fi @@ -90,19 +86,17 @@ function kube::validate::test::stablemetrics() { stability_check_setup temp_file=$(mktemp) cd "${KUBE_ROOT}/hack/tools/instrumentation" - doValidate=$(find_test_files -z \ - | sort -z \ - | KUBE_ROOT=${KUBE_ROOT} xargs -0 -L 200 \ - "${GOBIN}/instrumentation" \ + + find_test_files \ + | KUBE_ROOT=${KUBE_ROOT} "${GOBIN}/instrumentation" \ -- \ - 1>"${temp_file}") + - \ + 1>"${temp_file}" - if $doValidate; then - echo -e "${green}Diffing hack/tools/instrumentation/testdata/test-stable-metrics-list.yaml\n${reset}" - if diff -u "$KUBE_ROOT/hack/tools/instrumentation/testdata/test-stable-metrics-list.yaml" "$temp_file"; then - echo -e "${green}\nPASS metrics stability verification ${reset}" - return 0 - fi + echo -e "${green}Diffing hack/tools/instrumentation/testdata/test-stable-metrics-list.yaml\n${reset}" + if diff -u "$KUBE_ROOT/hack/tools/instrumentation/testdata/test-stable-metrics-list.yaml" "$temp_file"; then + echo -e "${green}\nPASS metrics stability verification ${reset}" + return 0 fi echo "${red}!!! Metrics stability static analysis test has failed!${reset}" >&2 @@ -113,44 +107,29 @@ function kube::validate::test::stablemetrics() { function kube::update::stablemetrics() { stability_check_setup temp_file=$(mktemp) - temp_file2=$(mktemp) - doCheckStability=$(find_files_to_check -z \ - | sort -z \ - | KUBE_ROOT=${KUBE_ROOT} xargs -0 -L 200 \ - "${GOBIN}/instrumentation" \ + + find_files_to_check \ + | KUBE_ROOT=${KUBE_ROOT} "${GOBIN}/instrumentation" \ -- \ - 1>"${temp_file}") + - \ + 1>"${temp_file}" - if ! $doCheckStability; then - echo "${red}!!! updating golden list of metrics has failed! ${reset}" >&2 - exit 1 - fi mv -f "$temp_file" "${KUBE_ROOT}/hack/tools/instrumentation/testdata/stable-metrics-list.yaml" - doSort=$("${GOBIN}/sort" --sort-file="${KUBE_ROOT}/hack/tools/instrumentation/testdata/stable-metrics-list.yaml" 1>"${temp_file2}") - if ! $doSort; then - echo "${red}!!! sorting metrics has failed! ${reset}" >&2 - exit 1 - fi - mv -f "$temp_file2" "${KUBE_ROOT}/hack/tools/instrumentation/testdata/stable-metrics-list.yaml" echo "${green}Updated golden list of stable metrics.${reset}" } function kube::update::documentation::list() { stability_check_setup temp_file=$(mktemp) - doCheckStability=$(find_files_to_check -z \ - | sort -z \ - | KUBE_ROOT=${KUBE_ROOT} xargs -0 -L 200 \ - "${GOBIN}/instrumentation" \ + + find_files_to_check \ + | KUBE_ROOT=${KUBE_ROOT} "${GOBIN}/instrumentation" \ --allstabilityclasses \ --endpoint-mappings="hack/tools/instrumentation/endpoint-mappings.yaml" \ -- \ - 1>"${temp_file}") + - \ + 1>"${temp_file}" - if ! $doCheckStability; then - echo "${red}!!! updating golden list of metrics has failed! ${reset}" >&2 - exit 1 - fi mv -f "$temp_file" "${KUBE_ROOT}/hack/tools/instrumentation/documentation/documentation-list.yaml" echo "${green}Updated list of metrics for documentation ${reset}" } @@ -173,17 +152,13 @@ function kube::update::test::stablemetrics() { stability_check_setup temp_file=$(mktemp) cd "${KUBE_ROOT}/hack/tools/instrumentation" - doCheckStability=$(find_test_files -z \ - | sort -z \ - | KUBE_ROOT=${KUBE_ROOT} xargs -0 -L 200 \ - "${GOBIN}/instrumentation" \ + + find_test_files \ + | KUBE_ROOT=${KUBE_ROOT} "${GOBIN}/instrumentation" \ -- \ - 1>"${temp_file}") + - \ + 1>"${temp_file}" - if ! $doCheckStability; then - echo "${red}!!! updating golden list of test metrics has failed! ${reset}" >&2 - exit 1 - fi mv -f "$temp_file" "${KUBE_ROOT}/hack/tools/instrumentation/testdata/test-stable-metrics-list.yaml" echo "${green}Updated test list of stable metrics.${reset}" }