From 05713e9c633c018956d8f0ea9494977cc1b3b783 Mon Sep 17 00:00:00 2001 From: Jules Casteran Date: Wed, 5 Jun 2024 16:18:02 +0200 Subject: [PATCH 01/23] feat(scaleway-sd): add labels for multiple routed IPs Signed-off-by: Jules Casteran --- discovery/scaleway/instance.go | 74 ++++++++++++++++++++--------- discovery/scaleway/instance_test.go | 2 + 2 files changed, 54 insertions(+), 22 deletions(-) diff --git a/discovery/scaleway/instance.go b/discovery/scaleway/instance.go index 2542c63253..a3ab71fb90 100644 --- a/discovery/scaleway/instance.go +++ b/discovery/scaleway/instance.go @@ -35,28 +35,30 @@ import ( const ( instanceLabelPrefix = metaLabelPrefix + "instance_" - instanceBootTypeLabel = instanceLabelPrefix + "boot_type" - instanceHostnameLabel = instanceLabelPrefix + "hostname" - instanceIDLabel = instanceLabelPrefix + "id" - instanceImageArchLabel = instanceLabelPrefix + "image_arch" - instanceImageIDLabel = instanceLabelPrefix + "image_id" - instanceImageNameLabel = instanceLabelPrefix + "image_name" - instanceLocationClusterID = instanceLabelPrefix + "location_cluster_id" - instanceLocationHypervisorID = instanceLabelPrefix + "location_hypervisor_id" - instanceLocationNodeID = instanceLabelPrefix + "location_node_id" - instanceNameLabel = instanceLabelPrefix + "name" - instanceOrganizationLabel = instanceLabelPrefix + "organization_id" - instancePrivateIPv4Label = instanceLabelPrefix + "private_ipv4" - instanceProjectLabel = instanceLabelPrefix + "project_id" - instancePublicIPv4Label = instanceLabelPrefix + "public_ipv4" - instancePublicIPv6Label = instanceLabelPrefix + "public_ipv6" - instanceSecurityGroupIDLabel = instanceLabelPrefix + "security_group_id" - instanceSecurityGroupNameLabel = instanceLabelPrefix + "security_group_name" - instanceStateLabel = instanceLabelPrefix + "status" - instanceTagsLabel = instanceLabelPrefix + "tags" - instanceTypeLabel = instanceLabelPrefix + "type" - instanceZoneLabel = instanceLabelPrefix + "zone" - instanceRegionLabel = instanceLabelPrefix + "region" + instanceBootTypeLabel = instanceLabelPrefix + "boot_type" + instanceHostnameLabel = instanceLabelPrefix + "hostname" + instanceIDLabel = instanceLabelPrefix + "id" + instanceImageArchLabel = instanceLabelPrefix + "image_arch" + instanceImageIDLabel = instanceLabelPrefix + "image_id" + instanceImageNameLabel = instanceLabelPrefix + "image_name" + instanceLocationClusterID = instanceLabelPrefix + "location_cluster_id" + instanceLocationHypervisorID = instanceLabelPrefix + "location_hypervisor_id" + instanceLocationNodeID = instanceLabelPrefix + "location_node_id" + instanceNameLabel = instanceLabelPrefix + "name" + instanceOrganizationLabel = instanceLabelPrefix + "organization_id" + instancePrivateIPv4Label = instanceLabelPrefix + "private_ipv4" + instanceProjectLabel = instanceLabelPrefix + "project_id" + instancePublicIPv4Label = instanceLabelPrefix + "public_ipv4" + instancePublicIPv6Label = instanceLabelPrefix + "public_ipv6" + instancePublicIPv4AddressesLabel = instanceLabelPrefix + "public_ipv4_addresses" + instancePublicIPv6AddressesLabel = instanceLabelPrefix + "public_ipv6_addresses" + instanceSecurityGroupIDLabel = instanceLabelPrefix + "security_group_id" + instanceSecurityGroupNameLabel = instanceLabelPrefix + "security_group_name" + instanceStateLabel = instanceLabelPrefix + "status" + instanceTagsLabel = instanceLabelPrefix + "tags" + instanceTypeLabel = instanceLabelPrefix + "type" + instanceZoneLabel = instanceLabelPrefix + "zone" + instanceRegionLabel = instanceLabelPrefix + "region" ) type instanceDiscovery struct { @@ -175,6 +177,34 @@ func (d *instanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, } addr := "" + if len(server.PublicIPs) > 0 { + var ipv4Addresses []string + var ipv6Addresses []string + + for _, ip := range server.PublicIPs { + if ip.Family == instance.ServerIPIPFamilyInet { + ipv4Addresses = append(ipv4Addresses, ip.Address.String()) + } else if ip.Family == instance.ServerIPIPFamilyInet6 { + ipv6Addresses = append(ipv6Addresses, ip.Address.String()) + } + } + + if len(ipv6Addresses) > 0 { + addr = ipv6Addresses[0] + labels[instancePublicIPv6AddressesLabel] = model.LabelValue( + separator + + strings.Join(ipv6Addresses, separator) + + separator) + } + if len(ipv4Addresses) > 0 { + addr = ipv4Addresses[0] + labels[instancePublicIPv4AddressesLabel] = model.LabelValue( + separator + + strings.Join(ipv4Addresses, separator) + + separator) + } + } + if server.IPv6 != nil { //nolint:staticcheck labels[instancePublicIPv6Label] = model.LabelValue(server.IPv6.Address.String()) //nolint:staticcheck addr = server.IPv6.Address.String() //nolint:staticcheck diff --git a/discovery/scaleway/instance_test.go b/discovery/scaleway/instance_test.go index ae70a9ed25..a8aabc5a9b 100644 --- a/discovery/scaleway/instance_test.go +++ b/discovery/scaleway/instance_test.go @@ -125,6 +125,8 @@ api_url: %s "__meta_scaleway_instance_organization_id": "20b3d507-96ac-454c-a795-bc731b46b12f", "__meta_scaleway_instance_project_id": "20b3d507-96ac-454c-a795-bc731b46b12f", "__meta_scaleway_instance_public_ipv4": "51.158.183.115", + "__meta_scaleway_instance_public_ipv4_addresses": ",51.158.183.115,", + "__meta_scaleway_instance_public_ipv6_addresses": ",2001:bc8:1640:1568:dc00:ff:fe21:91b,", "__meta_scaleway_instance_region": "nl-ams", "__meta_scaleway_instance_security_group_id": "984414da-9fc2-49c0-a925-fed6266fe092", "__meta_scaleway_instance_security_group_name": "Default security group", From 7766f2f06c45db4a5f6adc353053a9842fe827f1 Mon Sep 17 00:00:00 2001 From: Jules Casteran Date: Fri, 7 Jun 2024 16:35:55 +0200 Subject: [PATCH 02/23] tests(scaleway-sd): add case with multiple routed ips Signed-off-by: Jules Casteran --- discovery/scaleway/instance_test.go | 26 ++- discovery/scaleway/testdata/instance.json | 216 ++++++++++++++++++++++ 2 files changed, 241 insertions(+), 1 deletion(-) diff --git a/discovery/scaleway/instance_test.go b/discovery/scaleway/instance_test.go index a8aabc5a9b..11ef36d353 100644 --- a/discovery/scaleway/instance_test.go +++ b/discovery/scaleway/instance_test.go @@ -60,7 +60,7 @@ api_url: %s tg := tgs[0] require.NotNil(t, tg) require.NotNil(t, tg.Targets) - require.Len(t, tg.Targets, 3) + require.Len(t, tg.Targets, 4) for i, lbls := range []model.LabelSet{ { @@ -134,6 +134,30 @@ api_url: %s "__meta_scaleway_instance_type": "DEV1-S", "__meta_scaleway_instance_zone": "nl-ams-1", }, + { + "__address__": "163.172.136.10:80", + "__meta_scaleway_instance_boot_type": "local", + "__meta_scaleway_instance_hostname": "multiple-ips", + "__meta_scaleway_instance_id": "658abbf4-e6c6-4239-a483-3307763cf6e0", + "__meta_scaleway_instance_image_arch": "x86_64", + "__meta_scaleway_instance_image_id": "f583f58c-1ea5-44ab-a1e6-2b2e7df32a86", + "__meta_scaleway_instance_image_name": "Ubuntu 24.04 Noble Numbat", + "__meta_scaleway_instance_location_cluster_id": "7", + "__meta_scaleway_instance_location_hypervisor_id": "801", + "__meta_scaleway_instance_location_node_id": "95", + "__meta_scaleway_instance_name": "multiple-ips", + "__meta_scaleway_instance_organization_id": "ee7bd9e1-9cbd-4724-b2f4-19e50f3cf38b", + "__meta_scaleway_instance_project_id": "ee7bd9e1-9cbd-4724-b2f4-19e50f3cf38b", + "__meta_scaleway_instance_public_ipv4": "163.172.136.10", + "__meta_scaleway_instance_public_ipv4_addresses": ",163.172.136.10,212.47.248.223,51.15.231.134,", + "__meta_scaleway_instance_public_ipv6_addresses": ",2001:bc8:710:4a69:dc00:ff:fe58:40c1,2001:bc8:710:d::,2001:bc8:710:5417::,", + "__meta_scaleway_instance_region": "fr-par", + "__meta_scaleway_instance_security_group_id": "0fe819c3-274d-472a-b3f5-ddb258d2d8bb", + "__meta_scaleway_instance_security_group_name": "Default security group", + "__meta_scaleway_instance_status": "running", + "__meta_scaleway_instance_type": "PLAY2-PICO", + "__meta_scaleway_instance_zone": "fr-par-1", + }, } { t.Run(fmt.Sprintf("item %d", i), func(t *testing.T) { require.Equal(t, lbls, tg.Targets[i]) diff --git a/discovery/scaleway/testdata/instance.json b/discovery/scaleway/testdata/instance.json index b433f7598e..19a6106daa 100644 --- a/discovery/scaleway/testdata/instance.json +++ b/discovery/scaleway/testdata/instance.json @@ -356,6 +356,222 @@ "placement_group": null, "private_nics": [], "zone": "nl-ams-1" + }, + { + "id": "658abbf4-e6c6-4239-a483-3307763cf6e0", + "name": "multiple-ips", + "arch": "x86_64", + "commercial_type": "PLAY2-PICO", + "boot_type": "local", + "organization": "ee7bd9e1-9cbd-4724-b2f4-19e50f3cf38b", + "project": "ee7bd9e1-9cbd-4724-b2f4-19e50f3cf38b", + "hostname": "multiple-ips", + "image": { + "id": "f583f58c-1ea5-44ab-a1e6-2b2e7df32a86", + "name": "Ubuntu 24.04 Noble Numbat", + "organization": "51b656e3-4865-41e8-adbc-0c45bdd780db", + "project": "51b656e3-4865-41e8-adbc-0c45bdd780db", + "root_volume": { + "id": "cfab1e2e-fa24-480a-a372-61b19e8e2fda", + "name": "Ubuntu 24.04 Noble Numbat", + "volume_type": "unified", + "size": 10000000000 + }, + "extra_volumes": { + + }, + "public": true, + "arch": "x86_64", + "creation_date": "2024-04-26T09:24:38.624912+00:00", + "modification_date": "2024-04-26T09:24:38.624912+00:00", + "default_bootscript": null, + "from_server": "", + "state": "available", + "tags": [ + + ], + "zone": "fr-par-1" + }, + "volumes": { + "0": { + "boot": false, + "id": "7d4dc5ae-3f3c-4f9c-91cf-4a47a2193e94", + "name": "Ubuntu 24.04 Noble Numbat", + "volume_type": "b_ssd", + "export_uri": null, + "organization": "ee7bd9e1-9cbd-4724-b2f4-19e50f3cf38b", + "project": "ee7bd9e1-9cbd-4724-b2f4-19e50f3cf38b", + "server": { + "id": "658abbf4-e6c6-4239-a483-3307763cf6e0", + "name": "multiple-ips" + }, + "size": 10000000000, + "state": "available", + "creation_date": "2024-06-07T13:33:17.697162+00:00", + "modification_date": "2024-06-07T13:33:17.697162+00:00", + "tags": [ + + ], + "zone": "fr-par-1" + } + }, + "tags": [ + + ], + "state": "running", + "protected": false, + "state_detail": "booted", + "public_ip": { + "id": "63fd9ede-58d7-482c-b21d-1d79d81a90dc", + "address": "163.172.136.10", + "dynamic": false, + "gateway": "62.210.0.1", + "netmask": "32", + "family": "inet", + "provisioning_mode": "dhcp", + "tags": [ + + ], + "state": "attached", + "ipam_id": "700644ed-f6a2-4c64-8508-bb867bc07673" + }, + "public_ips": [ + { + "id": "63fd9ede-58d7-482c-b21d-1d79d81a90dc", + "address": "163.172.136.10", + "dynamic": false, + "gateway": "62.210.0.1", + "netmask": "32", + "family": "inet", + "provisioning_mode": "dhcp", + "tags": [ + + ], + "state": "attached", + "ipam_id": "700644ed-f6a2-4c64-8508-bb867bc07673" + }, + { + "id": "eed4575b-90e5-4102-b956-df874c911e2b", + "address": "212.47.248.223", + "dynamic": false, + "gateway": "62.210.0.1", + "netmask": "32", + "family": "inet", + "provisioning_mode": "manual", + "tags": [ + + ], + "state": "attached", + "ipam_id": "e2bdef64-828b-4f4a-a56b-954a85759adf" + }, + { + "id": "fca8b329-6c0e-4f9c-aa88-d5c197b5919a", + "address": "51.15.231.134", + "dynamic": false, + "gateway": "62.210.0.1", + "netmask": "32", + "family": "inet", + "provisioning_mode": "manual", + "tags": [ + + ], + "state": "attached", + "ipam_id": "e56808db-b348-4b7e-ad23-995ae08dc1a1" + }, + { + "id": "3ffa6774-124c-4e64-8afb-148c15304b25", + "address": "2001:bc8:710:4a69:dc00:ff:fe58:40c1", + "dynamic": false, + "gateway": "fe80::dc00:ff:fe58:40c2", + "netmask": "64", + "family": "inet6", + "provisioning_mode": "slaac", + "tags": [ + + ], + "state": "attached", + "ipam_id": "d97773d9-fd2c-4085-92bb-5c471abd132e" + }, + { + "id": "28fcf539-8492-4603-b627-de0501ce8489", + "address": "2001:bc8:710:d::", + "dynamic": false, + "gateway": "fe80::dc00:ff:fe58:40c2", + "netmask": "64", + "family": "inet6", + "provisioning_mode": "manual", + "tags": [ + + ], + "state": "attached", + "ipam_id": "005d19ac-203c-4034-ab16-9ff4caadbdd5" + }, + { + "id": "db6fafda-3a12-403d-8c9c-1a1cb1c315ba", + "address": "2001:bc8:710:5417::", + "dynamic": false, + "gateway": "fe80::dc00:ff:fe58:40c2", + "netmask": "64", + "family": "inet6", + "provisioning_mode": "manual", + "tags": [ + + ], + "state": "attached", + "ipam_id": "8c62fac8-4134-462c-ba48-71ce4f8ac939" + } + ], + "mac_address": "de:00:00:58:40:c1", + "routed_ip_enabled": true, + "ipv6": null, + "extra_networks": [ + + ], + "dynamic_ip_required": false, + "enable_ipv6": false, + "private_ip": null, + "creation_date": "2024-06-07T13:33:17.697162+00:00", + "modification_date": "2024-06-07T13:33:26.021167+00:00", + "bootscript": { + "id": "fdfe150f-a870-4ce4-b432-9f56b5b995c1", + "public": true, + "title": "x86_64 mainline 4.4.230 rev1", + "architecture": "x86_64", + "organization": "11111111-1111-4111-8111-111111111111", + "project": "11111111-1111-4111-8111-111111111111", + "kernel": "http://10.194.3.9/kernel/x86_64-mainline-lts-4.4-4.4.230-rev1/vmlinuz-4.4.230", + "dtb": "", + "initrd": "http://10.194.3.9/initrd/initrd-Linux-x86_64-v3.14.6.gz", + "bootcmdargs": "LINUX_COMMON scaleway boot=local nbd.max_part=16", + "default": true, + "zone": "fr-par-1" + }, + "security_group": { + "id": "0fe819c3-274d-472a-b3f5-ddb258d2d8bb", + "name": "Default security group" + }, + "location": { + "zone_id": "par1", + "platform_id": "14", + "cluster_id": "7", + "hypervisor_id": "801", + "node_id": "95" + }, + "maintenances": [ + + ], + "allowed_actions": [ + "poweroff", + "terminate", + "reboot", + "stop_in_place", + "backup" + ], + "placement_group": null, + "private_nics": [ + + ], + "zone": "fr-par-1" } ] } \ No newline at end of file From c7378c566328efad5aa83aa93d4a0ac2611fe8dc Mon Sep 17 00:00:00 2001 From: Jules Casteran Date: Thu, 13 Jun 2024 11:42:37 +0200 Subject: [PATCH 03/23] docs(scaleway-sd): document public ipv4 and ipv6 addresses Signed-off-by: Jules Casteran --- docs/configuration/configuration.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index 26c088e135..d6c896916e 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -2955,6 +2955,8 @@ The following meta labels are available on targets during [relabeling](#relabel_ * `__meta_scaleway_instance_project_id`: project id of the server * `__meta_scaleway_instance_public_ipv4`: the public IPv4 address of the server * `__meta_scaleway_instance_public_ipv6`: the public IPv6 address of the server +* `__meta_scaleway_instance_public_ipv4_addresses`: the public IPv4 addresses of the server +* `__meta_scaleway_instance_public_ipv6_addresses`: the public IPv6 addresses of the server * `__meta_scaleway_instance_region`: the region of the server * `__meta_scaleway_instance_security_group_id`: the ID of the security group of the server * `__meta_scaleway_instance_security_group_name`: the name of the security group of the server From 9eac82de40b6d72d7cef8c28a21f6633b2542e08 Mon Sep 17 00:00:00 2001 From: Jules Casteran Date: Thu, 13 Jun 2024 11:43:35 +0200 Subject: [PATCH 04/23] fix(scaleway-sd): do not use ip list to fill server address Signed-off-by: Jules Casteran --- discovery/scaleway/instance.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/discovery/scaleway/instance.go b/discovery/scaleway/instance.go index a3ab71fb90..ad49a313ec 100644 --- a/discovery/scaleway/instance.go +++ b/discovery/scaleway/instance.go @@ -190,14 +190,12 @@ func (d *instanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, } if len(ipv6Addresses) > 0 { - addr = ipv6Addresses[0] labels[instancePublicIPv6AddressesLabel] = model.LabelValue( separator + strings.Join(ipv6Addresses, separator) + separator) } if len(ipv4Addresses) > 0 { - addr = ipv4Addresses[0] labels[instancePublicIPv4AddressesLabel] = model.LabelValue( separator + strings.Join(ipv4Addresses, separator) + From a307cd4633eedb88d859cc51f2a19de107f928b5 Mon Sep 17 00:00:00 2001 From: Jules Casteran Date: Thu, 13 Jun 2024 11:44:16 +0200 Subject: [PATCH 05/23] fix(scaleway-sd): do not fill public_ipv4 with ipv6 Signed-off-by: Jules Casteran --- discovery/scaleway/instance.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/discovery/scaleway/instance.go b/discovery/scaleway/instance.go index ad49a313ec..efdc14bd43 100644 --- a/discovery/scaleway/instance.go +++ b/discovery/scaleway/instance.go @@ -209,8 +209,10 @@ func (d *instanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, } if server.PublicIP != nil { //nolint:staticcheck - labels[instancePublicIPv4Label] = model.LabelValue(server.PublicIP.Address.String()) //nolint:staticcheck - addr = server.PublicIP.Address.String() //nolint:staticcheck + if server.PublicIP.Family != instance.ServerIPIPFamilyInet6 { //nolint:staticcheck + labels[instancePublicIPv4Label] = model.LabelValue(server.PublicIP.Address.String()) //nolint:staticcheck + } + addr = server.PublicIP.Address.String() //nolint:staticcheck } if server.PrivateIP != nil { From b472ce7010de872747af2b57188c2d32c7e1007b Mon Sep 17 00:00:00 2001 From: Matthieu MOREL Date: Mon, 10 Feb 2025 21:20:48 +0100 Subject: [PATCH 06/23] chore: enable early-return from revive Signed-off-by: Matthieu MOREL --- .golangci.yml | 7 +++++++ discovery/azure/azure.go | 5 ++--- discovery/linode/linode.go | 9 ++++---- discovery/manager.go | 6 +++--- discovery/moby/docker.go | 21 +++++++++---------- .../remote_storage_adapter/influxdb/client.go | 6 +++--- promql/engine.go | 6 +++--- storage/remote/azuread/azuread.go | 5 ++--- tsdb/agent/db.go | 10 ++++----- tsdb/isolation.go | 7 +++---- 10 files changed, 41 insertions(+), 41 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index f294a4d76e..56509fe13f 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -126,6 +126,9 @@ linters-settings: - allowTypesBefore: "*testing.T,testing.TB" - name: context-keys-type - name: dot-imports + - name: early-return + arguments: + - "preserveScope" # A lot of false positives: incorrectly identifies channel draining as "empty code block". # See https://github.com/mgechev/revive/issues/386 - name: empty-block @@ -137,6 +140,8 @@ linters-settings: - name: exported - name: increment-decrement - name: indent-error-flow + arguments: + - "preserveScope" - name: package-comments # TODO(beorn7): Currently, we have a lot of missing package doc comments. Maybe we should have them. disabled: true @@ -144,6 +149,8 @@ linters-settings: - name: receiver-naming - name: redefines-builtin-id - name: superfluous-else + arguments: + - "preserveScope" - name: time-naming - name: unexported-return - name: unreachable-code diff --git a/discovery/azure/azure.go b/discovery/azure/azure.go index 862d86859b..8a6945790f 100644 --- a/discovery/azure/azure.go +++ b/discovery/azure/azure.go @@ -458,11 +458,10 @@ func (d *Discovery) vmToLabelSet(ctx context.Context, client client, vm virtualM networkInterface, err = client.getVMScaleSetVMNetworkInterfaceByID(ctx, nicID, vm.ScaleSet, vm.InstanceID) } if err != nil { - if errors.Is(err, errorNotFound) { - d.logger.Warn("Network interface does not exist", "name", nicID, "err", err) - } else { + if !errors.Is(err, errorNotFound) { return nil, err } + d.logger.Warn("Network interface does not exist", "name", nicID, "err", err) // Get out of this routine because we cannot continue without a network interface. return nil, nil } diff --git a/discovery/linode/linode.go b/discovery/linode/linode.go index 8e6fd8bf00..453901bc05 100644 --- a/discovery/linode/linode.go +++ b/discovery/linode/linode.go @@ -194,13 +194,12 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { events, err := d.client.ListEvents(ctx, &eventsOpts) if err != nil { var e *linodego.Error - if errors.As(err, &e) && e.Code == http.StatusUnauthorized { - // If we get a 401, the token doesn't have `events:read_only` scope. - // Disable event polling and fallback to doing a full refresh every interval. - d.eventPollingEnabled = false - } else { + if !(errors.As(err, &e) && e.Code == http.StatusUnauthorized) { return nil, err } + // If we get a 401, the token doesn't have `events:read_only` scope. + // Disable event polling and fallback to doing a full refresh every interval. + d.eventPollingEnabled = false } else { // Event polling tells us changes the Linode API is aware of. Actions issued outside of the Linode API, // such as issuing a `shutdown` at the VM's console instead of using the API to power off an instance, diff --git a/discovery/manager.go b/discovery/manager.go index 87e0ecc44b..3219117d2a 100644 --- a/discovery/manager.go +++ b/discovery/manager.go @@ -101,12 +101,12 @@ func NewManager(ctx context.Context, logger *slog.Logger, registerer prometheus. // Register the metrics. // We have to do this after setting all options, so that the name of the Manager is set. - if metrics, err := NewManagerMetrics(registerer, mgr.name); err == nil { - mgr.metrics = metrics - } else { + metrics, err := NewManagerMetrics(registerer, mgr.name) + if err != nil { logger.Error("Failed to create discovery manager metrics", "manager", mgr.name, "err", err) return nil } + mgr.metrics = metrics return mgr } diff --git a/discovery/moby/docker.go b/discovery/moby/docker.go index de277a58db..fca97ccd47 100644 --- a/discovery/moby/docker.go +++ b/discovery/moby/docker.go @@ -237,17 +237,16 @@ func (d *DockerDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, er if len(networks) == 0 { // Try to lookup shared networks for { - if containerNetworkMode.IsContainer() { - tmpContainer, exists := allContainers[containerNetworkMode.ConnectedContainer()] - if !exists { - break - } - networks = tmpContainer.NetworkSettings.Networks - containerNetworkMode = container.NetworkMode(tmpContainer.HostConfig.NetworkMode) - if len(networks) > 0 { - break - } - } else { + if !containerNetworkMode.IsContainer() { + break + } + tmpContainer, exists := allContainers[containerNetworkMode.ConnectedContainer()] + if !exists { + break + } + networks = tmpContainer.NetworkSettings.Networks + containerNetworkMode = container.NetworkMode(tmpContainer.HostConfig.NetworkMode) + if len(networks) > 0 { break } } diff --git a/documentation/examples/remote_storage/remote_storage_adapter/influxdb/client.go b/documentation/examples/remote_storage/remote_storage_adapter/influxdb/client.go index 12dc1732a5..a91b27360e 100644 --- a/documentation/examples/remote_storage/remote_storage_adapter/influxdb/client.go +++ b/documentation/examples/remote_storage/remote_storage_adapter/influxdb/client.go @@ -288,11 +288,11 @@ func valuesToSamples(timestamp time.Time, value interface{}) (prompb.Sample, err var valueInt64 int64 var ok bool if valueFloat64, ok = value.(float64); !ok { - if valueInt64, ok = value.(int64); ok { - valueFloat64 = float64(valueInt64) - } else { + valueInt64, ok = value.(int64) + if !ok { return prompb.Sample{}, fmt.Errorf("unable to convert sample value to float64: %v", value) } + valueFloat64 = float64(valueInt64) } return prompb.Sample{ diff --git a/promql/engine.go b/promql/engine.go index 3e14a2529d..b4a4899bec 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -3553,11 +3553,11 @@ func formatDate(t time.Time) string { // unwrapParenExpr does the AST equivalent of removing parentheses around a expression. func unwrapParenExpr(e *parser.Expr) { for { - if p, ok := (*e).(*parser.ParenExpr); ok { - *e = p.Expr - } else { + p, ok := (*e).(*parser.ParenExpr) + if !ok { break } + *e = p.Expr } } diff --git a/storage/remote/azuread/azuread.go b/storage/remote/azuread/azuread.go index 20ec53d6f6..aa3dee6b45 100644 --- a/storage/remote/azuread/azuread.go +++ b/storage/remote/azuread/azuread.go @@ -349,11 +349,10 @@ func (tokenProvider *tokenProvider) getToken(ctx context.Context) error { func (tokenProvider *tokenProvider) updateRefreshTime(accessToken azcore.AccessToken) error { tokenExpiryTimestamp := accessToken.ExpiresOn.UTC() deltaExpirytime := time.Now().Add(time.Until(tokenExpiryTimestamp) / 2) - if deltaExpirytime.After(time.Now().UTC()) { - tokenProvider.refreshTime = deltaExpirytime - } else { + if !deltaExpirytime.After(time.Now().UTC()) { return errors.New("access token expiry is less than the current time") } + tokenProvider.refreshTime = deltaExpirytime return nil } diff --git a/tsdb/agent/db.go b/tsdb/agent/db.go index 0bcef8e7bc..1f876f4e1e 100644 --- a/tsdb/agent/db.go +++ b/tsdb/agent/db.go @@ -1026,12 +1026,11 @@ func (a *appender) AppendHistogramCTZeroSample(ref storage.SeriesRef, l labels.L return 0, storage.ErrOutOfOrderCT } - if ct > series.lastTs { - series.lastTs = ct - } else { + if ct <= series.lastTs { // discard the sample if it's out of order. return 0, storage.ErrOutOfOrderCT } + series.lastTs = ct switch { case h != nil: @@ -1091,12 +1090,11 @@ func (a *appender) AppendCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, return 0, storage.ErrOutOfOrderSample } - if ct > series.lastTs { - series.lastTs = ct - } else { + if ct <= series.lastTs { // discard the sample if it's out of order. return 0, storage.ErrOutOfOrderCT } + series.lastTs = ct // NOTE: always modify pendingSamples and sampleSeries together. a.pendingSamples = append(a.pendingSamples, record.RefSample{ diff --git a/tsdb/isolation.go b/tsdb/isolation.go index 86330f36e4..1035991e74 100644 --- a/tsdb/isolation.go +++ b/tsdb/isolation.go @@ -275,12 +275,11 @@ func (txr *txRing) cleanupAppendIDsBelow(bound uint64) { pos := int(txr.txIDFirst) for txr.txIDCount > 0 { - if txr.txIDs[pos] < bound { - txr.txIDFirst++ - txr.txIDCount-- - } else { + if txr.txIDs[pos] >= bound { break } + txr.txIDFirst++ + txr.txIDCount-- pos++ if pos == len(txr.txIDs) { From d6443244077a06e1e0f81cc5558c0abf2ce2bdce Mon Sep 17 00:00:00 2001 From: machine424 Date: Thu, 2 Jan 2025 17:59:04 +0100 Subject: [PATCH 07/23] feat(tsdb/(head|agent)): reuse pools across segments to avoid generating garbage during WL replay This is part of the "reduce WAL replay overhead/garbage" effort to help with https://github.com/prometheus/prometheus/issues/6934. Signed-off-by: machine424 --- tsdb/agent/db.go | 27 +++++++++++----------- tsdb/head.go | 10 ++++++++ tsdb/head_test.go | 35 ++++++++++++++++++++-------- tsdb/head_wal.go | 59 ++++++++++++++++++----------------------------- 4 files changed, 72 insertions(+), 59 deletions(-) diff --git a/tsdb/agent/db.go b/tsdb/agent/db.go index 0bcef8e7bc..32494a4d62 100644 --- a/tsdb/agent/db.go +++ b/tsdb/agent/db.go @@ -235,6 +235,12 @@ type DB struct { appenderPool sync.Pool bufPool sync.Pool + // These pools are used during WAL replay. + walReplaySeriesPool zeropool.Pool[[]record.RefSeries] + walReplaySamplesPool zeropool.Pool[[]record.RefSample] + walReplayHistogramsPool zeropool.Pool[[]record.RefHistogramSample] + walReplayFloatHistogramsPool zeropool.Pool[[]record.RefFloatHistogramSample] + nextRef *atomic.Uint64 series *stripeSeries // deleted is a map of (ref IDs that should be deleted from WAL) to (the WAL segment they @@ -426,11 +432,6 @@ func (db *DB) loadWAL(r *wlog.Reader, multiRef map[chunks.HeadSeriesRef]chunks.H decoded = make(chan interface{}, 10) errCh = make(chan error, 1) - - seriesPool zeropool.Pool[[]record.RefSeries] - samplesPool zeropool.Pool[[]record.RefSample] - histogramsPool zeropool.Pool[[]record.RefHistogramSample] - floatHistogramsPool zeropool.Pool[[]record.RefFloatHistogramSample] ) go func() { @@ -440,7 +441,7 @@ func (db *DB) loadWAL(r *wlog.Reader, multiRef map[chunks.HeadSeriesRef]chunks.H rec := r.Record() switch dec.Type(rec) { case record.Series: - series := seriesPool.Get()[:0] + series := db.walReplaySeriesPool.Get()[:0] series, err = dec.Series(rec, series) if err != nil { errCh <- &wlog.CorruptionErr{ @@ -452,7 +453,7 @@ func (db *DB) loadWAL(r *wlog.Reader, multiRef map[chunks.HeadSeriesRef]chunks.H } decoded <- series case record.Samples: - samples := samplesPool.Get()[:0] + samples := db.walReplaySamplesPool.Get()[:0] samples, err = dec.Samples(rec, samples) if err != nil { errCh <- &wlog.CorruptionErr{ @@ -464,7 +465,7 @@ func (db *DB) loadWAL(r *wlog.Reader, multiRef map[chunks.HeadSeriesRef]chunks.H } decoded <- samples case record.HistogramSamples, record.CustomBucketsHistogramSamples: - histograms := histogramsPool.Get()[:0] + histograms := db.walReplayHistogramsPool.Get()[:0] histograms, err = dec.HistogramSamples(rec, histograms) if err != nil { errCh <- &wlog.CorruptionErr{ @@ -476,7 +477,7 @@ func (db *DB) loadWAL(r *wlog.Reader, multiRef map[chunks.HeadSeriesRef]chunks.H } decoded <- histograms case record.FloatHistogramSamples, record.CustomBucketsFloatHistogramSamples: - floatHistograms := floatHistogramsPool.Get()[:0] + floatHistograms := db.walReplayFloatHistogramsPool.Get()[:0] floatHistograms, err = dec.FloatHistogramSamples(rec, floatHistograms) if err != nil { errCh <- &wlog.CorruptionErr{ @@ -521,7 +522,7 @@ func (db *DB) loadWAL(r *wlog.Reader, multiRef map[chunks.HeadSeriesRef]chunks.H } } } - seriesPool.Put(v) + db.walReplaySeriesPool.Put(v) case []record.RefSample: for _, entry := range v { // Update the lastTs for the series based @@ -535,7 +536,7 @@ func (db *DB) loadWAL(r *wlog.Reader, multiRef map[chunks.HeadSeriesRef]chunks.H series.lastTs = entry.T } } - samplesPool.Put(v) + db.walReplaySamplesPool.Put(v) case []record.RefHistogramSample: for _, entry := range v { // Update the lastTs for the series based @@ -549,7 +550,7 @@ func (db *DB) loadWAL(r *wlog.Reader, multiRef map[chunks.HeadSeriesRef]chunks.H series.lastTs = entry.T } } - histogramsPool.Put(v) + db.walReplayHistogramsPool.Put(v) case []record.RefFloatHistogramSample: for _, entry := range v { // Update the lastTs for the series based @@ -563,7 +564,7 @@ func (db *DB) loadWAL(r *wlog.Reader, multiRef map[chunks.HeadSeriesRef]chunks.H series.lastTs = entry.T } } - floatHistogramsPool.Put(v) + db.walReplayFloatHistogramsPool.Put(v) default: panic(fmt.Errorf("unexpected decoded type: %T", d)) } diff --git a/tsdb/head.go b/tsdb/head.go index 4fbb4b5710..6f9a287aa0 100644 --- a/tsdb/head.go +++ b/tsdb/head.go @@ -94,6 +94,16 @@ type Head struct { bytesPool zeropool.Pool[[]byte] memChunkPool sync.Pool + // These pools are used during WAL/WBL replay. + wlReplaySeriesPool zeropool.Pool[[]record.RefSeries] + wlReplaySamplesPool zeropool.Pool[[]record.RefSample] + wlReplaytStonesPool zeropool.Pool[[]tombstones.Stone] + wlReplayExemplarsPool zeropool.Pool[[]record.RefExemplar] + wlReplayHistogramsPool zeropool.Pool[[]record.RefHistogramSample] + wlReplayFloatHistogramsPool zeropool.Pool[[]record.RefFloatHistogramSample] + wlReplayMetadataPool zeropool.Pool[[]record.RefMetadata] + wlReplayMmapMarkersPool zeropool.Pool[[]record.RefMmapMarker] + // All series addressable by their ID or hash. series *stripeSeries diff --git a/tsdb/head_test.go b/tsdb/head_test.go index 33b54a756f..e498578c10 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -46,6 +46,7 @@ import ( "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunkenc" "github.com/prometheus/prometheus/tsdb/chunks" + "github.com/prometheus/prometheus/tsdb/fileutil" "github.com/prometheus/prometheus/tsdb/index" "github.com/prometheus/prometheus/tsdb/record" "github.com/prometheus/prometheus/tsdb/tombstones" @@ -440,27 +441,41 @@ func BenchmarkLoadWLs(b *testing.B) { // BenchmarkLoadRealWLs will be skipped unless the BENCHMARK_LOAD_REAL_WLS_DIR environment variable is set. // BENCHMARK_LOAD_REAL_WLS_DIR should be the folder where `wal` and `chunks_head` are located. +// +// Using an absolute path for BENCHMARK_LOAD_REAL_WLS_DIR is recommended. +// +// Because WLs loading may alter BENCHMARK_LOAD_REAL_WLS_DIR which can affect benchmark results and to ensure consistency, +// a copy of BENCHMARK_LOAD_REAL_WLS_DIR is made for each iteration and deleted at the end. +// Make sure there is sufficient disk space for that. func BenchmarkLoadRealWLs(b *testing.B) { - dir := os.Getenv("BENCHMARK_LOAD_REAL_WLS_DIR") - if dir == "" { + srcDir := os.Getenv("BENCHMARK_LOAD_REAL_WLS_DIR") + if srcDir == "" { b.SkipNow() } - wal, err := wlog.New(nil, nil, filepath.Join(dir, "wal"), wlog.CompressionNone) - require.NoError(b, err) - b.Cleanup(func() { wal.Close() }) - - wbl, err := wlog.New(nil, nil, filepath.Join(dir, "wbl"), wlog.CompressionNone) - require.NoError(b, err) - b.Cleanup(func() { wbl.Close() }) - // Load the WAL. for i := 0; i < b.N; i++ { + b.StopTimer() + dir := b.TempDir() + require.NoError(b, fileutil.CopyDirs(srcDir, dir)) + + wal, err := wlog.New(nil, nil, filepath.Join(dir, "wal"), wlog.CompressionNone) + require.NoError(b, err) + b.Cleanup(func() { wal.Close() }) + + wbl, err := wlog.New(nil, nil, filepath.Join(dir, "wbl"), wlog.CompressionNone) + require.NoError(b, err) + b.Cleanup(func() { wbl.Close() }) + b.StartTimer() + opts := DefaultHeadOptions() opts.ChunkDirRoot = dir h, err := NewHead(nil, nil, wal, wbl, opts, nil) require.NoError(b, err) require.NoError(b, h.Init(0)) + + b.StopTimer() + require.NoError(b, os.RemoveAll(dir)) } } diff --git a/tsdb/head_wal.go b/tsdb/head_wal.go index 0afe84a875..ad03fa4766 100644 --- a/tsdb/head_wal.go +++ b/tsdb/head_wal.go @@ -39,7 +39,6 @@ import ( "github.com/prometheus/prometheus/tsdb/record" "github.com/prometheus/prometheus/tsdb/tombstones" "github.com/prometheus/prometheus/tsdb/wlog" - "github.com/prometheus/prometheus/util/zeropool" ) // histogramRecord combines both RefHistogramSample and RefFloatHistogramSample @@ -73,14 +72,6 @@ func (h *Head) loadWAL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch decoded = make(chan interface{}, 10) decodeErr, seriesCreationErr error - - seriesPool zeropool.Pool[[]record.RefSeries] - samplesPool zeropool.Pool[[]record.RefSample] - tstonesPool zeropool.Pool[[]tombstones.Stone] - exemplarsPool zeropool.Pool[[]record.RefExemplar] - histogramsPool zeropool.Pool[[]record.RefHistogramSample] - floatHistogramsPool zeropool.Pool[[]record.RefFloatHistogramSample] - metadataPool zeropool.Pool[[]record.RefMetadata] ) defer func() { @@ -140,7 +131,7 @@ func (h *Head) loadWAL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch rec := r.Record() switch dec.Type(rec) { case record.Series: - series := seriesPool.Get()[:0] + series := h.wlReplaySeriesPool.Get()[:0] series, err = dec.Series(rec, series) if err != nil { decodeErr = &wlog.CorruptionErr{ @@ -152,7 +143,7 @@ func (h *Head) loadWAL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch } decoded <- series case record.Samples: - samples := samplesPool.Get()[:0] + samples := h.wlReplaySamplesPool.Get()[:0] samples, err = dec.Samples(rec, samples) if err != nil { decodeErr = &wlog.CorruptionErr{ @@ -164,7 +155,7 @@ func (h *Head) loadWAL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch } decoded <- samples case record.Tombstones: - tstones := tstonesPool.Get()[:0] + tstones := h.wlReplaytStonesPool.Get()[:0] tstones, err = dec.Tombstones(rec, tstones) if err != nil { decodeErr = &wlog.CorruptionErr{ @@ -176,7 +167,7 @@ func (h *Head) loadWAL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch } decoded <- tstones case record.Exemplars: - exemplars := exemplarsPool.Get()[:0] + exemplars := h.wlReplayExemplarsPool.Get()[:0] exemplars, err = dec.Exemplars(rec, exemplars) if err != nil { decodeErr = &wlog.CorruptionErr{ @@ -188,7 +179,7 @@ func (h *Head) loadWAL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch } decoded <- exemplars case record.HistogramSamples, record.CustomBucketsHistogramSamples: - hists := histogramsPool.Get()[:0] + hists := h.wlReplayHistogramsPool.Get()[:0] hists, err = dec.HistogramSamples(rec, hists) if err != nil { decodeErr = &wlog.CorruptionErr{ @@ -200,7 +191,7 @@ func (h *Head) loadWAL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch } decoded <- hists case record.FloatHistogramSamples, record.CustomBucketsFloatHistogramSamples: - hists := floatHistogramsPool.Get()[:0] + hists := h.wlReplayFloatHistogramsPool.Get()[:0] hists, err = dec.FloatHistogramSamples(rec, hists) if err != nil { decodeErr = &wlog.CorruptionErr{ @@ -212,7 +203,7 @@ func (h *Head) loadWAL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch } decoded <- hists case record.Metadata: - meta := metadataPool.Get()[:0] + meta := h.wlReplayMetadataPool.Get()[:0] meta, err := dec.Metadata(rec, meta) if err != nil { decodeErr = &wlog.CorruptionErr{ @@ -251,7 +242,7 @@ Outer: idx := uint64(mSeries.ref) % uint64(concurrency) processors[idx].input <- walSubsetProcessorInputItem{walSeriesRef: walSeries.Ref, existingSeries: mSeries} } - seriesPool.Put(v) + h.wlReplaySeriesPool.Put(v) case []record.RefSample: samples := v minValidTime := h.minValidTime.Load() @@ -287,7 +278,7 @@ Outer: } samples = samples[m:] } - samplesPool.Put(v) + h.wlReplaySamplesPool.Put(v) case []tombstones.Stone: for _, s := range v { for _, itv := range s.Intervals { @@ -301,12 +292,12 @@ Outer: h.tombstones.AddInterval(s.Ref, itv) } } - tstonesPool.Put(v) + h.wlReplaytStonesPool.Put(v) case []record.RefExemplar: for _, e := range v { exemplarsInput <- e } - exemplarsPool.Put(v) + h.wlReplayExemplarsPool.Put(v) case []record.RefHistogramSample: samples := v minValidTime := h.minValidTime.Load() @@ -342,7 +333,7 @@ Outer: } samples = samples[m:] } - histogramsPool.Put(v) + h.wlReplayHistogramsPool.Put(v) case []record.RefFloatHistogramSample: samples := v minValidTime := h.minValidTime.Load() @@ -378,7 +369,7 @@ Outer: } samples = samples[m:] } - floatHistogramsPool.Put(v) + h.wlReplayFloatHistogramsPool.Put(v) case []record.RefMetadata: for _, m := range v { s := h.series.getByID(m.Ref) @@ -392,7 +383,7 @@ Outer: Help: m.Help, } } - metadataPool.Put(v) + h.wlReplayMetadataPool.Put(v) default: panic(fmt.Errorf("unexpected decoded type: %T", d)) } @@ -659,12 +650,8 @@ func (h *Head) loadWBL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch shards = make([][]record.RefSample, concurrency) histogramShards = make([][]histogramRecord, concurrency) - decodedCh = make(chan interface{}, 10) - decodeErr error - samplesPool zeropool.Pool[[]record.RefSample] - markersPool zeropool.Pool[[]record.RefMmapMarker] - histogramSamplesPool zeropool.Pool[[]record.RefHistogramSample] - floatHistogramSamplesPool zeropool.Pool[[]record.RefFloatHistogramSample] + decodedCh = make(chan interface{}, 10) + decodeErr error ) defer func() { @@ -700,7 +687,7 @@ func (h *Head) loadWBL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch rec := r.Record() switch dec.Type(rec) { case record.Samples: - samples := samplesPool.Get()[:0] + samples := h.wlReplaySamplesPool.Get()[:0] samples, err = dec.Samples(rec, samples) if err != nil { decodeErr = &wlog.CorruptionErr{ @@ -712,7 +699,7 @@ func (h *Head) loadWBL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch } decodedCh <- samples case record.MmapMarkers: - markers := markersPool.Get()[:0] + markers := h.wlReplayMmapMarkersPool.Get()[:0] markers, err = dec.MmapMarkers(rec, markers) if err != nil { decodeErr = &wlog.CorruptionErr{ @@ -724,7 +711,7 @@ func (h *Head) loadWBL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch } decodedCh <- markers case record.HistogramSamples, record.CustomBucketsHistogramSamples: - hists := histogramSamplesPool.Get()[:0] + hists := h.wlReplayHistogramsPool.Get()[:0] hists, err = dec.HistogramSamples(rec, hists) if err != nil { decodeErr = &wlog.CorruptionErr{ @@ -736,7 +723,7 @@ func (h *Head) loadWBL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch } decodedCh <- hists case record.FloatHistogramSamples, record.CustomBucketsFloatHistogramSamples: - hists := floatHistogramSamplesPool.Get()[:0] + hists := h.wlReplayFloatHistogramsPool.Get()[:0] hists, err = dec.FloatHistogramSamples(rec, hists) if err != nil { decodeErr = &wlog.CorruptionErr{ @@ -787,7 +774,7 @@ func (h *Head) loadWBL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch } samples = samples[m:] } - samplesPool.Put(v) + h.wlReplaySamplesPool.Put(v) case []record.RefMmapMarker: markers := v for _, rm := range markers { @@ -842,7 +829,7 @@ func (h *Head) loadWBL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch } samples = samples[m:] } - histogramSamplesPool.Put(v) + h.wlReplayHistogramsPool.Put(v) case []record.RefFloatHistogramSample: samples := v // We split up the samples into chunks of 5000 samples or less. @@ -874,7 +861,7 @@ func (h *Head) loadWBL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch } samples = samples[m:] } - floatHistogramSamplesPool.Put(v) + h.wlReplayFloatHistogramsPool.Put(v) default: panic(fmt.Errorf("unexpected decodedCh type: %T", d)) } From acd74b02fbebc10ebb559e2bc716b5fabb46bbb8 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Tue, 11 Feb 2025 14:46:03 +1100 Subject: [PATCH 08/23] Fix typo Signed-off-by: Charles Korn --- docs/querying/operators.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/querying/operators.md b/docs/querying/operators.md index 02cea500a1..df65a4335f 100644 --- a/docs/querying/operators.md +++ b/docs/querying/operators.md @@ -334,7 +334,7 @@ the input samples, while `limit_ratio(-0.9, ...)` returns precisely the remaining approximately 90% of the input samples not returned by `limit_ratio(0.1, ...)`. -`group` and `count` do not do not interact with the sample values, +`group` and `count` do not interact with the sample values, they work in the same way for float samples and histogram samples. `count_values` outputs one time series per unique sample value. Each series has From 2cfd402b3a0c092a22fedd92cbf942dd0c51ea48 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Tue, 11 Feb 2025 14:55:39 +1100 Subject: [PATCH 09/23] Clarify sorting behaviour of `topk` and `bottomk`. Signed-off-by: Charles Korn --- docs/querying/operators.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/docs/querying/operators.md b/docs/querying/operators.md index df65a4335f..7242049784 100644 --- a/docs/querying/operators.md +++ b/docs/querying/operators.md @@ -319,7 +319,14 @@ the input samples, including the original labels, are returned in the result vector. `by` and `without` are only used to bucket the input vector. Similar to `min` and `max`, they only operate on float samples, considering `NaN` values to be farthest from the top or bottom, respectively. Histogram samples in the -input vector are ignored, flagged by an info-level annotation. +input vector are ignored, flagged by an info-level annotation. + +If used in an instant query, `topk` and `bottomk` return series ordered by +value in descending or ascending order, respectively. If used with `by` or +`without`, then series within each bucket are sorted by value, and series in +the same bucket are returned consecutively, but there is no guarantee that +buckets of series will be returned in any particular order. No sorting applies +to range queries. `limitk` and `limit_ratio` also return a subset of the input samples, including the original labels in the result vector. The subset is selected in a From 8baad1a73e471bd3cf3175a1608199e27484f179 Mon Sep 17 00:00:00 2001 From: Bartlomiej Plotka Date: Tue, 11 Feb 2025 19:09:21 +0100 Subject: [PATCH 10/23] Bump promci to latest to avoid deprecated actions/cache CI issue. (#16013) Current main is broken due to: ``` Error: This request has been automatically failed because it uses a deprecated version of `actions/cache: 0c45773b623bea8c8e75f6c82b208c3cf94ea4f9`. Please update your workflow to use v3/v4 of actions/cache to avoid interruptions. Learn more: https://github.blog/changelog/2024-12-05-notice-of-upcoming-releases-and-breaking-changes-for-github-actions/#actions-cache-v1-v2-and-actions-toolkit-cache-package-closing-down ``` This is odd given `actions/cache: 0c45773b623bea8c8e75f6c82b208c3cf94ea4f9` has a comment of v4.0.2 (so not v1 and v2), trying 4.1.0. https://github.com/prometheus/prometheus/actions/runs/13267052807/job/37037025130?pr=16011 Signed-off-by: bwplotka --- .github/workflows/ci.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 57c47720c6..35a2651a0a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,7 +14,7 @@ jobs: image: quay.io/prometheus/golang-builder:1.23-base steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - uses: prometheus/promci@52c7012f5f0070d7281b8db4a119e21341d43c91 # v0.4.5 + - uses: prometheus/promci@c3c93a50d581b928af720f0134b2b2dad32a6c41 # v0.4.6 - uses: ./.github/promci/actions/setup_environment with: enable_npm: true @@ -30,7 +30,7 @@ jobs: image: quay.io/prometheus/golang-builder:1.23-base steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - uses: prometheus/promci@52c7012f5f0070d7281b8db4a119e21341d43c91 # v0.4.5 + - uses: prometheus/promci@c3c93a50d581b928af720f0134b2b2dad32a6c41 # v0.4.6 - uses: ./.github/promci/actions/setup_environment - run: go test --tags=dedupelabels ./... - run: GOARCH=386 go test ./... @@ -63,7 +63,7 @@ jobs: steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - uses: prometheus/promci@52c7012f5f0070d7281b8db4a119e21341d43c91 # v0.4.5 + - uses: prometheus/promci@c3c93a50d581b928af720f0134b2b2dad32a6c41 # v0.4.6 - uses: ./.github/promci/actions/setup_environment with: enable_go: false @@ -122,7 +122,7 @@ jobs: thread: [ 0, 1, 2 ] steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - uses: prometheus/promci@52c7012f5f0070d7281b8db4a119e21341d43c91 # v0.4.5 + - uses: prometheus/promci@c3c93a50d581b928af720f0134b2b2dad32a6c41 # v0.4.6 - uses: ./.github/promci/actions/build with: promu_opts: "-p linux/amd64 -p windows/amd64 -p linux/arm64 -p darwin/amd64 -p darwin/arm64 -p linux/386" @@ -147,7 +147,7 @@ jobs: # should also be updated. steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - uses: prometheus/promci@52c7012f5f0070d7281b8db4a119e21341d43c91 # v0.4.5 + - uses: prometheus/promci@c3c93a50d581b928af720f0134b2b2dad32a6c41 # v0.4.6 - uses: ./.github/promci/actions/build with: parallelism: 12 @@ -209,7 +209,7 @@ jobs: if: github.event_name == 'push' && github.event.ref == 'refs/heads/main' steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - uses: prometheus/promci@52c7012f5f0070d7281b8db4a119e21341d43c91 # v0.4.5 + - uses: prometheus/promci@c3c93a50d581b928af720f0134b2b2dad32a6c41 # v0.4.6 - uses: ./.github/promci/actions/publish_main with: docker_hub_login: ${{ secrets.docker_hub_login }} @@ -226,7 +226,7 @@ jobs: (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.')) steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - uses: prometheus/promci@52c7012f5f0070d7281b8db4a119e21341d43c91 # v0.4.5 + - uses: prometheus/promci@c3c93a50d581b928af720f0134b2b2dad32a6c41 # v0.4.6 - uses: ./.github/promci/actions/publish_release with: docker_hub_login: ${{ secrets.docker_hub_login }} @@ -241,7 +241,7 @@ jobs: steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - uses: prometheus/promci@52c7012f5f0070d7281b8db4a119e21341d43c91 # v0.4.5 + - uses: prometheus/promci@c3c93a50d581b928af720f0134b2b2dad32a6c41 # v0.4.6 - name: Install nodejs uses: actions/setup-node@39370e3970a6d050c480ffad4ff0ed4d3fdee5af # v4.1.0 with: From 6518941d73ad60be8fcf812df6ba0e975dd0450e Mon Sep 17 00:00:00 2001 From: beorn7 Date: Thu, 6 Feb 2025 14:54:03 +0100 Subject: [PATCH 11/23] promql: Expose problem with histogram counter resets in subquery Signed-off-by: beorn7 --- promql/promqltest/testdata/subquery.test | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/promql/promqltest/testdata/subquery.test b/promql/promqltest/testdata/subquery.test index 5a5e4e0092..377ee1e5ce 100644 --- a/promql/promqltest/testdata/subquery.test +++ b/promql/promqltest/testdata/subquery.test @@ -134,3 +134,19 @@ eval instant at 20s min_over_time(metric_total[15s:10s]) eval instant at 20m min_over_time(rate(metric_total[5m])[20m:1m]) {} 0.12119047619047618 +clear + +# This native histogram series has a counter reset at 5m. +# TODO(beorn7): Write this more nicely once https://github.com/prometheus/prometheus/issues/15984 is fixed. +load 1m + native_histogram {{sum:100 count:100}} {{sum:103 count:103}} {{sum:106 count:106}} {{sum:109 count:109}} {{sum:112 count:112}} {{sum:3 count:3 counter_reset_hint:reset}} {{sum:6 count:6}}+{{sum:3 count:3}}x5 + +# This sub-query has to detect the counter reset even if it does +# not include the exact sample where the counter reset has happened. +eval instant at 10m increase(native_histogram[10m:3m]) + {} {{count:25 sum:25}} +# This sub-query must not detect the counter reset multiple times +# even though the sample where the counter reset happened is returned +# by the sub-query multiple times. +eval instant at 10m increase(native_histogram[10m:15s]) + {} {{count:30.769230769230766 sum:30.769230769230766}} From 14bb63c467963b5923aa01c565ddaffb3058d3b2 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Thu, 6 Feb 2025 14:54:37 +0100 Subject: [PATCH 12/23] promql: Fix counter reset detection in subqueries with histograms Signed-off-by: beorn7 --- promql/engine.go | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/promql/engine.go b/promql/engine.go index 3e14a2529d..579fcd1c06 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -1548,6 +1548,28 @@ func (ev *evaluator) evalSubquery(ctx context.Context, subq *parser.SubqueryExpr VectorSelector: vs, } for _, s := range mat { + // Set any "NotCounterReset" and "CounterReset" hints in native + // histograms to "UnknownCounterReset" because we might + // otherwise miss a counter reset happening in samples not + // returned by the subquery, or we might over-detect counter + // resets if the sample with a counter reset is returned + // multiple times by a high-res subquery. This intentionally + // does not attempt to be clever (like detecting if we are + // really missing underlying samples or returning underlying + // samples multiple times) because subqueries on counters are + // inherently problematic WRT counter reset handling, so we + // cannot really solve the problem for good. We only want to + // avoid problems that happen due to the explicitly set counter + // reset hints and go back to the behavior we already know from + // float samples. + for i, hp := range s.Histograms { + switch hp.H.CounterResetHint { + case histogram.NotCounterReset, histogram.CounterReset: + h := *hp.H // Shallow copy is sufficient, we only change CounterResetHint. + h.CounterResetHint = histogram.UnknownCounterReset + s.Histograms[i].H = &h + } + } vs.Series = append(vs.Series, NewStorageSeries(s)) } return ms, mat.TotalSamples(), ws From a944fa1e7af03b7bff0733bbd2ce03558a69e612 Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Wed, 12 Feb 2025 15:56:00 +0100 Subject: [PATCH 13/23] Make theme switcher a single three-state toggle button (#16000) This should help a bit with the header icon overflow on narrow screens and also overall make things look less cluttered. Signed-off-by: Julius Volz --- .../src/components/ThemeSelector.tsx | 73 +++++++------------ 1 file changed, 25 insertions(+), 48 deletions(-) diff --git a/web/ui/mantine-ui/src/components/ThemeSelector.tsx b/web/ui/mantine-ui/src/components/ThemeSelector.tsx index a7f5d1dcaa..964d491a04 100644 --- a/web/ui/mantine-ui/src/components/ThemeSelector.tsx +++ b/web/ui/mantine-ui/src/components/ThemeSelector.tsx @@ -1,14 +1,8 @@ +import { useMantineColorScheme, rem, ActionIcon } from "@mantine/core"; import { - useMantineColorScheme, - SegmentedControl, - rem, - MantineColorScheme, - Tooltip, -} from "@mantine/core"; -import { + IconBrightnessFilled, IconMoonFilled, IconSunFilled, - IconUserFilled, } from "@tabler/icons-react"; import { FC } from "react"; @@ -20,45 +14,28 @@ export const ThemeSelector: FC = () => { }; return ( - setColorScheme(v as MantineColorScheme)} - data={[ - { - value: "light", - label: ( - - - - ), - }, - { - value: "dark", - label: ( - - - - ), - }, - { - value: "auto", - label: ( - - - - ), - }, - ]} - /> + + setColorScheme( + colorScheme === "light" + ? "dark" + : colorScheme === "dark" + ? "auto" + : "light" + ) + } + > + {colorScheme === "light" ? ( + + ) : colorScheme === "dark" ? ( + + ) : ( + + )} + ); }; From a323c23332954a0355792493ae778f49c288d9a4 Mon Sep 17 00:00:00 2001 From: Gabriel Bernal Date: Wed, 12 Feb 2025 16:08:45 +0100 Subject: [PATCH 14/23] set ttlAutopurge to true as ttl is set for codemirror prometheus client cache (#16024) Signed-off-by: Gabriel Bernal --- web/ui/module/codemirror-promql/src/client/prometheus.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/ui/module/codemirror-promql/src/client/prometheus.ts b/web/ui/module/codemirror-promql/src/client/prometheus.ts index 72e34d4ec5..7fce7f1a17 100644 --- a/web/ui/module/codemirror-promql/src/client/prometheus.ts +++ b/web/ui/module/codemirror-promql/src/client/prometheus.ts @@ -294,7 +294,7 @@ class Cache { constructor(config?: CacheConfig) { const maxAge = { ttl: config && config.maxAge ? config.maxAge : 5 * 60 * 1000, - ttlAutopurge: false, + ttlAutopurge: true, }; this.completeAssociation = new LRUCache>>(maxAge); this.metricMetadata = {}; From 00b69efabb3cd3e213a2a34939f745d7156f7d47 Mon Sep 17 00:00:00 2001 From: Bartlomiej Plotka Date: Wed, 12 Feb 2025 16:47:56 +0100 Subject: [PATCH 15/23] model/textparse: Change parser interface Metric(...) string to Labels(...) (#16012) * model/textparse: Change parser interface Metric(...) string to Labels(...) Simplified the interface given no one is using the return argument. Renamed for clarity too. Found and discussed https://github.com/prometheus/prometheus/pull/15731#discussion_r1950916842 Signed-off-by: bwplotka * Fixed comments; optimized not needed copy for om and text. Signed-off-by: bwplotka --------- Signed-off-by: bwplotka --- cmd/promtool/backfill.go | 4 ++-- model/textparse/benchmark_test.go | 2 +- model/textparse/interface.go | 5 ++--- model/textparse/interface_test.go | 2 +- model/textparse/nhcbparse.go | 12 +++++------- model/textparse/openmetricsparse.go | 10 +++------- model/textparse/promparse.go | 10 +++------- model/textparse/protobufparse.go | 6 ++---- scrape/scrape.go | 2 +- scrape/scrape_test.go | 2 +- web/federate_test.go | 2 +- 11 files changed, 22 insertions(+), 35 deletions(-) diff --git a/cmd/promtool/backfill.go b/cmd/promtool/backfill.go index 125c9a08e6..473ea036ec 100644 --- a/cmd/promtool/backfill.go +++ b/cmd/promtool/backfill.go @@ -149,7 +149,7 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn _, ts, v := p.Series() if ts == nil { l := labels.Labels{} - p.Metric(&l) + p.Labels(&l) return fmt.Errorf("expected timestamp for series %v, got none", l) } if *ts < t { @@ -163,7 +163,7 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn } l := labels.Labels{} - p.Metric(&l) + p.Labels(&l) lb.Reset(l) for name, value := range customLabels { diff --git a/model/textparse/benchmark_test.go b/model/textparse/benchmark_test.go index 4c5d850930..ce7845ed03 100644 --- a/model/textparse/benchmark_test.go +++ b/model/textparse/benchmark_test.go @@ -203,7 +203,7 @@ func benchParse(b *testing.B, data []byte, parser string) { b.Fatal("not implemented entry", t) } - _ = p.Metric(&res) + p.Labels(&res) _ = p.CreatedTimestamp() for hasExemplar := p.Exemplar(&e); hasExemplar; hasExemplar = p.Exemplar(&e) { } diff --git a/model/textparse/interface.go b/model/textparse/interface.go index 2682855281..58cdf52a03 100644 --- a/model/textparse/interface.go +++ b/model/textparse/interface.go @@ -57,11 +57,10 @@ type Parser interface { // The returned byte slice becomes invalid after the next call to Next. Comment() []byte - // Metric writes the labels of the current sample into the passed labels. - // It returns the string from which the metric was parsed. + // Labels writes the labels of the current sample into the passed labels. // The values of the "le" labels of classic histograms and "quantile" labels // of summaries should follow the OpenMetrics formatting rules. - Metric(l *labels.Labels) string + Labels(l *labels.Labels) // Exemplar writes the exemplar of the current sample into the passed // exemplar. It can be called repeatedly to retrieve multiple exemplars diff --git a/model/textparse/interface_test.go b/model/textparse/interface_test.go index 72c8284f2d..504a4917d9 100644 --- a/model/textparse/interface_test.go +++ b/model/textparse/interface_test.go @@ -238,7 +238,7 @@ func testParse(t *testing.T, p Parser) (ret []parsedEntry) { got.m = string(m) } - p.Metric(&got.lset) + p.Labels(&got.lset) // Parser reuses int pointer. if ct := p.CreatedTimestamp(); ct != nil { got.ct = int64p(*ct) diff --git a/model/textparse/nhcbparse.go b/model/textparse/nhcbparse.go index 83e381539f..d8c2317980 100644 --- a/model/textparse/nhcbparse.go +++ b/model/textparse/nhcbparse.go @@ -67,8 +67,7 @@ type NHCBParser struct { h *histogram.Histogram fh *histogram.FloatHistogram // For Metric. - lset labels.Labels - metricString string + lset labels.Labels // For Type. bName []byte typ model.MetricType @@ -141,13 +140,12 @@ func (p *NHCBParser) Comment() []byte { return p.parser.Comment() } -func (p *NHCBParser) Metric(l *labels.Labels) string { +func (p *NHCBParser) Labels(l *labels.Labels) { if p.state == stateEmitting { *l = p.lsetNHCB - return p.metricStringNHCB + return } *l = p.lset - return p.metricString } func (p *NHCBParser) Exemplar(ex *exemplar.Exemplar) bool { @@ -200,7 +198,7 @@ func (p *NHCBParser) Next() (Entry, error) { switch p.entry { case EntrySeries: p.bytes, p.ts, p.value = p.parser.Series() - p.metricString = p.parser.Metric(&p.lset) + p.parser.Labels(&p.lset) // Check the label set to see if we can continue or need to emit the NHCB. var isNHCB bool if p.compareLabels() { @@ -224,7 +222,7 @@ func (p *NHCBParser) Next() (Entry, error) { return p.entry, p.err case EntryHistogram: p.bytes, p.ts, p.h, p.fh = p.parser.Histogram() - p.metricString = p.parser.Metric(&p.lset) + p.parser.Labels(&p.lset) p.storeExponentialLabels() case EntryType: p.bName, p.typ = p.parser.Type() diff --git a/model/textparse/openmetricsparse.go b/model/textparse/openmetricsparse.go index f0dd51afee..2f4d5ab2c5 100644 --- a/model/textparse/openmetricsparse.go +++ b/model/textparse/openmetricsparse.go @@ -197,11 +197,9 @@ func (p *OpenMetricsParser) Comment() []byte { return p.text } -// Metric writes the labels of the current sample into the passed labels. -// It returns the string from which the metric was parsed. -func (p *OpenMetricsParser) Metric(l *labels.Labels) string { - // Copy the buffer to a string: this is only necessary for the return value. - s := string(p.series) +// Labels writes the labels of the current sample into the passed labels. +func (p *OpenMetricsParser) Labels(l *labels.Labels) { + s := yoloString(p.series) p.builder.Reset() metricName := unreplace(s[p.offsets[0]-p.start : p.offsets[1]-p.start]) @@ -220,8 +218,6 @@ func (p *OpenMetricsParser) Metric(l *labels.Labels) string { p.builder.Sort() *l = p.builder.Labels() - - return s } // Exemplar writes the exemplar of the current sample into the passed exemplar. diff --git a/model/textparse/promparse.go b/model/textparse/promparse.go index 17b0c3db8b..844ab2f15d 100644 --- a/model/textparse/promparse.go +++ b/model/textparse/promparse.go @@ -223,11 +223,9 @@ func (p *PromParser) Comment() []byte { return p.text } -// Metric writes the labels of the current sample into the passed labels. -// It returns the string from which the metric was parsed. -func (p *PromParser) Metric(l *labels.Labels) string { - // Copy the buffer to a string: this is only necessary for the return value. - s := string(p.series) +// Labels writes the labels of the current sample into the passed labels. +func (p *PromParser) Labels(l *labels.Labels) { + s := yoloString(p.series) p.builder.Reset() metricName := unreplace(s[p.offsets[0]-p.start : p.offsets[1]-p.start]) @@ -246,8 +244,6 @@ func (p *PromParser) Metric(l *labels.Labels) string { p.builder.Sort() *l = p.builder.Labels() - - return s } // Exemplar implements the Parser interface. However, since the classic diff --git a/model/textparse/protobufparse.go b/model/textparse/protobufparse.go index a77e1d728f..6d074a2695 100644 --- a/model/textparse/protobufparse.go +++ b/model/textparse/protobufparse.go @@ -296,9 +296,9 @@ func (p *ProtobufParser) Comment() []byte { return nil } -// Metric writes the labels of the current sample into the passed labels. +// Labels writes the labels of the current sample into the passed labels. // It returns the string from which the metric was parsed. -func (p *ProtobufParser) Metric(l *labels.Labels) string { +func (p *ProtobufParser) Labels(l *labels.Labels) { p.builder.Reset() p.builder.Add(labels.MetricName, p.getMagicName()) @@ -312,8 +312,6 @@ func (p *ProtobufParser) Metric(l *labels.Labels) string { // Sort labels to maintain the sorted labels invariant. p.builder.Sort() *l = p.builder.Labels() - - return p.metricBytes.String() } // Exemplar writes the exemplar of the current sample into the passed diff --git a/scrape/scrape.go b/scrape/scrape.go index 2e95ee2282..58314af4eb 100644 --- a/scrape/scrape.go +++ b/scrape/scrape.go @@ -1714,7 +1714,7 @@ loop: lset = ce.lset hash = ce.hash } else { - p.Metric(&lset) + p.Labels(&lset) hash = lset.Hash() // Hash label set as it is seen local to the target. Then add target labels diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index 2185d028fb..ddc9b2853f 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -1988,7 +1988,7 @@ func TestScrapeLoopAppendCacheEntryButErrNotFound(t *testing.T) { var lset labels.Labels p.Next() - p.Metric(&lset) + p.Labels(&lset) hash := lset.Hash() // Create a fake entry in the cache diff --git a/web/federate_test.go b/web/federate_test.go index 056a95d67f..717c0a2356 100644 --- a/web/federate_test.go +++ b/web/federate_test.go @@ -403,7 +403,7 @@ func TestFederationWithNativeHistograms(t *testing.T) { } require.NoError(t, err) if et == textparse.EntryHistogram || et == textparse.EntrySeries { - p.Metric(&l) + p.Labels(&l) } switch et { case textparse.EntryHelp: From 8fe24ced8285a10b22724255d3e2c65d93ce9ebf Mon Sep 17 00:00:00 2001 From: Bartlomiej Plotka Date: Wed, 12 Feb 2025 17:51:40 +0100 Subject: [PATCH 16/23] ci: Add GH action that clicks "enable auto-merge" on dependabot PRs (#15967) This will merge on green all non-conflicting dependabot PRs. Signed-off-by: bwplotka --- .github/workflows/automerge-dependabot.yml | 30 ++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 .github/workflows/automerge-dependabot.yml diff --git a/.github/workflows/automerge-dependabot.yml b/.github/workflows/automerge-dependabot.yml new file mode 100644 index 0000000000..3909f57329 --- /dev/null +++ b/.github/workflows/automerge-dependabot.yml @@ -0,0 +1,30 @@ +--- +name: Dependabot auto-merge +on: pull_request + +concurrency: + group: ${{ github.workflow }}-${{ (github.event.pull_request && github.event.pull_request.number) || github.ref || github.run_id }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + dependabot: + permissions: + contents: write + pull-requests: write + runs-on: ubuntu-latest + if: ${{ github.event.pull_request.user.login == 'dependabot[bot]' && github.repository_owner == 'prometheus' }} + steps: + - name: Dependabot metadata + id: metadata + uses: dependabot/fetch-metadata@d7267f607e9d3fb96fc2fbe83e0af444713e90b7 # v2.3.0 + with: + github-token: "${{ secrets.GITHUB_TOKEN }}" + - name: Enable auto-merge for Dependabot PRs + if: ${{steps.metadata.outputs.update-type == 'version-update:semver-minor' || steps.metadata.outputs.update-type == 'version-update:semver-patch'}} + run: gh pr merge --auto --merge "$PR_URL" + env: + PR_URL: ${{github.event.pull_request.html_url}} + GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} From 509b978f0d675b4c9b3ccf8c0fc06961b0f03e8f Mon Sep 17 00:00:00 2001 From: Aman <95525722+amanycodes@users.noreply.github.com> Date: Wed, 12 Feb 2025 22:44:05 +0530 Subject: [PATCH 17/23] single-button-theme-toggle-and-responsive-logo (#16021) Signed-off-by: amanycodes --- web/ui/mantine-ui/src/App.tsx | 6 ++++-- web/ui/mantine-ui/src/components/ThemeSelector.tsx | 10 +++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/web/ui/mantine-ui/src/App.tsx b/web/ui/mantine-ui/src/App.tsx index 3bec30fa31..2599c9f5aa 100644 --- a/web/ui/mantine-ui/src/App.tsx +++ b/web/ui/mantine-ui/src/App.tsx @@ -332,14 +332,16 @@ function App() { justify="space-between" wrap="nowrap" > - + - Prometheus{agentMode && " Agent"} + Prometheus + Prometheus + {agentMode && "Agent"} diff --git a/web/ui/mantine-ui/src/components/ThemeSelector.tsx b/web/ui/mantine-ui/src/components/ThemeSelector.tsx index 964d491a04..131327a1f6 100644 --- a/web/ui/mantine-ui/src/components/ThemeSelector.tsx +++ b/web/ui/mantine-ui/src/components/ThemeSelector.tsx @@ -1,8 +1,8 @@ import { useMantineColorScheme, rem, ActionIcon } from "@mantine/core"; import { - IconBrightnessFilled, IconMoonFilled, IconSunFilled, + IconBrightnessFilled, } from "@tabler/icons-react"; import { FC } from "react"; @@ -30,11 +30,11 @@ export const ThemeSelector: FC = () => { } > {colorScheme === "light" ? ( - - ) : colorScheme === "dark" ? ( - - ) : ( + ) : colorScheme === "dark" ? ( + + ) : ( + )} ); From 898af6102d19213199a9015bc4d97d109b35616f Mon Sep 17 00:00:00 2001 From: Nicolas Peugnet Date: Thu, 13 Feb 2025 03:03:31 +0100 Subject: [PATCH 18/23] promtool: support creating tsdb blocks from a pipe (#16011) This is very useful when piping the input file to stdin and then using /dev/stdin as the input file. e.g. xzcat dump.xz | promtool tsdb create-blocks-from openmetrics /dev/stdin /tmp/data Signed-off-by: Nicolas Peugnet --- cmd/promtool/tsdb.go | 20 ++++++++-- cmd/promtool/tsdb_posix_test.go | 69 +++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 3 deletions(-) create mode 100644 cmd/promtool/tsdb_posix_test.go diff --git a/cmd/promtool/tsdb.go b/cmd/promtool/tsdb.go index b291d29bf7..6a62e2e8bc 100644 --- a/cmd/promtool/tsdb.go +++ b/cmd/promtool/tsdb.go @@ -826,17 +826,31 @@ func checkErr(err error) int { } func backfillOpenMetrics(path, outputDir string, humanReadable, quiet bool, maxBlockDuration time.Duration, customLabels map[string]string) int { - inputFile, err := fileutil.OpenMmapFile(path) + var buf []byte + info, err := os.Stat(path) if err != nil { return checkErr(err) } - defer inputFile.Close() + if info.Mode()&(os.ModeNamedPipe|os.ModeCharDevice) != 0 { + // Read the pipe chunks by chunks as it cannot be mmap-ed + buf, err = os.ReadFile(path) + if err != nil { + return checkErr(err) + } + } else { + inputFile, err := fileutil.OpenMmapFile(path) + if err != nil { + return checkErr(err) + } + defer inputFile.Close() + buf = inputFile.Bytes() + } if err := os.MkdirAll(outputDir, 0o777); err != nil { return checkErr(fmt.Errorf("create output dir: %w", err)) } - return checkErr(backfill(5000, inputFile.Bytes(), outputDir, humanReadable, quiet, maxBlockDuration, customLabels)) + return checkErr(backfill(5000, buf, outputDir, humanReadable, quiet, maxBlockDuration, customLabels)) } func displayHistogram(dataType string, datas []int, total int) { diff --git a/cmd/promtool/tsdb_posix_test.go b/cmd/promtool/tsdb_posix_test.go new file mode 100644 index 0000000000..8a83aead70 --- /dev/null +++ b/cmd/promtool/tsdb_posix_test.go @@ -0,0 +1,69 @@ +// Copyright 2017 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !windows + +package main + +import ( + "bytes" + "io" + "math" + "os" + "path" + "syscall" + "testing" + "time" + + "github.com/stretchr/testify/require" + + "github.com/prometheus/prometheus/tsdb" +) + +func TestTSDBDumpOpenMetricsRoundTripPipe(t *testing.T) { + initialMetrics, err := os.ReadFile("testdata/dump-openmetrics-roundtrip-test.prom") + require.NoError(t, err) + initialMetrics = normalizeNewLine(initialMetrics) + + pipeDir := t.TempDir() + dbDir := t.TempDir() + + // create pipe + pipe := path.Join(pipeDir, "pipe") + err = syscall.Mkfifo(pipe, 0o666) + require.NoError(t, err) + + go func() { + // open pipe to write + in, err := os.OpenFile(pipe, os.O_WRONLY, os.ModeNamedPipe) + require.NoError(t, err) + defer func() { require.NoError(t, in.Close()) }() + _, err = io.Copy(in, bytes.NewReader(initialMetrics)) + require.NoError(t, err) + }() + + // Import samples from OM format + code := backfillOpenMetrics(pipe, dbDir, false, false, 2*time.Hour, map[string]string{}) + require.Equal(t, 0, code) + db, err := tsdb.Open(dbDir, nil, nil, tsdb.DefaultOptions(), nil) + require.NoError(t, err) + t.Cleanup(func() { + require.NoError(t, db.Close()) + }) + + // Dump the blocks into OM format + dumpedMetrics := getDumpedSamples(t, dbDir, "", math.MinInt64, math.MaxInt64, []string{"{__name__=~'(?s:.*)'}"}, formatSeriesSetOpenMetrics) + + // Should get back the initial metrics. + require.Equal(t, string(initialMetrics), dumpedMetrics) +} From 9b4c8f6be28823c604aab50febcd32013aa4212c Mon Sep 17 00:00:00 2001 From: frazou <4470714+Frazew@users.noreply.github.com> Date: Thu, 13 Feb 2025 10:48:33 +0100 Subject: [PATCH 19/23] rulefmt: support YAML aliases for Alert/Record/Expr (#14957) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * rulefmt: add tests with YAML aliases for Alert/Record/Expr Altough somewhat discouraged in favour of using proper configuration management tools to generate full YAML, it can still be useful in some situations to use YAML anchors/aliases in rules. The current implementation is however confusing: aliases will work everywhere except on the alert/record name and expr This first commit adds (failing) tests to illustrate the issue, the next one fixes it. The YAML test file is intentionally filled with anchors and aliases. Although this is probably not representative of a real-world use case (which would have less of them), it errs on the safer side. Signed-off-by: François HORTA * rulefmt: support YAML aliases for Alert/Record/Expr This fixes the use of YAML aliases in alert/recording rule names and expressions. A side effect of this change is that the RuleNode YAML type is no longer propagated deeper in the codebase, instead the generic Rule type can now be used. Signed-off-by: François HORTA * rulefmt: Add test for YAML merge combined with aliases Currently this does work, but adding a test for the related functionally here makes sense. Signed-off-by: David Leadbeater * rulefmt: Rebase to latest changes Signed-off-by: David Leadbeater --------- Signed-off-by: François HORTA Signed-off-by: David Leadbeater Co-authored-by: David Leadbeater --- cmd/promtool/main.go | 8 +-- model/rulefmt/rulefmt.go | 75 ++++++++++++++---------- model/rulefmt/rulefmt_test.go | 27 +++++++++ model/rulefmt/testdata/test_aliases.yaml | 57 ++++++++++++++++++ rules/manager.go | 8 +-- rules/manager_test.go | 8 +-- 6 files changed, 139 insertions(+), 44 deletions(-) create mode 100644 model/rulefmt/testdata/test_aliases.yaml diff --git a/cmd/promtool/main.go b/cmd/promtool/main.go index 4275b7b572..00280500ed 100644 --- a/cmd/promtool/main.go +++ b/cmd/promtool/main.go @@ -992,11 +992,11 @@ func checkDuplicates(groups []rulefmt.RuleGroup) []compareRuleType { return duplicates } -func ruleMetric(rule rulefmt.RuleNode) string { - if rule.Alert.Value != "" { - return rule.Alert.Value +func ruleMetric(rule rulefmt.Rule) string { + if rule.Alert != "" { + return rule.Alert } - return rule.Record.Value + return rule.Record } var checkMetricsUsage = strings.TrimSpace(` diff --git a/model/rulefmt/rulefmt.go b/model/rulefmt/rulefmt.go index 3fc3fa0437..7b99c5fa33 100644 --- a/model/rulefmt/rulefmt.go +++ b/model/rulefmt/rulefmt.go @@ -92,7 +92,7 @@ type RuleGroups struct { } type ruleGroups struct { - Groups []yaml.Node `yaml:"groups"` + Groups []ruleGroupNode `yaml:"groups"` } // Validate validates all rules in the rule groups. @@ -128,9 +128,9 @@ func (g *RuleGroups) Validate(node ruleGroups) (errs []error) { set[g.Name] = struct{}{} for i, r := range g.Rules { - for _, node := range g.Rules[i].Validate() { - var ruleName yaml.Node - if r.Alert.Value != "" { + for _, node := range r.Validate(node.Groups[j].Rules[i]) { + var ruleName string + if r.Alert != "" { ruleName = r.Alert } else { ruleName = r.Record @@ -138,7 +138,7 @@ func (g *RuleGroups) Validate(node ruleGroups) (errs []error) { errs = append(errs, &Error{ Group: g.Name, Rule: i + 1, - RuleName: ruleName.Value, + RuleName: ruleName, Err: node, }) } @@ -154,7 +154,18 @@ type RuleGroup struct { Interval model.Duration `yaml:"interval,omitempty"` QueryOffset *model.Duration `yaml:"query_offset,omitempty"` Limit int `yaml:"limit,omitempty"` - Rules []RuleNode `yaml:"rules"` + Rules []Rule `yaml:"rules"` + Labels map[string]string `yaml:"labels,omitempty"` +} + +// ruleGroupNode adds yaml.v3 layer to support line and columns outputs for invalid rule groups. +type ruleGroupNode struct { + yaml.Node + Name string `yaml:"name"` + Interval model.Duration `yaml:"interval,omitempty"` + QueryOffset *model.Duration `yaml:"query_offset,omitempty"` + Limit int `yaml:"limit,omitempty"` + Rules []ruleNode `yaml:"rules"` Labels map[string]string `yaml:"labels,omitempty"` } @@ -169,8 +180,8 @@ type Rule struct { Annotations map[string]string `yaml:"annotations,omitempty"` } -// RuleNode adds yaml.v3 layer to support line and column outputs for invalid rules. -type RuleNode struct { +// ruleNode adds yaml.v3 layer to support line and column outputs for invalid rules. +type ruleNode struct { Record yaml.Node `yaml:"record,omitempty"` Alert yaml.Node `yaml:"alert,omitempty"` Expr yaml.Node `yaml:"expr"` @@ -181,64 +192,64 @@ type RuleNode struct { } // Validate the rule and return a list of encountered errors. -func (r *RuleNode) Validate() (nodes []WrappedError) { - if r.Record.Value != "" && r.Alert.Value != "" { +func (r *Rule) Validate(node ruleNode) (nodes []WrappedError) { + if r.Record != "" && r.Alert != "" { nodes = append(nodes, WrappedError{ err: errors.New("only one of 'record' and 'alert' must be set"), - node: &r.Record, - nodeAlt: &r.Alert, + node: &node.Record, + nodeAlt: &node.Alert, }) } - if r.Record.Value == "" && r.Alert.Value == "" { + if r.Record == "" && r.Alert == "" { nodes = append(nodes, WrappedError{ err: errors.New("one of 'record' or 'alert' must be set"), - node: &r.Record, - nodeAlt: &r.Alert, + node: &node.Record, + nodeAlt: &node.Alert, }) } - if r.Expr.Value == "" { + if r.Expr == "" { nodes = append(nodes, WrappedError{ err: errors.New("field 'expr' must be set in rule"), - node: &r.Expr, + node: &node.Expr, }) - } else if _, err := parser.ParseExpr(r.Expr.Value); err != nil { + } else if _, err := parser.ParseExpr(r.Expr); err != nil { nodes = append(nodes, WrappedError{ err: fmt.Errorf("could not parse expression: %w", err), - node: &r.Expr, + node: &node.Expr, }) } - if r.Record.Value != "" { + if r.Record != "" { if len(r.Annotations) > 0 { nodes = append(nodes, WrappedError{ err: errors.New("invalid field 'annotations' in recording rule"), - node: &r.Record, + node: &node.Record, }) } if r.For != 0 { nodes = append(nodes, WrappedError{ err: errors.New("invalid field 'for' in recording rule"), - node: &r.Record, + node: &node.Record, }) } if r.KeepFiringFor != 0 { nodes = append(nodes, WrappedError{ err: errors.New("invalid field 'keep_firing_for' in recording rule"), - node: &r.Record, + node: &node.Record, }) } - if !model.IsValidMetricName(model.LabelValue(r.Record.Value)) { + if !model.IsValidMetricName(model.LabelValue(r.Record)) { nodes = append(nodes, WrappedError{ - err: fmt.Errorf("invalid recording rule name: %s", r.Record.Value), - node: &r.Record, + err: fmt.Errorf("invalid recording rule name: %s", r.Record), + node: &node.Record, }) } // While record is a valid UTF-8 it's common mistake to put PromQL expression in the record name. // Disallow "{}" chars. - if strings.Contains(r.Record.Value, "{") || strings.Contains(r.Record.Value, "}") { + if strings.Contains(r.Record, "{") || strings.Contains(r.Record, "}") { nodes = append(nodes, WrappedError{ - err: fmt.Errorf("braces present in the recording rule name; should it be in expr?: %s", r.Record.Value), - node: &r.Record, + err: fmt.Errorf("braces present in the recording rule name; should it be in expr?: %s", r.Record), + node: &node.Record, }) } } @@ -274,8 +285,8 @@ func (r *RuleNode) Validate() (nodes []WrappedError) { // testTemplateParsing checks if the templates used in labels and annotations // of the alerting rules are parsed correctly. -func testTemplateParsing(rl *RuleNode) (errs []error) { - if rl.Alert.Value == "" { +func testTemplateParsing(rl *Rule) (errs []error) { + if rl.Alert == "" { // Not an alerting rule. return errs } @@ -292,7 +303,7 @@ func testTemplateParsing(rl *RuleNode) (errs []error) { tmpl := template.NewTemplateExpander( context.TODO(), strings.Join(append(defs, text), ""), - "__alert_"+rl.Alert.Value, + "__alert_"+rl.Alert, tmplData, model.Time(timestamp.FromTime(time.Now())), nil, diff --git a/model/rulefmt/rulefmt_test.go b/model/rulefmt/rulefmt_test.go index 9e191820c5..e8ac3077bd 100644 --- a/model/rulefmt/rulefmt_test.go +++ b/model/rulefmt/rulefmt_test.go @@ -33,6 +33,33 @@ func TestParseFileSuccess(t *testing.T) { require.Empty(t, errs, "unexpected errors parsing file") } +func TestParseFileSuccessWithAliases(t *testing.T) { + exprString := `sum without(instance) (rate(errors_total[5m])) +/ +sum without(instance) (rate(requests_total[5m])) +` + rgs, errs := ParseFile("testdata/test_aliases.yaml", false) + require.Empty(t, errs, "unexpected errors parsing file") + for _, rg := range rgs.Groups { + require.Equal(t, "HighAlert", rg.Rules[0].Alert) + require.Equal(t, "critical", rg.Rules[0].Labels["severity"]) + require.Equal(t, "stuff's happening with {{ $.labels.service }}", rg.Rules[0].Annotations["description"]) + + require.Equal(t, "new_metric", rg.Rules[1].Record) + + require.Equal(t, "HighAlert", rg.Rules[2].Alert) + require.Equal(t, "critical", rg.Rules[2].Labels["severity"]) + require.Equal(t, "stuff's happening with {{ $.labels.service }}", rg.Rules[0].Annotations["description"]) + + require.Equal(t, "HighAlert2", rg.Rules[3].Alert) + require.Equal(t, "critical", rg.Rules[3].Labels["severity"]) + + for _, rule := range rg.Rules { + require.Equal(t, exprString, rule.Expr) + } + } +} + func TestParseFileFailure(t *testing.T) { for _, c := range []struct { filename string diff --git a/model/rulefmt/testdata/test_aliases.yaml b/model/rulefmt/testdata/test_aliases.yaml new file mode 100644 index 0000000000..326259b0b6 --- /dev/null +++ b/model/rulefmt/testdata/test_aliases.yaml @@ -0,0 +1,57 @@ +groups: + - name: my-group-name + interval: 30s # defaults to global interval + rules: + - &highalert + alert: &alertname HighAlert + expr: &expr | + sum without(instance) (rate(errors_total[5m])) + / + sum without(instance) (rate(requests_total[5m])) + for: 5m + labels: + severity: &severity critical + annotations: + description: &description "stuff's happening with {{ $.labels.service }}" + + # Mix recording rules in the same list + - record: &recordname "new_metric" + expr: *expr + labels: + abc: edf + uvw: xyz + + - alert: *alertname + expr: *expr + for: 5m + labels: + severity: *severity + annotations: + description: *description + + - <<: *highalert + alert: HighAlert2 + + - name: my-another-name + interval: 30s # defaults to global interval + rules: + - alert: *alertname + expr: *expr + for: 5m + labels: + severity: *severity + annotations: + description: *description + + - record: *recordname + expr: *expr + + - alert: *alertname + expr: *expr + labels: + severity: *severity + annotations: + description: *description + + - <<: *highalert + alert: HighAlert2 diff --git a/rules/manager.go b/rules/manager.go index b1d3e8e3d6..6e96df2168 100644 --- a/rules/manager.go +++ b/rules/manager.go @@ -324,16 +324,16 @@ func (m *Manager) LoadGroups( rules := make([]Rule, 0, len(rg.Rules)) for _, r := range rg.Rules { - expr, err := m.opts.GroupLoader.Parse(r.Expr.Value) + expr, err := m.opts.GroupLoader.Parse(r.Expr) if err != nil { return nil, []error{fmt.Errorf("%s: %w", fn, err)} } mLabels := FromMaps(rg.Labels, r.Labels) - if r.Alert.Value != "" { + if r.Alert != "" { rules = append(rules, NewAlertingRule( - r.Alert.Value, + r.Alert, expr, time.Duration(r.For), time.Duration(r.KeepFiringFor), @@ -347,7 +347,7 @@ func (m *Manager) LoadGroups( continue } rules = append(rules, NewRecordingRule( - r.Record.Value, + r.Record, expr, mLabels, )) diff --git a/rules/manager_test.go b/rules/manager_test.go index 34b77dd40c..6c32b6d0f1 100644 --- a/rules/manager_test.go +++ b/rules/manager_test.go @@ -842,7 +842,7 @@ func TestUpdate(t *testing.T) { // Change group rules and reload. for i, g := range rgs.Groups { for j, r := range g.Rules { - rgs.Groups[i].Rules[j].Expr.SetString(fmt.Sprintf("%s * 0", r.Expr.Value)) + rgs.Groups[i].Rules[j].Expr = fmt.Sprintf("%s * 0", r.Expr) } } reloadAndValidate(rgs, t, tmpFile, ruleManager, ogs) @@ -869,9 +869,9 @@ func formatRules(r *rulefmt.RuleGroups) ruleGroupsTest { rtmp := []rulefmt.Rule{} for _, r := range g.Rules { rtmp = append(rtmp, rulefmt.Rule{ - Record: r.Record.Value, - Alert: r.Alert.Value, - Expr: r.Expr.Value, + Record: r.Record, + Alert: r.Alert, + Expr: r.Expr, For: r.For, Labels: r.Labels, Annotations: r.Annotations, From 733a5e9eb433f2380e4c583ee07ff8bd21f06a8f Mon Sep 17 00:00:00 2001 From: Bartlomiej Plotka Date: Thu, 13 Feb 2025 11:38:35 +0100 Subject: [PATCH 20/23] textparse: Optimized protobuf parser with custom streaming unmarshal. (#15731) * textparse: Optimized protobuf parser with custom streaming decoder. Signed-off-by: bwplotka Update model/textparse/protobufparse.go Co-authored-by: George Krajcsovits Signed-off-by: Bartlomiej Plotka Addressing comments. Signed-off-by: bwplotka decoder: reuse histograms and summaries. Signed-off-by: bwplotka optimize help returning (5% of mem utilization). Signed-off-by: bwplotka Apply suggestions from code review Co-authored-by: George Krajcsovits Signed-off-by: Bartlomiej Plotka Update prompb/io/prometheus/client/decoder.go Co-authored-by: George Krajcsovits Signed-off-by: Bartlomiej Plotka Fix build. Signed-off-by: bwplotka * Update model/textparse/protobufparse.go Signed-off-by: Bartlomiej Plotka --------- Signed-off-by: bwplotka Signed-off-by: Bartlomiej Plotka --- model/textparse/interface_test.go | 27 +- model/textparse/nhcbparse_test.go | 2 +- model/textparse/promparse.go | 4 + model/textparse/protobufparse.go | 386 +++++----- model/textparse/protobufparse_test.go | 12 +- prompb/io/prometheus/client/decoder.go | 780 ++++++++++++++++++++ prompb/io/prometheus/client/decoder_test.go | 171 +++++ scrape/scrape_test.go | 1 + 8 files changed, 1183 insertions(+), 200 deletions(-) create mode 100644 prompb/io/prometheus/client/decoder.go create mode 100644 prompb/io/prometheus/client/decoder_test.go diff --git a/model/textparse/interface_test.go b/model/textparse/interface_test.go index 504a4917d9..a5ca12859e 100644 --- a/model/textparse/interface_test.go +++ b/model/textparse/interface_test.go @@ -19,6 +19,7 @@ import ( "testing" "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" @@ -210,6 +211,17 @@ func requireEntries(t *testing.T, exp, got []parsedEntry) { t.Helper() testutil.RequireEqualWithOptions(t, exp, got, []cmp.Option{ + // We reuse slices so we sometimes have empty vs nil differences + // we need to ignore with cmpopts.EquateEmpty(). + // However we have to filter out labels, as only + // one comparer per type has to be specified, + // and RequireEqualWithOptions uses + // cmp.Comparer(labels.Equal). + cmp.FilterValues(func(x, y any) bool { + _, xIsLabels := x.(labels.Labels) + _, yIsLabels := y.(labels.Labels) + return !xIsLabels && !yIsLabels + }, cmpopts.EquateEmpty()), cmp.AllowUnexported(parsedEntry{}), }) } @@ -230,15 +242,20 @@ func testParse(t *testing.T, p Parser) (ret []parsedEntry) { case EntryInvalid: t.Fatal("entry invalid not expected") case EntrySeries, EntryHistogram: + var ts *int64 if et == EntrySeries { - m, got.t, got.v = p.Series() - got.m = string(m) + m, ts, got.v = p.Series() } else { - m, got.t, got.shs, got.fhs = p.Histogram() - got.m = string(m) + m, ts, got.shs, got.fhs = p.Histogram() } - + if ts != nil { + // TODO(bwplotka): Change to 0 in the interface for set check to + // avoid pointer mangling. + got.t = int64p(*ts) + } + got.m = string(m) p.Labels(&got.lset) + // Parser reuses int pointer. if ct := p.CreatedTimestamp(); ct != nil { got.ct = int64p(*ct) diff --git a/model/textparse/nhcbparse_test.go b/model/textparse/nhcbparse_test.go index 859bcc1cb7..f75c7f5677 100644 --- a/model/textparse/nhcbparse_test.go +++ b/model/textparse/nhcbparse_test.go @@ -931,7 +931,7 @@ func createTestPromHistogram() string { return `# HELP test_histogram1 Test histogram 1 # TYPE test_histogram1 histogram test_histogram1_count 175 1234568 -test_histogram1_sum 0.0008280461746287094 1234768 +test_histogram1_sum 0.0008280461746287094 1234568 test_histogram1_bucket{le="-0.0004899999999999998"} 2 1234568 test_histogram1_bucket{le="-0.0003899999999999998"} 4 1234568 test_histogram1_bucket{le="-0.0002899999999999998"} 16 1234568 diff --git a/model/textparse/promparse.go b/model/textparse/promparse.go index 844ab2f15d..f3f2571b00 100644 --- a/model/textparse/promparse.go +++ b/model/textparse/promparse.go @@ -502,6 +502,10 @@ func yoloString(b []byte) string { return unsafe.String(unsafe.SliceData(b), len(b)) } +func yoloBytes(b string) []byte { + return unsafe.Slice(unsafe.StringData(b), len(b)) +} + func parseFloat(s string) (float64, error) { // Keep to pre-Go 1.13 float formats. if strings.ContainsAny(s, "pP_") { diff --git a/model/textparse/protobufparse.go b/model/textparse/protobufparse.go index 6d074a2695..380c9918cf 100644 --- a/model/textparse/protobufparse.go +++ b/model/textparse/protobufparse.go @@ -15,7 +15,6 @@ package textparse import ( "bytes" - "encoding/binary" "errors" "fmt" "io" @@ -25,7 +24,6 @@ import ( "sync" "unicode/utf8" - "github.com/gogo/protobuf/proto" "github.com/gogo/protobuf/types" "github.com/prometheus/common/model" @@ -45,24 +43,24 @@ var floatFormatBufPool = sync.Pool{ }, } -// ProtobufParser is a very inefficient way of unmarshaling the old Prometheus -// protobuf format and then present it as it if were parsed by a -// Prometheus-2-style text parser. This is only done so that we can easily plug -// in the protobuf format into Prometheus 2. For future use (with the final -// format that will be used for native histograms), we have to revisit the -// parsing. A lot of the efficiency tricks of the Prometheus-2-style parsing -// could be used in a similar fashion (byte-slice pointers into the raw -// payload), which requires some hand-coded protobuf handling. But the current -// parsers all expect the full series name (metric name plus label pairs) as one -// string, which is not how things are represented in the protobuf format. If -// the re-arrangement work is actually causing problems (which has to be seen), -// that expectation needs to be changed. +// ProtobufParser parses the old Prometheus protobuf format and present it +// as the text-style textparse.Parser interface. +// +// It uses a tailored streaming protobuf dto.MetricStreamingDecoder that +// reuses internal protobuf structs and allows direct unmarshalling to Prometheus +// types like labels. type ProtobufParser struct { - in []byte // The input to parse. - inPos int // Position within the input. - metricPos int // Position within Metric slice. + dec *dto.MetricStreamingDecoder + + // Used for both the string returned by Series and Histogram, as well as, + // metric family for Type, Unit and Help. + entryBytes *bytes.Buffer + + lset labels.Labels + builder labels.ScratchBuilder // Held here to reduce allocations when building Labels. + // fieldPos is the position within a Summary or (legacy) Histogram. -2 - // is the count. -1 is the sum. Otherwise it is the index within + // is the count. -1 is the sum. Otherwise, it is the index within // quantiles/buckets. fieldPos int fieldsDone bool // true if no more fields of a Summary or (legacy) Histogram to be processed. @@ -78,27 +76,20 @@ type ProtobufParser struct { // that we have to decode the next MetricFamily. state Entry - builder labels.ScratchBuilder // held here to reduce allocations when building Labels - - mf *dto.MetricFamily - // Whether to also parse a classic histogram that is also present as a // native histogram. parseClassicHistograms bool - - // The following are just shenanigans to satisfy the Parser interface. - metricBytes *bytes.Buffer // A somewhat fluid representation of the current metric. } // NewProtobufParser returns a parser for the payload in the byte slice. func NewProtobufParser(b []byte, parseClassicHistograms bool, st *labels.SymbolTable) Parser { return &ProtobufParser{ - in: b, + dec: dto.NewMetricStreamingDecoder(b), + entryBytes: &bytes.Buffer{}, + builder: labels.NewScratchBuilderWithSymbolTable(st, 16), // TODO(bwplotka): Try base builder. + state: EntryInvalid, - mf: &dto.MetricFamily{}, - metricBytes: &bytes.Buffer{}, parseClassicHistograms: parseClassicHistograms, - builder: labels.NewScratchBuilderWithSymbolTable(st, 16), } } @@ -106,19 +97,18 @@ func NewProtobufParser(b []byte, parseClassicHistograms bool, st *labels.SymbolT // value, the timestamp if set, and the value of the current sample. func (p *ProtobufParser) Series() ([]byte, *int64, float64) { var ( - m = p.mf.GetMetric()[p.metricPos] - ts = m.GetTimestampMs() + ts = &p.dec.TimestampMs // To save memory allocations, never nil. v float64 ) - switch p.mf.GetType() { + switch p.dec.GetType() { case dto.MetricType_COUNTER: - v = m.GetCounter().GetValue() + v = p.dec.GetCounter().GetValue() case dto.MetricType_GAUGE: - v = m.GetGauge().GetValue() + v = p.dec.GetGauge().GetValue() case dto.MetricType_UNTYPED: - v = m.GetUntyped().GetValue() + v = p.dec.GetUntyped().GetValue() case dto.MetricType_SUMMARY: - s := m.GetSummary() + s := p.dec.GetSummary() switch p.fieldPos { case -2: v = float64(s.GetSampleCount()) @@ -133,7 +123,7 @@ func (p *ProtobufParser) Series() ([]byte, *int64, float64) { } case dto.MetricType_HISTOGRAM, dto.MetricType_GAUGE_HISTOGRAM: // This should only happen for a classic histogram. - h := m.GetHistogram() + h := p.dec.GetHistogram() switch p.fieldPos { case -2: v = h.GetSampleCountFloat() @@ -159,8 +149,8 @@ func (p *ProtobufParser) Series() ([]byte, *int64, float64) { default: panic("encountered unexpected metric type, this is a bug") } - if ts != 0 { - return p.metricBytes.Bytes(), &ts, v + if *ts != 0 { + return p.entryBytes.Bytes(), ts, v } // TODO(beorn7): We assume here that ts==0 means no timestamp. That's // not true in general, but proto3 originally has no distinction between @@ -171,7 +161,7 @@ func (p *ProtobufParser) Series() ([]byte, *int64, float64) { // away from gogo-protobuf to an actively maintained protobuf // implementation. Once that's done, we can simply use the `optional` // keyword and check for the unset state explicitly. - return p.metricBytes.Bytes(), nil, v + return p.entryBytes.Bytes(), nil, v } // Histogram returns the bytes of a series with a native histogram as a value, @@ -186,47 +176,56 @@ func (p *ProtobufParser) Series() ([]byte, *int64, float64) { // value. func (p *ProtobufParser) Histogram() ([]byte, *int64, *histogram.Histogram, *histogram.FloatHistogram) { var ( - m = p.mf.GetMetric()[p.metricPos] - ts = m.GetTimestampMs() - h = m.GetHistogram() + ts = &p.dec.TimestampMs // To save memory allocations, never nil. + h = p.dec.GetHistogram() ) + if p.parseClassicHistograms && len(h.GetBucket()) > 0 { p.redoClassic = true } if h.GetSampleCountFloat() > 0 || h.GetZeroCountFloat() > 0 { // It is a float histogram. fh := histogram.FloatHistogram{ - Count: h.GetSampleCountFloat(), - Sum: h.GetSampleSum(), - ZeroThreshold: h.GetZeroThreshold(), - ZeroCount: h.GetZeroCountFloat(), - Schema: h.GetSchema(), + Count: h.GetSampleCountFloat(), + Sum: h.GetSampleSum(), + ZeroThreshold: h.GetZeroThreshold(), + ZeroCount: h.GetZeroCountFloat(), + Schema: h.GetSchema(), + + // Decoder reuses slices, so we need to copy. PositiveSpans: make([]histogram.Span, len(h.GetPositiveSpan())), - PositiveBuckets: h.GetPositiveCount(), + PositiveBuckets: make([]float64, len(h.GetPositiveCount())), NegativeSpans: make([]histogram.Span, len(h.GetNegativeSpan())), - NegativeBuckets: h.GetNegativeCount(), + NegativeBuckets: make([]float64, len(h.GetNegativeCount())), } for i, span := range h.GetPositiveSpan() { fh.PositiveSpans[i].Offset = span.GetOffset() fh.PositiveSpans[i].Length = span.GetLength() } + for i, cnt := range h.GetPositiveCount() { + fh.PositiveBuckets[i] = cnt + } for i, span := range h.GetNegativeSpan() { fh.NegativeSpans[i].Offset = span.GetOffset() fh.NegativeSpans[i].Length = span.GetLength() } - if p.mf.GetType() == dto.MetricType_GAUGE_HISTOGRAM { + for i, cnt := range h.GetNegativeCount() { + fh.NegativeBuckets[i] = cnt + } + if p.dec.GetType() == dto.MetricType_GAUGE_HISTOGRAM { fh.CounterResetHint = histogram.GaugeType } fh.Compact(0) - if ts != 0 { - return p.metricBytes.Bytes(), &ts, nil, &fh + if *ts != 0 { + return p.entryBytes.Bytes(), ts, nil, &fh } // Nasty hack: Assume that ts==0 means no timestamp. That's not true in // general, but proto3 has no distinction between unset and // default. Need to avoid in the final format. - return p.metricBytes.Bytes(), nil, nil, &fh + return p.entryBytes.Bytes(), nil, nil, &fh } + // TODO(bwplotka): Create sync.Pool for those structs. sh := histogram.Histogram{ Count: h.GetSampleCount(), Sum: h.GetSampleSum(), @@ -234,41 +233,47 @@ func (p *ProtobufParser) Histogram() ([]byte, *int64, *histogram.Histogram, *his ZeroCount: h.GetZeroCount(), Schema: h.GetSchema(), PositiveSpans: make([]histogram.Span, len(h.GetPositiveSpan())), - PositiveBuckets: h.GetPositiveDelta(), + PositiveBuckets: make([]int64, len(h.GetPositiveDelta())), NegativeSpans: make([]histogram.Span, len(h.GetNegativeSpan())), - NegativeBuckets: h.GetNegativeDelta(), + NegativeBuckets: make([]int64, len(h.GetNegativeDelta())), } for i, span := range h.GetPositiveSpan() { sh.PositiveSpans[i].Offset = span.GetOffset() sh.PositiveSpans[i].Length = span.GetLength() } + for i, cnt := range h.GetPositiveDelta() { + sh.PositiveBuckets[i] = cnt + } for i, span := range h.GetNegativeSpan() { sh.NegativeSpans[i].Offset = span.GetOffset() sh.NegativeSpans[i].Length = span.GetLength() } - if p.mf.GetType() == dto.MetricType_GAUGE_HISTOGRAM { + for i, cnt := range h.GetNegativeDelta() { + sh.NegativeBuckets[i] = cnt + } + if p.dec.GetType() == dto.MetricType_GAUGE_HISTOGRAM { sh.CounterResetHint = histogram.GaugeType } sh.Compact(0) - if ts != 0 { - return p.metricBytes.Bytes(), &ts, &sh, nil + if *ts != 0 { + return p.entryBytes.Bytes(), ts, &sh, nil } - return p.metricBytes.Bytes(), nil, &sh, nil + return p.entryBytes.Bytes(), nil, &sh, nil } // Help returns the metric name and help text in the current entry. // Must only be called after Next returned a help entry. // The returned byte slices become invalid after the next call to Next. func (p *ProtobufParser) Help() ([]byte, []byte) { - return p.metricBytes.Bytes(), []byte(p.mf.GetHelp()) + return p.entryBytes.Bytes(), yoloBytes(p.dec.GetHelp()) } // Type returns the metric name and type in the current entry. // Must only be called after Next returned a type entry. // The returned byte slices become invalid after the next call to Next. func (p *ProtobufParser) Type() ([]byte, model.MetricType) { - n := p.metricBytes.Bytes() - switch p.mf.GetType() { + n := p.entryBytes.Bytes() + switch p.dec.GetType() { case dto.MetricType_COUNTER: return n, model.MetricTypeCounter case dto.MetricType_GAUGE: @@ -287,7 +292,7 @@ func (p *ProtobufParser) Type() ([]byte, model.MetricType) { // Must only be called after Next returned a unit entry. // The returned byte slices become invalid after the next call to Next. func (p *ProtobufParser) Unit() ([]byte, []byte) { - return p.metricBytes.Bytes(), []byte(p.mf.GetUnit()) + return p.entryBytes.Bytes(), []byte(p.dec.GetUnit()) } // Comment always returns nil because comments aren't supported by the protobuf @@ -297,21 +302,8 @@ func (p *ProtobufParser) Comment() []byte { } // Labels writes the labels of the current sample into the passed labels. -// It returns the string from which the metric was parsed. func (p *ProtobufParser) Labels(l *labels.Labels) { - p.builder.Reset() - p.builder.Add(labels.MetricName, p.getMagicName()) - - for _, lp := range p.mf.GetMetric()[p.metricPos].GetLabel() { - p.builder.Add(lp.GetName(), lp.GetValue()) - } - if needed, name, value := p.getMagicLabel(); needed { - p.builder.Add(name, value) - } - - // Sort labels to maintain the sorted labels invariant. - p.builder.Sort() - *l = p.builder.Labels() + *l = p.lset.Copy() } // Exemplar writes the exemplar of the current sample into the passed @@ -324,15 +316,14 @@ func (p *ProtobufParser) Exemplar(ex *exemplar.Exemplar) bool { // We only ever return one exemplar per (non-native-histogram) series. return false } - m := p.mf.GetMetric()[p.metricPos] var exProto *dto.Exemplar - switch p.mf.GetType() { + switch p.dec.GetType() { case dto.MetricType_COUNTER: - exProto = m.GetCounter().GetExemplar() + exProto = p.dec.GetCounter().GetExemplar() case dto.MetricType_HISTOGRAM, dto.MetricType_GAUGE_HISTOGRAM: isClassic := p.state == EntrySeries - if !isClassic && len(m.GetHistogram().GetExemplars()) > 0 { - exs := m.GetHistogram().GetExemplars() + if !isClassic && len(p.dec.GetHistogram().GetExemplars()) > 0 { + exs := p.dec.GetHistogram().GetExemplars() for p.exemplarPos < len(exs) { exProto = exs[p.exemplarPos] p.exemplarPos++ @@ -344,7 +335,7 @@ func (p *ProtobufParser) Exemplar(ex *exemplar.Exemplar) bool { return false } } else { - bb := m.GetHistogram().GetBucket() + bb := p.dec.GetHistogram().GetBucket() if p.fieldPos < 0 { if isClassic { return false // At _count or _sum. @@ -392,13 +383,13 @@ func (p *ProtobufParser) Exemplar(ex *exemplar.Exemplar) bool { // invalid (as timestamp e.g. negative value) on counters, summaries or histograms. func (p *ProtobufParser) CreatedTimestamp() *int64 { var ct *types.Timestamp - switch p.mf.GetType() { + switch p.dec.GetType() { case dto.MetricType_COUNTER: - ct = p.mf.GetMetric()[p.metricPos].GetCounter().GetCreatedTimestamp() + ct = p.dec.GetCounter().GetCreatedTimestamp() case dto.MetricType_SUMMARY: - ct = p.mf.GetMetric()[p.metricPos].GetSummary().GetCreatedTimestamp() + ct = p.dec.GetSummary().GetCreatedTimestamp() case dto.MetricType_HISTOGRAM, dto.MetricType_GAUGE_HISTOGRAM: - ct = p.mf.GetMetric()[p.metricPos].GetHistogram().GetCreatedTimestamp() + ct = p.dec.GetHistogram().GetCreatedTimestamp() default: } ctAsTime, err := types.TimestampFromProto(ct) @@ -416,31 +407,34 @@ func (p *ProtobufParser) CreatedTimestamp() *int64 { func (p *ProtobufParser) Next() (Entry, error) { p.exemplarReturned = false switch p.state { + // Invalid state occurs on: + // * First Next() call. + // * Recursive call that tells Next to move to the next metric family. case EntryInvalid: - p.metricPos = 0 p.exemplarPos = 0 p.fieldPos = -2 - n, err := readDelimited(p.in[p.inPos:], p.mf) - p.inPos += n - if err != nil { + + if err := p.dec.NextMetricFamily(); err != nil { return p.state, err } - - // Skip empty metric families. - if len(p.mf.GetMetric()) == 0 { - return p.Next() + if err := p.dec.NextMetric(); err != nil { + // Skip empty metric families. + if errors.Is(err, io.EOF) { + return p.Next() + } + return EntryInvalid, err } // We are at the beginning of a metric family. Put only the name - // into metricBytes and validate only name, help, and type for now. - name := p.mf.GetName() + // into entryBytes and validate only name, help, and type for now. + name := p.dec.GetName() if !model.IsValidMetricName(model.LabelValue(name)) { return EntryInvalid, fmt.Errorf("invalid metric name: %s", name) } - if help := p.mf.GetHelp(); !utf8.ValidString(help) { + if help := p.dec.GetHelp(); !utf8.ValidString(help) { return EntryInvalid, fmt.Errorf("invalid help for metric %q: %s", name, help) } - switch p.mf.GetType() { + switch p.dec.GetType() { case dto.MetricType_COUNTER, dto.MetricType_GAUGE, dto.MetricType_HISTOGRAM, @@ -449,11 +443,11 @@ func (p *ProtobufParser) Next() (Entry, error) { dto.MetricType_UNTYPED: // All good. default: - return EntryInvalid, fmt.Errorf("unknown metric type for metric %q: %s", name, p.mf.GetType()) + return EntryInvalid, fmt.Errorf("unknown metric type for metric %q: %s", name, p.dec.GetType()) } - unit := p.mf.GetUnit() + unit := p.dec.GetUnit() if len(unit) > 0 { - if p.mf.GetType() == dto.MetricType_COUNTER && strings.HasSuffix(name, "_total") { + if p.dec.GetType() == dto.MetricType_COUNTER && strings.HasSuffix(name, "_total") { if !strings.HasSuffix(name[:len(name)-6], unit) || len(name)-6 < len(unit)+1 || name[len(name)-6-len(unit)-1] != '_' { return EntryInvalid, fmt.Errorf("unit %q not a suffix of counter %q", unit, name) } @@ -461,12 +455,11 @@ func (p *ProtobufParser) Next() (Entry, error) { return EntryInvalid, fmt.Errorf("unit %q not a suffix of metric %q", unit, name) } } - p.metricBytes.Reset() - p.metricBytes.WriteString(name) - + p.entryBytes.Reset() + p.entryBytes.WriteString(name) p.state = EntryHelp case EntryHelp: - if p.mf.Unit != "" { + if p.dec.Unit != "" { p.state = EntryUnit } else { p.state = EntryType @@ -474,48 +467,78 @@ func (p *ProtobufParser) Next() (Entry, error) { case EntryUnit: p.state = EntryType case EntryType: - t := p.mf.GetType() + t := p.dec.GetType() if (t == dto.MetricType_HISTOGRAM || t == dto.MetricType_GAUGE_HISTOGRAM) && - isNativeHistogram(p.mf.GetMetric()[0].GetHistogram()) { + isNativeHistogram(p.dec.GetHistogram()) { p.state = EntryHistogram } else { p.state = EntrySeries } - if err := p.updateMetricBytes(); err != nil { + if err := p.onSeriesOrHistogramUpdate(); err != nil { return EntryInvalid, err } - case EntryHistogram, EntrySeries: - if p.redoClassic { - p.redoClassic = false - p.state = EntrySeries - p.fieldPos = -3 - p.fieldsDone = false - } - t := p.mf.GetType() - if p.state == EntrySeries && !p.fieldsDone && - (t == dto.MetricType_SUMMARY || - t == dto.MetricType_HISTOGRAM || - t == dto.MetricType_GAUGE_HISTOGRAM) { - p.fieldPos++ - } else { - p.metricPos++ + case EntrySeries: + // Potentially a second series in the metric family. + t := p.dec.GetType() + if t == dto.MetricType_SUMMARY || + t == dto.MetricType_HISTOGRAM || + t == dto.MetricType_GAUGE_HISTOGRAM { + // Non-trivial series (complex metrics, with magic suffixes). + + // Did we iterate over all the classic representations fields? + // NOTE: p.fieldsDone is updated on p.onSeriesOrHistogramUpdate. + if !p.fieldsDone { + // Still some fields to iterate over. + p.fieldPos++ + if err := p.onSeriesOrHistogramUpdate(); err != nil { + return EntryInvalid, err + } + return p.state, nil + } + + // Reset histogram fields. p.fieldPos = -2 p.fieldsDone = false p.exemplarPos = 0 + // If this is a metric family containing native - // histograms, we have to switch back to native - // histograms after parsing a classic histogram. - if p.state == EntrySeries && - (t == dto.MetricType_HISTOGRAM || t == dto.MetricType_GAUGE_HISTOGRAM) && - isNativeHistogram(p.mf.GetMetric()[0].GetHistogram()) { + // histograms, it means we are here thanks to redoClassic state. + // Return to native histograms for the consistent flow. + if (t == dto.MetricType_HISTOGRAM || t == dto.MetricType_GAUGE_HISTOGRAM) && + isNativeHistogram(p.dec.GetHistogram()) { p.state = EntryHistogram } } - if p.metricPos >= len(p.mf.GetMetric()) { - p.state = EntryInvalid - return p.Next() + // Is there another series? + if err := p.dec.NextMetric(); err != nil { + if errors.Is(err, io.EOF) { + p.state = EntryInvalid + return p.Next() + } + return EntryInvalid, err } - if err := p.updateMetricBytes(); err != nil { + if err := p.onSeriesOrHistogramUpdate(); err != nil { + return EntryInvalid, err + } + case EntryHistogram: + // Was Histogram() called and parseClassicHistograms is true? + if p.redoClassic { + p.redoClassic = false + p.fieldPos = -3 + p.fieldsDone = false + p.state = EntrySeries + return p.Next() // Switch to classic histogram. + } + + // Is there another series? + if err := p.dec.NextMetric(); err != nil { + if errors.Is(err, io.EOF) { + p.state = EntryInvalid + return p.Next() + } + return EntryInvalid, err + } + if err := p.onSeriesOrHistogramUpdate(); err != nil { return EntryInvalid, err } default: @@ -524,30 +547,39 @@ func (p *ProtobufParser) Next() (Entry, error) { return p.state, nil } -func (p *ProtobufParser) updateMetricBytes() error { - b := p.metricBytes - b.Reset() - b.WriteString(p.getMagicName()) - for _, lp := range p.mf.GetMetric()[p.metricPos].GetLabel() { - b.WriteByte(model.SeparatorByte) - n := lp.GetName() - if !model.LabelName(n).IsValid() { - return fmt.Errorf("invalid label name: %s", n) - } - b.WriteString(n) - b.WriteByte(model.SeparatorByte) - v := lp.GetValue() - if !utf8.ValidString(v) { - return fmt.Errorf("invalid label value: %s", v) - } - b.WriteString(v) +// onSeriesOrHistogramUpdate updates internal state before returning +// a series or histogram. It updates: +// * p.lset. +// * p.entryBytes. +// * p.fieldsDone depending on p.fieldPos. +func (p *ProtobufParser) onSeriesOrHistogramUpdate() error { + p.builder.Reset() + p.builder.Add(labels.MetricName, p.getMagicName()) + + if err := p.dec.Label(&p.builder); err != nil { + return err } - if needed, n, v := p.getMagicLabel(); needed { - b.WriteByte(model.SeparatorByte) - b.WriteString(n) - b.WriteByte(model.SeparatorByte) - b.WriteString(v) + + if needed, name, value := p.getMagicLabel(); needed { + p.builder.Add(name, value) } + + // Sort labels to maintain the sorted labels invariant. + p.builder.Sort() + p.builder.Overwrite(&p.lset) + + // entryBytes has to be unique for each series. + p.entryBytes.Reset() + p.lset.Range(func(l labels.Label) { + if l.Name == labels.MetricName { + p.entryBytes.WriteString(l.Value) + return + } + p.entryBytes.WriteByte(model.SeparatorByte) + p.entryBytes.WriteString(l.Name) + p.entryBytes.WriteByte(model.SeparatorByte) + p.entryBytes.WriteString(l.Value) + }) return nil } @@ -555,36 +587,37 @@ func (p *ProtobufParser) updateMetricBytes() error { // ("_count", "_sum", "_bucket") if needed according to the current parser // state. func (p *ProtobufParser) getMagicName() string { - t := p.mf.GetType() + t := p.dec.GetType() if p.state == EntryHistogram || (t != dto.MetricType_HISTOGRAM && t != dto.MetricType_GAUGE_HISTOGRAM && t != dto.MetricType_SUMMARY) { - return p.mf.GetName() + return p.dec.GetName() } if p.fieldPos == -2 { - return p.mf.GetName() + "_count" + return p.dec.GetName() + "_count" } if p.fieldPos == -1 { - return p.mf.GetName() + "_sum" + return p.dec.GetName() + "_sum" } if t == dto.MetricType_HISTOGRAM || t == dto.MetricType_GAUGE_HISTOGRAM { - return p.mf.GetName() + "_bucket" + return p.dec.GetName() + "_bucket" } - return p.mf.GetName() + return p.dec.GetName() } // getMagicLabel returns if a magic label ("quantile" or "le") is needed and, if // so, its name and value. It also sets p.fieldsDone if applicable. func (p *ProtobufParser) getMagicLabel() (bool, string, string) { + // Native histogram or _count and _sum series. if p.state == EntryHistogram || p.fieldPos < 0 { return false, "", "" } - switch p.mf.GetType() { + switch p.dec.GetType() { case dto.MetricType_SUMMARY: - qq := p.mf.GetMetric()[p.metricPos].GetSummary().GetQuantile() + qq := p.dec.GetSummary().GetQuantile() q := qq[p.fieldPos] p.fieldsDone = p.fieldPos == len(qq)-1 return true, model.QuantileLabel, formatOpenMetricsFloat(q.GetQuantile()) case dto.MetricType_HISTOGRAM, dto.MetricType_GAUGE_HISTOGRAM: - bb := p.mf.GetMetric()[p.metricPos].GetHistogram().GetBucket() + bb := p.dec.GetHistogram().GetBucket() if p.fieldPos >= len(bb) { p.fieldsDone = true return true, model.BucketLabel, "+Inf" @@ -596,29 +629,6 @@ func (p *ProtobufParser) getMagicLabel() (bool, string, string) { return false, "", "" } -var errInvalidVarint = errors.New("protobufparse: invalid varint encountered") - -// readDelimited is essentially doing what the function of the same name in -// github.com/matttproud/golang_protobuf_extensions/pbutil is doing, but it is -// specific to a MetricFamily, utilizes the more efficient gogo-protobuf -// unmarshaling, and acts on a byte slice directly without any additional -// staging buffers. -func readDelimited(b []byte, mf *dto.MetricFamily) (n int, err error) { - if len(b) == 0 { - return 0, io.EOF - } - messageLength, varIntLength := proto.DecodeVarint(b) - if varIntLength == 0 || varIntLength > binary.MaxVarintLen32 { - return 0, errInvalidVarint - } - totalLength := varIntLength + int(messageLength) - if totalLength > len(b) { - return 0, fmt.Errorf("protobufparse: insufficient length of buffer, expected at least %d bytes, got %d bytes", totalLength, len(b)) - } - mf.Reset() - return totalLength, mf.Unmarshal(b[varIntLength:totalLength]) -} - // formatOpenMetricsFloat works like the usual Go string formatting of a float // but appends ".0" if the resulting number would otherwise contain neither a // "." nor an "e". diff --git a/model/textparse/protobufparse_test.go b/model/textparse/protobufparse_test.go index 065459a69a..381e6a9355 100644 --- a/model/textparse/protobufparse_test.go +++ b/model/textparse/protobufparse_test.go @@ -1246,7 +1246,7 @@ func TestProtobufParse(t *testing.T) { ), }, { - m: "rpc_durations_seconds\xffservice\xffexponential\xffquantile\xff0.5", + m: "rpc_durations_seconds\xffquantile\xff0.5\xffservice\xffexponential", v: 6.442786329648548e-07, lset: labels.FromStrings( "__name__", "rpc_durations_seconds", @@ -1255,7 +1255,7 @@ func TestProtobufParse(t *testing.T) { ), }, { - m: "rpc_durations_seconds\xffservice\xffexponential\xffquantile\xff0.9", + m: "rpc_durations_seconds\xffquantile\xff0.9\xffservice\xffexponential", v: 1.9435742936658396e-06, lset: labels.FromStrings( "__name__", "rpc_durations_seconds", @@ -1264,7 +1264,7 @@ func TestProtobufParse(t *testing.T) { ), }, { - m: "rpc_durations_seconds\xffservice\xffexponential\xffquantile\xff0.99", + m: "rpc_durations_seconds\xffquantile\xff0.99\xffservice\xffexponential", v: 4.0471608667037015e-06, lset: labels.FromStrings( "__name__", "rpc_durations_seconds", @@ -2199,7 +2199,7 @@ func TestProtobufParse(t *testing.T) { ), }, { - m: "rpc_durations_seconds\xffservice\xffexponential\xffquantile\xff0.5", + m: "rpc_durations_seconds\xffquantile\xff0.5\xffservice\xffexponential", v: 6.442786329648548e-07, lset: labels.FromStrings( "__name__", "rpc_durations_seconds", @@ -2208,7 +2208,7 @@ func TestProtobufParse(t *testing.T) { ), }, { - m: "rpc_durations_seconds\xffservice\xffexponential\xffquantile\xff0.9", + m: "rpc_durations_seconds\xffquantile\xff0.9\xffservice\xffexponential", v: 1.9435742936658396e-06, lset: labels.FromStrings( "__name__", "rpc_durations_seconds", @@ -2217,7 +2217,7 @@ func TestProtobufParse(t *testing.T) { ), }, { - m: "rpc_durations_seconds\xffservice\xffexponential\xffquantile\xff0.99", + m: "rpc_durations_seconds\xffquantile\xff0.99\xffservice\xffexponential", v: 4.0471608667037015e-06, lset: labels.FromStrings( "__name__", "rpc_durations_seconds", diff --git a/prompb/io/prometheus/client/decoder.go b/prompb/io/prometheus/client/decoder.go new file mode 100644 index 0000000000..b21f78cc9c --- /dev/null +++ b/prompb/io/prometheus/client/decoder.go @@ -0,0 +1,780 @@ +// Copyright 2025 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package io_prometheus_client //nolint:revive + +import ( + "encoding/binary" + "errors" + "fmt" + "io" + "unicode/utf8" + "unsafe" + + proto "github.com/gogo/protobuf/proto" + "github.com/prometheus/common/model" + + "github.com/prometheus/prometheus/model/labels" +) + +type MetricStreamingDecoder struct { + in []byte + inPos int + + // TODO(bwplotka): Switch to generator/plugin that won't have those fields accessible e.g. OpaqueAPI + // We leverage the fact those two don't collide. + *MetricFamily // Without Metric, guarded by overridden GetMetric method. + *Metric // Without Label, guarded by overridden GetLabel method. + + mfData []byte + metrics []pos + metricIndex int + + mData []byte + labels []pos +} + +// NewMetricStreamingDecoder returns a Go iterator that unmarshals given protobuf bytes one +// metric family and metric at the time, allowing efficient streaming. +// +// Do not modify MetricStreamingDecoder between iterations as it's reused to save allocations. +// GetGauge, GetCounter, etc are also cached, which means GetGauge will work for counter +// if previously gauge was parsed. It's up to the caller to use Type to decide what +// method to use when checking the value. +// +// TODO(bwplotka): io.Reader approach is possible too, but textparse has access to whole scrape for now. +func NewMetricStreamingDecoder(data []byte) *MetricStreamingDecoder { + return &MetricStreamingDecoder{ + in: data, + MetricFamily: &MetricFamily{}, + Metric: &Metric{}, + metrics: make([]pos, 0, 100), + } +} + +var errInvalidVarint = errors.New("clientpb: invalid varint encountered") + +func (m *MetricStreamingDecoder) NextMetricFamily() error { + b := m.in[m.inPos:] + if len(b) == 0 { + return io.EOF + } + messageLength, varIntLength := proto.DecodeVarint(b) // TODO(bwplotka): Get rid of gogo. + if varIntLength == 0 || varIntLength > binary.MaxVarintLen32 { + return errInvalidVarint + } + totalLength := varIntLength + int(messageLength) + if totalLength > len(b) { + return fmt.Errorf("clientpb: insufficient length of buffer, expected at least %d bytes, got %d bytes", totalLength, len(b)) + } + m.resetMetricFamily() + m.mfData = b[varIntLength:totalLength] + + m.inPos += totalLength + return m.MetricFamily.unmarshalWithoutMetrics(m, m.mfData) +} + +// resetMetricFamily resets all the fields in m to equal the zero value, but re-using slice memory. +func (m *MetricStreamingDecoder) resetMetricFamily() { + m.metrics = m.metrics[:0] + m.metricIndex = 0 + m.MetricFamily.Reset() +} + +func (m *MetricStreamingDecoder) NextMetric() error { + if m.metricIndex >= len(m.metrics) { + return io.EOF + } + + m.resetMetric() + m.mData = m.mfData[m.metrics[m.metricIndex].start:m.metrics[m.metricIndex].end] + if err := m.Metric.unmarshalWithoutLabels(m, m.mData); err != nil { + return err + } + m.metricIndex++ + return nil +} + +// resetMetric resets all the fields in m to equal the zero value, but re-using slices memory. +func (m *MetricStreamingDecoder) resetMetric() { + m.labels = m.labels[:0] + m.TimestampMs = 0 + + // TODO(bwplotka): Autogenerate reset functions. + if m.Metric.Counter != nil { + m.Metric.Counter.Value = 0 + m.Metric.Counter.CreatedTimestamp = nil + m.Metric.Counter.Exemplar = nil + } + if m.Metric.Gauge != nil { + m.Metric.Gauge.Value = 0 + } + if m.Metric.Histogram != nil { + m.Metric.Histogram.SampleCount = 0 + m.Metric.Histogram.SampleCountFloat = 0 + m.Metric.Histogram.SampleSum = 0 + m.Metric.Histogram.Bucket = m.Metric.Histogram.Bucket[:0] + m.Metric.Histogram.CreatedTimestamp = nil + m.Metric.Histogram.Schema = 0 + m.Metric.Histogram.ZeroThreshold = 0 + m.Metric.Histogram.ZeroCount = 0 + m.Metric.Histogram.ZeroCountFloat = 0 + m.Metric.Histogram.NegativeSpan = m.Metric.Histogram.NegativeSpan[:0] + m.Metric.Histogram.NegativeDelta = m.Metric.Histogram.NegativeDelta[:0] + m.Metric.Histogram.NegativeCount = m.Metric.Histogram.NegativeCount[:0] + m.Metric.Histogram.PositiveSpan = m.Metric.Histogram.PositiveSpan[:0] + m.Metric.Histogram.PositiveDelta = m.Metric.Histogram.PositiveDelta[:0] + m.Metric.Histogram.PositiveCount = m.Metric.Histogram.PositiveCount[:0] + m.Metric.Histogram.Exemplars = m.Metric.Histogram.Exemplars[:0] + } + if m.Metric.Summary != nil { + m.Metric.Summary.SampleCount = 0 + m.Metric.Summary.SampleSum = 0 + m.Metric.Summary.Quantile = m.Metric.Summary.Quantile[:0] + m.Metric.Summary.CreatedTimestamp = nil + } +} + +func (m *MetricStreamingDecoder) GetMetric() { + panic("don't use GetMetric, use Metric directly") +} + +func (m *MetricStreamingDecoder) GetLabel() { + panic("don't use GetLabel, use Label instead") +} + +// Label parses labels into labels scratch builder. Metric name is missing +// given the protobuf metric model and has to be deduced from the metric family name. +// TODO: The method name intentionally hide MetricStreamingDecoder.Metric.Label +// field to avoid direct use (it's not parsed). In future generator will generate +// structs tailored for streaming decoding. +func (m *MetricStreamingDecoder) Label(b *labels.ScratchBuilder) error { + for _, l := range m.labels { + if err := parseLabel(m.mData[l.start:l.end], b); err != nil { + return err + } + } + return nil +} + +// parseLabels is essentially LabelPair.Unmarshal but directly adding into scratch builder +// and reusing strings. +func parseLabel(dAtA []byte, b *labels.ScratchBuilder) error { + var name, value string + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return errors.New("proto: LabelPair: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: LabelPair: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Name", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + intStringLen + if postIndex < 0 { + return ErrInvalidLengthMetrics + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + name = yoloString(dAtA[iNdEx:postIndex]) + if !model.LabelName(name).IsValid() { + return fmt.Errorf("invalid label name: %s", name) + } + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Value", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + intStringLen + if postIndex < 0 { + return ErrInvalidLengthMetrics + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + value = yoloString(dAtA[iNdEx:postIndex]) + if !utf8.ValidString(value) { + return fmt.Errorf("invalid label value: %s", value) + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipMetrics(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthMetrics + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + if iNdEx > l { + return io.ErrUnexpectedEOF + } + b.Add(name, value) + return nil +} + +func yoloString(b []byte) string { + return unsafe.String(unsafe.SliceData(b), len(b)) +} + +type pos struct { + start, end int +} + +func (m *Metric) unmarshalWithoutLabels(p *MetricStreamingDecoder, dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return errors.New("proto: Metric: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: Metric: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Label", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthMetrics + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + p.labels = append(p.labels, pos{start: iNdEx, end: postIndex}) + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Gauge", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthMetrics + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Gauge == nil { + m.Gauge = &Gauge{} + } + if err := m.Gauge.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 3: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Counter", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthMetrics + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Counter == nil { + m.Counter = &Counter{} + } + if err := m.Counter.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 4: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Summary", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthMetrics + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Summary == nil { + m.Summary = &Summary{} + } + if err := m.Summary.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 5: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Untyped", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthMetrics + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Untyped == nil { + m.Untyped = &Untyped{} + } + if err := m.Untyped.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 6: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field TimestampMs", wireType) + } + m.TimestampMs = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.TimestampMs |= int64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 7: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Histogram", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthMetrics + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Histogram == nil { + m.Histogram = &Histogram{} + } + if err := m.Histogram.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipMetrics(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthMetrics + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...) + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} + +func (m *MetricFamily) unmarshalWithoutMetrics(buf *MetricStreamingDecoder, dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return errors.New("proto: MetricFamily: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: MetricFamily: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Name", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + intStringLen + if postIndex < 0 { + return ErrInvalidLengthMetrics + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Name = yoloString(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Help", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + intStringLen + if postIndex < 0 { + return ErrInvalidLengthMetrics + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Help = yoloString(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 3: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Type", wireType) + } + m.Type = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Type |= MetricType(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 4: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Metric", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthMetrics + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + buf.metrics = append(buf.metrics, pos{start: iNdEx, end: postIndex}) + iNdEx = postIndex + case 5: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Unit", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + intStringLen + if postIndex < 0 { + return ErrInvalidLengthMetrics + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Unit = yoloString(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipMetrics(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthMetrics + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...) + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} diff --git a/prompb/io/prometheus/client/decoder_test.go b/prompb/io/prometheus/client/decoder_test.go new file mode 100644 index 0000000000..8697b78fca --- /dev/null +++ b/prompb/io/prometheus/client/decoder_test.go @@ -0,0 +1,171 @@ +// Copyright 2025 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package io_prometheus_client //nolint:revive + +import ( + "bytes" + "encoding/binary" + "errors" + "io" + "testing" + + "github.com/gogo/protobuf/proto" + "github.com/stretchr/testify/require" + + "github.com/prometheus/prometheus/model/labels" +) + +const ( + testGauge = `name: "go_build_info" +help: "Build information about the main Go module." +type: GAUGE +metric: < + label: < + name: "checksum" + value: "" + > + label: < + name: "path" + value: "github.com/prometheus/client_golang" + > + label: < + name: "version" + value: "(devel)" + > + gauge: < + value: 1 + > +> +metric: < + label: < + name: "checksum" + value: "" + > + label: < + name: "path" + value: "github.com/prometheus/prometheus" + > + label: < + name: "version" + value: "v3.0.0" + > + gauge: < + value: 2 + > +> + +` + testCounter = `name: "go_memstats_alloc_bytes_total" +help: "Total number of bytes allocated, even if freed." +type: COUNTER +unit: "bytes" +metric: < + counter: < + value: 1.546544e+06 + exemplar: < + label: < + name: "dummyID" + value: "42" + > + value: 12 + timestamp: < + seconds: 1625851151 + nanos: 233181499 + > + > + > +> + +` +) + +func TestMetricStreamingDecoder(t *testing.T) { + varintBuf := make([]byte, binary.MaxVarintLen32) + buf := bytes.Buffer{} + for _, m := range []string{testGauge, testCounter} { + mf := &MetricFamily{} + require.NoError(t, proto.UnmarshalText(m, mf)) + // From proto message to binary protobuf. + protoBuf, err := proto.Marshal(mf) + require.NoError(t, err) + + // Write first length, then binary protobuf. + varintLength := binary.PutUvarint(varintBuf, uint64(len(protoBuf))) + buf.Write(varintBuf[:varintLength]) + buf.Write(protoBuf) + } + + d := NewMetricStreamingDecoder(buf.Bytes()) + require.NoError(t, d.NextMetricFamily()) + nextFn := func() error { + for { + err := d.NextMetric() + if errors.Is(err, io.EOF) { + if err := d.NextMetricFamily(); err != nil { + return err + } + continue + } + return err + } + } + + var firstMetricLset labels.Labels + { + require.NoError(t, nextFn()) + + require.Equal(t, "go_build_info", d.GetName()) + require.Equal(t, "Build information about the main Go module.", d.GetHelp()) + require.Equal(t, MetricType_GAUGE, d.GetType()) + + require.Equal(t, float64(1), d.GetGauge().GetValue()) + b := labels.NewScratchBuilder(0) + require.NoError(t, d.Label(&b)) + + firstMetricLset = b.Labels() + + require.Equal(t, `{checksum="", path="github.com/prometheus/client_golang", version="(devel)"}`, firstMetricLset.String()) + } + + { + require.NoError(t, nextFn()) + + require.Equal(t, "go_build_info", d.GetName()) + require.Equal(t, "Build information about the main Go module.", d.GetHelp()) + require.Equal(t, MetricType_GAUGE, d.GetType()) + + require.Equal(t, float64(2), d.GetGauge().GetValue()) + b := labels.NewScratchBuilder(0) + require.NoError(t, d.Label(&b)) + require.Equal(t, `{checksum="", path="github.com/prometheus/prometheus", version="v3.0.0"}`, b.Labels().String()) + } + { + // Different mf now. + require.NoError(t, nextFn()) + + require.Equal(t, "go_memstats_alloc_bytes_total", d.GetName()) + require.Equal(t, "Total number of bytes allocated, even if freed.", d.GetHelp()) + require.Equal(t, "bytes", d.GetUnit()) + require.Equal(t, MetricType_COUNTER, d.GetType()) + + require.Equal(t, 1.546544e+06, d.Metric.GetCounter().GetValue()) + b := labels.NewScratchBuilder(0) + require.NoError(t, d.Label(&b)) + require.Equal(t, `{}`, b.Labels().String()) + } + require.Equal(t, io.EOF, nextFn()) + + // Expect labels and metricBytes to be static and reusable even after parsing. + require.Equal(t, `{checksum="", path="github.com/prometheus/client_golang", version="(devel)"}`, firstMetricLset.String()) +} diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index ddc9b2853f..b30dbfa1b9 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -1895,6 +1895,7 @@ func TestScrapeLoopAppend(t *testing.T) { } func requireEqual(t *testing.T, expected, actual interface{}, msgAndArgs ...interface{}) { + t.Helper() testutil.RequireEqualWithOptions(t, expected, actual, []cmp.Option{cmp.Comparer(equalFloatSamples), cmp.AllowUnexported(histogramSample{})}, msgAndArgs...) From de23a9667cc670b26d8afdba8e3fa48a00e74ec0 Mon Sep 17 00:00:00 2001 From: Bartlomiej Plotka Date: Thu, 13 Feb 2025 13:16:33 +0100 Subject: [PATCH 21/23] prw2: Split PRW2.0 from metadata-wal-records feature (#16030) Rationales: * metadata-wal-records might be deprecated and replaced going forward: https://github.com/prometheus/prometheus/issues/15911 * PRW 2.0 works without metadata just fine (although it sends untyped metrics as expected). Signed-off-by: bwplotka --- cmd/prometheus/main.go | 4 ++-- docs/feature_flags.md | 3 +-- storage/remote/queue_manager_test.go | 6 +++--- storage/remote/read_test.go | 2 +- storage/remote/storage.go | 4 ++-- storage/remote/storage_test.go | 10 +++++----- storage/remote/write.go | 8 +------- storage/remote/write_test.go | 14 +++++++------- tsdb/agent/db_test.go | 6 +++--- web/api/v1/api_test.go | 2 +- 10 files changed, 26 insertions(+), 33 deletions(-) diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index 3c2b5ee0c1..d69648d88b 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -231,7 +231,7 @@ func (c *flagConfig) setFeatureListOptions(logger *slog.Logger) error { logger.Info("Experimental additional scrape metrics enabled") case "metadata-wal-records": c.scrape.AppendMetadata = true - logger.Info("Experimental metadata records in WAL enabled, required for remote write 2.0") + logger.Info("Experimental metadata records in WAL enabled") case "promql-per-step-stats": c.enablePerStepStats = true logger.Info("Experimental per-step statistics reporting") @@ -699,7 +699,7 @@ func main() { var ( localStorage = &readyStorage{stats: tsdb.NewDBStats()} scraper = &readyScrapeManager{} - remoteStorage = remote.NewStorage(logger.With("component", "remote"), prometheus.DefaultRegisterer, localStorage.StartTime, localStoragePath, time.Duration(cfg.RemoteFlushDeadline), scraper, cfg.scrape.AppendMetadata) + remoteStorage = remote.NewStorage(logger.With("component", "remote"), prometheus.DefaultRegisterer, localStorage.StartTime, localStoragePath, time.Duration(cfg.RemoteFlushDeadline), scraper) fanoutStorage = storage.NewFanout(logger, localStorage, remoteStorage) ) diff --git a/docs/feature_flags.md b/docs/feature_flags.md index 2a5a9263f5..6973d6d73b 100644 --- a/docs/feature_flags.md +++ b/docs/feature_flags.md @@ -114,8 +114,7 @@ Fall back to serving the old (Prometheus 2.x) web UI instead of the new UI. The When enabled, Prometheus will store metadata in-memory and keep track of metadata changes as WAL records on a per-series basis. -This must be used if -you are also using remote write 2.0 as it will only gather metadata from the WAL. +This must be used if you would like to send metadata using the new remote write 2.0. ## Delay compaction start time diff --git a/storage/remote/queue_manager_test.go b/storage/remote/queue_manager_test.go index 38eda81d97..27d5aa47b6 100644 --- a/storage/remote/queue_manager_test.go +++ b/storage/remote/queue_manager_test.go @@ -135,7 +135,7 @@ func TestBasicContentNegotiation(t *testing.T) { } { t.Run(tc.name, func(t *testing.T) { dir := t.TempDir() - s := NewStorage(nil, nil, nil, dir, defaultFlushDeadline, nil, true) + s := NewStorage(nil, nil, nil, dir, defaultFlushDeadline, nil) defer s.Close() var ( @@ -243,7 +243,7 @@ func TestSampleDelivery(t *testing.T) { } { t.Run(fmt.Sprintf("%s-%s", tc.protoMsg, tc.name), func(t *testing.T) { dir := t.TempDir() - s := NewStorage(nil, nil, nil, dir, defaultFlushDeadline, nil, true) + s := NewStorage(nil, nil, nil, dir, defaultFlushDeadline, nil) defer s.Close() var ( @@ -362,7 +362,7 @@ func TestMetadataDelivery(t *testing.T) { func TestWALMetadataDelivery(t *testing.T) { dir := t.TempDir() - s := NewStorage(nil, nil, nil, dir, defaultFlushDeadline, nil, true) + s := NewStorage(nil, nil, nil, dir, defaultFlushDeadline, nil) defer s.Close() cfg := config.DefaultQueueConfig diff --git a/storage/remote/read_test.go b/storage/remote/read_test.go index b78a8c6215..8073f23b3b 100644 --- a/storage/remote/read_test.go +++ b/storage/remote/read_test.go @@ -93,7 +93,7 @@ func TestNoDuplicateReadConfigs(t *testing.T) { for _, tc := range cases { t.Run("", func(t *testing.T) { - s := NewStorage(nil, nil, nil, dir, defaultFlushDeadline, nil, false) + s := NewStorage(nil, nil, nil, dir, defaultFlushDeadline, nil) conf := &config.Config{ GlobalConfig: config.DefaultGlobalConfig, RemoteReadConfigs: tc.cfgs, diff --git a/storage/remote/storage.go b/storage/remote/storage.go index 14c3c87d93..ba6d100bdf 100644 --- a/storage/remote/storage.go +++ b/storage/remote/storage.go @@ -64,7 +64,7 @@ type Storage struct { } // NewStorage returns a remote.Storage. -func NewStorage(l *slog.Logger, reg prometheus.Registerer, stCallback startTimeCallback, walDir string, flushDeadline time.Duration, sm ReadyScrapeManager, metadataInWAL bool) *Storage { +func NewStorage(l *slog.Logger, reg prometheus.Registerer, stCallback startTimeCallback, walDir string, flushDeadline time.Duration, sm ReadyScrapeManager) *Storage { if l == nil { l = promslog.NewNopLogger() } @@ -76,7 +76,7 @@ func NewStorage(l *slog.Logger, reg prometheus.Registerer, stCallback startTimeC deduper: deduper, localStartTimeCallback: stCallback, } - s.rws = NewWriteStorage(s.logger, reg, walDir, flushDeadline, sm, metadataInWAL) + s.rws = NewWriteStorage(s.logger, reg, walDir, flushDeadline, sm) return s } diff --git a/storage/remote/storage_test.go b/storage/remote/storage_test.go index 8c97d870e9..a62cd2da39 100644 --- a/storage/remote/storage_test.go +++ b/storage/remote/storage_test.go @@ -29,7 +29,7 @@ import ( func TestStorageLifecycle(t *testing.T) { dir := t.TempDir() - s := NewStorage(nil, nil, nil, dir, defaultFlushDeadline, nil, false) + s := NewStorage(nil, nil, nil, dir, defaultFlushDeadline, nil) conf := &config.Config{ GlobalConfig: config.DefaultGlobalConfig, RemoteWriteConfigs: []*config.RemoteWriteConfig{ @@ -56,7 +56,7 @@ func TestStorageLifecycle(t *testing.T) { func TestUpdateRemoteReadConfigs(t *testing.T) { dir := t.TempDir() - s := NewStorage(nil, nil, nil, dir, defaultFlushDeadline, nil, false) + s := NewStorage(nil, nil, nil, dir, defaultFlushDeadline, nil) conf := &config.Config{ GlobalConfig: config.GlobalConfig{}, @@ -77,7 +77,7 @@ func TestUpdateRemoteReadConfigs(t *testing.T) { func TestFilterExternalLabels(t *testing.T) { dir := t.TempDir() - s := NewStorage(nil, nil, nil, dir, defaultFlushDeadline, nil, false) + s := NewStorage(nil, nil, nil, dir, defaultFlushDeadline, nil) conf := &config.Config{ GlobalConfig: config.GlobalConfig{ @@ -102,7 +102,7 @@ func TestFilterExternalLabels(t *testing.T) { func TestIgnoreExternalLabels(t *testing.T) { dir := t.TempDir() - s := NewStorage(nil, nil, nil, dir, defaultFlushDeadline, nil, false) + s := NewStorage(nil, nil, nil, dir, defaultFlushDeadline, nil) conf := &config.Config{ GlobalConfig: config.GlobalConfig{ @@ -154,7 +154,7 @@ func baseRemoteReadConfig(host string) *config.RemoteReadConfig { // ApplyConfig runs concurrently with Notify // See https://github.com/prometheus/prometheus/issues/12747 func TestWriteStorageApplyConfigsDuringCommit(t *testing.T) { - s := NewStorage(nil, nil, nil, t.TempDir(), defaultFlushDeadline, nil, false) + s := NewStorage(nil, nil, nil, t.TempDir(), defaultFlushDeadline, nil) var wg sync.WaitGroup wg.Add(2000) diff --git a/storage/remote/write.go b/storage/remote/write.go index 0363095444..51daeedb72 100644 --- a/storage/remote/write.go +++ b/storage/remote/write.go @@ -15,7 +15,6 @@ package remote import ( "context" - "errors" "fmt" "log/slog" "math" @@ -67,7 +66,6 @@ type WriteStorage struct { externalLabels labels.Labels dir string queues map[string]*QueueManager - metadataInWAL bool samplesIn *ewmaRate flushDeadline time.Duration interner *pool @@ -79,7 +77,7 @@ type WriteStorage struct { } // NewWriteStorage creates and runs a WriteStorage. -func NewWriteStorage(logger *slog.Logger, reg prometheus.Registerer, dir string, flushDeadline time.Duration, sm ReadyScrapeManager, metadataInWal bool) *WriteStorage { +func NewWriteStorage(logger *slog.Logger, reg prometheus.Registerer, dir string, flushDeadline time.Duration, sm ReadyScrapeManager) *WriteStorage { if logger == nil { logger = promslog.NewNopLogger() } @@ -95,7 +93,6 @@ func NewWriteStorage(logger *slog.Logger, reg prometheus.Registerer, dir string, interner: newPool(), scraper: sm, quit: make(chan struct{}), - metadataInWAL: metadataInWal, highestTimestamp: &maxTimestamp{ Gauge: prometheus.NewGauge(prometheus.GaugeOpts{ Namespace: namespace, @@ -149,9 +146,6 @@ func (rws *WriteStorage) ApplyConfig(conf *config.Config) error { newQueues := make(map[string]*QueueManager) newHashes := []string{} for _, rwConf := range conf.RemoteWriteConfigs { - if rwConf.ProtobufMessage == config.RemoteWriteProtoMsgV2 && !rws.metadataInWAL { - return errors.New("invalid remote write configuration, if you are using remote write version 2.0 the `--enable-feature=metadata-wal-records` feature flag must be enabled") - } hash, err := toHash(rwConf) if err != nil { return err diff --git a/storage/remote/write_test.go b/storage/remote/write_test.go index 8125da7f6e..a3b30b6425 100644 --- a/storage/remote/write_test.go +++ b/storage/remote/write_test.go @@ -117,7 +117,7 @@ func TestWriteStorageApplyConfig_NoDuplicateWriteConfigs(t *testing.T) { }, } { t.Run("", func(t *testing.T) { - s := NewWriteStorage(nil, nil, dir, time.Millisecond, nil, false) + s := NewWriteStorage(nil, nil, dir, time.Millisecond, nil) conf := &config.Config{ GlobalConfig: config.DefaultGlobalConfig, RemoteWriteConfigs: tc.cfgs, @@ -143,7 +143,7 @@ func TestWriteStorageApplyConfig_RestartOnNameChange(t *testing.T) { hash, err := toHash(cfg) require.NoError(t, err) - s := NewWriteStorage(nil, nil, dir, time.Millisecond, nil, false) + s := NewWriteStorage(nil, nil, dir, time.Millisecond, nil) conf := &config.Config{ GlobalConfig: config.DefaultGlobalConfig, @@ -165,7 +165,7 @@ func TestWriteStorageApplyConfig_RestartOnNameChange(t *testing.T) { func TestWriteStorageApplyConfig_UpdateWithRegisterer(t *testing.T) { dir := t.TempDir() - s := NewWriteStorage(nil, prometheus.NewRegistry(), dir, time.Millisecond, nil, false) + s := NewWriteStorage(nil, prometheus.NewRegistry(), dir, time.Millisecond, nil) c1 := &config.RemoteWriteConfig{ Name: "named", URL: &common_config.URL{ @@ -206,7 +206,7 @@ func TestWriteStorageApplyConfig_UpdateWithRegisterer(t *testing.T) { func TestWriteStorageApplyConfig_Lifecycle(t *testing.T) { dir := t.TempDir() - s := NewWriteStorage(nil, nil, dir, defaultFlushDeadline, nil, false) + s := NewWriteStorage(nil, nil, dir, defaultFlushDeadline, nil) conf := &config.Config{ GlobalConfig: config.DefaultGlobalConfig, RemoteWriteConfigs: []*config.RemoteWriteConfig{ @@ -222,7 +222,7 @@ func TestWriteStorageApplyConfig_Lifecycle(t *testing.T) { func TestWriteStorageApplyConfig_UpdateExternalLabels(t *testing.T) { dir := t.TempDir() - s := NewWriteStorage(nil, prometheus.NewRegistry(), dir, time.Second, nil, false) + s := NewWriteStorage(nil, prometheus.NewRegistry(), dir, time.Second, nil) externalLabels := labels.FromStrings("external", "true") conf := &config.Config{ @@ -250,7 +250,7 @@ func TestWriteStorageApplyConfig_UpdateExternalLabels(t *testing.T) { func TestWriteStorageApplyConfig_Idempotent(t *testing.T) { dir := t.TempDir() - s := NewWriteStorage(nil, nil, dir, defaultFlushDeadline, nil, false) + s := NewWriteStorage(nil, nil, dir, defaultFlushDeadline, nil) conf := &config.Config{ GlobalConfig: config.GlobalConfig{}, RemoteWriteConfigs: []*config.RemoteWriteConfig{ @@ -274,7 +274,7 @@ func TestWriteStorageApplyConfig_Idempotent(t *testing.T) { func TestWriteStorageApplyConfig_PartialUpdate(t *testing.T) { dir := t.TempDir() - s := NewWriteStorage(nil, nil, dir, defaultFlushDeadline, nil, false) + s := NewWriteStorage(nil, nil, dir, defaultFlushDeadline, nil) c0 := &config.RemoteWriteConfig{ RemoteTimeout: model.Duration(10 * time.Second), diff --git a/tsdb/agent/db_test.go b/tsdb/agent/db_test.go index 0238a8e140..db98e87408 100644 --- a/tsdb/agent/db_test.go +++ b/tsdb/agent/db_test.go @@ -91,7 +91,7 @@ func createTestAgentDB(t testing.TB, reg prometheus.Registerer, opts *Options) * t.Helper() dbDir := t.TempDir() - rs := remote.NewStorage(promslog.NewNopLogger(), reg, startTime, dbDir, time.Second*30, nil, false) + rs := remote.NewStorage(promslog.NewNopLogger(), reg, startTime, dbDir, time.Second*30, nil) t.Cleanup(func() { require.NoError(t, rs.Close()) }) @@ -737,7 +737,7 @@ func TestLockfile(t *testing.T) { tsdbutil.TestDirLockerUsage(t, func(t *testing.T, data string, createLock bool) (*tsdbutil.DirLocker, testutil.Closer) { logger := promslog.NewNopLogger() reg := prometheus.NewRegistry() - rs := remote.NewStorage(logger, reg, startTime, data, time.Second*30, nil, false) + rs := remote.NewStorage(logger, reg, startTime, data, time.Second*30, nil) t.Cleanup(func() { require.NoError(t, rs.Close()) }) @@ -757,7 +757,7 @@ func TestLockfile(t *testing.T) { func Test_ExistingWAL_NextRef(t *testing.T) { dbDir := t.TempDir() - rs := remote.NewStorage(promslog.NewNopLogger(), nil, startTime, dbDir, time.Second*30, nil, false) + rs := remote.NewStorage(promslog.NewNopLogger(), nil, startTime, dbDir, time.Second*30, nil) defer func() { require.NoError(t, rs.Close()) }() diff --git a/web/api/v1/api_test.go b/web/api/v1/api_test.go index f9bdbe3947..fcf9c2a243 100644 --- a/web/api/v1/api_test.go +++ b/web/api/v1/api_test.go @@ -495,7 +495,7 @@ func TestEndpoints(t *testing.T) { remote := remote.NewStorage(promslog.New(&promslogConfig), prometheus.DefaultRegisterer, func() (int64, error) { return 0, nil - }, dbDir, 1*time.Second, nil, false) + }, dbDir, 1*time.Second, nil) err = remote.ApplyConfig(&config.Config{ RemoteReadConfigs: []*config.RemoteReadConfig{ From a5ffa83be83be22e2ec9fd1d4765299d8d16119e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Laimis=20Juzeli=C5=ABnas?= <58551069+laimis9133@users.noreply.github.com> Date: Thu, 13 Feb 2025 15:25:50 +0200 Subject: [PATCH 22/23] scrape: reorder the switch case of errors (#15991) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change case order for switch scrapeLoop This commit changes the ordering in error identification switch cases for better production performance and adds reasonings behind error switch case order as comments. --------- Signed-off-by: Laimis Juzeliūnas --- scrape/scrape.go | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/scrape/scrape.go b/scrape/scrape.go index 58314af4eb..020bddb757 100644 --- a/scrape/scrape.go +++ b/scrape/scrape.go @@ -1962,12 +1962,24 @@ func isSeriesPartOfFamily(mName string, mfName []byte, typ model.MetricType) boo // Adds samples to the appender, checking the error, and then returns the # of samples added, // whether the caller should continue to process more samples, and any sample or bucket limit errors. +// Switch error cases for Sample and Bucket limits are checked first since they're more common +// during normal operation (e.g., accidental cardinality explosion, sudden traffic spikes). +// Current case ordering prevents exercising other cases when limits are exceeded. +// Remaining error cases typically occur only a few times, often during initial setup. func (sl *scrapeLoop) checkAddError(met []byte, err error, sampleLimitErr, bucketLimitErr *error, appErrs *appendErrors) (bool, error) { switch { case err == nil: return true, nil - case errors.Is(err, storage.ErrNotFound): - return false, storage.ErrNotFound + case errors.Is(err, errSampleLimit): + // Keep on parsing output if we hit the limit, so we report the correct + // total number of samples scraped. + *sampleLimitErr = err + return false, nil + case errors.Is(err, errBucketLimit): + // Keep on parsing output if we hit the limit, so we report the bucket + // total number of samples scraped. + *bucketLimitErr = err + return false, nil case errors.Is(err, storage.ErrOutOfOrderSample): appErrs.numOutOfOrder++ sl.l.Debug("Out of order sample", "series", string(met)) @@ -1983,16 +1995,8 @@ func (sl *scrapeLoop) checkAddError(met []byte, err error, sampleLimitErr, bucke sl.l.Debug("Out of bounds metric", "series", string(met)) sl.metrics.targetScrapeSampleOutOfBounds.Inc() return false, nil - case errors.Is(err, errSampleLimit): - // Keep on parsing output if we hit the limit, so we report the correct - // total number of samples scraped. - *sampleLimitErr = err - return false, nil - case errors.Is(err, errBucketLimit): - // Keep on parsing output if we hit the limit, so we report the correct - // total number of samples scraped. - *bucketLimitErr = err - return false, nil + case errors.Is(err, storage.ErrNotFound): + return false, storage.ErrNotFound default: return false, err } From 77a56981909c3478e423127fffb2e10008eeddbf Mon Sep 17 00:00:00 2001 From: Julien Duchesne Date: Mon, 17 Feb 2025 06:19:16 -0500 Subject: [PATCH 23/23] Ruler Concurrency: Consider `__name__` matchers (#16039) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When calculating dependencies between rules, we sometimes run into `{__name__...}` matchers These can be used the same way as the actual rule names This will enable even more rules to run concurrently The new logic is also not slower: ``` julienduchesne@triceratops prometheus % benchstat test-old.txt test.txt goos: darwin goarch: arm64 pkg: github.com/prometheus/prometheus/rules cpu: Apple M3 Pro │ test-old.txt │ test.txt │ │ sec/op │ sec/op vs base │ DependencyMap-11 1.206µ ± 7% 1.024µ ± 7% -15.10% (p=0.000 n=10) │ test-old.txt │ test.txt │ │ B/op │ B/op vs base │ DependencyMap-11 1.720Ki ± 0% 1.438Ki ± 0% -16.35% (p=0.000 n=10) │ test-old.txt │ test.txt │ │ allocs/op │ allocs/op vs base │ DependencyMap-11 39.00 ± 0% 34.00 ± 0% -12.82% (p=0.000 n=10) ``` Signed-off-by: Julien Duchesne --- rules/group.go | 43 ++++++++++++++++++++++++++----------------- rules/manager_test.go | 14 +++++++++++--- 2 files changed, 37 insertions(+), 20 deletions(-) diff --git a/rules/group.go b/rules/group.go index 9ad9aab093..a83c33a8e8 100644 --- a/rules/group.go +++ b/rules/group.go @@ -1110,9 +1110,6 @@ func buildDependencyMap(rules []Rule) dependencyMap { return dependencies } - inputs := make(map[string][]Rule, len(rules)) - outputs := make(map[string][]Rule, len(rules)) - var indeterminate bool for _, rule := range rules { @@ -1120,26 +1117,46 @@ func buildDependencyMap(rules []Rule) dependencyMap { break } - name := rule.Name() - outputs[name] = append(outputs[name], rule) - parser.Inspect(rule.Query(), func(node parser.Node, path []parser.Node) error { if n, ok := node.(*parser.VectorSelector); ok { + // Find the name matcher for the rule. + var nameMatcher *labels.Matcher + if n.Name != "" { + nameMatcher = labels.MustNewMatcher(labels.MatchEqual, model.MetricNameLabel, n.Name) + } else { + for _, m := range n.LabelMatchers { + if m.Name == model.MetricNameLabel { + nameMatcher = m + break + } + } + } + // A wildcard metric expression means we cannot reliably determine if this rule depends on any other, // which means we cannot safely run any rules concurrently. - if n.Name == "" && len(n.LabelMatchers) > 0 { + if nameMatcher == nil { indeterminate = true return nil } // Rules which depend on "meta-metrics" like ALERTS and ALERTS_FOR_STATE will have undefined behaviour // if they run concurrently. - if n.Name == alertMetricName || n.Name == alertForStateMetricName { + if nameMatcher.Matches(alertMetricName) || nameMatcher.Matches(alertForStateMetricName) { indeterminate = true return nil } - inputs[n.Name] = append(inputs[n.Name], rule) + // Find rules which depend on the output of this rule. + for _, other := range rules { + if other == rule { + continue + } + + otherName := other.Name() + if nameMatcher.Matches(otherName) { + dependencies[other] = append(dependencies[other], rule) + } + } } return nil }) @@ -1149,13 +1166,5 @@ func buildDependencyMap(rules []Rule) dependencyMap { return nil } - for output, outRules := range outputs { - for _, outRule := range outRules { - if inRules, found := inputs[output]; found && len(inRules) > 0 { - dependencies[outRule] = append(dependencies[outRule], inRules...) - } - } - } - return dependencies } diff --git a/rules/manager_test.go b/rules/manager_test.go index 6c32b6d0f1..9ef1fcf074 100644 --- a/rules/manager_test.go +++ b/rules/manager_test.go @@ -1601,10 +1601,14 @@ func TestDependencyMap(t *testing.T) { require.NoError(t, err) rule4 := NewRecordingRule("user:requests:increase1h", expr, labels.Labels{}) + expr, err = parser.ParseExpr(`sum by (user) ({__name__=~"user:requests.+5m"})`) + require.NoError(t, err) + rule5 := NewRecordingRule("user:requests:sum5m", expr, labels.Labels{}) + group := NewGroup(GroupOptions{ Name: "rule_group", Interval: time.Second, - Rules: []Rule{rule, rule2, rule3, rule4}, + Rules: []Rule{rule, rule2, rule3, rule4, rule5}, Opts: opts, }) @@ -1619,13 +1623,17 @@ func TestDependencyMap(t *testing.T) { require.Equal(t, []Rule{rule}, depMap.dependencies(rule2)) require.False(t, depMap.isIndependent(rule2)) - require.Zero(t, depMap.dependents(rule3)) + require.Equal(t, []Rule{rule5}, depMap.dependents(rule3)) require.Zero(t, depMap.dependencies(rule3)) - require.True(t, depMap.isIndependent(rule3)) + require.False(t, depMap.isIndependent(rule3)) require.Zero(t, depMap.dependents(rule4)) require.Equal(t, []Rule{rule}, depMap.dependencies(rule4)) require.False(t, depMap.isIndependent(rule4)) + + require.Zero(t, depMap.dependents(rule5)) + require.Equal(t, []Rule{rule3}, depMap.dependencies(rule5)) + require.False(t, depMap.isIndependent(rule5)) } func TestNoDependency(t *testing.T) {