From 8bdfba87722f1cd1d1ed9b3b69563ea76f648b1c Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Thu, 19 Feb 2026 11:47:32 +0100 Subject: [PATCH] Allow users to provide additional resource attributes --- doc/09-object-types.md | 2 + doc/14-features.md | 35 +++++++---- .../features-available/otlpmetrics.conf | 12 ++++ lib/perfdata/otlpmetricswriter.cpp | 61 ++++++++++++++++++- lib/perfdata/otlpmetricswriter.hpp | 4 ++ lib/perfdata/otlpmetricswriter.ti | 6 ++ 6 files changed, 108 insertions(+), 12 deletions(-) diff --git a/doc/09-object-types.md b/doc/09-object-types.md index 8bff4fb9a..4cc45d6c3 100644 --- a/doc/09-object-types.md +++ b/doc/09-object-types.md @@ -1898,6 +1898,8 @@ There are more configuration options available as described in the table below. | metrics\_endpoint | String | **Required.** OTLP metrics endpoint path. Defaults to `/v1/metrics`. | | service\_namespace | String | **Required.** The namespace to associate with emitted metrics used in the `service.namespace` OTel resource attribute. Defaults to `icinga`. | | basic\_auth | Dictionary | **Optional.** Username and password for HTTP basic authentication. | +| host\_resource\_attributes | Dictionary | **Optional.** Additional resource attributes to be included with host metrics. Defaults to none. | +| service\_resource\_attributes | Dictionary | **Optional.** Additional resource attributes to be included with service metrics. Defaults to none. | | flush\_interval | Duration | **Optional.** How long to buffer data points before transferring to the OTLP backend. Defaults to `15s`. | | flush\_threshold | Number | **Optional.** How many bytes to buffer before forcing a transfer to the OTLP backend. Defaults to `32MiB`. | | enable\_ha | Boolean | **Optional.** Enable the high availability functionality. Has no effect in non-cluster setups. Defaults to `false`. | diff --git a/doc/14-features.md b/doc/14-features.md index 4400675a2..c8c854194 100644 --- a/doc/14-features.md +++ b/doc/14-features.md @@ -657,11 +657,6 @@ capabilities of OpenTelemetry for advanced analysis and visualization of your mo standardized way to collect, process, and export telemetry data, making it easier to integrate with numerous [monitoring and observability](https://opentelemetry.io/docs/collector/components/exporter/) tools effortlessly. -!!! note - - This feature has successfully been tested with OpenTelemetry Collector, Prometheus OTLP receiver, OpenSearch Data - Prepper, and Grafana Mimir. However, it should work with any backend that supports the OTLP HTTP protocol as well. - In order to enable this feature, you can use the following command: ```bash @@ -697,12 +692,30 @@ Additionally, each metric point will also include other relevant attributes such `icinga2.command.name`, etc. as resource attributes. You can find the full list of metric point formats and attributes in the [OTLPMetrics data format](#otlpmetrics-writer-data-format) section below. -At the moment, the OTLPMetrics Writer allows you to configure only a single metrics resource attribute -[`service.namespace`](https://opentelemetry.io/docs/specs/semconv/registry/attributes/service/#service-namespace) via -the `service_namespace` option in the OTLPMetrics Writer config. This attribute can be used to group related metrics -together in the backend. By default, it is set to `icinga`. You can customize it to better fit your monitoring -environment. For example, you might set it to `production`, `staging`, or any other relevant namespace that categorizes -your Icinga 2 metrics emitted to the OpenTelemetry backend effectively. +In addition to the default attributes, it is also possible to configure custom resource attributes that are sent along +with the metrics to the OpenTelemetry backend. You can use the `host_resource_attributes` and `service_resource_attributes` +options in the OTLPMetrics Writer configuration to define custom resource attributes for host and service checks +respectively. You can use macros in the attribute values to dynamically populate them based on the check context. +For instance, you can add a custom resource attribute `host.os` with the value `$host.vars.os$` and it will be populated +with the value of `vars.os` for each host that has this variable defined, otherwise it will silently be ignored. +All custom resource attributes will be prefixed with `icinga2.custom.` to avoid naming conflicts with existing +OpenTelemetry and Icinga 2's built-in resource attributes. For example, if you define a custom resource attribute +`host.os`, it will be sent as `icinga2.custom.host.os` to OpenTelemetry. + +!!! warning + + Be cautious when defining custom resource attributes, as they are sent with every metric and can lead to high + cardinality issues if not used carefully. It is recommended to only define custom resource attributes that are + necessary for your monitoring use case and to avoid using attributes with high variability or a large number of + unique values. + +Apart from custom resource attributes, the OTLPMetrics Writer also allows you to configure an additional resource +attribute called [`service.namespace`](https://opentelemetry.io/docs/specs/semconv/registry/attributes/service/#service-namespace) +via the `service_namespace` option in the OTLPMetrics Writer configuration. This attribute is not specific to any host +or service but is a general attribute that applies to all metrics emitted by one OTLPMetrics Writer instance. +By default, it is set to `icinga`. You can customize it to better fit your monitoring environment. For example, you +might set it to `production`, `staging`, or any other relevant namespace that categorizes your Icinga 2 metrics emitted +to the OpenTelemetry backend effectively. #### OTLPMetrics in HA Cluster Zones diff --git a/etc/icinga2/features-available/otlpmetrics.conf b/etc/icinga2/features-available/otlpmetrics.conf index 39a2cacb0..9a2e635ca 100644 --- a/etc/icinga2/features-available/otlpmetrics.conf +++ b/etc/icinga2/features-available/otlpmetrics.conf @@ -18,6 +18,18 @@ object OTLPMetricsWriter "otlp-metrics" { // password = "otel_password" // } + # You can also add custom tags to the exported metrics based on host and service variables. + # These tags will be included in the OTel metrics as resource attributes for hosts and services, respectively. + # By default, no additional tags are added. Adjust the templates as needed to include the desired variables. + // host_resource_attributes = { + // "host.vars.env" = "$host.vars.env$" + // "host.vars.os" = "$host.vars.os$" + // } + // service_resource_attributes = { + // "service.vars.env" = "$service.vars.env$" + // "service.vars.os" = "$service.vars.os$" + // } + # These are the default settings used by the OTel writer. Adjust them as needed. # Please refer to the documentation for more details on each option. // enable_ha = false diff --git a/lib/perfdata/otlpmetricswriter.cpp b/lib/perfdata/otlpmetricswriter.cpp index 2ad480c71..269b26166 100644 --- a/lib/perfdata/otlpmetricswriter.cpp +++ b/lib/perfdata/otlpmetricswriter.cpp @@ -11,6 +11,7 @@ #include "base/statsfunction.hpp" #include "icinga/checkable.hpp" #include "icinga/checkcommand.hpp" +#include "icinga/macroprocessor.hpp" #include "icinga/service.hpp" #include @@ -200,7 +201,7 @@ void OTLPMetricsWriter::CheckResultHandler(const Checkable::Ptr& checkable, cons if (auto unit = pdv->GetUnit(); !unit.IsEmpty()) { attrs.emplace("unit", std::move(unit)); } - AddBytesAndFlushIfNeeded(Record(checkable, l_PerfdataMetric, pdv->GetValue(), startTime, endTime, std::move(attrs))); + AddBytesAndFlushIfNeeded(Record(checkable, cr, l_PerfdataMetric, pdv->GetValue(), startTime, endTime, std::move(attrs))); if (GetEnableSendThresholds()) { std::array, 4> thresholds{{ @@ -218,6 +219,7 @@ void OTLPMetricsWriter::CheckResultHandler(const Checkable::Ptr& checkable, cons AddBytesAndFlushIfNeeded( Record( checkable, + cr, l_ThresholdMetric, Convert::ToDouble(threshold), startTime, @@ -279,6 +281,7 @@ void OTLPMetricsWriter::AddBytesAndFlushIfNeeded(std::size_t newBytes) * @tparam T The type of the data point to record (e.g., int64_t, double). * * @param checkable The configuration object to associate the metric with. + * @param cr The check result associated with the metric data point, used for macro resolution in attributes. * @param metric The OTel metric enum value indicating which metric stream to record the data point for. * @param value The data point value to record. * @param startTime The start time of the data point in seconds. @@ -290,6 +293,7 @@ void OTLPMetricsWriter::AddBytesAndFlushIfNeeded(std::size_t newBytes) template std::size_t OTLPMetricsWriter::Record( const Checkable::Ptr& checkable, + const CheckResult::Ptr& cr, std::string_view metric, T value, double startTime, @@ -327,6 +331,31 @@ std::size_t OTLPMetricsWriter::Record( } attr = resource->add_attributes(); OTel::SetAttribute(*attr, "icinga2.command.name"sv, checkable->GetCheckCommand()->GetName()); + + if (Dictionary::Ptr tmpl = service ? GetServiceResourceAttributes() : GetHostResourceAttributes(); tmpl) { + MacroProcessor::ResolverList resolvers{{"host", host}}; + if (service) { + resolvers.emplace_back("service", service); + } + + ObjectLock olock(tmpl); + for (const Dictionary::Pair& pair : tmpl) { + String missingMacro; + auto resolvedVal = MacroProcessor::ResolveMacros(pair.second, resolvers, cr, &missingMacro); + if (missingMacro.IsEmpty()) { + attr = resource->add_attributes(); + try { + OTel::SetAttribute(*attr, "icinga2.custom." + pair.first, resolvedVal); + } catch (const std::exception& ex) { + Log(LogWarning, "OTLPMetricsWriter") + << "Ignoring invalid resource attribute '" << pair.first << "' for checkable '" + << checkable->GetName() << "': " << ex.what(); + // Remove the last attribute from the list which is the one we just attempted to set. + resource->mutable_attributes()->RemoveLast(); + } + } + } + } bytes = resourceMetrics->ByteSizeLong(); } @@ -383,3 +412,33 @@ void OTLPMetricsWriter::ValidateFlushThreshold(const Lazy& lvalue, cons )); } } + +void OTLPMetricsWriter::ValidateHostResourceAttributes(const Lazy& lvalue, const ValidationUtils& utils) +{ + ObjectImpl::ValidateHostResourceAttributes(lvalue, utils); + if (const auto& tags{lvalue()}; tags) { + ValidateResourceAttributes(tags, "host_resource_attributes"); + } +} + +void OTLPMetricsWriter::ValidateServiceResourceAttributes(const Lazy& lvalue, const ValidationUtils& utils) +{ + ObjectImpl::ValidateServiceResourceAttributes(lvalue, utils); + if (const auto& tags{lvalue()}; tags) { + ValidateResourceAttributes(tags, "service_resource_attributes"); + } +} + +void OTLPMetricsWriter::ValidateResourceAttributes(const Dictionary::Ptr& tmpl, const String& attrName) +{ + ObjectLock olock(tmpl); + for (const auto& pair : tmpl) { + if (!MacroProcessor::ValidateMacroString(pair.second)) { + BOOST_THROW_EXCEPTION(ValidationError( + this, + {attrName, pair.first}, + "Closing $ not found in macro format string '" + pair.second + "'." + )); + } + } +} diff --git a/lib/perfdata/otlpmetricswriter.hpp b/lib/perfdata/otlpmetricswriter.hpp index a473e5798..2a7eee8f8 100644 --- a/lib/perfdata/otlpmetricswriter.hpp +++ b/lib/perfdata/otlpmetricswriter.hpp @@ -29,15 +29,19 @@ protected: void ValidatePort(const Lazy& lvalue, const ValidationUtils& utils) override; void ValidateFlushInterval(const Lazy& lvalue, const ValidationUtils& utils) override; void ValidateFlushThreshold(const Lazy& lvalue, const ValidationUtils& utils) override; + void ValidateHostResourceAttributes(const Lazy& lvalue, const ValidationUtils& utils) override; + void ValidateServiceResourceAttributes(const Lazy& lvalue, const ValidationUtils& utils) override; private: void CheckResultHandler(const Checkable::Ptr& checkable, const CheckResult::Ptr& cr); void Flush(bool fromTimer = false); void AddBytesAndFlushIfNeeded(std::size_t newBytes = 0); + void ValidateResourceAttributes(const Dictionary::Ptr& tmpl, const String& attrName); template [[nodiscard]] std::size_t Record( const Checkable::Ptr& checkable, + const CheckResult::Ptr& cr, std::string_view metric, T value, double startTime, diff --git a/lib/perfdata/otlpmetricswriter.ti b/lib/perfdata/otlpmetricswriter.ti index 3423305a6..214b12baa 100644 --- a/lib/perfdata/otlpmetricswriter.ti +++ b/lib/perfdata/otlpmetricswriter.ti @@ -28,6 +28,9 @@ class OTLPMetricsWriter : ConfigObject [config, no_user_view, no_user_modify] Dictionary::Ptr basic_auth; + [config] Dictionary::Ptr host_resource_attributes; + [config] Dictionary::Ptr service_resource_attributes; + [config] int flush_interval { default {{{ return 15; }}} }; @@ -63,6 +66,9 @@ validator OTLPMetricsWriter required password; String password; }; + + Dictionary host_resource_attributes { String "*"; }; + Dictionary service_resource_attributes { String "*"; }; }; } // namespace icinga