mirror of
https://github.com/Icinga/icinga2.git
synced 2026-06-09 08:42:59 -04:00
Allow users to provide additional resource attributes
This commit is contained in:
parent
8d4a69e343
commit
8bdfba8772
6 changed files with 108 additions and 12 deletions
|
|
@ -1898,6 +1898,8 @@ There are more configuration options available as described in the table below.
|
|||
| metrics\_endpoint | String | **Required.** OTLP metrics endpoint path. Defaults to `/v1/metrics`. |
|
||||
| service\_namespace | String | **Required.** The namespace to associate with emitted metrics used in the `service.namespace` OTel resource attribute. Defaults to `icinga`. |
|
||||
| basic\_auth | Dictionary | **Optional.** Username and password for HTTP basic authentication. |
|
||||
| host\_resource\_attributes | Dictionary | **Optional.** Additional resource attributes to be included with host metrics. Defaults to none. |
|
||||
| service\_resource\_attributes | Dictionary | **Optional.** Additional resource attributes to be included with service metrics. Defaults to none. |
|
||||
| flush\_interval | Duration | **Optional.** How long to buffer data points before transferring to the OTLP backend. Defaults to `15s`. |
|
||||
| flush\_threshold | Number | **Optional.** How many bytes to buffer before forcing a transfer to the OTLP backend. Defaults to `32MiB`. |
|
||||
| enable\_ha | Boolean | **Optional.** Enable the high availability functionality. Has no effect in non-cluster setups. Defaults to `false`. |
|
||||
|
|
|
|||
|
|
@ -657,11 +657,6 @@ capabilities of OpenTelemetry for advanced analysis and visualization of your mo
|
|||
standardized way to collect, process, and export telemetry data, making it easier to integrate with numerous
|
||||
[monitoring and observability](https://opentelemetry.io/docs/collector/components/exporter/) tools effortlessly.
|
||||
|
||||
!!! note
|
||||
|
||||
This feature has successfully been tested with OpenTelemetry Collector, Prometheus OTLP receiver, OpenSearch Data
|
||||
Prepper, and Grafana Mimir. However, it should work with any backend that supports the OTLP HTTP protocol as well.
|
||||
|
||||
In order to enable this feature, you can use the following command:
|
||||
|
||||
```bash
|
||||
|
|
@ -697,12 +692,30 @@ Additionally, each metric point will also include other relevant attributes such
|
|||
`icinga2.command.name`, etc. as resource attributes. You can find the full list of metric point formats and attributes
|
||||
in the [OTLPMetrics data format](#otlpmetrics-writer-data-format) section below.
|
||||
|
||||
At the moment, the OTLPMetrics Writer allows you to configure only a single metrics resource attribute
|
||||
[`service.namespace`](https://opentelemetry.io/docs/specs/semconv/registry/attributes/service/#service-namespace) via
|
||||
the `service_namespace` option in the OTLPMetrics Writer config. This attribute can be used to group related metrics
|
||||
together in the backend. By default, it is set to `icinga`. You can customize it to better fit your monitoring
|
||||
environment. For example, you might set it to `production`, `staging`, or any other relevant namespace that categorizes
|
||||
your Icinga 2 metrics emitted to the OpenTelemetry backend effectively.
|
||||
In addition to the default attributes, it is also possible to configure custom resource attributes that are sent along
|
||||
with the metrics to the OpenTelemetry backend. You can use the `host_resource_attributes` and `service_resource_attributes`
|
||||
options in the OTLPMetrics Writer configuration to define custom resource attributes for host and service checks
|
||||
respectively. You can use macros in the attribute values to dynamically populate them based on the check context.
|
||||
For instance, you can add a custom resource attribute `host.os` with the value `$host.vars.os$` and it will be populated
|
||||
with the value of `vars.os` for each host that has this variable defined, otherwise it will silently be ignored.
|
||||
All custom resource attributes will be prefixed with `icinga2.custom.` to avoid naming conflicts with existing
|
||||
OpenTelemetry and Icinga 2's built-in resource attributes. For example, if you define a custom resource attribute
|
||||
`host.os`, it will be sent as `icinga2.custom.host.os` to OpenTelemetry.
|
||||
|
||||
!!! warning
|
||||
|
||||
Be cautious when defining custom resource attributes, as they are sent with every metric and can lead to high
|
||||
cardinality issues if not used carefully. It is recommended to only define custom resource attributes that are
|
||||
necessary for your monitoring use case and to avoid using attributes with high variability or a large number of
|
||||
unique values.
|
||||
|
||||
Apart from custom resource attributes, the OTLPMetrics Writer also allows you to configure an additional resource
|
||||
attribute called [`service.namespace`](https://opentelemetry.io/docs/specs/semconv/registry/attributes/service/#service-namespace)
|
||||
via the `service_namespace` option in the OTLPMetrics Writer configuration. This attribute is not specific to any host
|
||||
or service but is a general attribute that applies to all metrics emitted by one OTLPMetrics Writer instance.
|
||||
By default, it is set to `icinga`. You can customize it to better fit your monitoring environment. For example, you
|
||||
might set it to `production`, `staging`, or any other relevant namespace that categorizes your Icinga 2 metrics emitted
|
||||
to the OpenTelemetry backend effectively.
|
||||
|
||||
#### OTLPMetrics in HA Cluster Zones <a id="otlpmetrics-writer-ha-cluster"></a>
|
||||
|
||||
|
|
|
|||
|
|
@ -18,6 +18,18 @@ object OTLPMetricsWriter "otlp-metrics" {
|
|||
// password = "otel_password"
|
||||
// }
|
||||
|
||||
# You can also add custom tags to the exported metrics based on host and service variables.
|
||||
# These tags will be included in the OTel metrics as resource attributes for hosts and services, respectively.
|
||||
# By default, no additional tags are added. Adjust the templates as needed to include the desired variables.
|
||||
// host_resource_attributes = {
|
||||
// "host.vars.env" = "$host.vars.env$"
|
||||
// "host.vars.os" = "$host.vars.os$"
|
||||
// }
|
||||
// service_resource_attributes = {
|
||||
// "service.vars.env" = "$service.vars.env$"
|
||||
// "service.vars.os" = "$service.vars.os$"
|
||||
// }
|
||||
|
||||
# These are the default settings used by the OTel writer. Adjust them as needed.
|
||||
# Please refer to the documentation for more details on each option.
|
||||
// enable_ha = false
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@
|
|||
#include "base/statsfunction.hpp"
|
||||
#include "icinga/checkable.hpp"
|
||||
#include "icinga/checkcommand.hpp"
|
||||
#include "icinga/macroprocessor.hpp"
|
||||
#include "icinga/service.hpp"
|
||||
#include <future>
|
||||
|
||||
|
|
@ -200,7 +201,7 @@ void OTLPMetricsWriter::CheckResultHandler(const Checkable::Ptr& checkable, cons
|
|||
if (auto unit = pdv->GetUnit(); !unit.IsEmpty()) {
|
||||
attrs.emplace("unit", std::move(unit));
|
||||
}
|
||||
AddBytesAndFlushIfNeeded(Record(checkable, l_PerfdataMetric, pdv->GetValue(), startTime, endTime, std::move(attrs)));
|
||||
AddBytesAndFlushIfNeeded(Record(checkable, cr, l_PerfdataMetric, pdv->GetValue(), startTime, endTime, std::move(attrs)));
|
||||
|
||||
if (GetEnableSendThresholds()) {
|
||||
std::array<std::pair<String, Value>, 4> thresholds{{
|
||||
|
|
@ -218,6 +219,7 @@ void OTLPMetricsWriter::CheckResultHandler(const Checkable::Ptr& checkable, cons
|
|||
AddBytesAndFlushIfNeeded(
|
||||
Record(
|
||||
checkable,
|
||||
cr,
|
||||
l_ThresholdMetric,
|
||||
Convert::ToDouble(threshold),
|
||||
startTime,
|
||||
|
|
@ -279,6 +281,7 @@ void OTLPMetricsWriter::AddBytesAndFlushIfNeeded(std::size_t newBytes)
|
|||
* @tparam T The type of the data point to record (e.g., int64_t, double).
|
||||
*
|
||||
* @param checkable The configuration object to associate the metric with.
|
||||
* @param cr The check result associated with the metric data point, used for macro resolution in attributes.
|
||||
* @param metric The OTel metric enum value indicating which metric stream to record the data point for.
|
||||
* @param value The data point value to record.
|
||||
* @param startTime The start time of the data point in seconds.
|
||||
|
|
@ -290,6 +293,7 @@ void OTLPMetricsWriter::AddBytesAndFlushIfNeeded(std::size_t newBytes)
|
|||
template<typename T>
|
||||
std::size_t OTLPMetricsWriter::Record(
|
||||
const Checkable::Ptr& checkable,
|
||||
const CheckResult::Ptr& cr,
|
||||
std::string_view metric,
|
||||
T value,
|
||||
double startTime,
|
||||
|
|
@ -327,6 +331,31 @@ std::size_t OTLPMetricsWriter::Record(
|
|||
}
|
||||
attr = resource->add_attributes();
|
||||
OTel::SetAttribute(*attr, "icinga2.command.name"sv, checkable->GetCheckCommand()->GetName());
|
||||
|
||||
if (Dictionary::Ptr tmpl = service ? GetServiceResourceAttributes() : GetHostResourceAttributes(); tmpl) {
|
||||
MacroProcessor::ResolverList resolvers{{"host", host}};
|
||||
if (service) {
|
||||
resolvers.emplace_back("service", service);
|
||||
}
|
||||
|
||||
ObjectLock olock(tmpl);
|
||||
for (const Dictionary::Pair& pair : tmpl) {
|
||||
String missingMacro;
|
||||
auto resolvedVal = MacroProcessor::ResolveMacros(pair.second, resolvers, cr, &missingMacro);
|
||||
if (missingMacro.IsEmpty()) {
|
||||
attr = resource->add_attributes();
|
||||
try {
|
||||
OTel::SetAttribute(*attr, "icinga2.custom." + pair.first, resolvedVal);
|
||||
} catch (const std::exception& ex) {
|
||||
Log(LogWarning, "OTLPMetricsWriter")
|
||||
<< "Ignoring invalid resource attribute '" << pair.first << "' for checkable '"
|
||||
<< checkable->GetName() << "': " << ex.what();
|
||||
// Remove the last attribute from the list which is the one we just attempted to set.
|
||||
resource->mutable_attributes()->RemoveLast();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
bytes = resourceMetrics->ByteSizeLong();
|
||||
}
|
||||
|
||||
|
|
@ -383,3 +412,33 @@ void OTLPMetricsWriter::ValidateFlushThreshold(const Lazy<int64_t>& lvalue, cons
|
|||
));
|
||||
}
|
||||
}
|
||||
|
||||
void OTLPMetricsWriter::ValidateHostResourceAttributes(const Lazy<Dictionary::Ptr>& lvalue, const ValidationUtils& utils)
|
||||
{
|
||||
ObjectImpl::ValidateHostResourceAttributes(lvalue, utils);
|
||||
if (const auto& tags{lvalue()}; tags) {
|
||||
ValidateResourceAttributes(tags, "host_resource_attributes");
|
||||
}
|
||||
}
|
||||
|
||||
void OTLPMetricsWriter::ValidateServiceResourceAttributes(const Lazy<Dictionary::Ptr>& lvalue, const ValidationUtils& utils)
|
||||
{
|
||||
ObjectImpl::ValidateServiceResourceAttributes(lvalue, utils);
|
||||
if (const auto& tags{lvalue()}; tags) {
|
||||
ValidateResourceAttributes(tags, "service_resource_attributes");
|
||||
}
|
||||
}
|
||||
|
||||
void OTLPMetricsWriter::ValidateResourceAttributes(const Dictionary::Ptr& tmpl, const String& attrName)
|
||||
{
|
||||
ObjectLock olock(tmpl);
|
||||
for (const auto& pair : tmpl) {
|
||||
if (!MacroProcessor::ValidateMacroString(pair.second)) {
|
||||
BOOST_THROW_EXCEPTION(ValidationError(
|
||||
this,
|
||||
{attrName, pair.first},
|
||||
"Closing $ not found in macro format string '" + pair.second + "'."
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,15 +29,19 @@ protected:
|
|||
void ValidatePort(const Lazy<int>& lvalue, const ValidationUtils& utils) override;
|
||||
void ValidateFlushInterval(const Lazy<int>& lvalue, const ValidationUtils& utils) override;
|
||||
void ValidateFlushThreshold(const Lazy<int64_t>& lvalue, const ValidationUtils& utils) override;
|
||||
void ValidateHostResourceAttributes(const Lazy<Dictionary::Ptr>& lvalue, const ValidationUtils& utils) override;
|
||||
void ValidateServiceResourceAttributes(const Lazy<Dictionary::Ptr>& lvalue, const ValidationUtils& utils) override;
|
||||
|
||||
private:
|
||||
void CheckResultHandler(const Checkable::Ptr& checkable, const CheckResult::Ptr& cr);
|
||||
void Flush(bool fromTimer = false);
|
||||
void AddBytesAndFlushIfNeeded(std::size_t newBytes = 0);
|
||||
void ValidateResourceAttributes(const Dictionary::Ptr& tmpl, const String& attrName);
|
||||
|
||||
template<typename T>
|
||||
[[nodiscard]] std::size_t Record(
|
||||
const Checkable::Ptr& checkable,
|
||||
const CheckResult::Ptr& cr,
|
||||
std::string_view metric,
|
||||
T value,
|
||||
double startTime,
|
||||
|
|
|
|||
|
|
@ -28,6 +28,9 @@ class OTLPMetricsWriter : ConfigObject
|
|||
|
||||
[config, no_user_view, no_user_modify] Dictionary::Ptr basic_auth;
|
||||
|
||||
[config] Dictionary::Ptr host_resource_attributes;
|
||||
[config] Dictionary::Ptr service_resource_attributes;
|
||||
|
||||
[config] int flush_interval {
|
||||
default {{{ return 15; }}}
|
||||
};
|
||||
|
|
@ -63,6 +66,9 @@ validator OTLPMetricsWriter
|
|||
required password;
|
||||
String password;
|
||||
};
|
||||
|
||||
Dictionary host_resource_attributes { String "*"; };
|
||||
Dictionary service_resource_attributes { String "*"; };
|
||||
};
|
||||
|
||||
} // namespace icinga
|
||||
|
|
|
|||
Loading…
Reference in a new issue