diff --git a/doc/03-monitoring-basics.md b/doc/03-monitoring-basics.md index 79ef8cc5d..0c5281272 100644 --- a/doc/03-monitoring-basics.md +++ b/doc/03-monitoring-basics.md @@ -2738,6 +2738,27 @@ apply Dependency "internet" to Service { } ``` +### Redundancy Groups + +Sometimes you want dependencies to accumulate, +i.e. to consider the parent reachable only if no dependency is violated. +Sometimes you want them to be regarded as redundant, +i.e. to consider the parent unreachable only if no dependency is fulfilled. +Think of a host connected to both a network and a storage switch vs. a host connected to redundant routers. + +Sometimes you even want a mixture of both. +Think of a service like SSH depeding on both LDAP and DNS to function, +while operating redundant LDAP servers as well as redundant DNS resolvers. + +Before v2.12, Icinga regarded all dependecies as cumulative. +In v2.12 and v2.13, Icinga regarded all dependencies redundant. +The latter led to unrelated services being inadvertantly regarded to be redundant to each other. + +v2.14 restored the former behavior and allowed to override it. +I.e. all dependecies are regarded as essential for the parent by default. +Specifying the `redundancy_group` attribute for two dependecies of a child object with the equal value +causes them to be regarded as redundant (only inside that redundancy group). + diff --git a/doc/09-object-types.md b/doc/09-object-types.md index 0f92f28d8..c09471638 100644 --- a/doc/09-object-types.md +++ b/doc/09-object-types.md @@ -201,6 +201,7 @@ Configuration Attributes: parent\_service\_name | Object name | **Optional.** The parent service. If omitted, this dependency object is treated as host dependency. child\_host\_name | Object name | **Required.** The child host. child\_service\_name | Object name | **Optional.** The child service. If omitted, this dependency object is treated as host dependency. + redundancy\_group | String | **Optional.** Puts the dependency into a group of [mutually redundant ones](03-monitoring-basics.md#dependencies-redundancy-groups). disable\_checks | Boolean | **Optional.** Whether to disable checks (i.e., don't schedule active checks and drop passive results) when this dependency fails. Defaults to false. disable\_notifications | Boolean | **Optional.** Whether to disable notifications when this dependency fails. Defaults to true. ignore\_soft\_states | Boolean | **Optional.** Whether to ignore soft states for the reachability calculation. Defaults to true. diff --git a/lib/icinga/checkable-dependency.cpp b/lib/icinga/checkable-dependency.cpp index 5ce92886c..58d6b578b 100644 --- a/lib/icinga/checkable-dependency.cpp +++ b/lib/icinga/checkable-dependency.cpp @@ -3,6 +3,7 @@ #include "icinga/service.hpp" #include "icinga/dependency.hpp" #include "base/logger.hpp" +#include using namespace icinga; @@ -74,25 +75,42 @@ bool Checkable::IsReachable(DependencyType dt, Dependency::Ptr *failedDependency auto deps = GetDependencies(); - int countDeps = deps.size(); - int countFailed = 0; + std::unordered_map violated; // key: redundancy group, value: nullptr if satisfied, violating dependency otherwise for (const Dependency::Ptr& dep : deps) { - if (!dep->IsAvailable(dt)) { - countFailed++; + std::string redundancy_group = dep->GetRedundancyGroup(); - if (failedDependency) - *failedDependency = dep; + if (!dep->IsAvailable(dt)) { + if (redundancy_group.empty()) { + Log(LogDebug, "Checkable") + << "Non-redundant dependency '" << dep->GetName() << "' failed for checkable '" << GetName() << "': Marking as unreachable."; + + if (failedDependency) + *failedDependency = dep; + + return false; + } + + // tentatively mark this dependency group as failed unless it is already marked; + // so it either passed before (don't overwrite) or already failed (so don't care) + // note that std::unordered_map::insert() will not overwrite an existing entry + violated.insert(std::make_pair(redundancy_group, dep)); + } else if (!redundancy_group.empty()) { + violated[redundancy_group] = nullptr; } } - /* If there are dependencies, and all of them failed, mark as unreachable. */ - if (countDeps > 0 && countFailed == countDeps) { + auto violator = std::find_if(violated.begin(), violated.end(), [](auto& v) { return v.second != nullptr; }); + if (violator != violated.end()) { Log(LogDebug, "Checkable") - << "All dependencies have failed for checkable '" << GetName() << "': Marking as unreachable."; + << "All dependencies in redundancy group '" << violator->first << "' have failed for checkable '" << GetName() << "': Marking as unreachable."; + + if (failedDependency) + *failedDependency = violator->second; return false; } + if (failedDependency) *failedDependency = nullptr; diff --git a/lib/icinga/dependency.ti b/lib/icinga/dependency.ti index 3fc832522..41de7ba23 100644 --- a/lib/icinga/dependency.ti +++ b/lib/icinga/dependency.ti @@ -77,6 +77,8 @@ class Dependency : CustomVarObject < DependencyNameComposer }}} }; + [config] String redundancy_group; + [config, navigation] name(TimePeriod) period (PeriodRaw) { navigate {{{ return TimePeriod::GetByName(GetPeriodRaw()); diff --git a/test/icinga-dependencies.cpp b/test/icinga-dependencies.cpp index b31f540b1..929b6ca0d 100644 --- a/test/icinga-dependencies.cpp +++ b/test/icinga-dependencies.cpp @@ -70,14 +70,26 @@ BOOST_AUTO_TEST_CASE(multi_parent) /* Test the reachability from this point. * parentHost1 is DOWN, parentHost2 is UP. - * Expected result: childHost is reachable. + * Expected result: childHost is unreachable. */ parentHost1->SetStateRaw(ServiceCritical); // parent Host 1 DOWN parentHost2->SetStateRaw(ServiceOK); // parent Host 2 UP + BOOST_CHECK(childHost->IsReachable() == false); + + /* The only DNS server is DOWN. + * Expected result: childHost is unreachable. + */ + dep1->SetRedundancyGroup("DNS"); + BOOST_CHECK(childHost->IsReachable() == false); + + /* 1/2 DNS servers is DOWN. + * Expected result: childHost is reachable. + */ + dep2->SetRedundancyGroup("DNS"); BOOST_CHECK(childHost->IsReachable() == true); - /* parentHost1 is DOWN, parentHost2 is DOWN. + /* Both DNS servers are DOWN. * Expected result: childHost is unreachable. */ parentHost1->SetStateRaw(ServiceCritical); // parent Host 1 DOWN