Log pending items stats regularly & include them as perfdata in IcingaDB check

This commit is contained in:
Yonas Habteab 2026-04-15 09:34:43 +02:00
parent 267675e80b
commit 99328ec417
3 changed files with 60 additions and 3 deletions

View file

@ -3,6 +3,7 @@
#include "icingadb/icingadb.hpp"
#include "base/logger.hpp"
#include "base/perfdatavalue.hpp"
#include <vector>
using namespace icinga;
@ -47,16 +48,42 @@ void IcingaDB::PendingItemsThreadProc()
// Redis queries when the Redis connection is saturated.
constexpr std::size_t maxPendingQueries = 128;
// The minimum age an item must have before it can be processed.
constexpr ch::milliseconds minItemAge{300};
constexpr auto minItemAge{300ms};
std::unique_lock lock(m_PendingItemsMutex);
// Wait until the initial config dump is done. IcingaDB::OnConnectedHandler will notify us once it's finished.
while (GetActive() && !m_ConfigDumpDone) m_PendingItemsCV.wait(lock);
// The regular interval at which to log the statistics about the pending items queue.
constexpr auto logInterval{5min};
// The interval at which to log the statistics about the pending items when the worker is under heavy load.
constexpr auto heavyLoadLogInterval{10s};
auto lastLogTime = ch::steady_clock::now();
auto& seqView = m_PendingItems.get<1>();
while (GetActive()) {
auto now = ch::steady_clock::now();
auto elapsed{now - lastLogTime};
bool laggingBehind{!seqView.empty() && seqView.front().EnqueueTime + minItemAge < now - 5s};
// Log pending items stats at regular intervals, and more frequently when we're lagging behind to
// make the user aware of the situation as that's usually an indication of something being wrong.
if (elapsed >= logInterval || (laggingBehind && elapsed >= heavyLoadLogInterval)) {
lastLogTime = now;
Log log(laggingBehind ? LogWarning : LogInformation, "IcingaDB");
log << "Pending config and state updates: " << seqView.size() << " (Redis queries: "
<< (m_RconWorker ? m_RconWorker->GetPendingQueryCount() : 0);
if (!seqView.empty()) {
log << ", oldest item age: ";
if (auto age{now - seqView.front().EnqueueTime}; age < 1s) {
log << ch::duration_cast<ch::milliseconds>(age).count() << "ms";
} else {
log << ch::duration_cast<ch::seconds>(age).count() << "s";
}
}
log << ").";
}
if (!m_PendingItems.empty() && m_RconWorker && m_RconWorker->IsConnected() && m_RconWorker->GetPendingQueryCount() < maxPendingQueries) {
auto now = ch::steady_clock::now();
auto it = seqView.begin();
if (auto age = now - it->EnqueueTime; minItemAge > age) {
m_PendingItemsCV.wait_for(lock, minItemAge - age);
@ -417,3 +444,30 @@ void IcingaDB::EnqueueRelationsDeletion(const String& id, icingadb::task_queue::
}
m_PendingItemsCV.notify_one();
}
/**
* Load statistics about the pending items queue into the provided perfdata array.
*
* This function retrieves statistics about the pending items queue, such as the current queue size and
* the age of the oldest item, and adds them to the provided perfdata array as performance data values.
*
* @param perfdata The array to which the pending items statistics will be added.
*/
void IcingaDB::LoadPendingItemsStats(const Array::Ptr& perfdata) const
{
namespace ch = std::chrono;
std::size_t queueSize = 0;
double oldestItemAge{0};
{
std::lock_guard lock(m_PendingItemsMutex);
queueSize = m_PendingItems.size();
if (auto& seqView = m_PendingItems.get<1>(); !seqView.empty()) {
oldestItemAge = ch::duration_cast<ch::duration<double>>(
ch::steady_clock::now() - seqView.front().EnqueueTime
).count();
}
}
perfdata->Add(new PerfdataValue("icinga2_pending_config_and_state_updates_count", static_cast<double>(queueSize)));
perfdata->Add(new PerfdataValue("icinga2_pending_config_and_state_updates_backlog", oldestItemAge, false, "seconds"));
}

View file

@ -242,6 +242,8 @@ public:
return m_RconLocked.load();
}
void LoadPendingItemsStats(const Array::Ptr& perfdata) const;
template<class T>
static void AddKvsToMap(const Array::Ptr& kvs, T& map)
{
@ -469,7 +471,7 @@ private:
std::thread m_PendingItemsThread; // The background worker thread (consumer of m_PendingItems).
icingadb::task_queue::PendingItemsSet m_PendingItems; // Container for pending items with dirty bits (access protected by m_PendingItemsMutex).
std::mutex m_PendingItemsMutex; // Mutex to protect access to m_PendingItems.
mutable std::mutex m_PendingItemsMutex; // Mutex to protect access to m_PendingItems.
std::condition_variable m_PendingItemsCV; // Condition variable to forcefully wake up the worker thread.
void PendingItemsThreadProc();

View file

@ -479,6 +479,7 @@ void IcingadbCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckR
perfdata->Add(new PerfdataValue(String("icinga2_") + subject.Name + "_items_5mins", (redis.get()->*subject.Getter)(5 * 60, now), false, "", Empty, Empty, 0));
perfdata->Add(new PerfdataValue(String("icinga2_") + subject.Name + "_items_15mins", (redis.get()->*subject.Getter)(15 * 60, now), false, "", Empty, Empty, 0));
}
conn->LoadPendingItemsStats(perfdata);
ServiceState state;
std::ostringstream msgbuf;