diff --git a/lib/base/io-engine.cpp b/lib/base/io-engine.cpp index 2f93fc693..423f9f7c2 100644 --- a/lib/base/io-engine.cpp +++ b/lib/base/io-engine.cpp @@ -30,7 +30,7 @@ using namespace icinga; CpuBoundWork::CpuBoundWork(boost::asio::yield_context yc, boost::asio::io_context::strand& strand) : m_Done(false) { - VERIFY(strand.running_in_this_thread()); + VERIFY(IoEngine::IsStrandRunningOnThisThread(strand)); auto& ie (IoEngine::Get()); Shared::Ptr cv; diff --git a/lib/base/io-engine.hpp b/lib/base/io-engine.hpp index 5e9ef084c..cc2eb72de 100644 --- a/lib/base/io-engine.hpp +++ b/lib/base/io-engine.hpp @@ -96,6 +96,27 @@ public: static IoEngine& Get(); + /** + * Checks whether the given strand is currently running in the calling thread. + * + * This is a simple wrapper around @c running_in_this_thread() with a little but significant difference: + * It is marked as @c noinline to prevent the compiler from ever inlining the call to this function and + * thus potentially optimizing away the thread-local storage access that is required for this function + * to work correctly. This is especially important for the case where the caller is a coroutine that have + * some suspension points between the calls to this function, and cause the compiler to assume that the + * thread-local access performed by @c running_in_this_thread() is invariant across these suspensions and + * thus optimize it by caching the result in a register or on the stack, which would lead to incorrect + * results after resuming the coroutine on a different thread. For more details, see [^1][^2][^3]. + * + * [^1]: https://github.com/chriskohlhoff/asio/issues/1366 + * [^2]: https://bugs.llvm.org/show_bug.cgi?id=19177 + * [^3]: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=26461 + */ + BOOST_NOINLINE static bool IsStrandRunningOnThisThread(const boost::asio::io_context::strand& strand) + { + return strand.running_in_this_thread(); + } + boost::asio::io_context& GetIoContext(); static inline size_t GetCoroutineStackSize() { @@ -237,7 +258,7 @@ public: Timeout(boost::asio::io_context::strand& strand, const Timer::duration_type& timeoutFromNow, OnTimeout onTimeout) : m_Timer(strand.context(), timeoutFromNow), m_Cancelled(Shared>::Make(false)) { - VERIFY(strand.running_in_this_thread()); + ASSERT(IoEngine::IsStrandRunningOnThisThread(strand)); m_Timer.async_wait(boost::asio::bind_executor( strand, [cancelled = m_Cancelled, onTimeout = std::move(onTimeout)](boost::system::error_code ec) { diff --git a/lib/otel/otel.cpp b/lib/otel/otel.cpp index f2a0d6aca..42be64f3a 100644 --- a/lib/otel/otel.cpp +++ b/lib/otel/otel.cpp @@ -353,7 +353,7 @@ void OTel::ExportLoop(boost::asio::yield_context& yc) // indicate a broken connection and force a reconnect in those cases. For the `end_of_stream` case, // we downgrade the log severity to debug level since this is a normal occurrence when using an OTEL // collector compatible backend that don't honor keep-alive connections (e.g., OpenSearch Data Prepper). - if (m_Stopped || (ser && ser->code() == http::error::end_of_stream)) { + if (m_Stopped || (ser && (ser->code() == http::error::end_of_stream || ser->code() == boost::asio::error::broken_pipe))) { severity = LogDebug; } Log{severity, "OTelExporter", DiagnosticInformation(ex, false)};