From bd449f0ac823413a55069e3df9e163a4b4adbebd Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 21 Aug 2025 21:39:29 +0200 Subject: [PATCH] mon/MonClient: post version request completions outside of monc_lock dispatch() is allowed to invoke the completion object in the current thread, before control returns from dispatch(). This isn't desirable when it comes to discarding version requests in MonClient::shutdown() and MonClient::_reopen_session() because completion objects could then be invoked under monc_lock. In case of MonClient::_reopen_session() in particular, this leads to an attempt to acquire monc_lock once again in MonClient::get_version() on a retry due to monc_errc::session_reset that is converted to errc::resource_unavailable_try_again: MonClient::ms_handle_reset < takes monc_lock > MonClient::_reopen_session < invokes the completion object via dispatch() with ec == monc_errc::session_reset > Objecter::CB_Objecter_GetVersion::operator() [ ec == errc::resource_unavailable_try_again ] Objecter::_wait_for_latest_osdmap MonClient::get_version < attempts to take monc_lock in the body of the lambda > The end result is either a lockup or some form of undefined behavior. The best possible outcome here is an exception (std::system_error with "Resource deadlock avoided" error) and a successive call to std::terminate(). This is a regression introduced in commit e81d4eae4e76 ("common/async: Update `use_blocked` for newer asio"). Revert to posting version request completions for the error cases in a way that is uniform with the success case in MonClient::handle_get_version_reply(). Fixes: https://tracker.ceph.com/issues/72692 Signed-off-by: Ilya Dryomov --- src/mon/MonClient.cc | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/mon/MonClient.cc b/src/mon/MonClient.cc index 132be34c63332..164dd28dc76d4 100644 --- a/src/mon/MonClient.cc +++ b/src/mon/MonClient.cc @@ -535,11 +535,11 @@ void MonClient::shutdown() monc_lock.lock(); stopping = true; while (!version_requests.empty()) { - asio::dispatch( - asio::append(std::move(version_requests.begin()->second), - make_error_code(monc_errc::shutting_down), 0, 0)); ldout(cct, 20) << __func__ << " canceling and discarding version request " << version_requests.begin()->first << dendl; + asio::post(service.get_executor(), + asio::append(std::move(version_requests.begin()->second), + make_error_code(monc_errc::shutting_down), 0, 0)); version_requests.erase(version_requests.begin()); } while (!mon_commands.empty()) { @@ -769,9 +769,11 @@ void MonClient::_reopen_session(int rank) // throw out version check requests while (!version_requests.empty()) { - asio::dispatch(asio::append(std::move(version_requests.begin()->second), - make_error_code(monc_errc::session_reset), - 0, 0)); + ldout(cct, 20) << __func__ << " canceling and discarding version request " + << version_requests.begin()->first << dendl; + asio::post(service.get_executor(), + asio::append(std::move(version_requests.begin()->second), + make_error_code(monc_errc::session_reset), 0, 0)); version_requests.erase(version_requests.begin()); } -- 2.39.5