]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
msg/async: race condition between reset_recv_state and shutdown_connections 65785/head
authorNitzan Mordechai <nmordech@redhat.com>
Tue, 10 Dec 2024 09:04:34 +0000 (09:04 +0000)
committerNitzan Mordechai <nmordech@redhat.com>
Sun, 5 Oct 2025 10:56:58 +0000 (10:56 +0000)
when shutting down monitors and valgrind is involved, we can,
sometimes, to hit race condition and locks that causing the shutdown
process to hang for a long time.

reset_recv_state - issuing a message without proper locks that
causing the shutdown to hang during shutdown connection (drain network)

Fixes: https://tracker.ceph.com/issues/63501
Signed-off-by: Nitzan Mordechai <nmordech@redhat.com>
(cherry picked from commit b800149243b593ff7946d9a5df23f5a49247c0fd)

src/msg/async/ProtocolV1.cc
src/msg/async/ProtocolV2.cc

index b45ad8ca5155f37dcadbe2ef7b1dffa4b5122deb..bcab3cfc63fbdb801d240826b418be5351f4f2d9 100644 (file)
@@ -1277,11 +1277,11 @@ void ProtocolV1::reset_recv_state()
   // `write_message()`. `submit_to()` here is NOT blocking.
   if (!connection->center->in_thread()) {
     connection->center->submit_to(connection->center->get_id(), [this] {
-      ldout(cct, 5) << "reset_recv_state (warped) reseting security handlers"
-                    << dendl;
       // Possibly unnecessary. See the comment in `deactivate_existing`.
       std::lock_guard<std::mutex> l(connection->lock);
       std::lock_guard<std::mutex> wl(connection->write_lock);
+      ldout(cct, 5) << "reset_recv_state (warped) reseting security handlers"
+                    << dendl;
       reset_security();
     }, /* always_async = */true);
   } else {
index 08426b796b88b16c9e0142a7dc7d58d2d8d071f7..347fa27986c04c0f9c014acb8eccf6eb074e04d8 100644 (file)
@@ -250,11 +250,11 @@ void ProtocolV2::reset_recv_state() {
     // `write_event()` unlocks it just before calling `write_message()`.
     // `submit_to()` here is NOT blocking.
     connection->center->submit_to(connection->center->get_id(), [this] {
-      ldout(cct, 5) << "reset_recv_state (warped) reseting crypto and compression handlers"
-                    << dendl;
       // Possibly unnecessary. See the comment in `deactivate_existing`.
       std::lock_guard<std::mutex> l(connection->lock);
       std::lock_guard<std::mutex> wl(connection->write_lock);
+      ldout(cct, 5) << "reset_recv_state (warped) reseting crypto and compression handlers"
+                    << dendl;
       reset_security();
       reset_compression();
     }, /* always_async = */true);