]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
msg/async: race condition between reset_recv_state and shutdown_connections
authorNitzan Mordechai <nmordech@redhat.com>
Tue, 10 Dec 2024 09:04:34 +0000 (09:04 +0000)
committerNitzan Mordechai <nmordech@redhat.com>
Sun, 15 Dec 2024 11:07:01 +0000 (11:07 +0000)
when shutting down monitors and valgrind is involved, we can,
sometimes, to hit race condition and locks that causing the shutdown
process to hang for a long time.

reset_recv_state - issuing a message without proper locks that
causing the shutdown to hang during shutdown connection (drain network)

Fixes: https://tracker.ceph.com/issues/63501
Signed-off-by: Nitzan Mordechai <nmordech@redhat.com>
src/msg/async/ProtocolV1.cc
src/msg/async/ProtocolV2.cc

index a53f6389c3101da81541ea93fbaf69802e5da269..17d3dcfbe2c8aece204164a8a0a8ccc8a1b7a10b 100644 (file)
@@ -1281,11 +1281,11 @@ void ProtocolV1::reset_recv_state()
   // `write_message()`. `submit_to()` here is NOT blocking.
   if (!connection->center->in_thread()) {
     connection->center->submit_to(connection->center->get_id(), [this] {
-      ldout(cct, 5) << "reset_recv_state (warped) reseting security handlers"
-                    << dendl;
       // Possibly unnecessary. See the comment in `deactivate_existing`.
       std::lock_guard<std::mutex> l(connection->lock);
       std::lock_guard<std::mutex> wl(connection->write_lock);
+      ldout(cct, 5) << "reset_recv_state (warped) reseting security handlers"
+                    << dendl;
       reset_security();
     }, /* always_async = */true);
   } else {
index ed6f93cdd481065f86c2b2bf8d3aecd2747aa45f..fd1dfb5470e2c6b89678e5f7351d2320718f78fd 100644 (file)
@@ -251,11 +251,11 @@ void ProtocolV2::reset_recv_state() {
     // `write_event()` unlocks it just before calling `write_message()`.
     // `submit_to()` here is NOT blocking.
     connection->center->submit_to(connection->center->get_id(), [this] {
-      ldout(cct, 5) << "reset_recv_state (warped) reseting crypto and compression handlers"
-                    << dendl;
       // Possibly unnecessary. See the comment in `deactivate_existing`.
       std::lock_guard<std::mutex> l(connection->lock);
       std::lock_guard<std::mutex> wl(connection->write_lock);
+      ldout(cct, 5) << "reset_recv_state (warped) reseting crypto and compression handlers"
+                    << dendl;
       reset_security();
       reset_compression();
     }, /* always_async = */true);