]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
msg/async: race condition between reset_recv_state and shutdown_connections
authorNitzan Mordechai <nmordech@redhat.com>
Tue, 10 Dec 2024 09:04:34 +0000 (09:04 +0000)
committerNitzan Mordechai <nmordech@redhat.com>
Sun, 5 Oct 2025 10:59:23 +0000 (10:59 +0000)
when shutting down monitors and valgrind is involved, we can,
sometimes, to hit race condition and locks that causing the shutdown
process to hang for a long time.

reset_recv_state - issuing a message without proper locks that
causing the shutdown to hang during shutdown connection (drain network)

Fixes: https://tracker.ceph.com/issues/63501
Signed-off-by: Nitzan Mordechai <nmordech@redhat.com>
(cherry picked from commit b800149243b593ff7946d9a5df23f5a49247c0fd)

src/msg/async/ProtocolV1.cc
src/msg/async/ProtocolV2.cc

index 041942fd906ac3ba34117a1efe2d6c157149ce2c..a73d173ae6016b28ee0004f4abe980d6f96dfd7a 100644 (file)
@@ -1282,11 +1282,11 @@ void ProtocolV1::reset_recv_state()
   // `write_message()`. `submit_to()` here is NOT blocking.
   if (!connection->center->in_thread()) {
     connection->center->submit_to(connection->center->get_id(), [this] {
-      ldout(cct, 5) << "reset_recv_state (warped) reseting security handlers"
-                    << dendl;
       // Possibly unnecessary. See the comment in `deactivate_existing`.
       std::lock_guard<std::mutex> l(connection->lock);
       std::lock_guard<std::mutex> wl(connection->write_lock);
+      ldout(cct, 5) << "reset_recv_state (warped) reseting security handlers"
+                    << dendl;
       reset_security();
     }, /* always_async = */true);
   } else {
index 7c4a4d0fe94137df39e12234ecfd986e1dbb3979..3631898da8ed83bab0829d3b4c261c99081054d5 100644 (file)
@@ -250,11 +250,11 @@ void ProtocolV2::reset_recv_state() {
     // `write_event()` unlocks it just before calling `write_message()`.
     // `submit_to()` here is NOT blocking.
     connection->center->submit_to(connection->center->get_id(), [this] {
-      ldout(cct, 5) << "reset_recv_state (warped) reseting crypto and compression handlers"
-                    << dendl;
       // Possibly unnecessary. See the comment in `deactivate_existing`.
       std::lock_guard<std::mutex> l(connection->lock);
       std::lock_guard<std::mutex> wl(connection->write_lock);
+      ldout(cct, 5) << "reset_recv_state (warped) reseting crypto and compression handlers"
+                    << dendl;
       reset_security();
       reset_compression();
     }, /* always_async = */true);