]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mds/quiesce-agt: never send a synchronous ack
authorLeonid Usov <leonid.usov@ibm.com>
Sun, 26 May 2024 08:16:48 +0000 (11:16 +0300)
committerLeonid Usov <leonid.usov@ibm.com>
Sun, 26 May 2024 08:33:52 +0000 (11:33 +0300)
Defer to the agent thread to perform all acking.
This avoids race conditions between the updating thread
and the acking thread.

Fixes: https://tracker.ceph.com/issues/66219
Signed-off-by: Leonid Usov <leonid.usov@ibm.com>
src/mds/QuiesceAgent.cc
src/test/mds/TestQuiesceAgent.cc

index 6bc6770e31046f69c2258a5d07935124e4178ccb..3a01910cb264bb2423cd52a8b0e720a2be30597c 100644 (file)
@@ -82,8 +82,9 @@ bool QuiesceAgent::db_update(QuiesceMap& map)
   // ack with the known state stored in `map`
   set_pending_roots(map.db_version, std::move(new_roots));
 
-  // always send a synchronous ack
-  return true;
+  // to avoid ack races with the agent_thread,
+  // never send a synchronous ack
+  return false;
 }
 
 void* QuiesceAgent::agent_thread_main() {
index 130d319403ab444aaa1c97c4eabd67171ca1a047..aa108075faa59fec54bde8a62f4cb83caf9788b6 100644 (file)
@@ -278,16 +278,19 @@ TEST_F(QuiesceAgentTest, DbUpdates) {
   
     // manipulate root0 and root1 as if they were quiesced and root2 as if it was released
     auto& root0 = *roots.at("root0");
-    root0.quiesce_result = 0;
-    EXPECT_EQ(QS_QUIESCED, root0.get_actual_state());
+    complete_quiesce("root0", 0);
 
     auto& root1 = *roots.at("root1");
-    root1.quiesce_result = 0;
-    EXPECT_EQ(QS_QUIESCED, root1.get_actual_state());
+    complete_quiesce("root1", 0);
 
     auto& root2 = *roots.at("root2");
-    root2.quiesce_result = 0;
-    root2.cancel_result = 0;
+    complete_quiesce("root2", 0);
+    root2.cancel_result = root2.cancel(*root2.quiesce_request);
+
+    EXPECT_TRUE(await_idle());
+
+    EXPECT_EQ(QS_QUIESCED, root0.get_actual_state());
+    EXPECT_EQ(QS_QUIESCED, root1.get_actual_state());
     EXPECT_EQ(QS_RELEASED, root2.get_actual_state());
   }
 
@@ -501,13 +504,10 @@ TEST_F(QuiesceAgentTest, DuplicateQuiesceRequest) {
       { "root1", QS_QUIESCING },
       { "root2", QS_QUIESCING },
       { "root3", QS_QUIESCING },
-    });
+    }, WaitForAgent::No);
 
-    ASSERT_TRUE(ack.has_value());
-    EXPECT_EQ(3, ack->db_version);
-    // even though root1 is already quiesced,
-    // we should not know about it synchronously
-    EXPECT_EQ(0, ack->roots.size());
+    // no sync update
+    EXPECT_FALSE(ack.has_value());
   }
 
   EXPECT_TRUE(await_idle());
@@ -600,21 +600,17 @@ TEST_F(QuiesceAgentTest, RapidDbUpdates)
     auto ack = update(2, {
                              { "root1", QS_QUIESCING },
                              { "root2", QS_QUIESCING },
-                         });
+                         }, WaitForAgent::No);
 
-    ASSERT_TRUE(ack.has_value());
-    EXPECT_EQ(2, ack->db_version);
-    EXPECT_EQ(0, ack->roots.size());
+    EXPECT_FALSE(ack.has_value());
   };
 
   {
     auto ack = update(1, {
                              { "root1", QS_QUIESCING },
-                         });
+                         }, WaitForAgent::No);
 
-    ASSERT_TRUE(ack.has_value());
-    EXPECT_EQ(1, ack->db_version);
-    EXPECT_EQ(0, ack->roots.size());
+    EXPECT_FALSE(ack.has_value());
   }
 
   EXPECT_TRUE(await_idle_v(2));