]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson: Move send_to_osd call outside with_obc lambda 65849/head
authorMohit Agrawal <moagrawa@redhat.com>
Thu, 16 Oct 2025 10:58:08 +0000 (16:28 +0530)
committerMohit Agrawal <moagrawa@redhat.com>
Wed, 5 Nov 2025 23:57:34 +0000 (05:27 +0530)
The with_obc() function acquires a lock before invoking the
lambda it wraps. Earlier the lambda itself called send_to_osd()
which returns a future to with_obc. If a future is not resolved
immediately and a response could arrive and trigger
handle_pull_response() which attempts to acquire an exclusive lock.

Because a future is not returned yet to with_obc() so the original
lock is still holding by with_obc and handle_pull_response() throw
an assertion failure due to that osd is crashed.

Solution: Move send_to_osd() call outside with_obc lambda so that
the lock is released before handle_pull_response() is triggered.

Fixed: https://tracker.ceph.com/issues/71861
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
src/crimson/osd/replicated_recovery_backend.cc

index 503d5283bd679b0ff908fb0e410bf6afb363a811..5503063cbc9d810d06fb6235755d73034f34a1e9 100644 (file)
@@ -132,29 +132,37 @@ ReplicatedRecoveryBackend::maybe_pull_missing_obj(
     using prepare_pull_iertr =
       crimson::osd::ObjectContextLoader::load_obc_iertr::extend<
         crimson::ct_error::eagain>;
-    return pg.obc_loader.with_obc<RWState::RWREAD>(soid.get_head(),
-      [this, soid, need](auto head, auto) {
-      PullOp pull_op;
-      auto& recovery_waiter = get_recovering(soid);
-      recovery_waiter.pull_info =
-        std::make_optional<RecoveryBackend::pull_info_t>();
-      auto& pull_info = *recovery_waiter.pull_info;
-      prepare_pull(head, pull_op, pull_info, soid, need);
-      auto msg = crimson::make_message<MOSDPGPull>();
-      msg->from = pg.get_pg_whoami();
-      msg->set_priority(pg.get_recovery_op_priority());
-      msg->pgid = pg.get_pgid();
-      msg->map_epoch = pg.get_osdmap_epoch();
-      msg->min_epoch = pg.get_last_peering_reset();
-      msg->set_pulls({std::move(pull_op)});
-      return shard_services.send_to_osd(
-        pull_info.from.osd,
-        std::move(msg),
-        pg.get_osdmap_epoch());
-    }).si_then([this, soid]() -> prepare_pull_iertr::future<> {
-      auto& recovery_waiter = get_recovering(soid);
-      return recovery_waiter.wait_for_pull();
-    });
+
+    return seastar::do_with(
+      PullOp{},
+      [this, soid, need](PullOp &pull_op) {
+        return pg.obc_loader.with_obc<RWState::RWREAD>(soid.get_head(),
+          [this, soid, need, &pull_op](auto head, auto) {
+          auto& recovery_waiter = get_recovering(soid);
+          recovery_waiter.pull_info.emplace();
+          auto &pull_info = *recovery_waiter.pull_info;
+          prepare_pull(head, pull_op, pull_info, soid, need);
+          return seastar::now();
+        }).si_then([this, soid, &pull_op]() {
+            auto& recovery_waiter = get_recovering(soid);
+            auto &pull_info = *recovery_waiter.pull_info;
+            auto msg = crimson::make_message<MOSDPGPull>();
+            msg->from = pg.get_pg_whoami();
+            msg->set_priority(pg.get_recovery_op_priority());
+            msg->pgid = pg.get_pgid();
+            msg->map_epoch = pg.get_osdmap_epoch();
+            msg->min_epoch = pg.get_last_peering_reset();
+            msg->set_pulls({std::move(pull_op)});
+            return shard_services.send_to_osd(
+              pull_info.from.osd,
+              std::move(msg),
+              pg.get_osdmap_epoch()
+            ).then([this, soid]() -> prepare_pull_iertr::future<> {
+                auto& recovery_waiter = get_recovering(soid);
+                return recovery_waiter.wait_for_pull();
+              });
+           });
+       });
   }).handle_error_interruptible(
     crimson::ct_error::assert_all(fmt::format(
       "{} {} error with {} need {} ", pg, FNAME, soid, need).c_str())