]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
crimson/osd/pg_recovery: backoff if the recovery/backfill is deferred
authorXuehan Xu <xuxuehan@qianxin.com>
Wed, 10 Apr 2024 06:51:47 +0000 (14:51 +0800)
committerXuehan Xu <xuxuehan@qianxin.com>
Mon, 29 Apr 2024 09:27:21 +0000 (17:27 +0800)
Fixes: https://tracker.ceph.com/issues/65399
Signed-off-by: Xuehan Xu <xuxuehan@qianxin.com>
src/crimson/osd/pg_recovery.cc

index 13ac069c63d4166eaae66c4eb2aacf99792635f5..b2f813447b3710a76e209e0f628d3a4b54fd2904 100644 (file)
@@ -42,7 +42,12 @@ PGRecovery::start_recovery_ops(
 {
   assert(pg->is_primary());
   assert(pg->is_peered());
-  assert(pg->is_recovering());
+
+  if (!pg->is_recovering() && !pg->is_backfilling()) {
+    logger().debug("recovery raced and were queued twice, ignoring!");
+    return seastar::make_ready_future<bool>(false);
+  }
+
   // in ceph-osd the do_recovery() path handles both the pg log-based
   // recovery and the backfill, albeit they are separated at the layer
   // of PeeringState. In crimson-osd backfill has been cut from it, so
@@ -64,6 +69,12 @@ PGRecovery::start_recovery_ops(
                                        [] (auto&& ifut) {
     return std::move(ifut);
   }).then_interruptible([this] {
+    //TODO: maybe we should implement a recovery race interruptor in the future
+    if (!pg->is_recovering() && !pg->is_backfilling()) {
+      logger().debug("recovery raced and were queued twice, ignoring!");
+      return seastar::make_ready_future<bool>(false);
+    }
+
     bool done = !pg->get_peering_state().needs_recovery();
     if (done) {
       logger().debug("start_recovery_ops: AllReplicasRecovered for pg: {}",