]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/osd/osd_operations/background_recovery: mark PGs as unfound when
authorXuehan Xu <xuxuehan@qianxin.com>
Tue, 30 Apr 2024 06:54:10 +0000 (14:54 +0800)
committerXuehan Xu <xxhdx1985126@gmail.com>
Mon, 10 Jun 2024 04:03:05 +0000 (12:03 +0800)
necessary

Signed-off-by: Xuehan Xu <xuxuehan@qianxin.com>
src/crimson/osd/osd_operations/background_recovery.cc
src/crimson/osd/pg.cc
src/crimson/osd/pg.h

index f74933ec26600228cd49513fc8d30825956ad484..ac94ea7eb8894007a626e9ab744bf0cc14903088 100644 (file)
@@ -116,15 +116,19 @@ UrgentRecovery::do_recovery()
 {
   LOG_PREFIX(UrgentRecovery::do_recovery);
   DEBUGDPPI("{}: {}", *pg, __func__, *this);
-  if (!pg->has_reset_since(epoch_started)) {
+  if (pg->has_reset_since(epoch_started)) {
+    return seastar::make_ready_future<bool>(false);
+  }
+
+  return pg->find_unfound(epoch_started
+  ).then_interruptible([this] {
     return with_blocking_event<RecoveryBackend::RecoveryBlockingEvent,
                               interruptor>([this] (auto&& trigger) {
       return pg->get_recovery_handler()->recover_missing(trigger, soid, need);
     }).then_interruptible([] {
       return seastar::make_ready_future<bool>(false);
     });
-  }
-  return seastar::make_ready_future<bool>(false);
+  });
 }
 
 void UrgentRecovery::print(std::ostream &lhs) const
@@ -164,11 +168,14 @@ PglogBasedRecovery::do_recovery()
   if (pg->has_reset_since(epoch_started)) {
     return seastar::make_ready_future<bool>(false);
   }
-  return with_blocking_event<RecoveryBackend::RecoveryBlockingEvent,
-                            interruptor>([this] (auto&& trigger) {
-    return pg->get_recovery_handler()->start_recovery_ops(
-      trigger,
-      crimson::common::local_conf()->osd_recovery_max_single_start);
+  return pg->find_unfound(epoch_started
+  ).then_interruptible([this] {
+    return with_blocking_event<RecoveryBackend::RecoveryBlockingEvent,
+                              interruptor>([this] (auto&& trigger) {
+      return pg->get_recovery_handler()->start_recovery_ops(
+       trigger,
+       crimson::common::local_conf()->osd_recovery_max_single_start);
+    });
   });
 }
 
index 06749df2af63658a359c771c009e6fbc04030a4f..dd2f49744128a33cdf0c836a585b072ec7248c3f 100644 (file)
@@ -244,6 +244,40 @@ void PG::queue_check_readable(epoch_t last_peering_reset, ceph::timespan delay)
     std::chrono::duration_cast<seastar::lowres_clock::duration>(delay));
 }
 
+PG::interruptible_future<> PG::find_unfound(epoch_t epoch_started)
+{
+  if (!have_unfound()) {
+    return interruptor::now();
+  }
+  PeeringCtx rctx;
+  if (!peering_state.discover_all_missing(rctx)) {
+    if (peering_state.state_test(PG_STATE_BACKFILLING)) {
+      logger().debug(
+        "{} {} no luck, giving up on this pg for now (in backfill)",
+        *this, __func__);
+      std::ignore = get_shard_services().start_operation<LocalPeeringEvent>(
+        this,
+        get_pg_whoami(),
+        get_pgid(),
+        epoch_started,
+        epoch_started,
+        PeeringState::UnfoundBackfill());
+    } else if (peering_state.state_test(PG_STATE_RECOVERING)) {
+      logger().debug(
+        "{} {} no luck, giving up on this pg for now (in recovery)",
+        *this, __func__);
+      std::ignore = get_shard_services().start_operation<LocalPeeringEvent>(
+        this,
+        get_pg_whoami(),
+        get_pgid(),
+        epoch_started,
+        epoch_started,
+        PeeringState::UnfoundRecovery());
+    }
+  }
+  return get_shard_services().dispatch_context(get_collection_ref(), std::move(rctx));
+}
+
 void PG::recheck_readable()
 {
   bool changed = false;
index 9f49422bd1d06c8ffb914337705a4231ea510df6..bf853969acda8e961f03e61926a43ef4dafb098f 100644 (file)
@@ -734,6 +734,10 @@ public:
     // TODO: see PrimaryLogPG::mark_all_unfound_lost()
     return seastar::now();
   }
+  interruptible_future<> find_unfound(epoch_t epoch_started);
+  bool have_unfound() const {
+    return peering_state.have_unfound();
+  }
 
   bool old_peering_msg(epoch_t reply_epoch, epoch_t query_epoch) const;
 
@@ -766,9 +770,6 @@ private:
   friend class SnapTrimEvent;
   friend class SnapTrimObjSubEvent;
 private:
-  seastar::future<bool> find_unfound() {
-    return seastar::make_ready_future<bool>(true);
-  }
 
   bool can_discard_replica_op(const Message& m, epoch_t m_map_epoch) const;
   bool can_discard_op(const MOSDOp& m) const;