]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/osd/osd_operations/background_recovery: mark PGs as unfound when
authorXuehan Xu <xuxuehan@qianxin.com>
Tue, 30 Apr 2024 06:54:10 +0000 (14:54 +0800)
committerMatan Breizman <mbreizma@redhat.com>
Thu, 25 Jul 2024 07:29:05 +0000 (10:29 +0300)
necessary

Signed-off-by: Xuehan Xu <xuxuehan@qianxin.com>
(cherry picked from commit c5886d926cac4abb6eb0ec328d78f5cbd42c8f4c)

src/crimson/osd/osd_operations/background_recovery.cc
src/crimson/osd/pg.cc
src/crimson/osd/pg.h

index f74933ec26600228cd49513fc8d30825956ad484..ac94ea7eb8894007a626e9ab744bf0cc14903088 100644 (file)
@@ -116,15 +116,19 @@ UrgentRecovery::do_recovery()
 {
   LOG_PREFIX(UrgentRecovery::do_recovery);
   DEBUGDPPI("{}: {}", *pg, __func__, *this);
-  if (!pg->has_reset_since(epoch_started)) {
+  if (pg->has_reset_since(epoch_started)) {
+    return seastar::make_ready_future<bool>(false);
+  }
+
+  return pg->find_unfound(epoch_started
+  ).then_interruptible([this] {
     return with_blocking_event<RecoveryBackend::RecoveryBlockingEvent,
                               interruptor>([this] (auto&& trigger) {
       return pg->get_recovery_handler()->recover_missing(trigger, soid, need);
     }).then_interruptible([] {
       return seastar::make_ready_future<bool>(false);
     });
-  }
-  return seastar::make_ready_future<bool>(false);
+  });
 }
 
 void UrgentRecovery::print(std::ostream &lhs) const
@@ -164,11 +168,14 @@ PglogBasedRecovery::do_recovery()
   if (pg->has_reset_since(epoch_started)) {
     return seastar::make_ready_future<bool>(false);
   }
-  return with_blocking_event<RecoveryBackend::RecoveryBlockingEvent,
-                            interruptor>([this] (auto&& trigger) {
-    return pg->get_recovery_handler()->start_recovery_ops(
-      trigger,
-      crimson::common::local_conf()->osd_recovery_max_single_start);
+  return pg->find_unfound(epoch_started
+  ).then_interruptible([this] {
+    return with_blocking_event<RecoveryBackend::RecoveryBlockingEvent,
+                              interruptor>([this] (auto&& trigger) {
+      return pg->get_recovery_handler()->start_recovery_ops(
+       trigger,
+       crimson::common::local_conf()->osd_recovery_max_single_start);
+    });
   });
 }
 
index ad777c49d60118b66973abd10116d61196173294..cebc12f6accc0cb9c595cacdeaceaee1f26bcb95 100644 (file)
@@ -244,6 +244,40 @@ void PG::queue_check_readable(epoch_t last_peering_reset, ceph::timespan delay)
     std::chrono::duration_cast<seastar::lowres_clock::duration>(delay));
 }
 
+PG::interruptible_future<> PG::find_unfound(epoch_t epoch_started)
+{
+  if (!have_unfound()) {
+    return interruptor::now();
+  }
+  PeeringCtx rctx;
+  if (!peering_state.discover_all_missing(rctx)) {
+    if (peering_state.state_test(PG_STATE_BACKFILLING)) {
+      logger().debug(
+        "{} {} no luck, giving up on this pg for now (in backfill)",
+        *this, __func__);
+      std::ignore = get_shard_services().start_operation<LocalPeeringEvent>(
+        this,
+        get_pg_whoami(),
+        get_pgid(),
+        epoch_started,
+        epoch_started,
+        PeeringState::UnfoundBackfill());
+    } else if (peering_state.state_test(PG_STATE_RECOVERING)) {
+      logger().debug(
+        "{} {} no luck, giving up on this pg for now (in recovery)",
+        *this, __func__);
+      std::ignore = get_shard_services().start_operation<LocalPeeringEvent>(
+        this,
+        get_pg_whoami(),
+        get_pgid(),
+        epoch_started,
+        epoch_started,
+        PeeringState::UnfoundRecovery());
+    }
+  }
+  return get_shard_services().dispatch_context(get_collection_ref(), std::move(rctx));
+}
+
 void PG::recheck_readable()
 {
   bool changed = false;
index a6a287e6223af6b36b9ddf0e00d82389a2702bbe..1f35e77bdc7d7bb284934a89418aa04309bb2c64 100644 (file)
@@ -739,6 +739,10 @@ public:
     // TODO: see PrimaryLogPG::mark_all_unfound_lost()
     return seastar::now();
   }
+  interruptible_future<> find_unfound(epoch_t epoch_started);
+  bool have_unfound() const {
+    return peering_state.have_unfound();
+  }
 
   bool old_peering_msg(epoch_t reply_epoch, epoch_t query_epoch) const;
 
@@ -771,9 +775,6 @@ private:
   friend class SnapTrimEvent;
   friend class SnapTrimObjSubEvent;
 private:
-  seastar::future<bool> find_unfound() {
-    return seastar::make_ready_future<bool>(true);
-  }
 
   bool can_discard_replica_op(const Message& m, epoch_t m_map_epoch) const;
   bool can_discard_op(const MOSDOp& m) const;