]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/osd: fix stuck recovery 62812/head
authorMatan Breizman <mbreizma@redhat.com>
Tue, 15 Apr 2025 07:35:31 +0000 (07:35 +0000)
committerMatan Breizman <mbreizma@redhat.com>
Tue, 15 Apr 2025 09:42:41 +0000 (09:42 +0000)
https://github.com/ceph/ceph/pull/62080 tested version was **different**
from the one that got merged.
The untested change was changing the boolean returned from start_recovery_ops.
While the seastar::repeat loop in BackgroundRecoveryT<T>::start() was changed accordingly,
other do_recovery() return cases were not considered.

See Tested / Merged here: https://github.com/Matan-B/ceph/pull/2/files

start_recovery_ops used by do_recovery should return whether the iteration (i.e recovery) keep going.

_Note: This has caused a regression in our suite_

Signed-off-by: Matan Breizman <mbreizma@redhat.com>
src/crimson/osd/osd_operations/background_recovery.cc
src/crimson/osd/pg_recovery.cc

index dbfeab07e6b69978b94387b151b082a1db2307d6..20e80bf1b84ecbb0ec063968b0b15d57a95ddc7c 100644 (file)
@@ -82,11 +82,11 @@ seastar::future<> BackgroundRecoveryT<T>::start()
        return do_recovery();
       }, [](std::exception_ptr) {
        return seastar::make_ready_future<bool>(false);
-      }, pg, epoch_started).then([](bool recovery_done) {
-       if (recovery_done) {
-         return seastar::stop_iteration::yes;
-       } else {
+      }, pg, epoch_started).then([](bool do_recovery) {
+       if (do_recovery) {
          return seastar::stop_iteration::no;
+       } else {
+         return seastar::stop_iteration::yes;
        }
       });
     });
index f219411acda9d660faf52ec584413ab7549e35c0..2b28f7e4f0f4b44e7b72aa62a762b4c4cb42cd5d 100644 (file)
@@ -82,8 +82,8 @@ PGRecovery::start_recovery_ops(
     ceph_assert(pg->is_recovering());
     ceph_assert(!pg->is_backfilling());
 
-    bool done = !pg->get_peering_state().needs_recovery();
-    if (done) {
+    bool do_recovery = pg->get_peering_state().needs_recovery();
+    if (!do_recovery) {
       logger().debug("start_recovery_ops: AllReplicasRecovered for pg: {}",
                      pg->get_pgid());
       using LocalPeeringEvent = crimson::osd::LocalPeeringEvent;
@@ -110,7 +110,7 @@ PGRecovery::start_recovery_ops(
       }
       pg->reset_pglog_based_recovery_op();
     }
-    return seastar::make_ready_future<bool>(done);
+    return seastar::make_ready_future<bool>(do_recovery);
   });
 }