From 8451cd2b4c8bdd861c4edb56e580723db769ba60 Mon Sep 17 00:00:00 2001 From: Matan Breizman Date: Tue, 15 Apr 2025 07:35:31 +0000 Subject: [PATCH] crimson/osd: fix stuck recovery https://github.com/ceph/ceph/pull/62080 tested version was **different** from the one that got merged. The untested change was changing the boolean returned from start_recovery_ops. While the seastar::repeat loop in BackgroundRecoveryT::start() was changed accordingly, other do_recovery() return cases were not considered. See Tested / Merged here: https://github.com/Matan-B/ceph/pull/2/files start_recovery_ops used by do_recovery should return whether the iteration (i.e recovery) keep going. _Note: This has caused a regression in our suite_ Signed-off-by: Matan Breizman --- src/crimson/osd/osd_operations/background_recovery.cc | 8 ++++---- src/crimson/osd/pg_recovery.cc | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/crimson/osd/osd_operations/background_recovery.cc b/src/crimson/osd/osd_operations/background_recovery.cc index dbfeab07e6b69..20e80bf1b84ec 100644 --- a/src/crimson/osd/osd_operations/background_recovery.cc +++ b/src/crimson/osd/osd_operations/background_recovery.cc @@ -82,11 +82,11 @@ seastar::future<> BackgroundRecoveryT::start() return do_recovery(); }, [](std::exception_ptr) { return seastar::make_ready_future(false); - }, pg, epoch_started).then([](bool recovery_done) { - if (recovery_done) { - return seastar::stop_iteration::yes; - } else { + }, pg, epoch_started).then([](bool do_recovery) { + if (do_recovery) { return seastar::stop_iteration::no; + } else { + return seastar::stop_iteration::yes; } }); }); diff --git a/src/crimson/osd/pg_recovery.cc b/src/crimson/osd/pg_recovery.cc index f219411acda9d..2b28f7e4f0f4b 100644 --- a/src/crimson/osd/pg_recovery.cc +++ b/src/crimson/osd/pg_recovery.cc @@ -82,8 +82,8 @@ PGRecovery::start_recovery_ops( ceph_assert(pg->is_recovering()); ceph_assert(!pg->is_backfilling()); - bool done = !pg->get_peering_state().needs_recovery(); - if (done) { + bool do_recovery = pg->get_peering_state().needs_recovery(); + if (!do_recovery) { logger().debug("start_recovery_ops: AllReplicasRecovered for pg: {}", pg->get_pgid()); using LocalPeeringEvent = crimson::osd::LocalPeeringEvent; @@ -110,7 +110,7 @@ PGRecovery::start_recovery_ops( } pg->reset_pglog_based_recovery_op(); } - return seastar::make_ready_future(done); + return seastar::make_ready_future(do_recovery); }); } -- 2.39.5