From: Samuel Just Date: Fri, 9 May 2025 16:46:48 +0000 (+0000) Subject: crimson/osd/pg_recovery: only reset_pglog_based_recovery_op if complete X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=c95509edcdf387f8e57416f557bd154049ef34df;p=ceph.git crimson/osd/pg_recovery: only reset_pglog_based_recovery_op if complete ce4e9aaad, as part of the start_recovery_ops changed the call to reset_pglog_based_recovery_op to occur unconditionally rather than only if recovery has completed. Note, this fix only restores the prior behavior. There's actually still a race here where a DeferRecovery could be processed between the call to reset_pglog_based_recovery_op and the RequestBackfill or AllReplicasRecovered being processed. Introduced: ce4e9aaad8f2cafae24511fe1687c61dc41affc1 Related: https://tracker.ceph.com/issues/71267 Fixes: https://tracker.ceph.com/issues/70337 Signed-off-by: Samuel Just --- diff --git a/src/crimson/osd/pg_recovery.cc b/src/crimson/osd/pg_recovery.cc index 2adbd33443f24..51f9faf18577c 100644 --- a/src/crimson/osd/pg_recovery.cc +++ b/src/crimson/osd/pg_recovery.cc @@ -80,17 +80,17 @@ PGRecovery::start_recovery_ops( ceph_assert(pg->is_recovering()); ceph_assert(!pg->is_backfilling()); - // move to unnamed placeholder when C++ 26 is available - auto reset_pglog_based_recovery_op = seastar::defer([this] { - pg->reset_pglog_based_recovery_op(); - }); - if (!pg->get_peering_state().needs_recovery()) { if (pg->get_peering_state().needs_backfill()) { request_backfill(); } else { all_replicas_recovered(); } + /* TODO: this is racy -- it's possible for a DeferRecovery + * event to be processed between this call and when the + * async RequestBackfill or AllReplicasRecovered events + * are processed -- see https://tracker.ceph.com/issues/71267 */ + pg->reset_pglog_based_recovery_op(); co_return seastar::stop_iteration::yes; } co_return seastar::stop_iteration::no;