From 278ace1bc4abf7a91a7e0932091353b19da11222 Mon Sep 17 00:00:00 2001 From: Aishwarya Mathuria Date: Tue, 14 Apr 2026 13:29:36 +0530 Subject: [PATCH] crimson/osd: fix race between AllReplicasRecovered and DeferRecovery MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Fixes a crash where AllReplicasRecovered event arrives at NotRecovering state due to async event delivery race with DeferRecovery preemption. The issue occurs when: 1. Recovery completes and AllReplicasRecovered is queued asynchronously 2. A higher priority operation (e.g., client I/O) triggers AsyncReserver to preempt recovery, posting DeferRecovery event 3. DeferRecovery is processed first, transitioning PG to NotRecovering 4. AllReplicasRecovered arrives at wrong state → crash with "bad state machine event" because NotRecovering doesn't handle it The fix follows Classic OSD's approach in PrimaryLogPG::start_recovery_ops(): clear PG_STATE_RECOVERING before posting recovery completion events. This makes the existing safety check in PeeringState::Recovering::react() work: when DeferRecovery arrives and sees !state_test(PG_STATE_RECOVERING), it discards itself, preventing the state transition that would cause the crash. Fixes:https://tracker.ceph.com/issues/73314 Signed-off-by: Aishwarya Mathuria --- src/crimson/osd/pg_recovery.cc | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/crimson/osd/pg_recovery.cc b/src/crimson/osd/pg_recovery.cc index 0ff4abc8518f..cac3bfc53df2 100644 --- a/src/crimson/osd/pg_recovery.cc +++ b/src/crimson/osd/pg_recovery.cc @@ -83,15 +83,24 @@ PGRecovery::start_recovery_ops( ceph_assert(!pg->is_backfilling()); if (!pg->get_peering_state().needs_recovery()) { + /* Clear PG_STATE_RECOVERING before posting recovery completion events + * to prevent race condition with DeferRecovery. + * See https://tracker.ceph.com/issues/73314. + * + * This matches the Classic OSD approach in PrimaryLogPG::start_recovery_ops(). + * When DeferRecovery arrives, it checks !state_test(PG_STATE_RECOVERING) + * and discards itself if the flag is already clear, preventing the crash + * that occurs when AllReplicasRecovered arrives at NotRecovering state. */ + pg->get_peering_state().state_clear(PG_STATE_RECOVERING); + pg->get_peering_state().state_clear(PG_STATE_FORCED_RECOVERY); + if (pg->get_peering_state().needs_backfill()) { + DEBUGDPP("recovery done, queuing backfill", *pg->get_dpp()); request_backfill(); } else { + DEBUGDPP("recovery done, no backfill", *pg->get_dpp()); all_replicas_recovered(); } - /* TODO: this is racy -- it's possible for a DeferRecovery - * event to be processed between this call and when the - * async RequestBackfill or AllReplicasRecovered events - * are processed -- see https://tracker.ceph.com/issues/71267 */ pg->reset_pglog_based_recovery_op(); co_return seastar::stop_iteration::yes; } -- 2.47.3