]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/osd/pg_recovery: only reset_pglog_based_recovery_op if complete 63125/head
authorSamuel Just <sjust@redhat.com>
Fri, 9 May 2025 16:46:48 +0000 (16:46 +0000)
committerMatan Breizman <mbreizma@redhat.com>
Sun, 11 May 2025 08:00:48 +0000 (08:00 +0000)
ce4e9aaad, as part of the start_recovery_ops changed the call to
reset_pglog_based_recovery_op to occur unconditionally rather than only
if recovery has completed.

Note, this fix only restores the prior behavior.  There's actually still
a race here where a DeferRecovery could be processed between the call to
reset_pglog_based_recovery_op and the RequestBackfill or
AllReplicasRecovered being processed.

Introduced: ce4e9aaad8f2cafae24511fe1687c61dc41affc1
Related: https://tracker.ceph.com/issues/71267
Fixes: https://tracker.ceph.com/issues/70337
Signed-off-by: Samuel Just <sjust@redhat.com>
(cherry picked from commit c95509edcdf387f8e57416f557bd154049ef34df)

src/crimson/osd/pg_recovery.cc

index 2adbd33443f24e24dc0fcbd9f06e88b91acb6641..51f9faf18577c4b7827941b4696647fa9766a5bb 100644 (file)
@@ -80,17 +80,17 @@ PGRecovery::start_recovery_ops(
   ceph_assert(pg->is_recovering());
   ceph_assert(!pg->is_backfilling());
 
-  // move to unnamed placeholder when C++ 26 is available
-  auto reset_pglog_based_recovery_op = seastar::defer([this] {
-    pg->reset_pglog_based_recovery_op();
-  });
-
   if (!pg->get_peering_state().needs_recovery()) {
     if (pg->get_peering_state().needs_backfill()) {
       request_backfill();
     } else {
       all_replicas_recovered();
     }
+    /* TODO: this is racy -- it's possible for a DeferRecovery
+     * event to be processed between this call and when the
+     * async RequestBackfill or AllReplicasRecovered events
+     * are processed --  see https://tracker.ceph.com/issues/71267 */
+    pg->reset_pglog_based_recovery_op();
     co_return seastar::stop_iteration::yes;
   }
   co_return seastar::stop_iteration::no;