]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/osd: skip PGAdvanceMap on a deleted PG 68814/head
authorAishwarya Mathuria <amathuri@redhat.com>
Fri, 8 May 2026 07:32:01 +0000 (13:02 +0530)
committerAishwarya Mathuria <amathuri@redhat.com>
Fri, 8 May 2026 10:51:01 +0000 (10:51 +0000)
A PGAdvanceMap queued by broadcast_map_to_pgs can sit behind in-flight
DeleteSome events on the peering pipeline holding a Ref<PG>.  When it
finally runs, the collection has already been removed in seastore and
PGAdvanceMap drives handle_advance_map / check_for_splits on a stale
PG thereby issuing ops on a collection that no longer exists, crashing the OSD.

Following Classic OSD, set peering_state.set_delete_complete() in PG::do_delete_work's
final batch and bail out of PGAdvanceMap::start when pg->is_deleted() is true.

Fixes: https://tracker.ceph.com/issues/76447
Signed-off-by: Aishwarya Mathuria <amathuri@redhat.com>
src/crimson/osd/osd_operations/pg_advance_map.cc
src/crimson/osd/pg.cc
src/crimson/osd/pg.h

index ee809cc53a93348670868a90a733271d90b226c8..2e1885b6ac8bb458ed249a80d269fdec0c1352fc 100644 (file)
@@ -71,6 +71,12 @@ seastar::future<> PGAdvanceMap::start()
   return enter_stage<>(
     peering_pp(*pg).process
   ).then([this, FNAME] {
+    // pg may have been deleted while this op was queued; see PG::do_delete_work.
+    if (pg->is_deleted()) {
+      DEBUG("{}: pg is deleted, skipping advance", *this);
+      return seastar::now();
+    }
+
     /*
      * PGAdvanceMap is scheduled at pg creation and when
      * broadcasting new osdmaps to pgs. We are not able to serialize
index f1b890eb2bea57400a3fbfecb5136831787f349b..8c4d68d040e0742764b898f5f9153411431e8b49 100644 (file)
@@ -507,6 +507,17 @@ PG::do_delete_work(ceph::os::Transaction &t, ghobject_t _next)
     t.remove(coll_ref->get_cid(), pgid.make_snapmapper_oid());
     t.remove(coll_ref->get_cid(), pgmeta_oid);
     t.remove_collection(coll_ref->get_cid());
+    /*
+     * Mark the PG as fully deleted *before* dispatching the final
+     * RMCOLL transaction.  A PGAdvanceMap may already have been queued
+     * (with a Ref<PG>) by an earlier broadcast_map_to_pgs while this
+     * PG was still in pg_map, and is now sitting behind these
+     * DeleteSome events on the peering pipeline.  Setting the deleted
+     * flag now lets that queued PGAdvanceMap detect the situation and
+     * skip itself instead of issuing ops on a collection that is
+     * about to disappear.
+     */
+    peering_state.set_delete_complete();
     (void) crimson::os::with_store_do_transaction(
       shard_services.get_store(store_index),
       coll_ref,
index b68fa886ec1620a6c0e6f58e1c421b89871c2e93..ae433a549206cdda4ac2bfba3e497105ff697aec 100644 (file)
@@ -688,6 +688,9 @@ public:
   bool is_backfilling() const final {
     return peering_state.is_backfilling();
   }
+  bool is_deleted() const {
+    return peering_state.is_deleted();
+  }
   uint64_t get_last_user_version() const {
     return get_info().last_user_version;
   }