]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
crimson/osd: fix assertion fail in prepare_clone
authorKautilya Tripathi <kautilya.tripathi@ibm.com>
Tue, 9 Sep 2025 10:22:52 +0000 (10:22 +0000)
committerKautilya Tripathi <kautilya.tripathi@ibm.com>
Thu, 6 Nov 2025 11:53:45 +0000 (11:53 +0000)
Ops like rbd.assert_snapc_seq can return -ERANGE, but we were preparing
clones in OpsExecuter constructor before guard evaluation.

On retry with sparse_copyup, this led to ceph_assert(!existed) in
prepare_clone() due to the pre-created clone OBC.

This adds precise OBC invalidation during rollback using single key
clear.

Fixes: https://tracker.ceph.com/issues/72705
Fixes: https://tracker.ceph.com/issues/72709
Fixes: https://tracker.ceph.com/issues/72041
Fixes: https://tracker.ceph.com/issues/72650
Signed-off-by: Kautilya Tripathi <kautilya.tripathi@ibm.com>
src/crimson/osd/ops_executer.h
src/crimson/osd/pg.cc

index 91c897b2a96b94b50e986ff9c887352da872ce01..00d9b1883da6a9bc15d62d1a59951b868dbcfc67 100644 (file)
@@ -450,6 +450,19 @@ public:
 
   version_t get_last_user_version() const;
 
+  bool has_cloning_ctx() const {
+    return cloning_ctx != nullptr;
+  }
+
+  const hobject_t& get_cloning_coid() const {
+    ceph_assert(has_cloning_ctx());
+    return cloning_ctx->coid;
+  }
+
+  void reset_cloning_ctx() {
+    cloning_ctx.reset();
+  }
+
   ObjectContextRef prepare_clone(
     const hobject_t& coid,
     const ObjectState& initial_obs);
index 6f12a82957d83be1ad1df9b02d47efefda330250..56f1120dd6d75f5ec013545a4df25eadb5e8d17a 100644 (file)
@@ -1155,8 +1155,19 @@ PG::run_executer_fut PG::run_executer(
 {
   LOG_PREFIX(PG::run_executer);
   auto rollbacker = ox.create_rollbacker(
-    [obc_data = duplicate_obc_data(obc)](auto &obc) mutable {
+    [FNAME, obc_data = duplicate_obc_data(obc), &ox, this](auto &obc) mutable {
+      // First, revert the OBC state
       obc->update_from(obc_data);
+      // Then clean up any prepared clone OBCs
+      if (ox.has_cloning_ctx()) {
+        const auto coid = ox.get_cloning_coid();
+        DEBUGDPP("cleaning up clone OBC for {} reqid={}",
+        FNAME, coid, ox.get_message().get_reqid());
+        // Use single-key clear by passing [coid, coid] to avoid impacting other clones
+        this->obc_registry.clear_range(coid, coid);
+        // Reset the cloning context directly
+        ox.reset_cloning_ctx();
+      }
     });
   auto rollback_on_error = seastar::defer([&rollbacker] {
     rollbacker.rollback_obc_if_modified();