]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore: force rewrite transactions to conflict with others 69044/head
authorXuehan Xu <xuxuehan@qianxin.com>
Thu, 21 May 2026 12:35:47 +0000 (20:35 +0800)
committerXuehan Xu <xuxuehan@qianxin.com>
Wed, 27 May 2026 02:05:06 +0000 (10:05 +0800)
if it involve insertions on the lba tree

The issue was introduced since bd0ce704f24afbe11830d31b46d8b22771f54456

Signed-off-by: Xuehan Xu <xuxuehan@qianxin.com>
src/crimson/os/seastore/cache.cc
src/crimson/os/seastore/cached_extent.cc
src/crimson/os/seastore/seastore_types.h
src/crimson/os/seastore/transaction.h
src/crimson/os/seastore/transaction_manager.cc

index 5bbb8feeb034c83e2db1e3ea80ba647167c1bf37..3d2efdbb6b2a570f5b1ad353a19fe7ccdfadeb6d 100644 (file)
@@ -929,7 +929,7 @@ void Cache::stage_visibility_handoff(
   assert(next->get_paddr().is_absolute() || next->get_paddr().is_root());
   assert(next->version == prev->version + 1);
   const auto t_src = t.get_src();
-  ceph_assert(should_use_no_conflict_publish(t_src, next->get_type()));
+  ceph_assert(should_use_no_conflict_publish(t, next->get_type()));
 
   bool was_stable_dirty = prev->is_stable_dirty();
   if (!was_stable_dirty) {
@@ -1386,7 +1386,7 @@ record_t Cache::prepare_record(
       DEBUGT("invalid mutated extent -- {}", t, *i);
       continue;
     }
-    if (should_use_no_conflict_publish(t.get_src(), i->get_type())) {
+    if (should_use_no_conflict_publish(t, i->get_type())) {
       i->new_committer(t);
       i->committer->block_trans(t);
     }
@@ -1507,7 +1507,8 @@ record_t Cache::prepare_record(
     }
 
     if (i->is_mutation_pending()) {
-      const bool use_no_conflict = should_use_no_conflict_publish(t.get_src(), i->get_type());
+      const bool use_no_conflict =
+        should_use_no_conflict_publish(t, i->get_type());
       // Block the new extent readers until the journal commit completes.
       i->set_io_wait(CachedExtent::extent_state_t::DIRTY, use_no_conflict);
 
@@ -1536,7 +1537,7 @@ record_t Cache::prepare_record(
     retire_stat.increment(extent->get_length());
     DEBUGT("retired and remove extent {}~0x{:x} -- {}",
           t, extent->get_paddr(), extent->get_length(), *extent);
-    if (should_use_no_conflict_publish(t.get_src(), extent->get_type())) {
+    if (should_use_no_conflict_publish(t, extent->get_type())) {
       // avoid extent invalidation on retirement
       // only adjust dirty bookkeeping
       // we would invalidate them in complete_commit final stage
@@ -1659,10 +1660,10 @@ record_t Cache::prepare_record(
          i->get_type()));
     }
     i->set_io_wait(CachedExtent::extent_state_t::CLEAN,
-                   should_use_no_conflict_publish(t.get_src(), i->get_type()));
+                   should_use_no_conflict_publish(t, i->get_type()));
     // Note, paddr is known until complete_commit(),
     // so add_extent() later.
-    if (should_use_no_conflict_publish(t.get_src(), i->get_type())) {
+    if (should_use_no_conflict_publish(t, i->get_type())) {
       assert(i->get_prior_instance());
       assert(!i->committer);
       assert(!i->get_prior_instance()->committer);
@@ -1671,7 +1672,8 @@ record_t Cache::prepare_record(
       auto &committer = *i->committer;
       committer.block_trans(t);
       i->get_prior_instance()->set_io_wait(
-        CachedExtent::extent_state_t::CLEAN, should_use_no_conflict_publish(t.get_src(), i->get_type()));
+        CachedExtent::extent_state_t::CLEAN,
+        should_use_no_conflict_publish(t, i->get_type()));
     }
   }
 
@@ -1696,7 +1698,7 @@ record_t Cache::prepare_record(
          i->get_length(),
          i->get_type()));
     }
-    if (should_use_no_conflict_publish(t.get_src(), i->get_type())) {
+    if (should_use_no_conflict_publish(t, i->get_type())) {
       assert(i->get_prior_instance());
       assert(!i->committer);
       assert(!i->get_prior_instance()->committer);
@@ -1709,7 +1711,7 @@ record_t Cache::prepare_record(
         CachedExtent::extent_state_t::CLEAN, true);
     }
     i->set_io_wait(CachedExtent::extent_state_t::CLEAN,
-                   should_use_no_conflict_publish(t.get_src(), i->get_type()));
+                   should_use_no_conflict_publish(t, i->get_type()));
     // Note, paddr is (can be) known until complete_commit(),
     // so add_extent() later.
   }
@@ -1758,7 +1760,7 @@ record_t Cache::prepare_record(
     } else {
       assert(i->is_exist_mutation_pending());
       i->set_io_wait(CachedExtent::extent_state_t::DIRTY,
-                     should_use_no_conflict_publish(t.get_src(), i->get_type()));
+                     should_use_no_conflict_publish(t, i->get_type()));
     }
 
     // exist mutation pending extents must be in t.mutated_block_list
@@ -1992,8 +1994,7 @@ void Cache::complete_commit(
             t, final_block_start, start_seq);
   for (auto &i: t.retired_set) {
     auto &extent = i.extent;
-    auto trans_src = t.get_src();
-    if (should_use_no_conflict_publish(trans_src, extent->get_type())) {
+    if (should_use_no_conflict_publish(t, extent->get_type())) {
      // retired extents should remain valid through complete_commit().
      // We only free space post-commit *AFTER* handoff.
       assert(extent->is_valid());
@@ -2023,7 +2024,7 @@ void Cache::complete_commit(
     i->pending_for_transaction = TRANS_ID_NULL;
     i->on_initial_write();
     const auto t_src = t.get_src();
-    if (should_use_no_conflict_publish(t_src, i->get_type())) {
+    if (should_use_no_conflict_publish(t, i->get_type())) {
       ceph_assert(i->committer);
       auto &committer = *i->committer;
       auto &prior = *i->get_prior_instance();
@@ -2106,7 +2107,7 @@ void Cache::complete_commit(
     if (i->version == 1 || is_root_type(i->get_type())) {
       i->dirty_from = start_seq;
       DEBUGT("commit extent done, become dirty -- {}", t, *i);
-      if (should_use_no_conflict_publish(t.get_src(), i->get_type())) {
+      if (should_use_no_conflict_publish(t, i->get_type())) {
         auto &prior = *i->get_prior_instance();
         prior.dirty_from = start_seq;
         ceph_assert(i->committer);
@@ -2117,7 +2118,7 @@ void Cache::complete_commit(
       DEBUGT("commit extent done -- {}", t, *i);
     }
     i->on_delta_write(final_block_start);
-    if (should_use_no_conflict_publish(t.get_src(), i->get_type())) {
+    if (should_use_no_conflict_publish(t, i->get_type())) {
       TRACET("committing paddr to prior for {}, prior={}",
         t, *i, *i->prior_instance);
       assert(i->committer);
@@ -2163,13 +2164,13 @@ void Cache::complete_commit(
   }
 
   t.for_each_finalized_fresh_block([&t](const CachedExtentRef &i) {
-    if (should_use_no_conflict_publish(t.get_src(), i->get_type())) {
+    if (should_use_no_conflict_publish(t, i->get_type())) {
       i->set_invalid(t);
     }
   });
 
   for (auto &i: t.mutated_block_list) {
-    if (should_use_no_conflict_publish(t.get_src(), i->get_type())) {
+    if (should_use_no_conflict_publish(t, i->get_type())) {
         i->set_invalid(t);
     }
   }
index 08951ee8f8f9dd16bb7a65fa02ef812750427d9d..bf30cf163eccf0597db424a16c7b3533e711ce0d 100644 (file)
@@ -502,7 +502,7 @@ void ExtentCommitter::_share_prior_data_to_pending_versions()
 }
 
 void CachedExtent::new_committer(Transaction &t) {
-  ceph_assert(should_use_no_conflict_publish(t.get_src(), this->get_type()));
+  ceph_assert(should_use_no_conflict_publish(t, this->get_type()));
   ceph_assert(!committer);
   committer = new ExtentCommitter(*this, t);
   assert(prior_instance);
index 878f6cf9ae2b9264716b2840df58b25ee27115db..6f29c9c9b70d85fba339c6b280f9b2e00193f669 100644 (file)
@@ -2756,34 +2756,6 @@ constexpr bool is_modify_transaction(transaction_type_t type) {
       is_background_transaction(type));
 }
 
-/**
- * should_use_no_conflict_publish()
- *
- * Returns true when this (transaction source, extent type) pair should take
- * the no-conflict publish path (i.e avoid invalidate-and-retry and use the
- * committer + visibility hand-off).
- *
- * Currently true for:
- *  - rewrite (background) transactions, for any non-root extent
- *
- *  To be expanded to:
- *  - user (txn_manager) transactions that mutate LBA nodes
- *  - Onode/Omap nodes
- */
-constexpr bool should_use_no_conflict_publish(transaction_type_t txn_type,
-                                              extent_types_t ext_type) {
-  // keep classic handling for ROOT
-  if (is_root_type(ext_type)) {
-    return false;
-  }
-
-  // TODO: Extend this as support grows (e.g. Onode/OMAP nodes).
-  //       is_user_transaction(txn_type) && is_lba_node(ext_type)
-
-  return is_rewrite_transaction(txn_type);
-}
-
-
 // Note: It is possible to statically introduce structs for OOL, which must be
 // more efficient, but that requires to specialize the RecordSubmitter as well.
 // Let's delay this optimization until necessary.
index a79d822de6a7af4a4e05ed23103a206b0d93301d..a6e2e1c72722449a21eb15f68bebf361e2970f83 100644 (file)
@@ -528,6 +528,7 @@ public:
     rewrite_stats = {};
     conflicted = false;
     need_wait_visibility = false;
+    force_rewrite_conflict = false;
     assert(backref_entries.empty());
     if (!has_reset) {
       has_reset = true;
@@ -647,6 +648,7 @@ public:
   btree_cursor_stats_t cursor_stats;
 
   bool need_wait_visibility = false;
+  bool force_rewrite_conflict = false;
 
   using update_copied_lba_key_func_t =
     std::function<void (Transaction&, laddr_t, paddr_t)>;
@@ -913,6 +915,33 @@ inline TransactionRef make_test_transaction() {
     CACHE_HINT_TOUCH)
   );
 }
+/**
+ * should_use_no_conflict_publish()
+ *
+ * Returns true when this (transaction source, extent type) pair should take
+ * the no-conflict publish path (i.e avoid invalidate-and-retry and use the
+ * committer + visibility hand-off).
+ *
+ * Currently true for:
+ *  - rewrite (background) transactions, for any non-root extent
+ *
+ *  To be expanded to:
+ *  - user (txn_manager) transactions that mutate LBA nodes
+ *  - Onode/Omap nodes
+ */
+constexpr bool should_use_no_conflict_publish(const Transaction &t,
+                                              extent_types_t ext_type) {
+  // keep classic handling for ROOT
+  if (is_root_type(ext_type)) {
+    return false;
+  }
+
+  // TODO: Extend this as support grows (e.g. Onode/OMAP nodes).
+  //       is_user_transaction(txn_type) && is_lba_node(ext_type)
+
+  return !t.force_rewrite_conflict && is_rewrite_transaction(t.get_src());
+}
+
 
 }
 
index 11b113b3378323cb241f774eff1be2180ddb98c9..408e7ef009e15d5db325f19f6e4d13f94503beb0 100644 (file)
@@ -750,6 +750,12 @@ TransactionManager::rewrite_logical_extent(
     extent_len_t off = 0;
     auto left = extent->get_length();
     extent_ref_count_t refcount = 0;
+    // if rewriting a logical range resolves to multiple extents,
+    // the LBA update likely involves insertions/splits (structural
+    // btree changes), which the no-conflict publish-to-prior path
+    // does not currently cover safely. Fall back to optimistic
+    // conflict handling.
+    t.force_rewrite_conflict = (extents.size() > 1);
     for (auto &_nextent : extents) {
       auto nextent = _nextent->template cast<LogicalChildNode>();
       bool first_extent = (off == 0);