]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore/transaction_manager: block client transactions if
authorXuehan Xu <xuxuehan@qianxin.com>
Mon, 1 Dec 2025 09:44:45 +0000 (17:44 +0800)
committerXuehan Xu <xuxuehan@qianxin.com>
Mon, 9 Mar 2026 09:56:41 +0000 (17:56 +0800)
they conflict with rewriting transactions until the rewriting
transactions finishes

Signed-off-by: Xuehan Xu <xuxuehan@qianxin.com>
src/crimson/os/seastore/cache.cc
src/crimson/os/seastore/cached_extent.cc
src/crimson/os/seastore/cached_extent.h
src/crimson/os/seastore/transaction.h
src/crimson/os/seastore/transaction_manager.cc

index b3dab60882df3ab26f60a62eb7a7c13cc549b39c..2ecfc5d6d46e8062ec60ce4866d95f722de9b854 100644 (file)
@@ -1316,6 +1316,7 @@ record_t Cache::prepare_record(
     if (is_rewrite_transaction(t.get_src()) &&
         !is_root_type(i->get_type())) {
       i->new_committer(t);
+      i->committer->block_trans(t);
     }
     assert(i->is_exist_mutation_pending() ||
           i->prior_instance);
@@ -1591,6 +1592,7 @@ record_t Cache::prepare_record(
       if (is_lba_backref_node(i->get_type())) {
         committer.sync_checksum();
       }
+      committer.block_trans(t);
       i->get_prior_instance()->set_io_wait(
         CachedExtent::extent_state_t::CLEAN, true);
     }
@@ -1963,6 +1965,7 @@ void Cache::complete_commit(
       }
       touch_extent_fully(prior, &t_src, t.get_cache_hint());
       committer.sync_version();
+      committer.unblock_trans(t);
       prior.complete_io();
       i->committer.reset();
       prior.committer.reset();
@@ -2042,6 +2045,7 @@ void Cache::complete_commit(
         t, *i, *i->prior_instance);
       assert(i->committer);
       auto &committer = *i->committer;
+      committer.unblock_trans(t);
       auto &prior = *i->prior_instance;
       prior.pending_for_transaction = TRANS_ID_NULL;
       ceph_assert(prior.is_valid());
index 0445aca5eef8209084f1a4f93bfb0ecb8636603d..d5abf2e085326f8075a4e7b2c159d548e31e42d3 100644 (file)
@@ -18,6 +18,8 @@ namespace {
   }
 }
 
+SET_SUBSYS(seastore_cache);
+
 namespace crimson::os::seastore {
 
 #ifdef DEBUG_CACHED_EXTENT_REF
@@ -472,4 +474,24 @@ void CachedExtent::new_committer(Transaction &t) {
   prior_instance->committer = committer;
 }
 
+void ExtentCommitter::block_trans(Transaction &t) {
+  LOG_PREFIX(ExtentCommitter::block_trans);
+  auto &prior = *extent.prior_instance;
+  for (auto &item : prior.read_transactions) {
+    TRACET("blocking trans {} for rewriting {}",
+      t, item.t->get_trans_id(), *item.ref);
+    item.t->need_wait_rewrite = true;
+  }
+}
+
+void ExtentCommitter::unblock_trans(Transaction &t) {
+  LOG_PREFIX(ExtentCommitter::unblock_trans);
+  auto &prior = *extent.prior_instance;
+  for (auto &item : prior.read_transactions) {
+    TRACET("unblocking trans {} for rewriting {}",
+      t, item.t->get_trans_id(), *item.ref);
+    item.t->need_wait_rewrite = false;
+  }
+}
+
 }
index 1a1144c31fa0f76468d15dfa78b8de327195c37d..e53ce32b70727d91b7310ed1c2a7868154ad6dd3 100644 (file)
@@ -279,6 +279,8 @@ public:
   ExtentCommitter(CachedExtent &extent, Transaction &t)
     : extent(extent), t(t) {}
 
+  void block_trans(Transaction &);
+  void unblock_trans(Transaction &);
   // commit all extent states to the prior instance,
   // except poffset and extent content
   void commit_state();
index 59662142ff0623b4688bf758743e0a937b45b741..9a119b3a3fd41a5e74af50f05398cf361a56ca9b 100644 (file)
@@ -550,6 +550,7 @@ public:
     ool_write_stats = {};
     rewrite_stats = {};
     conflicted = false;
+    need_wait_rewrite = false;
     assert(backref_entries.empty());
     if (!has_reset) {
       has_reset = true;
@@ -667,6 +668,8 @@ public:
   }
 
   btree_cursor_stats_t cursor_stats;
+  bool need_wait_rewrite = false;
+
 private:
   friend class Cache;
   friend Ref make_test_transaction();
index 7baea2cd82c4de6a4cfaa3fd83803e5e2dfc65f1..f9d9b5305ef7f95aae30a84cfc40f253de500d5a 100644 (file)
@@ -598,6 +598,9 @@ TransactionManager::do_submit_transaction(
     tref.get_handle().enter(write_pipeline.prepare)
   );
 
+  while (tref.need_wait_rewrite) {
+    co_await trans_intr::make_interruptible(seastar::yield());
+  }
   if (trim_alloc_to && *trim_alloc_to != JOURNAL_SEQ_NULL) {
     SUBTRACET(seastore_t, "trim backref_bufs to {}", tref, *trim_alloc_to);
     cache->trim_backref_bufs(*trim_alloc_to);