]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
Revert "crimson/os/seastore/cache: add facilities to synchronize data and states"
authorMatan Breizman <mbreizma@redhat.com>
Mon, 9 Feb 2026 08:49:54 +0000 (08:49 +0000)
committerMatan Breizman <mbreizma@redhat.com>
Mon, 9 Feb 2026 08:49:54 +0000 (08:49 +0000)
This reverts commit 2fc047c51f37cbdfd3952f616cc07f99cf2483d7.

Signed-off-by: Matan Breizman <mbreizma@redhat.com>
src/crimson/common/fixed_kv_node_layout.h
src/crimson/os/seastore/btree/fixed_kv_node.h
src/crimson/os/seastore/cached_extent.cc
src/crimson/os/seastore/cached_extent.h
src/crimson/os/seastore/lba/lba_btree_node.h
src/crimson/os/seastore/linked_tree_node.h
src/crimson/os/seastore/logical_child_node.h
src/crimson/os/seastore/object_data_handler.h
src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h
src/crimson/os/seastore/transaction.h

index fb8fc9bd1772689a77795f1a69bc13dab4f3bdb6..6ec85fbe77ab211022461860e7cf05e781d8e85a 100644 (file)
@@ -365,6 +365,7 @@ public:
   virtual ~FixedKVNodeLayout() = default;
 
   void set_layout_buf(char *_buf) {
+    assert(buf == nullptr);
     assert(_buf != nullptr);
     buf = _buf;
   }
index aba7fdc020180a11a03fe25c290267ec14fefcdb..de57a482c149dd5947be3b7c70335ceb22c9c97c 100644 (file)
@@ -49,10 +49,6 @@ struct FixedKVNode : CachedExtent {
   virtual ~FixedKVNode() = default;
   virtual void do_on_rewrite(Transaction &t, CachedExtent &extent) = 0;
 
-  virtual void on_state_commit() override {
-    auto &prior = static_cast<FixedKVNode&>(*get_prior_instance());
-    prior.range = std::move(range);
-  }
   bool is_in_range(const node_key_t key) const {
     return get_node_meta().is_in_range(key);
   }
@@ -181,10 +177,6 @@ struct FixedKVInternalNode
     return this->get_split_pivot().get_offset();
   }
 
-  void sync_layout_buf() {
-    this->set_layout_buf(this->get_bptr().c_str());
-  }
-
   void prepare_commit() final {
     parent_node_t::prepare_commit();
   }
@@ -207,10 +199,6 @@ struct FixedKVInternalNode
     }
   }
 
-  void on_data_commit() final {
-    this->set_layout_buf(this->get_bptr().c_str());
-  }
-
   void on_invalidated(Transaction &t) final {
     this->child_node_t::on_invalidated();
   }
@@ -495,52 +483,6 @@ struct FixedKVInternalNode
     assert(this->get_size() >= (get_min_capacity() - 1));
     return this->get_size() < get_min_capacity();
   }
-
-  void reapply_delta() final {
-    if (delta_buffer.empty()) {
-      return;
-    }
-    delta_buffer.replay(*this);
-  }
-
-  void merge_content_to_pending_versions(Transaction &t) {
-    ceph_assert(is_rewrite_transaction(t.get_src()));
-    this->for_each_copy_dest_set(t, [this, &t](auto &copy_dests) {
-      this->merge_content_to(t, copy_dests.dests_by_key);
-    });
-  }
-
-  template <template <typename...> typename Container, typename... T>
-  void merge_content_to(Transaction &t, Container<T...> &container) {
-    auto iter = this->begin();
-    for (auto &copy_dest : container) {
-      auto &pending_version = static_cast<this_type_t&>(*copy_dest);
-      auto it = pending_version.begin();
-      while (it != pending_version.end() && iter != this->end()) {
-        if (auto child = pending_version.children[it->get_offset()];
-            (is_valid_child_ptr(child) &&
-             (pending_version.is_pending() || child->_is_pending_io()))) {
-          it++;
-          continue;
-        }
-        if (it->get_key() == iter->get_key()) {
-          it->set_val(iter->get_val());
-          it++;
-          iter++;
-        } else if (it->get_key() > iter->get_key()) {
-          iter++;
-        } else {
-          it++;
-        }
-      }
-      if (pending_version.get_last_committed_crc()) {
-        // if pending_version has already calculated its crc,
-        // calculate it again.
-        pending_version.set_last_committed_crc(pending_version.calc_crc32c());
-      }
-    }
-  }
-
 };
 
 template <
@@ -609,25 +551,6 @@ struct FixedKVLeafNode
   // modifications can be detected (see BtreeLBAMapping.parent_modifications)
   uint64_t modifications = 0;
 
-  void on_state_commit() override {
-    base_t::on_state_commit();
-    auto &prior = static_cast<this_type_t&>(*this->get_prior_instance());
-    // We don't touch the prior's modifications field here, because there maybe
-    // other transactions accessing the prior, and the modifications field is
-    // not to be tainted.
-    if (!prior.is_mutation_pending()) {
-      assert(!prior.modifications);
-    }
-  }
-
-  void on_data_commit() final {
-    this->set_layout_buf(this->get_bptr().c_str());
-  }
-
-  void sync_layout_buf() {
-    this->set_layout_buf(this->get_bptr().c_str());
-  }
-
   void on_invalidated(Transaction &t) final {
     this->child_node_t::on_invalidated();
   }
@@ -868,13 +791,6 @@ struct FixedKVLeafNode
     assert(this->get_size() >= (get_min_capacity() - 1));
     return this->get_size() < get_min_capacity();
   }
-
-  void reapply_delta() final {
-    if (delta_buffer.empty()) {
-      return;
-    }
-    delta_buffer.replay(*this);
-  }
 };
 
 } // namespace crimson::os::seastore
index b290268dbfb4635d90f62c4392b19733bcf378fd..1662f5866e4b547324b758e85f90a3168158d3f5 100644 (file)
@@ -9,8 +9,6 @@
 #include "crimson/os/seastore/btree/fixed_kv_node.h"
 #include "crimson/os/seastore/lba_mapping.h"
 #include "crimson/os/seastore/logical_child_node.h"
-#include "crimson/os/seastore/lba/lba_btree_node.h"
-#include "crimson/os/seastore/backref/backref_tree_node.h"
 
 namespace {
   [[maybe_unused]] seastar::logger& logger() {
@@ -345,125 +343,4 @@ ceph::bufferptr BufferSpace::to_full_ptr(extent_len_t length)
   return ptr;
 }
 
-void ExtentCommitter::sync_version() {
-  assert(extent.prior_instance);
-  auto &prior = *extent.prior_instance;
-  for (auto &mext : prior.mutation_pending_extents) {
-    auto &mextent = static_cast<CachedExtent&>(mext);
-    mextent.version = extent.version + 1;
-  }
-}
-
-void ExtentCommitter::sync_dirty_from() {
-  assert(extent.prior_instance);
-  auto &prior = *extent.prior_instance;
-  for (auto &mext : prior.mutation_pending_extents) {
-    auto &mextent = static_cast<CachedExtent&>(mext);
-    mextent.dirty_from = extent.dirty_from;
-  }
-}
-
-void ExtentCommitter::sync_checksum() {
-  assert(extent.prior_instance);
-  auto &prior = *extent.prior_instance;
-  for (auto &mext : prior.mutation_pending_extents) {
-    auto &mextent = static_cast<CachedExtent&>(mext);
-    mextent.set_last_committed_crc(extent.last_committed_crc);
-  }
-}
-
-void ExtentCommitter::commit_data() {
-  assert(extent.prior_instance);
-  // extent and its prior are sharing the same bptr content
-  auto &prior = *extent.prior_instance;
-  prior.set_bptr(extent.get_bptr());
-  prior.on_data_commit();
-  _share_prior_data_to_mutations();
-  _share_prior_data_to_pending_versions();
-}
-
-void ExtentCommitter::commit_state() {
-  LOG_PREFIX(CachedExtent::commit_state_to_prior);
-  assert(extent.prior_instance);
-  SUBTRACET(seastore_cache, "{} prior={}",
-    t, extent, *extent.prior_instance);
-  auto &prior = *extent.prior_instance;
-  prior.pending_for_transaction = extent.pending_for_transaction;
-  prior.modify_time = extent.modify_time;
-  prior.last_committed_crc = extent.last_committed_crc;
-  prior.dirty_from = extent.dirty_from;
-  prior.length = extent.length;
-  prior.loaded_length = extent.loaded_length;
-  prior.buffer_space = std::move(extent.buffer_space);
-  // XXX: We can go ahead and change the prior's version because
-  // transactions don't hold a local view of the version field,
-  // unlike FixedKVLeafNode::modifications
-  prior.version = extent.version;
-  prior.user_hint = extent.user_hint;
-  prior.rewrite_generation = extent.rewrite_generation;
-  prior.state = extent.state;
-  extent.on_state_commit();
-}
-
-void ExtentCommitter::commit_and_share_paddr() {
-  auto &prior = *extent.prior_instance;
-  auto old_paddr = prior.get_prior_paddr_and_reset();
-  if (prior.get_paddr() == extent.get_paddr()) {
-    return;
-  }
-  if (prior.read_transactions.empty()) {
-    prior.set_paddr(
-      extent.get_paddr(),
-      prior.get_paddr().is_absolute());
-    return;
-  }
-  for (auto &item : prior.read_transactions) {
-    auto [removed, retired] = item.t->pre_stable_extent_paddr_mod(item);
-    if (prior.get_paddr() != extent.get_paddr()) {
-      prior.set_paddr(
-        extent.get_paddr(),
-        prior.get_paddr().is_absolute());
-    }
-    item.t->post_stable_extent_paddr_mod(item, retired);
-    item.t->maybe_update_pending_paddr(old_paddr, extent.get_paddr());
-  }
-}
-
-void ExtentCommitter::_share_prior_data_to_mutations() {
-  LOG_PREFIX(ExtentCommitter::_share_prior_data_to_mutations);
-  ceph_assert(is_lba_backref_node(extent.get_type()));
-  auto &prior = *extent.prior_instance;
-  for (auto &mext : prior.mutation_pending_extents) {
-    auto &mextent = static_cast<CachedExtent&>(mext);
-    TRACE("{} -> {}", extent, mextent);
-    extent.get_bptr().copy_out(
-      0, extent.get_length(), mextent.get_bptr().c_str());
-    mextent.on_data_commit();
-    mextent.reapply_delta();
-  }
-}
-
-void ExtentCommitter::_share_prior_data_to_pending_versions()
-{
-  ceph_assert(is_lba_backref_node(extent.get_type()));
-  auto &prior = *extent.prior_instance;
-  switch (extent.get_type()) {
-  case extent_types_t::LADDR_LEAF:
-    static_cast<lba::LBALeafNode&>(
-      prior).merge_content_to_pending_versions(t);
-    break;
-  case extent_types_t::LADDR_INTERNAL:
-    static_cast<lba::LBAInternalNode&>(prior
-      ).merge_content_to_pending_versions(t);
-    break;
-  case extent_types_t::BACKREF_INTERNAL:
-    static_cast<backref::BackrefInternalNode&>(prior
-      ).merge_content_to_pending_versions(t);
-    break;
-  default:
-    break;
-  }
-}
-
-
 }
index 35258c95c26676e6f9fc1172512851a1a0a6f763..bbd70d35da387facbbd02feb42966f2fb8974afc 100644 (file)
@@ -23,7 +23,6 @@ struct cache_test_t;
 
 namespace crimson::os::seastore {
 
-class ExtentCommitter;
 class Transaction;
 class CachedExtent;
 using CachedExtentRef = boost::intrusive_ptr<CachedExtent>;
@@ -273,36 +272,6 @@ enum class extent_2q_state_t : uint8_t {
   Max
 };
 
-class ExtentCommitter {
-public:
-  ExtentCommitter(CachedExtent &extent, Transaction &t)
-    : extent(extent), t(t) {}
-
-  // commit all extent states to the prior instance,
-  // except poffset and extent content
-  void commit_state();
-
-  void commit_data();
-
-  // synchronize last_committed_crc among mutation pending extents
-  void sync_checksum();
-
-  void sync_dirty_from();
-
-  void sync_version();
-
-  void commit_and_share_paddr();
-
-private:
-  // the rewritten extent
-  CachedExtent &extent;
-  Transaction &t;
-
-  void _share_prior_data_to_mutations();
-  void _share_prior_data_to_pending_versions();
-};
-using ExtentCommitterRef = boost::intrusive_ptr<ExtentCommitter>;
-
 class ExtentIndex;
 class CachedExtent
   : public boost::intrusive_ref_counter<
@@ -898,7 +867,6 @@ private:
   using index = boost::intrusive::set<CachedExtent, index_member_options>;
   friend class ExtentIndex;
   friend class Transaction;
-  friend class ExtentCommitter;
 
   bool is_linked_to_index() {
     return extent_index_hook.is_linked();
@@ -1027,16 +995,11 @@ protected:
       dirty_from(other.dirty_from),
       length(other.get_length()),
       loaded_length(other.get_loaded_length()),
-      version(other.version) {
+      version(other.version),
+      poffset(other.poffset) {
     // the extent must be fully loaded before CoW
     assert(other.is_fully_loaded());
     assert(is_aligned(length, CEPH_PAGE_SIZE));
-    if (other.poffset.is_absolute() ||
-        !other.prior_poffset.has_value()) {
-      poffset = other.poffset;
-    } else {
-      poffset = *other.prior_poffset;
-    }
     if (length > 0) {
       ptr = create_extent_ptr_rand(length);
       other.ptr->copy_out(0, length, ptr->c_str());
@@ -1056,9 +1019,7 @@ protected:
       length(other.get_length()),
       loaded_length(other.get_loaded_length()),
       version(other.version),
-      poffset(other.poffset.is_absolute()
-        ? other.poffset
-        : *other.prior_poffset) {
+      poffset(other.poffset) {
     // the extent must be fully loaded before CoW
     assert(other.is_fully_loaded());
     assert(is_aligned(length, CEPH_PAGE_SIZE));
@@ -1102,36 +1063,6 @@ protected:
     return new T();
   }
 
-  /**
-   * on_state_commit
-   *
-   * Called when the current extent's common states
-   * are copied to its prior instance, this should
-   * only be used in the context of rewriting transactions,
-   * e.g. TRIM_DIRTY and CLEANER
-   */
-  virtual void on_state_commit() {}
-
-  /**
-   * on_data_commit
-   *
-   * Called when the current extent's ptr is shared with
-   * its prior instance, this should only be used when
-   * commit extent rewriting transactions, e.g. TRIM_DIRTY
-   * and CLEANER
-   */
-  virtual void on_data_commit() {}
-
-  /**
-   * reapply_delta
-   *
-   * Called when there's need to reapply the current extent's
-   * deltas, this happens when the rewritting transaction
-   * overwrite the data of mutation pending extents, which erase
-   * all modifications and make the deltas needed to be reapplied
-   */
-  virtual void reapply_delta() {}
-
   void reset_prior_instance() {
     prior_instance.reset();
   }
@@ -1165,9 +1096,6 @@ protected:
   void set_bptr(ceph::bufferptr &&nptr) {
     ptr = nptr;
   }
-  void set_bptr(ceph::bufferptr &nptr) {
-    ptr = nptr;
-  }
 
   /**
    * maybe_generate_relative
@@ -1382,17 +1310,13 @@ public:
   }
 
   void erase(CachedExtent &extent) {
-    auto it = extent_index.s_iterator_to(extent);
-    erase(it);
-  }
-
-  void erase(CachedExtent::index::iterator &it) {
-    auto &extent = *it;
     assert(extent.parent_index);
     assert(extent.is_linked_to_index());
-    [[maybe_unused]] auto erased = extent_index.erase(it);
-    assert(erased);
+    [[maybe_unused]] auto erased = extent_index.erase(
+      extent_index.s_iterator_to(extent));
     extent.parent_index = nullptr;
+
+    assert(erased);
     bytes -= extent.get_length();
   }
 
@@ -1542,14 +1466,6 @@ protected:
     logical_on_delta_write();
   }
 
-  void on_state_commit() final {
-    auto &prior = static_cast<LogicalCachedExtent&>(*get_prior_instance());
-    prior.laddr = laddr;
-    do_on_state_commit();
-  }
-
-  virtual void do_on_state_commit() {}
-
 private:
   // the logical address of the extent, and if shared,
   // it is the intermediate_base, see BtreeLBAMapping comments.
index 7944b8f6cec66ebee91c9db2b16effa38b48b6f6..b4957fe48726a43aca501624c4ce6d22cdb27303 100644 (file)
@@ -287,55 +287,6 @@ struct LBALeafNode
   }
 
   std::ostream &print_detail(std::ostream &out) const final;
-
-  template <template <typename...> typename Container, typename... T>
-  void merge_content_to(Transaction &t, Container<T...> &container) {
-    auto iter = this->begin();
-    for (auto &copy_dest : container) {
-      auto &pending_version = static_cast<LBALeafNode&>(*copy_dest);
-      auto it = pending_version.begin();
-      while (it != pending_version.end() && iter != this->end()) {
-        if (iter->get_val().pladdr.is_laddr() ||
-            iter->get_val().pladdr.get_paddr().is_zero()) {
-          iter++;
-          continue;
-        }
-        if (auto child = pending_version.children[it->get_offset()];
-            is_valid_child_ptr(child) &&
-            (pending_version.is_pending() || child->_is_pending_io())) {
-          it++;
-          continue;
-        }
-        if (it->get_key() == iter->get_key()) {
-          it->set_val(iter->get_val());
-          it++;
-          iter++;
-        } else if (it->get_key() > iter->get_key()) {
-          iter++;
-        } else {
-          it++;
-        }
-      }
-      if (pending_version.get_last_committed_crc()) {
-        // if pending_version has already calculated its crc,
-        // calculate it again.
-        pending_version.set_last_committed_crc(pending_version.calc_crc32c());
-      }
-    }
-  }
-
-  void merge_content_to_pending_versions(Transaction &t) {
-    ceph_assert(is_rewrite_transaction(t.get_src()));
-    this->for_each_copy_dest_set(t, [this, &t](auto &copy_dests) {
-#ifndef NDEBUG
-      for (auto &copy_dest : copy_dests.dests_by_key) {
-        auto &pending_version = static_cast<LBALeafNode&>(*copy_dest);
-        ceph_assert(pending_version.is_pending());
-      }
-#endif
-      this->merge_content_to(t, copy_dests.dests_by_key);
-    });
-  }
 };
 using LBALeafNodeRef = TCachedExtentRef<LBALeafNode>;
 
index 8a9ddee84620822e7a522b300bb04c90c3d43ab5..5849bc94cf718b83e7893a1f39fb376845256944 100644 (file)
@@ -230,7 +230,6 @@ public:
   }
   virtual key_t node_begin() const = 0;
   virtual bool is_retired_placeholder() const = 0;
-  virtual bool _is_pending_io() const = 0;
 protected:
   parent_tracker_ref<ParentT> parent_tracker;
   virtual bool _is_valid() const = 0;
@@ -462,17 +461,6 @@ protected:
     return *it;
   }
 
-  template <typename Func>
-  void for_each_copy_dest_set(Transaction &t, Func &&f) {
-    for (auto &dests : copy_dests_by_trans) {
-      if (dests.pending_for_transaction == t.get_trans_id()) {
-        continue;
-      }
-      auto &copy_dests = static_cast<copy_dests_t&>(dests);
-      std::invoke(f, copy_dests);
-    }
-  }
-
   void add_copy_dest(Transaction &t, TCachedExtentRef<T> dest) {
     ceph_assert(down_cast().is_stable());
     ceph_assert(dest->is_pending());
@@ -1001,7 +989,6 @@ protected:
   }
 
   parent_tracker_t<T>* my_tracker = nullptr;
-  std::vector<BaseChildNode<T, node_key_t>*> children;
 private:
   T& down_cast() {
     return *static_cast<T*>(this);
@@ -1030,6 +1017,7 @@ private:
     }
   }
 
+  std::vector<BaseChildNode<T, node_key_t>*> children;
   std::set<TCachedExtentRef<T>, Comparator> copy_sources;
 
   // copy dests points from a stable node back to its pending nodes
@@ -1165,9 +1153,6 @@ private:
   bool _is_stable() const final {
     return down_cast().is_stable();
   }
-  bool _is_pending_io() const final {
-    return down_cast().is_pending_io();
-  }
   key_t node_begin() const final {
     return down_cast().get_begin();
   }
index 1fc28d35489f2d56f7d13245e892b74d9ddd49ae..40eaea75aaea0357f55abc03f09038f045ff40dc 100644 (file)
@@ -134,9 +134,6 @@ protected:
     do_on_replace_prior();
   }
   virtual void do_on_replace_prior() {}
-  void on_data_commit() final {
-    ceph_abort("impossible");
-  }
 };
 using LogicalChildNodeRef = TCachedExtentRef<LogicalChildNode>;
 } // namespace crimson::os::seastore
index ffb22246b4f8b12d3ee1d0c9e50704e1aa83e47a..91d0d29b5e838cc6e32c8e38f5acd99f7234dc47 100644 (file)
@@ -254,13 +254,6 @@ struct ObjectDataBlock : crimson::os::seastore::LogicalChildNode {
   explicit ObjectDataBlock(extent_len_t length)
     : LogicalChildNode(length) {}
 
-  void do_on_state_commit() final {
-    auto &prior = static_cast<ObjectDataBlock&>(*get_prior_instance());
-    prior.delta = std::move(delta);
-    prior.modified_region = std::move(modified_region);
-    prior.cached_overwrites = std::move(cached_overwrites);
-  }
-
   CachedExtentRef duplicate_for_write(Transaction&) final {
     return CachedExtentRef(new ObjectDataBlock(*this, share_buffer_t{}));
   };
index 715b0b2ebe6d56c64c5ec682d1870bd69f8e2127..a24f88999883feb94d20c8eb1027d3a85d4bdbda 100644 (file)
@@ -57,11 +57,6 @@ class SeastoreNodeExtent final: public NodeExtent {
  protected:
   NodeExtentRef mutate(context_t, DeltaRecorderURef&&) override;
 
-  void do_on_state_commit() final {
-    auto &prior = static_cast<SeastoreNodeExtent&>(*get_prior_instance());
-    prior.recorder = std::move(recorder);
-  }
-
   DeltaRecorder* get_recorder() const override {
     return recorder.get();
   }
index 59662142ff0623b4688bf758743e0a937b45b741..4541eb56e4e8927369702303954ef414e6b7e9c4 100644 (file)
@@ -408,53 +408,6 @@ public:
     }
   }
 
-  std::pair<bool, bool> pre_stable_extent_paddr_mod(
-    read_set_item_t<Transaction> &item)
-  {
-    LOG_PREFIX(Transaction::pre_stable_extent_paddr_mod);
-    SUBTRACET(seastore_t, "{}", *this, *item.ref);
-#ifndef NDEBUG
-    auto [existed, it] = lookup_trans_from_read_extent(item.ref);
-    assert(existed);
-    assert(item.ref.get() == it->ref.get());
-    assert(item.t = it->t);
-#endif
-
-    if (!item.is_extent_attached_to_trans()) {
-      return {false, false};
-    }
-    auto &extent = *item.ref;
-    read_set.erase(read_extent_set_t<Transaction>::s_iterator_to(item));
-    auto where1 = retired_set.find(extent.get_paddr());
-    bool retired = (where1 != retired_set.end());
-    if (where1 != retired_set.end()) {
-      retired_set.erase(where1);
-    }
-    return {true, retired};
-  }
-  void post_stable_extent_paddr_mod(
-    read_set_item_t<Transaction> &item,
-    bool retired) {
-    read_set.insert(item);
-    if (retired) {
-      retired_set.emplace(item.ref, trans_id);
-    }
-  }
-  void maybe_update_pending_paddr(
-    const paddr_t &old_paddr,
-    const paddr_t &new_paddr) {
-    if (!new_paddr.is_absolute()) {
-      return;
-    }
-    auto where2 = write_set.find_offset(old_paddr);
-    if (where2 != write_set.end()) {
-      auto &mextent = *where2;
-      write_set.erase(where2);
-      mextent.set_paddr(new_paddr);
-      write_set.insert(mextent);
-    }
-  }
-
   template <typename F>
   auto for_each_finalized_fresh_block(F &&f) const {
     std::for_each(ool_block_list.begin(), ool_block_list.end(), f);