]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
crimson/os/seastore/btree: add copy_source->pending links to FixedKVNode
authorXuehan Xu <xuxuehan@qianxin.com>
Sat, 29 Jun 2024 13:23:33 +0000 (21:23 +0800)
committerMatan Breizman <mbreizma@redhat.com>
Wed, 31 Jul 2024 08:50:23 +0000 (11:50 +0300)
Signed-off-by: Xuehan Xu <xuxuehan@qianxin.com>
(cherry picked from commit b87f6141a8865c41943ba9d76cf0f55e675f4d0d)

src/crimson/os/seastore/btree/fixed_kv_btree.h
src/crimson/os/seastore/btree/fixed_kv_node.h
src/crimson/os/seastore/cached_extent.h
src/crimson/os/seastore/root_block.h
src/crimson/os/seastore/transaction.h
src/crimson/os/seastore/transaction_manager.cc
src/test/crimson/seastore/test_block.h

index d5f8ecb8f420a5b05632bfd36fd81f6177d51ab9..159ea269b74d62b56db16c5972c293c429c6cc1e 100644 (file)
@@ -1047,7 +1047,7 @@ public:
         fixed_kv_extent.get_user_hint(),
         // get target rewrite generation
         fixed_kv_extent.get_rewrite_generation());
-      n_fixed_kv_extent->rewrite(fixed_kv_extent, 0);
+      n_fixed_kv_extent->rewrite(c.trans, fixed_kv_extent, 0);
       
       SUBTRACET(
         seastore_fixedkv_tree,
index 8b31d82aad5900d0771d38573edf9ca38ebf49d1..536c11c479e60861c9b11619817d7f0be3daf54c 100644 (file)
@@ -32,7 +32,7 @@ struct FixedKVNode : ChildableCachedExtent {
   using FixedKVNodeRef = TCachedExtentRef<FixedKVNode>;
   fixed_kv_node_meta_t<node_key_t> range;
 
-  struct copy_source_cmp_t {
+  struct fixedkv_node_cmp_t {
     using is_transparent = node_key_t;
     bool operator()(const FixedKVNodeRef &l, const FixedKVNodeRef &r) const {
       assert(l->range.end <= r->range.begin
@@ -84,11 +84,56 @@ struct FixedKVNode : ChildableCachedExtent {
    *   cannot be rewritten) because their parents must be mutated upon remapping.
    */
   std::vector<ChildableCachedExtent*> children;
-  std::set<FixedKVNodeRef, copy_source_cmp_t> copy_sources;
+  std::set<FixedKVNodeRef, fixedkv_node_cmp_t> copy_sources;
   uint16_t capacity = 0;
   parent_tracker_t* my_tracker = nullptr;
   RootBlockRef root_block;
 
+  // copy dests points from a stable node back to its pending nodes
+  // having copy sources at the same tree level, it serves as a two-level index:
+  // transaction-id then node-key to the pending node.
+  //
+  // The copy dest pointers must be symmetric to the copy source pointers.
+  //
+  // copy_dests_t will be automatically unregisterred upon transaction destruction,
+  // see Transaction::views
+  struct copy_dests_t : trans_spec_view_t {
+    std::set<FixedKVNodeRef, fixedkv_node_cmp_t> dests_by_key;
+    copy_dests_t(Transaction &t) : trans_spec_view_t{t.get_trans_id()} {}
+    ~copy_dests_t() {
+      LOG_PREFIX(~copy_dests_t);
+      SUBTRACE(seastore_fixedkv_tree, "copy_dests_t destroyed");
+    }
+  };
+
+  trans_view_set_t copy_dests_by_trans;
+
+  void add_copy_dest(Transaction &t, FixedKVNodeRef dest) {
+    ceph_assert(is_stable());
+    ceph_assert(dest->is_pending());
+    auto tid = t.get_trans_id();
+    auto iter = copy_dests_by_trans.lower_bound(
+      tid, trans_spec_view_t::cmp_t());
+    if (iter == copy_dests_by_trans.end() ||
+       iter->pending_for_transaction != tid) {
+      iter = copy_dests_by_trans.insert_before(
+       iter, t.add_transactional_view<copy_dests_t>(t));
+    }
+    auto &copy_dests = static_cast<copy_dests_t&>(*iter);
+    auto [it, inserted] = copy_dests.dests_by_key.insert(dest);
+    assert(inserted || it->get() == dest.get());
+  }
+
+  void del_copy_dest(Transaction &t, FixedKVNodeRef dest) {
+    auto iter = copy_dests_by_trans.find(
+      t.get_trans_id(), trans_spec_view_t::cmp_t());
+    ceph_assert(iter != copy_dests_by_trans.end());
+    auto &copy_dests = static_cast<copy_dests_t&>(*iter);
+    auto it = copy_dests.dests_by_key.find(dest);
+    ceph_assert(it != copy_dests.dests_by_key.end());
+    copy_dests.dests_by_key.erase(dest);
+  }
+
   bool is_linked() {
     assert(!has_parent_tracker() || !(bool)root_block);
     return (bool)has_parent_tracker() || (bool)root_block;
@@ -161,7 +206,7 @@ struct FixedKVNode : ChildableCachedExtent {
 
   virtual bool have_children() const = 0;
 
-  void on_rewrite(CachedExtent &extent, extent_len_t off) final {
+  void on_rewrite(Transaction &t, CachedExtent &extent, extent_len_t off) final {
     assert(get_type() == extent.get_type());
     assert(off == 0);
     auto &foreign_extent = (FixedKVNode&)extent;
@@ -169,11 +214,14 @@ struct FixedKVNode : ChildableCachedExtent {
 
     if (have_children()) {
       if (!foreign_extent.is_pending()) {
+       foreign_extent.add_copy_dest(t, this);
        copy_sources.emplace(&foreign_extent);
       } else {
        ceph_assert(foreign_extent.is_mutation_pending());
-       copy_sources.emplace(
-         foreign_extent.get_prior_instance()->template cast<FixedKVNode>());
+       auto copy_source = 
+         foreign_extent.get_prior_instance()->template cast<FixedKVNode>();
+       copy_source->add_copy_dest(t, this);
+       copy_sources.emplace(copy_source);
        children = std::move(foreign_extent.children);
        adjust_ptracker_for_children();
       }
@@ -211,17 +259,24 @@ struct FixedKVNode : ChildableCachedExtent {
   }
 
   static void push_copy_sources(
+    Transaction &t,
     FixedKVNode &dest,
     FixedKVNode &src)
   {
     ceph_assert(dest.is_initial_pending());
     if (!src.is_pending()) {
+      src.add_copy_dest(t, &dest);
       dest.copy_sources.emplace(&src);
     } else if (src.is_mutation_pending()) {
-      dest.copy_sources.emplace(
-       src.get_prior_instance()->template cast<FixedKVNode>());
+      auto copy_src =
+       src.get_prior_instance()->template cast<FixedKVNode>();
+      copy_src->add_copy_dest(t, &dest);
+      dest.copy_sources.emplace(copy_src);
     } else {
       ceph_assert(src.is_initial_pending());
+      for (auto &cs : src.copy_sources) {
+       cs->add_copy_dest(t, &dest);
+      }
       dest.copy_sources.insert(
        src.copy_sources.begin(),
        src.copy_sources.end());
@@ -306,13 +361,20 @@ struct FixedKVNode : ChildableCachedExtent {
   }
 
   void split_child_ptrs(
+    Transaction &t,
     FixedKVNode &left,
     FixedKVNode &right)
   {
     assert(!left.my_tracker);
     assert(!right.my_tracker);
-    push_copy_sources(left, *this);
-    push_copy_sources(right, *this);
+    if (is_initial_pending()) {
+      for (auto &cs : copy_sources) {
+       cs->del_copy_dest(t, this);
+      }
+    }
+
+    push_copy_sources(t, left, *this);
+    push_copy_sources(t, right, *this);
     if (is_pending()) {
       uint16_t pivot = get_node_split_pivot();
       move_child_ptrs(left, *this, 0, 0, pivot);
@@ -322,12 +384,24 @@ struct FixedKVNode : ChildableCachedExtent {
   }
 
   void merge_child_ptrs(
+    Transaction &t,
     FixedKVNode &left,
     FixedKVNode &right)
   {
     ceph_assert(!my_tracker);
-    push_copy_sources(*this, left);
-    push_copy_sources(*this, right);
+
+    if (left.is_initial_pending()) {
+      for (auto &cs : left.copy_sources) {
+       cs->del_copy_dest(t, &left);
+      }
+    }
+    if (right.is_initial_pending()) {
+      for (auto &cs : right.copy_sources) {
+       cs->del_copy_dest(t, &right);
+      }
+    }
+    push_copy_sources(t, *this, left);
+    push_copy_sources(t, *this, right);
 
     if (left.is_pending()) {
       move_child_ptrs(*this, left, 0, 0, left.get_node_size());
@@ -341,6 +415,7 @@ struct FixedKVNode : ChildableCachedExtent {
   }
 
   static void balance_child_ptrs(
+    Transaction &t,
     FixedKVNode &left,
     FixedKVNode &right,
     bool prefer_left,
@@ -355,12 +430,23 @@ struct FixedKVNode : ChildableCachedExtent {
       pivot_idx++;
     }
 
+    if (left.is_initial_pending()) {
+      for (auto &cs : left.copy_sources) {
+       cs->del_copy_dest(t, &left);
+      }
+    }
+    if (right.is_initial_pending()) {
+      for (auto &cs : right.copy_sources) {
+       cs->del_copy_dest(t, &right);
+      }
+    }
+
     assert(!replacement_left.my_tracker);
     assert(!replacement_right.my_tracker);
     if (pivot_idx < l_size) {
       // deal with left
-      push_copy_sources(replacement_left, left);
-      push_copy_sources(replacement_right, left);
+      push_copy_sources(t, replacement_left, left);
+      push_copy_sources(t, replacement_right, left);
       if (left.is_pending()) {
        move_child_ptrs(replacement_left, left, 0, 0, pivot_idx);
        move_child_ptrs(replacement_right, left, 0, pivot_idx, l_size);
@@ -368,22 +454,22 @@ struct FixedKVNode : ChildableCachedExtent {
       }
 
       // deal with right
-      push_copy_sources(replacement_right, right);
+      push_copy_sources(t, replacement_right, right);
       if (right.is_pending()) {
        move_child_ptrs(replacement_right, right, l_size - pivot_idx, 0, r_size);
        right.my_tracker= nullptr;
       }
     } else {
       // deal with left
-      push_copy_sources(replacement_left, left);
+      push_copy_sources(t, replacement_left, left);
       if (left.is_pending()) {
        move_child_ptrs(replacement_left, left, 0, 0, l_size);
        left.my_tracker = nullptr;
       }
 
       // deal with right
-      push_copy_sources(replacement_left, right);
-      push_copy_sources(replacement_right, right);
+      push_copy_sources(t, replacement_left, right);
+      push_copy_sources(t, replacement_right, right);
       if (right.is_pending()) {
        move_child_ptrs(replacement_left, right, l_size, 0, pivot_idx - l_size);
        move_child_ptrs(replacement_right, right, 0, pivot_idx - l_size, r_size);
@@ -821,10 +907,10 @@ struct FixedKVInternalNode
       c.trans, node_size, placement_hint_t::HOT, INIT_GENERATION);
     auto right = c.cache.template alloc_new_non_data_extent<node_type_t>(
       c.trans, node_size, placement_hint_t::HOT, INIT_GENERATION);
-    this->split_child_ptrs(*left, *right);
     auto pivot = this->split_into(*left, *right);
     left->range = left->get_meta();
     right->range = right->get_meta();
+    this->split_child_ptrs(c.trans, *left, *right);
     return std::make_tuple(
       left,
       right,
@@ -836,9 +922,9 @@ struct FixedKVInternalNode
     Ref &right) {
     auto replacement = c.cache.template alloc_new_non_data_extent<node_type_t>(
       c.trans, node_size, placement_hint_t::HOT, INIT_GENERATION);
-    replacement->merge_child_ptrs(*this, *right);
     replacement->merge_from(*this, *right->template cast<node_type_t>());
     replacement->range = replacement->get_meta();
+    replacement->merge_child_ptrs(c.trans, *this, *right);
     return replacement;
   }
 
@@ -860,15 +946,15 @@ struct FixedKVInternalNode
       prefer_left,
       *replacement_left,
       *replacement_right);
+    replacement_left->range = replacement_left->get_meta();
+    replacement_right->range = replacement_right->get_meta();
     this->balance_child_ptrs(
+      c.trans,
       *this,
       right,
       prefer_left,
       *replacement_left,
       *replacement_right);
-
-    replacement_left->range = replacement_left->get_meta();
-    replacement_right->range = replacement_right->get_meta();
     return std::make_tuple(
       replacement_left,
       replacement_right,
@@ -1231,12 +1317,12 @@ struct FixedKVLeafNode
       c.trans, node_size, placement_hint_t::HOT, INIT_GENERATION);
     auto right = c.cache.template alloc_new_non_data_extent<node_type_t>(
       c.trans, node_size, placement_hint_t::HOT, INIT_GENERATION);
-    if constexpr (has_children) {
-      this->split_child_ptrs(*left, *right);
-    }
     auto pivot = this->split_into(*left, *right);
     left->range = left->get_meta();
     right->range = right->get_meta();
+    if constexpr (has_children) {
+      this->split_child_ptrs(c.trans, *left, *right);
+    }
     return std::make_tuple(
       left,
       right,
@@ -1248,11 +1334,11 @@ struct FixedKVLeafNode
     Ref &right) {
     auto replacement = c.cache.template alloc_new_non_data_extent<node_type_t>(
       c.trans, node_size, placement_hint_t::HOT, INIT_GENERATION);
-    if constexpr (has_children) {
-      replacement->merge_child_ptrs(*this, *right);
-    }
     replacement->merge_from(*this, *right->template cast<node_type_t>());
     replacement->range = replacement->get_meta();
+    if constexpr (has_children) {
+      replacement->merge_child_ptrs(c.trans, *this, *right);
+    }
     return replacement;
   }
 
@@ -1274,17 +1360,17 @@ struct FixedKVLeafNode
       prefer_left,
       *replacement_left,
       *replacement_right);
+    replacement_left->range = replacement_left->get_meta();
+    replacement_right->range = replacement_right->get_meta();
     if constexpr (has_children) {
       this->balance_child_ptrs(
+       c.trans,
        *this,
        right,
        prefer_left,
        *replacement_left,
        *replacement_right);
     }
-
-    replacement_left->range = replacement_left->get_meta();
-    replacement_right->range = replacement_right->get_meta();
     return std::make_tuple(
       replacement_left,
       replacement_right,
index e78a0d95028b71393affdfa4c7e0885fc95b1469..0214399426d4ef87d15e3dea9296d024a67bc378 100644 (file)
@@ -112,6 +112,9 @@ struct trans_spec_view_t {
   // if the extent is pending, contains the id of the owning transaction;
   // TRANS_ID_NULL otherwise
   transaction_id_t pending_for_transaction = TRANS_ID_NULL;
+  trans_spec_view_t() = default;
+  trans_spec_view_t(transaction_id_t id) : pending_for_transaction(id) {}
+  virtual ~trans_spec_view_t() = default;
 
   struct cmp_t {
     bool operator()(
@@ -307,7 +310,7 @@ public:
     return true;
   }
 
-  void rewrite(CachedExtent &e, extent_len_t o) {
+  void rewrite(Transaction &t, CachedExtent &e, extent_len_t o) {
     assert(is_initial_pending());
     if (!e.is_pending()) {
       prior_instance = &e;
@@ -321,7 +324,7 @@ public:
       get_bptr().c_str());
     set_modify_time(e.get_modify_time());
     set_last_committed_crc(e.get_last_committed_crc());
-    on_rewrite(e, o);
+    on_rewrite(t, e, o);
   }
 
   /**
@@ -330,7 +333,7 @@ public:
    * Called when this extent is rewriting another one.
    *
    */
-  virtual void on_rewrite(CachedExtent &, extent_len_t) = 0;
+  virtual void on_rewrite(Transaction &, CachedExtent &, extent_len_t) = 0;
 
   friend std::ostream &operator<<(std::ostream &, extent_state_t);
   virtual std::ostream &print_detail(std::ostream &out) const { return out; }
@@ -1221,7 +1224,7 @@ public:
     return false;
   }
 
-  void on_rewrite(CachedExtent&, extent_len_t) final {}
+  void on_rewrite(Transaction &, CachedExtent&, extent_len_t) final {}
 
   std::ostream &print_detail(std::ostream &out) const final {
     return out << ", RetiredExtentPlaceholder";
@@ -1308,7 +1311,7 @@ public:
     : ChildableCachedExtent(std::forward<T>(t)...)
   {}
 
-  void on_rewrite(CachedExtent &extent, extent_len_t off) final {
+  void on_rewrite(Transaction&, CachedExtent &extent, extent_len_t off) final {
     assert(get_type() == extent.get_type());
     auto &lextent = (LogicalCachedExtent&)extent;
     set_laddr(lextent.get_laddr() + off);
index a9a7cd1982210734d95bdfa311912f8854c260ff..942434dd5965fcd7a24196fd218d5c32b0308ffb 100644 (file)
@@ -50,7 +50,7 @@ struct RootBlock : CachedExtent {
       backref_root_node(nullptr)
   {}
 
-  void on_rewrite(CachedExtent&, extent_len_t) final {}
+  void on_rewrite(Transaction&, CachedExtent&, extent_len_t) final {}
 
   CachedExtentRef duplicate_for_write(Transaction&) final {
     return CachedExtentRef(new RootBlock(*this));
index f6af7cfc350854aa48f147d432d01a4ffaf3c28d..d7cdb30098a0fe34693fd93233d5b64f6564f032 100644 (file)
@@ -391,6 +391,7 @@ public:
     get_handle().exit();
     on_destruct(*this);
     invalidate_clear_write_set();
+    views.clear();
   }
 
   friend class crimson::os::seastore::SeaStore;
@@ -427,6 +428,7 @@ public:
       has_reset = true;
     }
     get_handle().exit();
+    views.clear();
   }
 
   bool did_reset() const {
@@ -512,6 +514,15 @@ public:
     return trans_id;
   }
 
+  using view_ref = std::unique_ptr<trans_spec_view_t>;
+  template <typename T, typename... Args,
+          std::enable_if_t<std::is_base_of_v<trans_spec_view_t, T>, int> = 0>
+  T& add_transactional_view(Args&&... args) {
+    auto &view = views.emplace_back(
+      std::make_unique<T>(std::forward<Args>(args)...));
+    return static_cast<T&>(*view);
+  }
+
 private:
   friend class Cache;
   friend Ref make_test_transaction();
@@ -577,6 +588,8 @@ private:
   std::list<CachedExtentRef> existing_block_list;
   existing_block_stats_t existing_block_stats;
 
+  std::list<view_ref> views;
+
   /**
    * retire_set
    *
index 93a22a8833177c4fc3f9a50b87f9239a84497969..5dd355f33584a684c31ffa393969250cc2bbb34f 100644 (file)
@@ -494,7 +494,7 @@ TransactionManager::rewrite_logical_extent(
       lextent->get_user_hint(),
       // get target rewrite generation
       lextent->get_rewrite_generation())->cast<LogicalCachedExtent>();
-    nlextent->rewrite(*lextent, 0);
+    nlextent->rewrite(t, *lextent, 0);
 
     DEBUGT("rewriting logical extent -- {} to {}", t, *lextent, *nlextent);
 
@@ -535,7 +535,7 @@ TransactionManager::rewrite_logical_extent(
         bool first_extent = (off == 0);
         ceph_assert(left >= nextent->get_length());
         auto nlextent = nextent->template cast<LogicalCachedExtent>();
-        nlextent->rewrite(*lextent, off);
+        nlextent->rewrite(t, *lextent, off);
         DEBUGT("rewriting logical extent -- {} to {}", t, *lextent, *nlextent);
 
         /* This update_mapping is, strictly speaking, unnecessary for delayed_alloc
index 76891076f3360d64b64d2032f8fdb18f8713766d..fde6ad99c414248b48c5a860bef087c3abb89706 100644 (file)
@@ -111,7 +111,7 @@ struct TestBlockPhysical : crimson::os::seastore::CachedExtent{
 
   std::vector<test_block_delta_t> delta = {};
 
-  void on_rewrite(CachedExtent&, extent_len_t) final {}
+  void on_rewrite(Transaction&, CachedExtent&, extent_len_t) final {}
 
   TestBlockPhysical(ceph::bufferptr &&ptr)
     : CachedExtent(std::move(ptr)) {}