]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
crimson/os/seastore: add backref manager
authorXuehan Xu <xxhdx1985126@gmail.com>
Fri, 18 Feb 2022 09:36:10 +0000 (17:36 +0800)
committerXuehan Xu <xxhdx1985126@gmail.com>
Sat, 7 May 2022 04:51:33 +0000 (12:51 +0800)
Signed-off-by: Xuehan Xu <xxhdx1985126@gmail.com>
src/crimson/os/seastore/CMakeLists.txt
src/crimson/os/seastore/backref/backref_tree_node.h
src/crimson/os/seastore/backref/btree_backref_manager.cc [new file with mode: 0644]
src/crimson/os/seastore/backref/btree_backref_manager.h [new file with mode: 0644]
src/crimson/os/seastore/backref_manager.cc [new file with mode: 0644]
src/crimson/os/seastore/backref_manager.h [new file with mode: 0644]
src/crimson/os/seastore/cache.h
src/crimson/os/seastore/cached_extent.h
src/crimson/os/seastore/seastore_types.h
src/crimson/os/seastore/transaction.h

index 127f4a23acee62f37fb8c2f2f90e370df152f492..cd5b8f94fb9475e6f523aa83115128d6f4e550fa 100644 (file)
@@ -9,7 +9,9 @@ set(crimson_seastore_srcs
   cache.cc
   lba_manager.cc
   segment_cleaner.cc
+  backref_manager.cc
   backref/backref_tree_node.cc
+  backref/btree_backref_manager.cc
   lba_manager/btree/btree_lba_manager.cc
   lba_manager/btree/lba_btree_node.cc
   omap_manager.cc
index 316787830a1c472a458903dfe742d8c620cf3a90..2a29f94068ef99d3e59fdb642304dcfc362171ec 100644 (file)
@@ -7,6 +7,7 @@
 
 namespace crimson::os::seastore::backref {
 
+using backref_node_meta_t = fixed_kv_node_meta_t<paddr_t>;
 using backref_node_meta_le_t = fixed_kv_node_meta_le_t<paddr_t>;
 
 constexpr size_t INTERNAL_NODE_CAPACITY = 254;
@@ -39,7 +40,8 @@ struct backref_map_val_le_t {
   extent_types_le_t type = 0;
 
   backref_map_val_le_t() = default;
-  backref_map_val_le_t(const backref_map_val_le_t &val)
+  backref_map_val_le_t(const backref_map_val_le_t &) = default;
+  explicit backref_map_val_le_t(const backref_map_val_t &val)
     : len(init_extent_len_le(val.len)),
       laddr(val.laddr),
       type(extent_types_le_t(val.type)) {}
diff --git a/src/crimson/os/seastore/backref/btree_backref_manager.cc b/src/crimson/os/seastore/backref/btree_backref_manager.cc
new file mode 100644 (file)
index 0000000..d652da0
--- /dev/null
@@ -0,0 +1,473 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "crimson/os/seastore/backref/btree_backref_manager.h"
+
+SET_SUBSYS(seastore_backref);
+
+namespace crimson::os::seastore {
+
+template<>
+Transaction::tree_stats_t& get_tree_stats<
+  crimson::os::seastore::backref::BackrefBtree>(Transaction &t) {
+  return t.get_backref_tree_stats();
+}
+
+template<>
+phy_tree_root_t& get_phy_tree_root<
+  crimson::os::seastore::backref::BackrefBtree>(root_t &r) {
+  return r.backref_root;
+}
+
+}
+
+namespace crimson::os::seastore::backref {
+
+static depth_t get_depth(const CachedExtent &e)
+{
+  assert(is_backref_node(e.get_type()));
+  return e.cast<BackrefNode>()->get_node_meta().depth;
+}
+
+BtreeBackrefManager::mkfs_ret
+BtreeBackrefManager::mkfs(
+  Transaction &t)
+{
+  LOG_PREFIX(BtreeBackrefManager::mkfs);
+  INFOT("start", t);
+  return cache.get_root(t).si_then([this, &t](auto croot) {
+    croot->get_root().backref_root = BackrefBtree::mkfs(get_context(t));
+    return mkfs_iertr::now();
+  }).handle_error_interruptible(
+    mkfs_iertr::pass_further{},
+    crimson::ct_error::assert_all{
+      "Invalid error in BtreeBackrefManager::mkfs"
+    }
+  );
+}
+
+BtreeBackrefManager::get_mapping_ret
+BtreeBackrefManager::get_mapping(
+  Transaction &t,
+  paddr_t offset)
+{
+  LOG_PREFIX(BtreeBackrefManager::get_mapping);
+  TRACET("{}", t, offset);
+  auto c = get_context(t);
+  return with_btree_ret<BackrefBtree, BackrefPinRef>(
+    cache,
+    c,
+    [c, offset](auto &btree) {
+    return btree.lower_bound(
+      c, offset
+    ).si_then([offset, c](auto iter) -> get_mapping_ret {
+      LOG_PREFIX(BtreeBackrefManager::get_mapping);
+      if (iter.is_end() || iter.get_key() != offset) {
+       ERRORT("{} doesn't exist", c.trans, offset);
+       return crimson::ct_error::enoent::make();
+      } else {
+       TRACET("{} got {}, {}",
+              c.trans, offset, iter.get_key(), iter.get_val());
+       auto e = iter.get_pin();
+       return get_mapping_ret(
+         interruptible::ready_future_marker{},
+         std::move(e));
+      }
+    });
+  });
+}
+
+BtreeBackrefManager::get_mappings_ret
+BtreeBackrefManager::get_mappings(
+  Transaction &t,
+  paddr_t offset,
+  paddr_t end)
+{
+  LOG_PREFIX(BtreeBackrefManager::get_mappings);
+  TRACET("{}~{}", t, offset, end);
+  auto c = get_context(t);
+  return with_btree_state<BackrefBtree, backref_pin_list_t>(
+    cache,
+    c,
+    [c, offset, end](auto &btree, auto &ret) {
+      return BackrefBtree::iterate_repeat(
+       c,
+       btree.upper_bound_right(c, offset),
+       [&ret, offset, end, c](auto &pos) {
+         LOG_PREFIX(BtreeBackrefManager::get_mappings);
+         if (pos.is_end() || pos.get_key() >= end) {
+           TRACET("{}~{} done with {} results",
+                  c.trans, offset, end, ret.size());
+           return BackrefBtree::iterate_repeat_ret_inner(
+             interruptible::ready_future_marker{},
+             seastar::stop_iteration::yes);
+         }
+         TRACET("{}~{} got {}, {}, repeat ...",
+                c.trans, offset, end, pos.get_key(), pos.get_val());
+         ceph_assert((pos.get_key() + pos.get_val().len) > offset);
+         ret.push_back(pos.get_pin());
+         return BackrefBtree::iterate_repeat_ret_inner(
+           interruptible::ready_future_marker{},
+           seastar::stop_iteration::no);
+       });
+    });
+}
+
+BtreeBackrefManager::new_mapping_ret
+BtreeBackrefManager::new_mapping(
+  Transaction &t,
+  paddr_t key,
+  extent_len_t len,
+  laddr_t addr,
+  extent_types_t type)
+{
+  ceph_assert(
+    is_aligned(
+      key.as_seg_paddr().get_segment_off(),
+      (uint64_t)sm_group.get_block_size()));
+  struct state_t {
+    paddr_t last_end;
+
+    std::optional<BackrefBtree::iterator> insert_iter;
+    std::optional<BackrefBtree::iterator> ret;
+
+    state_t(paddr_t hint) : last_end(hint) {}
+  };
+
+  LOG_PREFIX(BtreeBackrefManager::new_mapping);
+  DEBUGT("{}~{}, paddr={}", t, addr, len, key);
+  backref_map_val_t val{len, addr, type};
+  auto c = get_context(t);
+  //++stats.num_alloc_extents;
+  //auto lookup_attempts = stats.num_alloc_extents_iter_nexts;
+  return crimson::os::seastore::with_btree_state<BackrefBtree, state_t>(
+    cache,
+    c,
+    key,
+    [val, c, key, len, addr, /*lookup_attempts,*/ &t]
+    (auto &btree, auto &state) {
+      return BackrefBtree::iterate_repeat(
+       c,
+       btree.upper_bound_right(c, key),
+       [&state, len, addr, &t, key/*, lookup_attempts*/](auto &pos) {
+         LOG_PREFIX(BtreeBackrefManager::new_mapping);
+         //++stats.num_alloc_extents_iter_nexts;
+         if (pos.is_end()) {
+           DEBUGT("{}~{}, paddr={}, state: end, insert at {}",
+                   t, addr, len, key,
+                   //stats.num_alloc_extents_iter_nexts - lookup_attempts,
+                   state.last_end);
+           state.insert_iter = pos;
+           return BackrefBtree::iterate_repeat_ret_inner(
+             interruptible::ready_future_marker{},
+             seastar::stop_iteration::yes);
+         } else if (pos.get_key() >= (state.last_end.add_offset(len))) {
+           DEBUGT("{}~{}, paddr={}, state: {}~{}, "
+                  "insert at {} -- {}",
+                   t, addr, len, key,
+                   pos.get_key(), pos.get_val().len,
+                   //stats.num_alloc_extents_iter_nexts - lookup_attempts,
+                   state.last_end,
+                   pos.get_val());
+           state.insert_iter = pos;
+           return BackrefBtree::iterate_repeat_ret_inner(
+             interruptible::ready_future_marker{},
+             seastar::stop_iteration::yes);
+         } else {
+           ERRORT("{}~{}, paddr={}, state: {}~{}, repeat ... -- {}",
+                   t, addr, len, key,
+                   pos.get_key(), pos.get_val().len,
+                   pos.get_val());
+           ceph_abort("not possible for the backref tree");
+           return BackrefBtree::iterate_repeat_ret_inner(
+             interruptible::ready_future_marker{},
+             seastar::stop_iteration::no);
+         }
+       }).si_then([c, addr, len, key, &btree, &state, val] {
+         return btree.insert(
+           c,
+           *state.insert_iter,
+           state.last_end,
+           val
+         ).si_then([&state, c, addr, len, key](auto &&p) {
+           LOG_PREFIX(BtreeBackrefManager::alloc_extent);
+           auto [iter, inserted] = std::move(p);
+           TRACET("{}~{}, paddr={}, inserted at {}",
+                  c.trans, addr, len, key, state.last_end);
+           ceph_assert(inserted);
+           state.ret = iter;
+         });
+       });
+    }).si_then([](auto &&state) {
+      return state.ret->get_pin();
+    });
+}
+
+BtreeBackrefManager::batch_insert_ret
+BtreeBackrefManager::batch_insert_from_cache(
+  Transaction &t,
+  const journal_seq_t &limit,
+  const uint64_t max)
+{
+  LOG_PREFIX(BtreeBackrefManager::batch_insert_from_cache);
+  DEBUGT("insert up to {}", t, limit);
+  return seastar::do_with(
+    limit,
+    cache.get_backref_bufs_to_flush().begin(),
+    JOURNAL_SEQ_NULL,
+    [this, &t, max](auto &limit, auto &iter, auto &inserted_to) {
+    return trans_intr::repeat(
+      [&iter, this, &limit, &t, max, &inserted_to]()
+      -> batch_insert_iertr::future<seastar::stop_iteration> {
+      if (iter == cache.get_backref_bufs_to_flush().end())
+       return seastar::make_ready_future<seastar::stop_iteration>(
+         seastar::stop_iteration::yes);
+      auto &bbr = *iter;
+      LOG_PREFIX(BtreeBackrefManager::batch_insert_from_cache);
+      DEBUGT("backref buffer starting seq: {}", t, bbr->backrefs.begin()->first);
+      if (bbr->backrefs.begin()->first <= limit) {
+       return batch_insert(
+         t,
+         bbr,
+         limit,
+         max
+       ).si_then([max, &iter, &inserted_to, &t](auto new_inserted_to) {
+         assert(inserted_to == JOURNAL_SEQ_NULL
+           || new_inserted_to >= inserted_to);
+         inserted_to = new_inserted_to;
+         if (t.get_num_fresh_backref() * BACKREF_NODE_SIZE < max) {
+           iter++;
+           return seastar::make_ready_future<seastar::stop_iteration>(
+             seastar::stop_iteration::no);
+         } else {
+           return seastar::make_ready_future<seastar::stop_iteration>(
+             seastar::stop_iteration::yes);
+         }
+       });
+      } else {
+       return seastar::make_ready_future<seastar::stop_iteration>(
+         seastar::stop_iteration::yes);
+      }
+    }).si_then([&inserted_to, this, &iter, &limit, &t, max] {
+      auto &backref_buffer = cache.get_newest_backref_buffer();
+      if (iter == cache.get_backref_bufs_to_flush().end()
+         && backref_buffer) {
+       return batch_insert(
+         t,
+         backref_buffer,
+         limit,
+         max
+       ).si_then([&inserted_to](auto new_inserted_to) {
+         assert(inserted_to == JOURNAL_SEQ_NULL
+           || new_inserted_to >= inserted_to);
+         return seastar::make_ready_future<journal_seq_t>(
+           std::move(new_inserted_to));
+       });
+      }
+      return batch_insert_iertr::make_ready_future<journal_seq_t>(
+       std::move(inserted_to));
+    });
+  });
+}
+
+BtreeBackrefManager::scan_mapped_space_ret
+BtreeBackrefManager::scan_mapped_space(
+  Transaction &t,
+  BtreeBackrefManager::scan_mapped_space_func_t &&f)
+{
+  LOG_PREFIX(BtreeBackrefManager::scan_mapped_space);
+  DEBUGT("start", t);
+  auto c = get_context(t);
+  return seastar::do_with(
+    std::move(f),
+    [this, c](auto &visitor) {
+      return with_btree<BackrefBtree>(
+       cache,
+       c,
+       [c, &visitor](auto &btree) {
+         return BackrefBtree::iterate_repeat(
+           c,
+           btree.lower_bound(
+             c,
+             paddr_t::make_seg_paddr(
+               segment_id_t{0, 0}, 0),
+             &visitor),
+           [&visitor](auto &pos) {
+             if (pos.is_end()) {
+               return BackrefBtree::iterate_repeat_ret_inner(
+                 interruptible::ready_future_marker{},
+                 seastar::stop_iteration::yes);
+             }
+             visitor(pos.get_key(), pos.get_val().len, 0);
+             return BackrefBtree::iterate_repeat_ret_inner(
+               interruptible::ready_future_marker{},
+               seastar::stop_iteration::no);
+           },
+           &visitor);
+       });
+    });
+}
+
+BtreeBackrefManager::batch_insert_ret
+BtreeBackrefManager::batch_insert(
+  Transaction &t,
+  backref_buffer_ref &bbr,
+  const journal_seq_t &limit,
+  const uint64_t max)
+{
+  return seastar::do_with(
+    bbr->backrefs.begin(),
+    JOURNAL_SEQ_NULL,
+    [this, &t, &limit, &bbr, max](auto &iter, auto &inserted_to) {
+    return trans_intr::repeat(
+      [&iter, this, &t, &limit, &bbr, max, &inserted_to]()
+      -> batch_insert_iertr::future<seastar::stop_iteration> {
+      if (iter == bbr->backrefs.end())
+       return seastar::make_ready_future<seastar::stop_iteration>(
+         seastar::stop_iteration::yes);
+      auto &seq = iter->first;
+      auto &backref_list = iter->second;
+      LOG_PREFIX(BtreeBackrefManager::batch_insert);
+      DEBUGT("seq {}, limit {}, num_fresh_backref {}"
+       , t, seq, limit, t.get_num_fresh_backref());
+      if (seq <= limit && t.get_num_fresh_backref() * BACKREF_NODE_SIZE < max) {
+       inserted_to = seq;
+       return trans_intr::do_for_each(
+         backref_list,
+         [this, &t](auto &backref) {
+         LOG_PREFIX(BtreeBackrefManager::batch_insert);
+         if (backref->laddr != L_ADDR_NULL) {
+           DEBUGT("new mapping: {}~{} -> {}",
+             t, backref->paddr, backref->len, backref->laddr);
+           return new_mapping(
+             t,
+             backref->paddr,
+             backref->len,
+             backref->laddr,
+             backref->type).si_then([](auto &&pin) {
+             return seastar::now();
+           });
+         } else {
+           DEBUGT("remove mapping: {}", t, backref->paddr);
+           return remove_mapping(
+             t,
+             backref->paddr).si_then([](auto&&) {
+             return seastar::now();
+           }).handle_error_interruptible(
+             crimson::ct_error::input_output_error::pass_further(),
+             crimson::ct_error::assert_all("no enoent possible")
+           );
+         }
+       }).si_then([&iter] {
+         iter++;
+         return seastar::make_ready_future<seastar::stop_iteration>(
+           seastar::stop_iteration::no);
+       });
+      }
+      return seastar::make_ready_future<seastar::stop_iteration>(
+       seastar::stop_iteration::yes);
+    }).si_then([&inserted_to] {
+      return seastar::make_ready_future<journal_seq_t>(
+       std::move(inserted_to));
+    });
+  });
+}
+
+BtreeBackrefManager::base_iertr::future<> _init_cached_extent(
+  op_context_t<paddr_t> c,
+  const CachedExtentRef &e,
+  BackrefBtree &btree,
+  bool &ret)
+{
+  return btree.init_cached_extent(c, e
+  ).si_then([&ret](bool is_alive) {
+    ret = is_alive;
+  });
+}
+
+BtreeBackrefManager::init_cached_extent_ret BtreeBackrefManager::init_cached_extent(
+  Transaction &t,
+  CachedExtentRef e)
+{
+  LOG_PREFIX(BtreeBackrefManager::init_cached_extent);
+  TRACET("{}", t, *e);
+  return seastar::do_with(bool(), [this, e, &t](bool &ret) {
+    auto c = get_context(t);
+    return with_btree<BackrefBtree>(cache, c, [c, e, &ret](auto &btree)
+      -> base_iertr::future<> {
+      LOG_PREFIX(BtreeBackrefManager::init_cached_extent);
+      DEBUGT("extent {}", c.trans, *e);
+      return _init_cached_extent(c, e, btree, ret);
+    }).si_then([&ret] { return ret; });
+  });
+}
+
+BtreeBackrefManager::remove_mapping_ret
+BtreeBackrefManager::remove_mapping(
+  Transaction &t,
+  paddr_t addr)
+{
+  auto c = get_context(t);
+  return with_btree_ret<BackrefBtree, remove_mapping_result_t>(
+    cache,
+    c,
+    [c, addr](auto &btree) mutable {
+      return btree.lower_bound(
+       c, addr
+      ).si_then([&btree, c, addr](auto iter)
+               -> remove_mapping_ret {
+       if (iter.is_end() || iter.get_key() != addr) {
+         LOG_PREFIX(BtreeBackrefManager::remove_mapping);
+         ERRORT("paddr={} doesn't exist", c.trans, addr);
+         return crimson::ct_error::enoent::make();
+       }
+
+       auto ret = remove_mapping_result_t{
+         iter.get_key(),
+         iter.get_val().len,
+         iter.get_val().laddr};
+       return btree.remove(
+         c,
+         iter
+       ).si_then([ret] {
+         return ret;
+       });
+      });
+    });
+}
+
+void BtreeBackrefManager::complete_transaction(
+  Transaction &t,
+  std::vector<CachedExtentRef> &to_clear,
+  std::vector<CachedExtentRef> &to_link)
+{
+  LOG_PREFIX(BtreeBackrefManager::complete_transaction);
+  DEBUGT("start", t);
+  // need to call check_parent from leaf->parent
+  std::sort(
+    to_clear.begin(), to_clear.end(),
+    [](auto &l, auto &r) { return get_depth(*l) < get_depth(*r); });
+
+  for (auto &e: to_clear) {
+    auto &pin = e->cast<BackrefNode>()->pin;
+    DEBUGT("retiring extent {} -- {}", t, pin, *e);
+    pin_set.retire(pin);
+  }
+
+  std::sort(
+    to_link.begin(), to_link.end(),
+    [](auto &l, auto &r) -> bool { return get_depth(*l) > get_depth(*r); });
+
+  for (auto &e : to_link) {
+    DEBUGT("linking extent -- {}", t, *e);
+    pin_set.add_pin(e->cast<BackrefNode>()->pin);
+  }
+
+  for (auto &e: to_clear) {
+    auto &pin = e->cast<BackrefNode>()->pin;
+    TRACET("checking extent {} -- {}", t, pin, *e);
+    pin_set.check_parent(pin);
+  }
+}
+
+} // namespace crimson::os::seastore::backref
diff --git a/src/crimson/os/seastore/backref/btree_backref_manager.h b/src/crimson/os/seastore/backref/btree_backref_manager.h
new file mode 100644 (file)
index 0000000..1ec9a38
--- /dev/null
@@ -0,0 +1,115 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include "crimson/os/seastore/backref_manager.h"
+#include "crimson/os/seastore/backref/backref_tree_node.h"
+#include "crimson/os/seastore/btree/fixed_kv_btree.h"
+
+namespace crimson::os::seastore::backref {
+
+constexpr size_t BACKREF_BLOCK_SIZE = 4096;
+
+class BtreeBackrefPin : public BtreeNodePin<paddr_t, laddr_t> {
+  extent_types_t type;
+public:
+  BtreeBackrefPin() = default;
+  BtreeBackrefPin(
+    CachedExtentRef parent,
+    backref_map_val_t &val,
+    backref_node_meta_t &&meta)
+    : BtreeNodePin(
+       parent,
+       val.laddr,
+       val.len,
+       std::forward<backref_node_meta_t>(meta)),
+      type(val.type)
+  {}
+  extent_types_t get_type() const final {
+    return type;
+  }
+};
+
+using BackrefBtree = FixedKVBtree<
+  paddr_t, backref_map_val_t, BackrefInternalNode,
+  BackrefLeafNode, BtreeBackrefPin, BACKREF_BLOCK_SIZE>;
+
+class BtreeBackrefManager : public BackrefManager {
+public:
+
+  BtreeBackrefManager(
+    SegmentManagerGroup &sm_group,
+    Cache &cache)
+    : sm_group(sm_group),
+      cache(cache)
+  {}
+
+  mkfs_ret mkfs(
+    Transaction &t) final;
+
+  get_mapping_ret  get_mapping(
+    Transaction &t,
+    paddr_t offset) final;
+
+  get_mappings_ret get_mappings(
+    Transaction &t,
+    paddr_t offset,
+    paddr_t end) final;
+
+  new_mapping_ret new_mapping(
+    Transaction &t,
+    paddr_t key,
+    extent_len_t len,
+    laddr_t val,
+    extent_types_t type) final;
+
+  batch_insert_ret batch_insert(
+    Transaction &t,
+    backref_buffer_ref &bbr,
+    const journal_seq_t &limit,
+    const uint64_t max) final;
+
+  batch_insert_ret batch_insert_from_cache(
+    Transaction &t,
+    const journal_seq_t &limit,
+    const uint64_t max) final;
+
+  remove_mapping_ret remove_mapping(
+    Transaction &t,
+    paddr_t offset) final;
+
+  scan_mapped_space_ret scan_mapped_space(
+    Transaction &t,
+    scan_mapped_space_func_t &&f) final;
+
+  init_cached_extent_ret init_cached_extent(
+    Transaction &t,
+    CachedExtentRef e) final;
+
+  void complete_transaction(
+    Transaction &t,
+    std::vector<CachedExtentRef> &,
+    std::vector<CachedExtentRef> &) final;
+
+  void add_pin(BackrefPin &pin) final {
+    auto *bpin = reinterpret_cast<BtreeBackrefPin*>(&pin);
+    pin_set.add_pin(bpin->get_range_pin());
+    bpin->set_parent(nullptr);
+  }
+  void remove_pin(BackrefPin &pin) final {
+    auto *bpin = reinterpret_cast<BtreeBackrefPin*>(&pin);
+    pin_set.retire(bpin->get_range_pin());
+  }
+private:
+  SegmentManagerGroup &sm_group;
+  Cache &cache;
+
+  btree_pin_set_t<paddr_t> pin_set;
+
+  op_context_t<paddr_t> get_context(Transaction &t) {
+    return op_context_t<paddr_t>{cache, t, &pin_set};
+  }
+};
+
+} // namespace crimson::os::seastore::backref
diff --git a/src/crimson/os/seastore/backref_manager.cc b/src/crimson/os/seastore/backref_manager.cc
new file mode 100644 (file)
index 0000000..8e19599
--- /dev/null
@@ -0,0 +1,20 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "crimson/os/seastore/cache.h"
+#include "crimson/os/seastore/backref_manager.h"
+#include "crimson/os/seastore/backref/btree_backref_manager.h"
+
+namespace crimson::os::seastore {
+
+BackrefManagerRef create_backref_manager(
+  SegmentManagerGroup &sm_group,
+  Cache &cache)
+{
+  return BackrefManagerRef(
+    new backref::BtreeBackrefManager(
+      sm_group, cache));
+}
+
+} // namespace crimson::os::seastore::backref
+
diff --git a/src/crimson/os/seastore/backref_manager.h b/src/crimson/os/seastore/backref_manager.h
new file mode 100644 (file)
index 0000000..3ebd106
--- /dev/null
@@ -0,0 +1,147 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include "crimson/os/seastore/cache.h"
+#include "crimson/os/seastore/cached_extent.h"
+#include "crimson/os/seastore/segment_manager_group.h"
+#include "crimson/os/seastore/transaction.h"
+
+namespace crimson::os::seastore {
+
+/**
+ * Abstract interface for managing back references that map paddr_t to laddr_t
+ */
+class BackrefManager {
+public:
+  using base_ertr = crimson::errorator<
+    crimson::ct_error::input_output_error>;
+  using base_iertr = trans_iertr<base_ertr>;
+
+  using mkfs_iertr = base_iertr;
+  using mkfs_ret = mkfs_iertr::future<>;
+  virtual mkfs_ret mkfs(
+    Transaction &t) = 0;
+
+  /**
+   * Fetches mappings for paddr_t in range [offset, offset + len)
+   *
+   * Future will not resolve until all pins have resolved
+   */
+  using get_mappings_iertr = base_iertr;
+  using get_mappings_ret = get_mappings_iertr::future<backref_pin_list_t>;
+  virtual get_mappings_ret get_mappings(
+    Transaction &t,
+    paddr_t offset,
+    paddr_t end) = 0;
+
+  /**
+   * Fetches the mapping for paddr_t
+   *
+   * Future will not resolve until the pin has resolved
+   */
+  using get_mapping_iertr = base_iertr::extend<
+    crimson::ct_error::enoent>;
+  using get_mapping_ret = get_mapping_iertr::future<BackrefPinRef>;
+  virtual get_mapping_ret  get_mapping(
+    Transaction &t,
+    paddr_t offset) = 0;
+
+  /**
+   * Insert new paddr_t -> laddr_t mapping
+   */
+  using new_mapping_iertr = base_iertr;
+  using new_mapping_ret = new_mapping_iertr::future<BackrefPinRef>;
+  virtual new_mapping_ret new_mapping(
+    Transaction &t,
+    paddr_t key,
+    extent_len_t len,
+    laddr_t val,
+    extent_types_t type) = 0;
+
+  /**
+   * Check if a CachedExtent is alive, should be called
+   * after replay on each cached extent.
+   *
+   * @return returns whether the extent is alive
+   */
+  using init_cached_extent_iertr = base_iertr;
+  using init_cached_extent_ret = init_cached_extent_iertr::future<bool>;
+  virtual init_cached_extent_ret init_cached_extent(
+    Transaction &t,
+    CachedExtentRef e) = 0;
+
+  /**
+   * insert new paddr_t -> laddr_t mappings in batches
+   */
+  using batch_insert_iertr = base_iertr;
+  using batch_insert_ret = batch_insert_iertr::future<journal_seq_t>;
+  virtual batch_insert_ret batch_insert(
+    Transaction &t,                    ///< Transaction that commits the updates
+    backref_buffer_ref &bbr,           ///< the set of backref mappings to be inserted
+    const journal_seq_t &limit,                ///< the journal seq upper bound that the insertion
+                                       //   shouldn't cross
+    const uint64_t max                 ///< maximum fresh backref extents that can be
+                                       //   created by this insertion
+  ) = 0;
+
+  /**
+   * insert new mappings directly from Cache
+   */
+  virtual batch_insert_ret batch_insert_from_cache(
+    Transaction &t,
+    const journal_seq_t &limit,
+    const uint64_t max) = 0;
+
+  struct remove_mapping_result_t {
+    paddr_t offset;
+    extent_len_t len;
+    laddr_t laddr;
+  };
+
+  /**
+   * delete the mapping for paddr_t offset
+   */
+  using remove_mapping_iertr = base_iertr::extend<
+    crimson::ct_error::enoent>;
+  using remove_mapping_ret = remove_mapping_iertr::future<remove_mapping_result_t>;
+  virtual remove_mapping_ret remove_mapping(
+    Transaction &t,
+    paddr_t offset) = 0;
+
+  /**
+   * scan all extents, including backref extents, logical extents and lba extents,
+   * visit them with scan_mapped_space_func_t
+   */
+  using scan_mapped_space_iertr = base_iertr::extend_ertr<
+    SegmentManager::read_ertr>;
+  using scan_mapped_space_ret = scan_mapped_space_iertr::future<>;
+  using scan_mapped_space_func_t = std::function<
+    void(paddr_t, extent_len_t, depth_t)>;
+  virtual scan_mapped_space_ret scan_mapped_space(
+    Transaction &t,
+    scan_mapped_space_func_t &&f) = 0;
+
+  virtual void complete_transaction(
+    Transaction &t,
+    std::vector<CachedExtentRef> &to_clear,    ///< extents whose pins are to be cleared,
+                                               //   as the results of their retirements
+    std::vector<CachedExtentRef> &to_link      ///< fresh extents whose pins are to be inserted
+                                               //   into backref manager's pin set
+  ) = 0;
+
+  virtual void add_pin(BackrefPin &pin) = 0;
+  virtual void remove_pin(BackrefPin &pin) = 0;
+
+  virtual ~BackrefManager() {}
+};
+
+using BackrefManagerRef =
+  std::unique_ptr<BackrefManager>;
+
+BackrefManagerRef create_backref_manager(
+  SegmentManagerGroup &sm_group,
+  Cache &cache);
+
+} // namespace crimson::os::seastore::backref
index 5f7664886c11b0252c3d7286d0feef36677abe85..d2b12cfd9ad22d24b4e463480bb96ed0b788914f 100644 (file)
@@ -615,6 +615,10 @@ public:
     return backref_remove_set;
   }
 
+  backref_buffer_ref& get_newest_backref_buffer() {
+    return backref_buffer;
+  }
+
   std::list<backref_buffer_ref>& get_backref_bufs_to_flush() {
     return backref_bufs_to_flush;
   }
index f0b88133d3e7779942e3acd26b8a165f45bcb932..4cb86c49eff64ed36cc526da92d2b7f5e6c5b664 100644 (file)
@@ -701,6 +701,11 @@ using lba_pin_list_t = std::list<LBAPinRef>;
 
 std::ostream &operator<<(std::ostream &out, const lba_pin_list_t &rhs);
 
+using BackrefPin = PhysicalNodePin<paddr_t, laddr_t>;
+using BackrefPinRef = PhysicalNodePinRef<paddr_t, laddr_t>;
+
+using backref_pin_list_t = std::list<BackrefPinRef>;
+
 /**
  * RetiredExtentPlaceholder
  *
index ed87f276677bb4b7666638081358b1fd1d946c20..46b03c523e5311fd68c2e4fbb9982645778b6f9d 100644 (file)
@@ -1213,6 +1213,7 @@ public:
 };
 
 using lba_root_t = phy_tree_root_t;
+using backref_root_t = phy_tree_root_t;
 
 /**
  * root_t
@@ -1225,6 +1226,7 @@ struct __attribute__((packed)) root_t {
 
   static constexpr int MAX_META_LENGTH = 1024;
 
+  backref_root_t backref_root;
   lba_root_t lba_root;
   laddr_le_t onode_root;
   coll_root_le_t collection_root;
@@ -1237,6 +1239,7 @@ struct __attribute__((packed)) root_t {
 
   void adjust_addrs_from_base(paddr_t base) {
     lba_root.adjust_addrs_from_base(base);
+    backref_root.adjust_addrs_from_base(base);
   }
 
   meta_t get_meta() {
index 25ef0eae4c4959389efc71d08aac0d17713eb12c..a64d902264727e136c75c4f49a509ee3283f4d54 100644 (file)
@@ -298,6 +298,7 @@ public:
     retired_set.clear();
     onode_tree_stats = {};
     lba_tree_stats = {};
+    backref_tree_stats = {};
     ool_write_stats = {};
     to_release = NULL_SEG_ID;
     conflicted = false;
@@ -327,6 +328,9 @@ public:
   tree_stats_t& get_lba_tree_stats() {
     return lba_tree_stats;
   }
+  tree_stats_t& get_backref_tree_stats() {
+    return backref_tree_stats;
+  }
   void add_rbm_alloc_info_blocks(rbm_alloc_delta_t &d) {
     rbm_alloc_info_blocks.push_back(d);
   }
@@ -417,6 +421,7 @@ private:
   /// stats to collect when commit or invalidate
   tree_stats_t onode_tree_stats;
   tree_stats_t lba_tree_stats;
+  tree_stats_t backref_tree_stats;
   ool_write_stats_t ool_write_stats;
 
   ///< if != NULL_SEG_ID, release this segment after completion