]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
crimson/os/seastore: lazy backref tree updates for extent (re)placement
authorXuehan Xu <xxhdx1985126@gmail.com>
Thu, 10 Mar 2022 02:55:38 +0000 (10:55 +0800)
committerXuehan Xu <xxhdx1985126@gmail.com>
Sat, 7 May 2022 04:51:29 +0000 (12:51 +0800)
Buffer all backref updates(allocs/releases) in Cache.
Also, organize them into an intrusive set, so later backref query can be done efficiently
Backref query should always be first applied to this set and then to the backref manager

Signed-off-by: Xuehan Xu <xxhdx1985126@gmail.com>
src/crimson/os/seastore/cache.cc
src/crimson/os/seastore/cache.h
src/crimson/os/seastore/seastore.cc
src/crimson/os/seastore/seastore_types.h
src/crimson/os/seastore/transaction_manager.cc
src/crimson/os/seastore/transaction_manager.h

index 5b031457f2cb612ae93b0ed159a2597551dd327c..0cd0e85d56f9655525e3054ab8f1cfb025cbe7d5 100644 (file)
@@ -1166,6 +1166,50 @@ record_t Cache::prepare_record(
   return record;
 }
 
+void Cache::backref_batch_update(
+  std::vector<backref_buf_entry_ref> &&list,
+  const journal_seq_t &seq)
+{
+  LOG_PREFIX(Cache::backref_batch_update);
+  DEBUG("inserting {} entries", list.size());
+  if (!backref_buffer) {
+    backref_buffer = std::make_unique<backref_buffer_t>();
+  }
+  // backref_buf_entry_t::laddr == L_ADDR_NULL means erase
+  for (auto &ent : list) {
+    if (ent->laddr == L_ADDR_NULL) {
+      auto [it, insert] = backref_remove_set.insert(*ent);
+#ifndef NDEBUG
+      if (!insert) {
+       ERROR("backref_remove_set already contains {}", ent->paddr);
+      }
+#endif
+      assert(insert);
+    } else {
+#ifndef NDEBUG
+      auto r = backref_remove_set.erase(ent->paddr, backref_buf_entry_t::cmp_t());
+      if (r) {
+       ERROR("backref_remove_set contains: {}", ent->paddr);
+      }
+      assert(!r);
+#endif
+      auto [it, insert] = backref_inserted_set.insert(*ent);
+      assert(insert);
+    }
+  }
+
+  auto iter = backref_buffer->backrefs.find(seq);
+  if (iter == backref_buffer->backrefs.end()) {
+    backref_buffer->backrefs.emplace(
+      seq, std::move(list));
+  } else {
+    iter->second.insert(
+      iter->second.end(),
+      std::make_move_iterator(list.begin()),
+      std::make_move_iterator(list.end()));
+  }
+}
+
 void Cache::complete_commit(
   Transaction &t,
   paddr_t final_block_start,
@@ -1176,7 +1220,10 @@ void Cache::complete_commit(
   SUBTRACET(seastore_t, "final_block_start={}, seq={}",
             t, final_block_start, seq);
 
-  t.for_each_fresh_block([&](auto &i) {
+  may_roll_backref_buffer(final_block_start);
+
+  std::vector<backref_buf_entry_ref> backref_list;
+  t.for_each_fresh_block([&](const CachedExtentRef &i) {
     bool is_inline = false;
     if (i->is_inline()) {
       is_inline = true;
@@ -1201,6 +1248,19 @@ void Cache::complete_commit(
            ? i->last_rewritten
            : seastar::lowres_system_clock::time_point());
       }
+      if (is_backref_mapped_extent_node(i)) {
+       backref_list.emplace_back(
+         std::make_unique<backref_buf_entry_t>(
+           i->get_paddr(),
+           i->is_logical()
+           ? i->cast<LogicalCachedExtent>()->get_laddr()
+           : (is_lba_node(i->get_type())
+             ? i->cast<lba_manager::btree::LBANode>()->get_node_meta().begin
+             : L_ADDR_NULL),
+           i->get_length(),
+           i->get_type(),
+           seq));
+      }
     }
   });
 
@@ -1240,7 +1300,19 @@ void Cache::complete_commit(
   last_commit = seq;
   for (auto &i: t.retired_set) {
     i->dirty_from_or_retired_at = last_commit;
+    if (is_backref_mapped_extent_node(i)
+         || is_retired_placeholder(i->get_type())) {
+      backref_list.emplace_back(
+       std::make_unique<backref_buf_entry_t>(
+         i->get_paddr(),
+         L_ADDR_NULL,
+         i->get_length(),
+         i->get_type(),
+         seq));
+    }
   }
+  if (!backref_list.empty())
+    backref_batch_update(std::move(backref_list), seq);
 }
 
 void Cache::init()
@@ -1291,6 +1363,8 @@ Cache::close_ertr::future<> Cache::close()
     dirty.erase(i++);
     intrusive_ptr_release(ptr);
   }
+  backref_bufs_to_flush.clear();
+  backref_buffer.reset();
   assert(stats.dirty_bytes == 0);
   lru.clear();
   return close_ertr::now();
index 789e5fa7a85774c5e5e81f64763e374883708ace..5f7664886c11b0252c3d7286d0feef36677abe85 100644 (file)
 
 namespace crimson::os::seastore {
 
+struct backref_buf_entry_t {
+  backref_buf_entry_t(
+    const paddr_t paddr,
+    const laddr_t laddr,
+    const extent_len_t len,
+    const extent_types_t type,
+    const journal_seq_t seq)
+    : paddr(paddr),
+      laddr(laddr),
+      len(len),
+      type(type),
+      seq(seq)
+  {}
+  const paddr_t paddr = P_ADDR_NULL;
+  const laddr_t laddr = L_ADDR_NULL;
+  const extent_len_t len = 0;
+  const extent_types_t type =
+    extent_types_t::ROOT;
+  const journal_seq_t seq;
+  friend bool operator< (
+    const backref_buf_entry_t &l,
+    const backref_buf_entry_t &r) {
+    return l.paddr < r.paddr;
+  }
+  friend bool operator> (
+    const backref_buf_entry_t &l,
+    const backref_buf_entry_t &r) {
+    return l.paddr > r.paddr;
+  }
+  friend bool operator== (
+    const backref_buf_entry_t &l,
+    const backref_buf_entry_t &r) {
+    return l.paddr == r.paddr;
+  }
+  using hook_t =
+    boost::intrusive::set_member_hook<
+      boost::intrusive::link_mode<
+       boost::intrusive::auto_unlink>>;
+  hook_t backref_set_hook;
+  using backref_set_member_options = boost::intrusive::member_hook<
+    backref_buf_entry_t,
+    hook_t,
+    &backref_buf_entry_t::backref_set_hook>;
+  using set_t = boost::intrusive::set<
+    backref_buf_entry_t,
+    backref_set_member_options,
+    boost::intrusive::constant_time_size<false>>;
+  struct cmp_t {
+    using is_transparent = paddr_t;
+    bool operator()(
+      const backref_buf_entry_t &l,
+      const backref_buf_entry_t &r) const {
+      return l.paddr < r.paddr;
+    }
+    bool operator()(const paddr_t l, const backref_buf_entry_t &r) const {
+      return l < r.paddr;
+    }
+    bool operator()(const backref_buf_entry_t &l, const paddr_t r) const {
+      return l.paddr < r;
+    }
+  };
+};
+
+using backref_buf_entry_ref =
+  std::unique_ptr<backref_buf_entry_t>;
+
+struct backref_buffer_t {
+  std::map<journal_seq_t, std::vector<backref_buf_entry_ref>> backrefs;
+};
+using backref_buffer_ref = std::unique_ptr<backref_buffer_t>;
+
 /**
  * Cache
  *
@@ -448,6 +519,12 @@ private:
     }
   }
 
+  backref_buffer_ref backref_buffer;
+  std::list<backref_buffer_ref> backref_bufs_to_flush;
+  // backrefs that needs to be inserted into the backref tree
+  backref_buf_entry_t::set_t backref_inserted_set;
+  backref_buf_entry_t::set_t backref_remove_set; // backrefs needs to be removed
+                                             // from the backref tree
 public:
   /**
    * get_extent_by_type
@@ -483,6 +560,96 @@ public:
       t, type, offset, laddr, length, [](CachedExtent &) {});
   }
 
+  std::set<
+    backref_buf_entry_t,
+    backref_buf_entry_t::cmp_t> get_backrefs_in_range(
+    paddr_t start,
+    paddr_t end) {
+    auto start_iter = backref_inserted_set.lower_bound(
+      start,
+      backref_buf_entry_t::cmp_t());
+    auto end_iter = backref_inserted_set.upper_bound(
+      end,
+      backref_buf_entry_t::cmp_t());
+    std::set<
+      backref_buf_entry_t,
+      backref_buf_entry_t::cmp_t> res;
+    for (auto it = start_iter;
+        it != end_iter;
+        it++) {
+      res.emplace(it->paddr, it->laddr, it->len, it->type, it->seq);
+    }
+    return res;
+  }
+
+  std::set<
+    backref_buf_entry_t,
+    backref_buf_entry_t::cmp_t> get_del_backrefs_in_range(
+    paddr_t start,
+    paddr_t end) {
+    LOG_PREFIX(Cache::get_del_backrefs_in_range);
+    SUBDEBUG(seastore_cache, "total {} del_backrefs", backref_remove_set.size());
+    auto start_iter = backref_remove_set.lower_bound(
+      start,
+      backref_buf_entry_t::cmp_t());
+    auto end_iter = backref_remove_set.upper_bound(
+      end,
+      backref_buf_entry_t::cmp_t());
+    std::set<
+      backref_buf_entry_t,
+      backref_buf_entry_t::cmp_t> res;
+    for (auto it = start_iter;
+        it != end_iter;
+        it++) {
+      res.emplace(it->paddr, it->laddr, it->len, it->type, it->seq);
+    }
+    SUBDEBUG(seastore_cache, "{} del_backrefs in range", res.size());
+    return res;
+  }
+
+  const backref_buf_entry_t::set_t& get_backrefs() {
+    return backref_inserted_set;
+  }
+
+  const backref_buf_entry_t::set_t& get_del_backrefs() {
+    return backref_remove_set;
+  }
+
+  std::list<backref_buffer_ref>& get_backref_bufs_to_flush() {
+    return backref_bufs_to_flush;
+  }
+
+  void trim_backref_bufs(const journal_seq_t &trim_to) {
+    LOG_PREFIX(Cache::trim_backref_bufs);
+    SUBDEBUG(seastore_cache, "trimming to {}", trim_to);
+    auto &backref_bufs = get_backref_bufs_to_flush();
+    for (auto iter = backref_bufs.begin();
+        iter != backref_bufs.end();) {
+      auto &backref_buf = *iter;
+      assert(backref_buf);
+      if (!backref_buf->backrefs.empty()
+         && backref_buf->backrefs.rbegin()->first > trim_to) {
+       auto iter2 = backref_buf->backrefs.upper_bound(trim_to);
+       SUBDEBUG(seastore_cache, "trim backref up to {}", iter2->first);
+       backref_buf->backrefs.erase(
+         backref_buf->backrefs.begin(), iter2);
+       break;
+      } else {
+       if (!backref_buf->backrefs.empty()) {
+         SUBDEBUG(seastore_cache, "trim backref buf {}",
+           backref_buf->backrefs.rbegin()->first);
+       }
+       iter = backref_bufs.erase(iter);
+      }
+    }
+    if (backref_bufs.empty() && backref_buffer) {
+      assert(backref_buffer->backrefs.rbegin()->first >= trim_to);
+      auto iter = backref_buffer->backrefs.upper_bound(trim_to);
+      SUBDEBUG(seastore_cache, "trim backref buffer up to {}", iter->first);
+      backref_buffer->backrefs.erase(
+        backref_buffer->backrefs.begin(), iter);
+    }
+  }
 
   /**
    * alloc_new_extent
@@ -738,6 +905,28 @@ public:
   /// Dump live extents
   void dump_contents();
 
+  void may_roll_backref_buffer(
+    const paddr_t &final_block_start,
+    bool force_roll = false) {
+    if (force_roll) {
+      if (backref_buffer) {
+       backref_bufs_to_flush.emplace_back(std::move(backref_buffer));
+      }
+      return;
+    }
+    if (backref_buffer && !backref_buffer->backrefs.empty()) {
+      auto &[seq, backref_list] = *backref_buffer->backrefs.rbegin();
+      if (backref_list.empty())
+       return;
+      auto &last_seg_paddr = seq.offset.as_seg_paddr();
+      if (last_seg_paddr.get_segment_id() !=
+         final_block_start.as_seg_paddr().get_segment_id()) {
+       // journal segment rolled
+       backref_bufs_to_flush.emplace_back(std::move(backref_buffer));
+      }
+    }
+  }
+
 private:
   ExtentPlacementManager& epm;
   RootBlockRef root;               ///< ref to current root
@@ -982,6 +1171,10 @@ private:
     }
   }
 
+  void backref_batch_update(
+    std::vector<backref_buf_entry_ref> &&,
+    const journal_seq_t &);
+
   /// Add extent to extents handling dirty and refcounting
   void add_extent(CachedExtentRef ref);
 
index 916fe7a40df5cb71e220601015db2a23b2a2ceb4..41fbcd37651dfd2468c516a7e6ba2de3539e2051 100644 (file)
@@ -22,6 +22,7 @@
 
 #include "crimson/os/futurized_collection.h"
 
+#include "crimson/os/seastore/backref_manager.h"
 #include "crimson/os/seastore/segment_cleaner.h"
 #include "crimson/os/seastore/collection_manager/flat_collection_manager.h"
 #include "crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h"
index c62a63980224a7564fa878ca2954ec98068dc6e5..ed87f276677bb4b7666638081358b1fd1d946c20 100644 (file)
@@ -886,6 +886,11 @@ constexpr bool is_logical_type(extent_types_t type) {
   }
 }
 
+constexpr bool is_retired_placeholder(extent_types_t type)
+{
+  return type == extent_types_t::RETIRED_PLACEHOLDER;
+}
+
 constexpr bool is_lba_node(extent_types_t type)
 {
   return type == extent_types_t::LADDR_INTERNAL ||
index 7a098e799693d84777df1a33ac8dbe8ca306d73a..795dc29e572edd2407b5aec45c086e08975971aa 100644 (file)
@@ -7,6 +7,7 @@
 #include "crimson/os/seastore/logging.h"
 #include "crimson/os/seastore/transaction_manager.h"
 #include "crimson/os/seastore/journal.h"
+#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
 
 /*
  * TransactionManager logs
@@ -26,12 +27,14 @@ TransactionManager::TransactionManager(
   JournalRef _journal,
   CacheRef _cache,
   LBAManagerRef _lba_manager,
-  ExtentPlacementManagerRef &&epm)
+  ExtentPlacementManagerRef &&epm,
+  BackrefManagerRef&& backref_manager)
   : segment_cleaner(std::move(_segment_cleaner)),
     cache(std::move(_cache)),
     lba_manager(std::move(_lba_manager)),
     journal(std::move(_journal)),
     epm(std::move(epm)),
+    backref_manager(std::move(backref_manager)),
     sm_group(*segment_cleaner->get_segment_manager_group())
 {
   segment_cleaner->set_extent_callback(this);
@@ -58,6 +61,8 @@ TransactionManager::mkfs_ertr::future<> TransactionManager::mkfs()
       return cache->mkfs(t
       ).si_then([this, &t] {
         return lba_manager->mkfs(t);
+      }).si_then([this, &t] {
+        return backref_manager->mkfs(t);
       }).si_then([this, FNAME, &t] {
         INFOT("submitting mkfs transaction", t);
         return submit_transaction_direct(t);
@@ -540,25 +545,28 @@ TransactionManager::~TransactionManager() {}
 
 TransactionManagerRef make_transaction_manager(bool detailed)
 {
+  auto epm = std::make_unique<ExtentPlacementManager>();
+  auto cache = std::make_unique<Cache>(*epm);
+  auto lba_manager = lba_manager::create_lba_manager(*cache);
   auto sms = std::make_unique<SegmentManagerGroup>();
+  auto backref_manager = create_backref_manager(*sms, *cache);
   auto segment_cleaner = std::make_unique<SegmentCleaner>(
     SegmentCleaner::config_t::get_default(),
     std::move(sms),
+    *backref_manager,
     detailed);
   auto journal = journal::make_segmented(*segment_cleaner);
-  auto epm = std::make_unique<ExtentPlacementManager>();
   epm->init_ool_writers(
       *segment_cleaner,
       segment_cleaner->get_ool_segment_seq_allocator());
-  auto cache = std::make_unique<Cache>(*epm);
-  auto lba_manager = lba_manager::create_lba_manager(*cache);
 
   return std::make_unique<TransactionManager>(
     std::move(segment_cleaner),
     std::move(journal),
     std::move(cache),
     std::move(lba_manager),
-    std::move(epm));
+    std::move(epm),
+    std::move(backref_manager));
 }
 
 }
index a2db409775d9a642d3403fb95d6e457bfe7dfdd0..fd7c743e26f1bd490b3fc53d1c6101f73b3bbaf5 100644 (file)
@@ -25,6 +25,7 @@
 #include "crimson/os/seastore/seastore_types.h"
 #include "crimson/os/seastore/cache.h"
 #include "crimson/os/seastore/lba_manager.h"
+#include "crimson/os/seastore/backref_manager.h"
 #include "crimson/os/seastore/journal.h"
 #include "crimson/os/seastore/extent_placement_manager.h"
 #include "crimson/os/seastore/device.h"
@@ -69,7 +70,8 @@ public:
     JournalRef journal,
     CacheRef cache,
     LBAManagerRef lba_manager,
-    ExtentPlacementManagerRef &&epm);
+    ExtentPlacementManagerRef &&epm,
+    BackrefManagerRef&& backref_manager);
 
   /// Writes initial metadata to disk
   using mkfs_ertr = base_ertr;
@@ -555,6 +557,7 @@ private:
   LBAManagerRef lba_manager;
   JournalRef journal;
   ExtentPlacementManagerRef epm;
+  BackrefManagerRef backref_manager;
   SegmentManagerGroup &sm_group;
 
   WritePipeline write_pipeline;