]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore/segment_cleaner: trim journal and reclaim space with the help...
authorXuehan Xu <xxhdx1985126@gmail.com>
Tue, 15 Mar 2022 12:44:30 +0000 (20:44 +0800)
committerXuehan Xu <xxhdx1985126@gmail.com>
Sat, 7 May 2022 05:13:38 +0000 (13:13 +0800)
Signed-off-by: Xuehan Xu <xxhdx1985126@gmail.com>
src/crimson/os/seastore/backref/btree_backref_manager.cc
src/crimson/os/seastore/backref/btree_backref_manager.h
src/crimson/os/seastore/backref_manager.h
src/crimson/os/seastore/cache.cc
src/crimson/os/seastore/cache.h
src/crimson/os/seastore/segment_cleaner.cc
src/crimson/os/seastore/segment_cleaner.h
src/crimson/os/seastore/transaction.h
src/crimson/os/seastore/transaction_manager.cc
src/crimson/os/seastore/transaction_manager.h

index d652da030ad5393faa8071f1eda021329fef332d..18ee1a6a628cbd86a7f7f474a497c4370af90e75 100644 (file)
@@ -402,6 +402,28 @@ BtreeBackrefManager::init_cached_extent_ret BtreeBackrefManager::init_cached_ext
   });
 }
 
+BtreeBackrefManager::rewrite_extent_ret
+BtreeBackrefManager::rewrite_extent(
+  Transaction &t,
+  CachedExtentRef extent)
+{
+  LOG_PREFIX(BtreeBackrefManager::rewrite_extent);
+  auto updated = cache.update_extent_from_transaction(t, extent);
+  if (!updated) {
+    DEBUGT("extent is already retired, skipping -- {}", t, *extent);
+    return rewrite_extent_iertr::now();
+  }
+  extent = updated;
+
+  auto c = get_context(t);
+  return with_btree<BackrefBtree>(
+    cache,
+    c,
+    [c, extent](auto &btree) mutable {
+    return btree.rewrite_extent(c, extent);
+  });
+}
+
 BtreeBackrefManager::remove_mapping_ret
 BtreeBackrefManager::remove_mapping(
   Transaction &t,
index 1ec9a38308bdd3cc1336bb19d3bbc1b38255edc5..fb0dd50ab0f3b56adb1d2103a82e0ad017dd15d9 100644 (file)
@@ -92,6 +92,10 @@ public:
     std::vector<CachedExtentRef> &,
     std::vector<CachedExtentRef> &) final;
 
+  rewrite_extent_ret rewrite_extent(
+    Transaction &t,
+    CachedExtentRef extent) final;
+
   void add_pin(BackrefPin &pin) final {
     auto *bpin = reinterpret_cast<BtreeBackrefPin*>(&pin);
     pin_set.add_pin(bpin->get_range_pin());
index 3ebd1064ccac0ca727e9d9aebbc59319f79c5b67..8afa21ef79eaed2db5aec0f6b94677fa25fd394d 100644 (file)
@@ -48,6 +48,17 @@ public:
     Transaction &t,
     paddr_t offset) = 0;
 
+  /**
+   * rewrite_extent
+   *
+   * rewrite extent into passed transaction
+   */
+  using rewrite_extent_iertr = base_iertr;
+  using rewrite_extent_ret = rewrite_extent_iertr::future<>;
+  virtual rewrite_extent_ret rewrite_extent(
+    Transaction &t,
+    CachedExtentRef extent) = 0;
+
   /**
    * Insert new paddr_t -> laddr_t mapping
    */
index f36d4c9d31d698a43ba3bac16654156f71544e96..002d948da6eb90c73f642ef69af12267b5cb64d9 100644 (file)
@@ -1037,8 +1037,9 @@ record_t Cache::prepare_record(
     retire_stat.increment(i->get_length());
     DEBUGT("retired and remove extent -- {}", t, *i);
     commit_retire_extent(t, i);
-    if (is_backref_mapped_extent_node(i)
-         || is_retired_placeholder(i->get_type())) {
+    if ((is_backref_mapped_extent_node(i)
+         || is_retired_placeholder(i->get_type()))
+       && t.should_record_release(i->get_paddr())) {
       rel_delta.alloc_blk_ranges.emplace_back(
        i->get_paddr(),
        L_ADDR_NULL,
@@ -1334,13 +1335,15 @@ void Cache::complete_commit(
     i->dirty_from_or_retired_at = last_commit;
     if (is_backref_mapped_extent_node(i)
          || is_retired_placeholder(i->get_type())) {
-      backref_list.emplace_back(
-       std::make_unique<backref_buf_entry_t>(
-         i->get_paddr(),
-         L_ADDR_NULL,
-         i->get_length(),
-         i->get_type(),
-         seq));
+      if (t.should_record_release(i->get_paddr())) {
+       backref_list.emplace_back(
+         std::make_unique<backref_buf_entry_t>(
+           i->get_paddr(),
+           L_ADDR_NULL,
+           i->get_length(),
+           i->get_type(),
+           seq));
+      }
     } else if (is_backref_node(i->get_type())) {
       remove_backref_extent(i->get_paddr());
     } else {
index aa30c061d64daaff4a6ce59f54a47747ac45ef02..7270a4fe379078fdd975c60b7eca10ee8f0463fd 100644 (file)
@@ -614,6 +614,13 @@ public:
     return res;
   }
 
+  backref_buf_entry_t get_del_backref(
+    paddr_t addr) {
+    auto it = backref_remove_set.find(addr, backref_buf_entry_t::cmp_t());
+    assert(it != backref_remove_set.end());
+    return *it;
+  }
+
   const backref_buf_entry_t::set_t& get_backrefs() {
     return backref_inserted_set;
   }
index 2f6ecf26b750c0aa719ebb82319a0d2d1093c26e..a5c11fb76a3fdaf435c38fcc78dcf856fb90a0b4 100644 (file)
@@ -377,11 +377,13 @@ SegmentCleaner::SegmentCleaner(
   config_t config,
   SegmentManagerGroupRef&& sm_group,
   BackrefManager &backref_manager,
+  Cache &cache,
   bool detailed)
   : detailed(detailed),
     config(config),
     sm_group(std::move(sm_group)),
     backref_manager(backref_manager),
+    cache(cache),
     ool_segment_seq_allocator(
       new SegmentSeqAllocator(segment_type_t::OOL)),
     gc_process(*this)
@@ -527,30 +529,37 @@ void SegmentCleaner::close_segment(segment_id_t segment)
        get_projected_reclaim_ratio());
 }
 
+SegmentCleaner::trim_backrefs_ret SegmentCleaner::trim_backrefs(
+  Transaction &t,
+  journal_seq_t limit)
+{
+  return backref_manager.batch_insert_from_cache(
+    t,
+    limit,
+    config.journal_rewrite_backref_per_cycle
+  );
+}
+
 SegmentCleaner::rewrite_dirty_ret SegmentCleaner::rewrite_dirty(
   Transaction &t,
   journal_seq_t limit)
 {
-  LOG_PREFIX(SegmentCleaner::rewrite_dirty);
   return ecb->get_next_dirty_extents(
     t,
     limit,
-    config.journal_rewrite_per_cycle
+    config.journal_rewrite_dirty_per_cycle
   ).si_then([=, &t](auto dirty_list) {
+    LOG_PREFIX(SegmentCleaner::rewrite_dirty);
     DEBUGT("rewrite {} dirty extents", t, dirty_list.size());
     return seastar::do_with(
       std::move(dirty_list),
-      [FNAME, this, &t](auto &dirty_list) {
-       return backref_manager.batch_insert_from_cache(
-         t,
-         dirty_list.back()->get_dirty_from()
-       ).si_then([FNAME, this, &t, &dirty_list] {
-         return trans_intr::do_for_each(
-           dirty_list,
-           [FNAME, this, &t](auto &e) {
-             DEBUGT("cleaning {}", t, *e);
-             return ecb->rewrite_extent(t, e);
-           });
+      [this, &t](auto &dirty_list) {
+       return trans_intr::do_for_each(
+         dirty_list,
+         [this, &t](auto &e) {
+         LOG_PREFIX(SegmentCleaner::rewrite_dirty);
+         DEBUGT("cleaning {}", t, *e);
+         return ecb->rewrite_extent(t, e);
        });
       });
   });
@@ -597,130 +606,263 @@ SegmentCleaner::gc_cycle_ret SegmentCleaner::do_gc_cycle()
 
 SegmentCleaner::gc_trim_journal_ret SegmentCleaner::gc_trim_journal()
 {
-  return repeat_eagain([this] {
-    return ecb->with_transaction_intr(
-      Transaction::src_t::CLEANER_TRIM,
-      "trim_journal",
-      [this](auto& t)
-    {
-      return rewrite_dirty(t, get_dirty_tail()
-      ).si_then([this, &t] {
-        return ecb->submit_transaction_direct(t);
+  return ecb->with_transaction_intr(
+    Transaction::src_t::TRIM_BACKREF,
+    "trim_backref",
+    [this](auto &t) {
+    return seastar::do_with(
+      get_dirty_tail(),
+      [this, &t](auto &limit) {
+      return trim_backrefs(t, limit).si_then(
+       [this, &t, &limit](auto trim_backrefs_to)
+       -> ExtentCallbackInterface::submit_transaction_direct_iertr::future<
+            journal_seq_t> {
+       if (trim_backrefs_to != JOURNAL_SEQ_NULL) {
+         return ecb->submit_transaction_direct(
+           t, std::make_optional<journal_seq_t>(trim_backrefs_to)
+         ).si_then([trim_backrefs_to=std::move(trim_backrefs_to)]() mutable {
+           return seastar::make_ready_future<
+             journal_seq_t>(std::move(trim_backrefs_to));
+         });
+       }
+       return seastar::make_ready_future<journal_seq_t>(std::move(limit));
+      });
+    });
+  }).handle_error(
+    crimson::ct_error::eagain::handle([](auto) {
+      ceph_abort("unexpected eagain");
+    }),
+    crimson::ct_error::pass_further_all()
+  ).safe_then([this](auto seq) {
+    return repeat_eagain([this, seq=std::move(seq)]() mutable {
+      return ecb->with_transaction_intr(
+       Transaction::src_t::CLEANER_TRIM,
+       "trim_journal",
+       [this, seq=std::move(seq)](auto& t)
+      {
+       return rewrite_dirty(t, seq
+       ).si_then([this, &t] {
+         return ecb->submit_transaction_direct(t);
+       });
+      });
+    });
+  });
+}
+
+SegmentCleaner::retrieve_backref_extents_ret
+SegmentCleaner::_retrieve_backref_extents(
+  Transaction &t,
+  std::set<
+    Cache::backref_extent_buf_entry_t,
+    Cache::backref_extent_buf_entry_t::cmp_t> &&backref_extents,
+  std::vector<CachedExtentRef> &extents)
+{
+  return trans_intr::parallel_for_each(
+    backref_extents,
+    [this, &extents, &t](auto &ent) {
+    // only the gc fiber which is single can rewrite backref extents,
+    // so it must be alive
+    assert(is_backref_node(ent.type));
+    LOG_PREFIX(SegmentCleaner::_retrieve_backref_extents);
+    DEBUGT("getting backref extent of type {} at {}",
+      t,
+      ent.type,
+      ent.paddr);
+    return cache.get_extent_by_type(
+      t, ent.type, ent.paddr, L_ADDR_NULL, BACKREF_NODE_SIZE
+    ).si_then([&extents](auto ext) {
+      extents.emplace_back(std::move(ext));
+    });
+  });
+}
+
+SegmentCleaner::retrieve_live_extents_ret
+SegmentCleaner::_retrieve_live_extents(
+  Transaction &t,
+  std::set<
+    backref_buf_entry_t,
+    backref_buf_entry_t::cmp_t> &&backrefs,
+  std::vector<CachedExtentRef> &extents)
+{
+  return seastar::do_with(
+    JOURNAL_SEQ_NULL,
+    std::move(backrefs),
+    [this, &t, &extents](auto &seq, auto &backrefs) {
+    return trans_intr::do_for_each(
+      backrefs,
+      [this, &extents, &t, &seq](auto &ent) {
+      LOG_PREFIX(SegmentCleaner::gc_reclaim_space);
+      DEBUGT("getting extent of type {} at {}~{}",
+       t,
+       ent.type,
+       ent.paddr,
+       ent.len);
+      return ecb->get_extent_if_live(
+       t, ent.type, ent.paddr, ent.laddr, ent.len
+      ).si_then([this, &extents, &ent, &seq](auto ext) {
+       if (!ext) {
+         logger().debug(
+           "SegmentCleaner::gc_reclaim_space:"
+           " addr {} dead, skipping",
+           ent.paddr);
+         auto backref = cache.get_del_backref(ent.paddr);
+         if (seq == JOURNAL_SEQ_NULL || seq < backref.seq) {
+           seq = backref.seq;
+         }
+       } else {
+         extents.emplace_back(std::move(ext));
+       }
+       return ExtentCallbackInterface::rewrite_extent_iertr::now();
       });
+    }).si_then([&seq] {
+      return retrieve_live_extents_iertr::make_ready_future<
+       journal_seq_t>(std::move(seq));
     });
   });
 }
 
 SegmentCleaner::gc_reclaim_space_ret SegmentCleaner::gc_reclaim_space()
 {
-  if (!scan_cursor) {
+  if (!next_reclaim_pos) {
     journal_seq_t next = get_next_gc_target();
-    if (next == JOURNAL_SEQ_NULL) {
-      logger().debug(
-       "SegmentCleaner::do_gc: no segments to gc");
-      return seastar::now();
-    }
-    scan_cursor =
-      std::make_unique<SegmentManagerGroup::scan_extents_cursor>(
-       next);
-    logger().debug(
-      "SegmentCleaner::do_gc: starting gc on segment {}",
-      scan_cursor->seq);
+    next_reclaim_pos = std::make_optional<paddr_t>(next.offset);
+  }
+  LOG_PREFIX(SegmentCleaner::gc_reclaim_space);
+  INFO("cleaning {}", *next_reclaim_pos);
+  auto &seg_paddr = next_reclaim_pos->as_seg_paddr();
+  paddr_t end_paddr;
+  auto segment_id = seg_paddr.get_segment_id();
+  if (final_reclaim()) {
+    segment_id_t next_segment_id{
+      segment_id.device_id(),
+      segment_id.device_segment_id() + 1};
+    end_paddr = paddr_t::make_seg_paddr(next_segment_id, 0);
   } else {
-    ceph_assert(!scan_cursor->is_complete());
+    end_paddr = seg_paddr + config.reclaim_bytes_stride;
   }
 
-  return sm_group->scan_extents(
-    *scan_cursor,
-    config.reclaim_bytes_stride
-  ).safe_then([this](auto &&_extents) {
-    return seastar::do_with(
-        std::move(_extents),
-       (size_t)0,
-        [this](auto &extents, auto &reclaimed) {
-      return repeat_eagain([this, &extents, &reclaimed]() mutable {
-       reclaimed = 0;
-        logger().debug(
-          "SegmentCleaner::gc_reclaim_space: processing {} extents",
-          extents.size());
-        return ecb->with_transaction_intr(
-          Transaction::src_t::CLEANER_RECLAIM,
-          "reclaim_space",
-          [this, &extents, &reclaimed](auto& t)
-        {
-          return trans_intr::do_for_each(
-              extents,
-              [this, &t, &reclaimed](auto &extent) {
-           auto &addr = extent.first;
-           auto commit_time = extent.second.first.commit_time;
-           auto commit_type = extent.second.first.commit_type;
-           auto &info = extent.second.second;
-            logger().debug(
-              "SegmentCleaner::gc_reclaim_space: checking extent {}",
-              info);
-            return ecb->get_extent_if_live(
-              t,
-              info.type,
-              addr,
-              info.addr,
-              info.len
-            ).si_then([&info, commit_type, commit_time, addr=addr, &t, this, &reclaimed]
-             (CachedExtentRef ext) {
-              if (!ext) {
-                logger().debug(
-                  "SegmentCleaner::gc_reclaim_space: addr {} dead, skipping",
-                  addr);
-                return ExtentCallbackInterface::rewrite_extent_iertr::now();
-              } else {
-                logger().debug(
-                  "SegmentCleaner::gc_reclaim_space: addr {} alive, gc'ing {}",
-                  addr,
-                  *ext);
-               assert(commit_time);
-               assert(info.last_modified);
-               assert(commit_type == record_commit_type_t::MODIFY
-                 || commit_type == record_commit_type_t::REWRITE);
-               if (ext->get_last_modified() == time_point()) {
-                 assert(ext->get_last_rewritten() == time_point());
-                 ext->set_last_modified(duration(info.last_modified));
-               }
-               if (commit_type == record_commit_type_t::REWRITE
-                   && ext->get_last_rewritten() == time_point()) {
-                 ext->set_last_rewritten(duration(commit_time));
-               }
+  double pavail_ratio = get_projected_available_ratio();
+  seastar::lowres_system_clock::time_point start = seastar::lowres_system_clock::now();
 
-               assert(
-                 (commit_type == record_commit_type_t::MODIFY
-                   && commit_time <=
-                     ext->get_last_modified().time_since_epoch().count())
-                 || (commit_type == record_commit_type_t::REWRITE
-                     && commit_time ==
-                       ext->get_last_rewritten().time_since_epoch().count()));
-
-               reclaimed += ext->get_length();
-                return ecb->rewrite_extent(
-                  t,
-                  ext);
-              }
-            });
-          }).si_then([this, &t] {
-            if (scan_cursor->is_complete()) {
-              t.mark_segment_to_release(scan_cursor->get_segment_id());
-            }
-            return ecb->submit_transaction_direct(t);
-          });
-        });
-      }).safe_then([&reclaimed] {
-       return seastar::make_ready_future<size_t>(reclaimed);
+  return seastar::do_with(
+    (size_t)0,
+    (size_t)0,
+    [this, segment_id, pavail_ratio, start, end_paddr](
+      auto &reclaimed,
+      auto &runs) {
+    return repeat_eagain(
+      [this, &reclaimed, segment_id, &runs, end_paddr]() mutable {
+      reclaimed = 0;
+      runs++;
+      return seastar::do_with(
+       cache.get_backref_extents_in_range(
+         *next_reclaim_pos, end_paddr),
+       cache.get_backrefs_in_range(*next_reclaim_pos, end_paddr),
+       cache.get_del_backrefs_in_range(
+         *next_reclaim_pos, end_paddr),
+       JOURNAL_SEQ_NULL,
+       [this, segment_id, &reclaimed, end_paddr]
+       (auto &backref_extents, auto &backrefs, auto &del_backrefs, auto &seq) {
+       return ecb->with_transaction_intr(
+         Transaction::src_t::CLEANER_RECLAIM,
+         "reclaim_space",
+         [segment_id, this, &backref_extents, &backrefs, &seq,
+         &del_backrefs, &reclaimed, end_paddr](auto &t) {
+         return backref_manager.get_mappings(
+           t, *next_reclaim_pos, end_paddr
+         ).si_then(
+           [segment_id, this, &backref_extents, &backrefs, &seq,
+           &del_backrefs, &reclaimed, &t](auto pin_list) {
+           LOG_PREFIX(SegmentCleaner::gc_reclaim_space);
+           DEBUG("{} backrefs, {} del_backrefs, {} pins",
+             backrefs.size(), del_backrefs.size(), pin_list.size());
+           for (auto &br : backrefs) {
+             if (seq == JOURNAL_SEQ_NULL
+                 || (br.seq != JOURNAL_SEQ_NULL && br.seq > seq))
+               seq = br.seq;
+           }
+           for (auto &pin : pin_list) {
+             backrefs.emplace(
+               pin->get_key(),
+               pin->get_val(),
+               pin->get_length(),
+               pin->get_type(),
+               journal_seq_t());
+           }
+           for (auto &del_backref : del_backrefs) {
+             INFO("del_backref {}~{} {} {}",
+               del_backref.paddr, del_backref.len, del_backref.type, del_backref.seq);
+             auto it = backrefs.find(del_backref.paddr);
+             if (it != backrefs.end())
+               backrefs.erase(it);
+             if (seq == JOURNAL_SEQ_NULL
+                 || (del_backref.seq != JOURNAL_SEQ_NULL && del_backref.seq > seq))
+               seq = del_backref.seq;
+           }
+           return seastar::do_with(
+             std::vector<CachedExtentRef>(),
+             [this, &backref_extents, &backrefs, &reclaimed, &t, &seq]
+             (auto &extents) {
+             return _retrieve_backref_extents(
+               t, std::move(backref_extents), extents
+             ).si_then([this, &extents, &t, &backrefs] {
+               return _retrieve_live_extents(
+                 t, std::move(backrefs), extents);
+             }).si_then([this, &seq, &t](auto nseq) {
+               if (nseq != JOURNAL_SEQ_NULL && nseq > seq)
+                 seq = nseq;
+               auto fut = BackrefManager::batch_insert_iertr::now();
+               if (seq != JOURNAL_SEQ_NULL) {
+                 fut = backref_manager.batch_insert_from_cache(
+                   t, seq, std::numeric_limits<uint64_t>::max()
+                 ).si_then([](auto) {
+                   return BackrefManager::batch_insert_iertr::now();
+                 });
+               }
+               return fut;
+             }).si_then([&extents, this, &t, &reclaimed] {
+               return trans_intr::do_for_each(
+                 extents,
+                 [this, &t, &reclaimed](auto &ext) {
+                 reclaimed += ext->get_length();
+                 return ecb->rewrite_extent(t, ext);
+               });
+             });
+           }).si_then([this, &t, segment_id, &seq] {
+             if (final_reclaim())
+               t.mark_segment_to_release(segment_id);
+             return ecb->submit_transaction_direct(
+               t, std::make_optional<journal_seq_t>(std::move(seq)));
+           });
+         });
+       });
       });
+    }).safe_then(
+      [&reclaimed, this, pavail_ratio, start, &runs, end_paddr] {
+      LOG_PREFIX(SegmentCleaner::gc_reclaim_space);
+#ifndef NDEBUG
+      auto ndel_backrefs = cache.get_del_backrefs_in_range(
+       *next_reclaim_pos, end_paddr);
+      if (!ndel_backrefs.empty()) {
+       for (auto &del_br : ndel_backrefs) {
+         ERROR("unexpected del_backref {}~{} {} {}",
+           del_br.paddr, del_br.len, del_br.type, del_br.seq);
+       }
+       ceph_abort("impossible");
+      }
+#endif
+      stats.reclaiming_bytes += reclaimed;
+      auto d = seastar::lowres_system_clock::now() - start;
+      INFO("duration: {}, pavail_ratio before: {}, repeats: {}", d, pavail_ratio, runs);
+      if (final_reclaim()) {
+       stats.reclaim_rewrite_bytes += stats.reclaiming_bytes;
+       stats.reclaiming_bytes = 0;
+       next_reclaim_pos.reset();
+      } else {
+       next_reclaim_pos =
+         paddr_t(*next_reclaim_pos + config.reclaim_bytes_stride);
+      }
     });
-  }).safe_then([this](size_t reclaimed) {
-    stats.reclaiming_bytes += reclaimed;
-    if (scan_cursor->is_complete()) {
-      stats.reclaim_rewrite_bytes += stats.reclaiming_bytes;
-      stats.reclaiming_bytes = 0;
-      scan_cursor.reset();
-    }
   });
 }
 
index b189af533b2ad9ad47890d4ad092d0b3888af637..f04073faa77b8bae111774c376b048209521292b 100644 (file)
@@ -433,7 +433,9 @@ public:
     size_t reclaim_bytes_stride = 0;
 
     /// Number of bytes of journal entries to rewrite per cycle
-    size_t journal_rewrite_per_cycle = 0;
+    size_t journal_rewrite_dirty_per_cycle = 0;
+
+    size_t journal_rewrite_backref_per_cycle = 0;
 
     static config_t get_default() {
       return config_t{
@@ -444,7 +446,8 @@ public:
          .6,   // reclaim_ratio_gc_threshhold
          .2,   // available_ratio_hard_limit
          1<<25,// reclaim 64MB per gc cycle
-         1<<25 // rewrite 64MB of journal entries per gc cycle
+         1<<25,// rewrite 64MB of journal entries per gc cycle
+         1<<24 // create 16MB of backref extents per gc cycle
        };
     }
   };
@@ -542,7 +545,8 @@ public:
     using submit_transaction_direct_ret =
       submit_transaction_direct_iertr::future<>;
     virtual submit_transaction_direct_ret submit_transaction_direct(
-      Transaction &t) = 0;
+      Transaction &t,
+      std::optional<journal_seq_t> seq_to_trim = std::nullopt) = 0;
   };
 
 private:
@@ -551,6 +555,7 @@ private:
 
   SegmentManagerGroupRef sm_group;
   BackrefManager &backref_manager;
+  Cache &cache;
 
   SpaceTrackerIRef space_tracker;
   segments_info_t segments;
@@ -596,6 +601,7 @@ public:
     config_t config,
     SegmentManagerGroupRef&& sm_group,
     BackrefManager &backref_manager,
+    Cache &cache,
     bool detailed = false);
 
   SegmentSeqAllocator& get_ool_segment_seq_allocator() {
@@ -810,6 +816,12 @@ private:
     Transaction &t,
     journal_seq_t limit);
 
+  using trim_backrefs_iertr = work_iertr;
+  using trim_backrefs_ret = trim_backrefs_iertr::future<journal_seq_t>;
+  trim_backrefs_ret trim_backrefs(
+    Transaction &t,
+    journal_seq_t limit);
+
   journal_seq_t get_dirty_tail() const {
     auto ret = journal_head;
     ret.segment_seq -= std::min(
@@ -827,9 +839,12 @@ private:
   }
 
   // GC status helpers
-  std::unique_ptr<
-    SegmentManagerGroup::scan_extents_cursor
-    > scan_cursor;
+  std::optional<paddr_t> next_reclaim_pos;
+
+  bool final_reclaim() {
+    return next_reclaim_pos->as_seg_paddr().get_segment_off()
+      + config.reclaim_bytes_stride >= (size_t)segments.get_segment_size();
+  }
 
   /**
    * GCProcess
@@ -919,6 +934,26 @@ private:
   using gc_reclaim_space_ret = gc_reclaim_space_ertr::future<>;
   gc_reclaim_space_ret gc_reclaim_space();
 
+  using retrieve_backref_extents_iertr = work_iertr;
+  using retrieve_backref_extents_ret =
+    retrieve_backref_extents_iertr::future<>;
+  retrieve_backref_extents_ret _retrieve_backref_extents(
+    Transaction &t,
+    std::set<
+      Cache::backref_extent_buf_entry_t,
+      Cache::backref_extent_buf_entry_t::cmp_t> &&backref_extents,
+    std::vector<CachedExtentRef> &extents);
+
+  using retrieve_live_extents_iertr = work_iertr;
+  using retrieve_live_extents_ret =
+    retrieve_live_extents_iertr::future<journal_seq_t>;
+  retrieve_live_extents_ret _retrieve_live_extents(
+    Transaction &t,
+    std::set<
+      backref_buf_entry_t,
+      backref_buf_entry_t::cmp_t> &&backrefs,
+    std::vector<CachedExtentRef> &extents);
+
   size_t get_bytes_used_current_segment() const {
     auto& seg_addr = journal_head.offset.as_seg_paddr();
     return seg_addr.get_segment_off();
@@ -929,18 +964,6 @@ private:
     return segment_size - get_bytes_used_current_segment();
   }
 
-  /**
-   * get_bytes_scanned_current_segment
-   *
-   * Returns the number of bytes from the current gc segment that
-   * have been scanned.
-   */
-  size_t get_bytes_scanned_current_segment() const {
-    if (!scan_cursor)
-      return 0;
-    return scan_cursor->get_segment_offset();
-  }
-
   /// Returns free space available for writes
   size_t get_available_bytes() const {
     return segments.get_available_bytes();
index 19d0e7e2135c96fde11315bb3d1dd0b761bc78d5..17479795df1c794ec5944f889515a39a2ed290d6 100644 (file)
@@ -132,6 +132,12 @@ public:
     }
     fresh_block_stats.increment(ref->get_length());
     write_set.insert(*ref);
+    if (is_backref_node(ref->get_type()))
+      fresh_backref_extents++;
+  }
+
+  uint64_t get_num_fresh_backref() const {
+    return fresh_backref_extents;
   }
 
   void mark_delayed_extent_inline(LogicalCachedExtentRef& ref) {
@@ -212,6 +218,19 @@ public:
     return retired_set;
   }
 
+  bool should_record_release(paddr_t addr) {
+    auto count = no_release_delta_retired_set.count(addr);
+#ifndef NDEBUG
+    if (count)
+      assert(retired_set.count(addr));
+#endif
+    return count == 0;
+  }
+
+  void dont_record_release(CachedExtentRef ref) {
+    no_release_delta_retired_set.insert(ref);
+  }
+
   template <typename F>
   auto for_each_fresh_block(F &&f) const {
     std::for_each(ool_block_list.begin(), ool_block_list.end(), f);
@@ -232,6 +251,7 @@ public:
     MUTATE = 0,
     READ, // including weak and non-weak read transactions
     CLEANER_TRIM,
+    TRIM_BACKREF,
     CLEANER_RECLAIM,
     MAX
   };
@@ -288,6 +308,7 @@ public:
     offset = 0;
     delayed_temp_offset = 0;
     read_set.clear();
+    fresh_backref_extents = 0;
     invalidate_clear_write_set();
     mutated_block_list.clear();
     fresh_block_stats = {};
@@ -296,6 +317,7 @@ public:
     inline_block_list.clear();
     ool_block_list.clear();
     retired_set.clear();
+    no_release_delta_retired_set.clear();
     onode_tree_stats = {};
     lba_tree_stats = {};
     backref_tree_stats = {};
@@ -375,6 +397,8 @@ private:
    */
   read_set_t<Transaction> read_set; ///< set of extents read by paddr
 
+  uint64_t fresh_backref_extents = 0; // counter of new backref extents
+
   /**
    * write_set
    *
@@ -407,6 +431,8 @@ private:
    */
   pextent_set_t retired_set;
 
+  pextent_set_t no_release_delta_retired_set;
+
   /// stats to collect when commit or invalidate
   tree_stats_t onode_tree_stats;
   tree_stats_t lba_tree_stats;
@@ -437,6 +463,8 @@ inline std::ostream& operator<<(std::ostream& os,
     return os << "READ";
   case Transaction::src_t::CLEANER_TRIM:
     return os << "CLEANER_TRIM";
+  case Transaction::src_t::TRIM_BACKREF:
+    return os << "TRIM_BACKREF";
   case Transaction::src_t::CLEANER_RECLAIM:
     return os << "CLEANER_RECLAIM";
   default:
index 1588d0a96a7cd8540594522c1574f5212cf57e05..aec0b57a9d45d498ce0d7709a5ab60e76182a815 100644 (file)
@@ -319,7 +319,8 @@ TransactionManager::submit_transaction(
 
 TransactionManager::submit_transaction_direct_ret
 TransactionManager::submit_transaction_direct(
-  Transaction &tref)
+  Transaction &tref,
+  std::optional<journal_seq_t> seq_to_trim)
 {
   LOG_PREFIX(TransactionManager::submit_transaction_direct);
   SUBTRACET(seastore_t, "start", tref);
@@ -352,7 +353,7 @@ TransactionManager::submit_transaction_direct(
   }).si_then([this, FNAME, &tref] {
     SUBTRACET(seastore_t, "about to prepare", tref);
     return tref.get_handle().enter(write_pipeline.prepare);
-  }).si_then([this, FNAME, &tref]() mutable
+  }).si_then([this, FNAME, &tref, seq_to_trim=std::move(seq_to_trim)]() mutable
              -> submit_transaction_iertr::future<> {
     auto record = cache->prepare_record(tref, segment_cleaner.get());
 
@@ -360,11 +361,15 @@ TransactionManager::submit_transaction_direct(
 
     SUBTRACET(seastore_t, "about to submit to journal", tref);
     return journal->submit_record(std::move(record), tref.get_handle()
-    ).safe_then([this, FNAME, &tref](auto submit_result) mutable {
+    ).safe_then([this, FNAME, &tref, seq_to_trim=std::move(seq_to_trim)]
+      (auto submit_result) mutable {
       SUBDEBUGT(seastore_t, "committed with {}", tref, submit_result);
       auto start_seq = submit_result.write_result.start_seq;
       auto end_seq = submit_result.write_result.get_end_seq();
       segment_cleaner->set_journal_head(end_seq);
+      if (seq_to_trim && *seq_to_trim != JOURNAL_SEQ_NULL) {
+       cache->trim_backref_bufs(*seq_to_trim);
+      }
       cache->complete_commit(
           tref,
           submit_result.record_block_base,
@@ -461,6 +466,11 @@ TransactionManager::rewrite_extent_ret TransactionManager::rewrite_extent(
   CachedExtentRef extent)
 {
   LOG_PREFIX(TransactionManager::rewrite_extent);
+
+  if (is_backref_node(extent->get_type())) {
+    return backref_manager->rewrite_extent(t, extent);
+  }
+
   {
     auto updated = cache->update_extent_from_transaction(t, extent);
     if (!updated) {
@@ -476,12 +486,24 @@ TransactionManager::rewrite_extent_ret TransactionManager::rewrite_extent(
     return rewrite_extent_iertr::now();
   }
 
+  auto fut = rewrite_extent_iertr::now();
   if (extent->is_logical()) {
-    return rewrite_logical_extent(t, extent->cast<LogicalCachedExtent>());
+    fut = rewrite_logical_extent(t, extent->cast<LogicalCachedExtent>());
   } else {
     DEBUGT("rewriting physical extent -- {}", t, *extent);
-    return lba_manager->rewrite_extent(t, extent);
+    fut = lba_manager->rewrite_extent(t, extent);
   }
+
+  return fut.si_then([this, extent, &t] {
+    t.dont_record_release(extent);
+    return backref_manager->remove_mapping(
+      t, extent->get_paddr()).si_then([](auto) {
+      return seastar::now();
+    }).handle_error_interruptible(
+      crimson::ct_error::input_output_error::pass_further(),
+      crimson::ct_error::assert_all()
+    );
+  });
 }
 
 TransactionManager::get_extent_if_live_ret TransactionManager::get_extent_if_live(
@@ -588,6 +610,7 @@ TransactionManagerRef make_transaction_manager(bool detailed)
     SegmentCleaner::config_t::get_default(),
     std::move(sms),
     *backref_manager,
+    *cache,
     detailed);
   auto journal = journal::make_segmented(*segment_cleaner);
   epm->init_ool_writers(
index b4e9ed92af47c7abf72d5ced5ed9309584863067..34490baec081585a46731759fb60f721c65ca9de 100644 (file)
@@ -382,7 +382,8 @@ public:
   /// SegmentCleaner::ExtentCallbackInterface
   using SegmentCleaner::ExtentCallbackInterface::submit_transaction_direct_ret;
   submit_transaction_direct_ret submit_transaction_direct(
-    Transaction &t) final;
+    Transaction &t,
+    std::optional<journal_seq_t> seq_to_trim = std::nullopt) final;
 
   /**
    * flush