]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
seastore: rename segment cleaner to async cleaner 46885/head
authormyoungwon oh <ohmyoungwon@gmail.com>
Wed, 1 Jun 2022 12:55:27 +0000 (21:55 +0900)
committermyoungwon oh <ohmyoungwon@gmail.com>
Wed, 29 Jun 2022 01:18:45 +0000 (10:18 +0900)
Signed-off-by: Myoungwon Oh <myoungwon.oh@samsung.com>
18 files changed:
src/crimson/os/seastore/CMakeLists.txt
src/crimson/os/seastore/async_cleaner.cc [new file with mode: 0644]
src/crimson/os/seastore/async_cleaner.h [new file with mode: 0644]
src/crimson/os/seastore/cache.cc
src/crimson/os/seastore/cache.h
src/crimson/os/seastore/journal/segment_allocator.cc
src/crimson/os/seastore/journal/segmented_journal.h
src/crimson/os/seastore/seastore.cc
src/crimson/os/seastore/segment_cleaner.cc [deleted file]
src/crimson/os/seastore/segment_cleaner.h [deleted file]
src/crimson/os/seastore/segment_seq_allocator.h
src/crimson/os/seastore/transaction_manager.cc
src/crimson/os/seastore/transaction_manager.h
src/test/crimson/seastore/onode_tree/test_fltree_onode_manager.cc
src/test/crimson/seastore/onode_tree/test_staged_fltree.cc
src/test/crimson/seastore/test_seastore_journal.cc
src/test/crimson/seastore/test_transaction_manager.cc
src/test/crimson/seastore/transaction_manager_test_state.h

index 55d2168cebb1c9bb2837d410afb310a9adcbfe64..ee3aa47cc533226ca2ca8173637b5792ad3e7648 100644 (file)
@@ -8,7 +8,7 @@ set(crimson_seastore_srcs
   transaction.cc
   cache.cc
   lba_manager.cc
-  segment_cleaner.cc
+  async_cleaner.cc
   backref_manager.cc
   backref/backref_tree_node.cc
   backref/btree_backref_manager.cc
diff --git a/src/crimson/os/seastore/async_cleaner.cc b/src/crimson/os/seastore/async_cleaner.cc
new file mode 100644 (file)
index 0000000..2e069a8
--- /dev/null
@@ -0,0 +1,1439 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <seastar/core/metrics.hh>
+
+#include "crimson/os/seastore/logging.h"
+
+#include "crimson/os/seastore/async_cleaner.h"
+#include "crimson/os/seastore/transaction_manager.h"
+
+SET_SUBSYS(seastore_cleaner);
+
+namespace crimson::os::seastore {
+
+void segment_info_t::set_open(
+    segment_seq_t _seq, segment_type_t _type)
+{
+  ceph_assert(_seq != NULL_SEG_SEQ);
+  ceph_assert(_type != segment_type_t::NULL_SEG);
+  state = Segment::segment_state_t::OPEN;
+  seq = _seq;
+  type = _type;
+  written_to = 0;
+}
+
+void segment_info_t::set_empty()
+{
+  state = Segment::segment_state_t::EMPTY;
+  seq = NULL_SEG_SEQ;
+  type = segment_type_t::NULL_SEG;
+  last_modified = {};
+  last_rewritten = {};
+  written_to = 0;
+}
+
+void segment_info_t::set_closed()
+{
+  state = Segment::segment_state_t::CLOSED;
+  // the rest of information is unchanged
+}
+
+void segment_info_t::init_closed(
+    segment_seq_t _seq, segment_type_t _type, std::size_t seg_size)
+{
+  ceph_assert(_seq != NULL_SEG_SEQ);
+  ceph_assert(_type != segment_type_t::NULL_SEG);
+  state = Segment::segment_state_t::CLOSED;
+  seq = _seq;
+  type = _type;
+  written_to = seg_size;
+}
+
+std::ostream& operator<<(std::ostream &out, const segment_info_t &info)
+{
+  out << "seg_info_t("
+      << "state=" << info.state;
+  if (info.is_empty()) {
+    // pass
+  } else { // open or closed
+    out << ", seq=" << segment_seq_printer_t{info.seq}
+        << ", type=" << info.type
+        << ", last_modified=" << info.last_modified.time_since_epoch()
+        << ", last_rewritten=" << info.last_rewritten.time_since_epoch()
+        << ", written_to=" << info.written_to;
+  }
+  return out << ")";
+}
+
+void segments_info_t::reset()
+{
+  segments.clear();
+
+  segment_size = 0;
+
+  journal_segment_id = NULL_SEG_ID;
+  num_in_journal_open = 0;
+  num_type_journal = 0;
+  num_type_ool = 0;
+
+  num_open = 0;
+  num_empty = 0;
+  num_closed = 0;
+
+  count_open_journal = 0;
+  count_open_ool = 0;
+  count_release_journal = 0;
+  count_release_ool = 0;
+  count_close_journal = 0;
+  count_close_ool = 0;
+
+  total_bytes = 0;
+  avail_bytes_in_open = 0;
+}
+
+void segments_info_t::add_segment_manager(
+    SegmentManager &segment_manager)
+{
+  LOG_PREFIX(segments_info_t::add_segment_manager);
+  device_id_t d_id = segment_manager.get_device_id();
+  auto ssize = segment_manager.get_segment_size();
+  auto nsegments = segment_manager.get_num_segments();
+  auto sm_size = segment_manager.get_size();
+  INFO("adding segment manager {}, size={}, ssize={}, segments={}",
+       device_id_printer_t{d_id}, sm_size, ssize, nsegments);
+  ceph_assert(ssize > 0);
+  ceph_assert(nsegments > 0);
+  ceph_assert(sm_size > 0);
+
+  // also validate if the device is duplicated
+  segments.add_device(d_id, nsegments, segment_info_t{});
+
+  // assume all the segment managers share the same settings as follows.
+  if (segment_size == 0) {
+    ceph_assert(ssize > 0);
+    segment_size = ssize;
+  } else {
+    ceph_assert(segment_size == (std::size_t)ssize);
+  }
+
+  // NOTE: by default the segments are empty
+  num_empty += nsegments;
+
+  total_bytes += sm_size;
+}
+
+void segments_info_t::init_closed(
+    segment_id_t segment, segment_seq_t seq, segment_type_t type)
+{
+  LOG_PREFIX(segments_info_t::init_closed);
+  auto& segment_info = segments[segment];
+  INFO("initiating {} {} {}, {}, num_segments(empty={}, opened={}, closed={})",
+       segment, segment_seq_printer_t{seq}, type,
+       segment_info, num_empty, num_open, num_closed);
+  ceph_assert(segment_info.is_empty());
+  segment_info.init_closed(seq, type, get_segment_size());
+  ceph_assert(num_empty > 0);
+  --num_empty;
+  ++num_closed;
+  if (type == segment_type_t::JOURNAL) {
+    // init_closed won't initialize journal_segment_id
+    ceph_assert(get_journal_head() == JOURNAL_SEQ_NULL);
+    ++num_type_journal;
+  } else {
+    ++num_type_ool;
+  }
+  // do not increment count_close_*;
+}
+
+void segments_info_t::mark_open(
+    segment_id_t segment, segment_seq_t seq, segment_type_t type)
+{
+  LOG_PREFIX(segments_info_t::mark_open);
+  auto& segment_info = segments[segment];
+  INFO("opening {} {} {}, {}, num_segments(empty={}, opened={}, closed={})",
+       segment, segment_seq_printer_t{seq}, type,
+       segment_info, num_empty, num_open, num_closed);
+  ceph_assert(segment_info.is_empty());
+  segment_info.set_open(seq, type);
+  ceph_assert(num_empty > 0);
+  --num_empty;
+  ++num_open;
+  if (type == segment_type_t::JOURNAL) {
+    if (journal_segment_id != NULL_SEG_ID) {
+      auto& last_journal_segment = segments[journal_segment_id];
+      ceph_assert(last_journal_segment.is_closed());
+      ceph_assert(last_journal_segment.type == segment_type_t::JOURNAL);
+      ceph_assert(last_journal_segment.seq + 1 == seq);
+    }
+    journal_segment_id = segment;
+
+    ++num_in_journal_open;
+    ++num_type_journal;
+    ++count_open_journal;
+  } else {
+    ++num_type_ool;
+    ++count_open_ool;
+  }
+  ceph_assert(segment_info.written_to == 0);
+  avail_bytes_in_open += get_segment_size();
+}
+
+void segments_info_t::mark_empty(
+    segment_id_t segment)
+{
+  LOG_PREFIX(segments_info_t::mark_empty);
+  auto& segment_info = segments[segment];
+  INFO("releasing {}, {}, num_segments(empty={}, opened={}, closed={})",
+       segment, segment_info,
+       num_empty, num_open, num_closed);
+  ceph_assert(segment_info.is_closed());
+  auto type = segment_info.type;
+  assert(type != segment_type_t::NULL_SEG);
+  segment_info.set_empty();
+  ceph_assert(num_closed > 0);
+  --num_closed;
+  ++num_empty;
+  if (type == segment_type_t::JOURNAL) {
+    ceph_assert(num_type_journal > 0);
+    --num_type_journal;
+    ++count_release_journal;
+  } else {
+    ceph_assert(num_type_ool > 0);
+    --num_type_ool;
+    ++count_release_ool;
+  }
+}
+
+void segments_info_t::mark_closed(
+    segment_id_t segment)
+{
+  LOG_PREFIX(segments_info_t::mark_closed);
+  auto& segment_info = segments[segment];
+  INFO("closing {}, {}, num_segments(empty={}, opened={}, closed={})",
+       segment, segment_info,
+       num_empty, num_open, num_closed);
+  ceph_assert(segment_info.is_open());
+  segment_info.set_closed();
+  ceph_assert(num_open > 0);
+  --num_open;
+  ++num_closed;
+  if (segment_info.type == segment_type_t::JOURNAL) {
+    ceph_assert(num_in_journal_open > 0);
+    --num_in_journal_open;
+    ++count_close_journal;
+  } else {
+    ++count_close_ool;
+  }
+  ceph_assert(get_segment_size() >= segment_info.written_to);
+  auto seg_avail_bytes = get_segment_size() - segment_info.written_to;
+  ceph_assert(avail_bytes_in_open >= seg_avail_bytes);
+  avail_bytes_in_open -= seg_avail_bytes;
+}
+
+void segments_info_t::update_written_to(
+    segment_type_t type,
+    paddr_t offset)
+{
+  LOG_PREFIX(segments_info_t::update_written_to);
+  auto& saddr = offset.as_seg_paddr();
+  auto& segment_info = segments[saddr.get_segment_id()];
+  if (!segment_info.is_open()) {
+    ERROR("segment is not open, not updating, type={}, offset={}, {}",
+          type, offset, segment_info);
+    ceph_abort();
+  }
+
+  auto new_written_to = static_cast<std::size_t>(saddr.get_segment_off());
+  ceph_assert(new_written_to <= get_segment_size());
+  if (segment_info.written_to > new_written_to) {
+    ERROR("written_to should not decrease! type={}, offset={}, {}",
+          type, offset, segment_info);
+    ceph_abort();
+  }
+
+  DEBUG("type={}, offset={}, {}", type, offset, segment_info);
+  ceph_assert(type == segment_info.type);
+  auto avail_deduction = new_written_to - segment_info.written_to;
+  ceph_assert(avail_bytes_in_open >= avail_deduction);
+  avail_bytes_in_open -= avail_deduction;
+  segment_info.written_to = new_written_to;
+}
+
+bool SpaceTrackerSimple::equals(const SpaceTrackerI &_other) const
+{
+  LOG_PREFIX(SpaceTrackerSimple::equals);
+  const auto &other = static_cast<const SpaceTrackerSimple&>(_other);
+
+  if (other.live_bytes_by_segment.size() != live_bytes_by_segment.size()) {
+    ERROR("different segment counts, bug in test");
+    assert(0 == "segment counts should match");
+    return false;
+  }
+
+  bool all_match = true;
+  for (auto i = live_bytes_by_segment.begin(), j = other.live_bytes_by_segment.begin();
+       i != live_bytes_by_segment.end(); ++i, ++j) {
+    if (i->second.live_bytes != j->second.live_bytes) {
+      all_match = false;
+      DEBUG("segment_id {} live bytes mismatch *this: {}, other: {}",
+            i->first, i->second.live_bytes, j->second.live_bytes);
+    }
+  }
+  return all_match;
+}
+
+int64_t SpaceTrackerDetailed::SegmentMap::allocate(
+  device_segment_id_t segment,
+  seastore_off_t offset,
+  extent_len_t len,
+  const extent_len_t block_size)
+{
+  LOG_PREFIX(SegmentMap::allocate);
+  assert(offset % block_size == 0);
+  assert(len % block_size == 0);
+
+  const auto b = (offset / block_size);
+  const auto e = (offset + len) / block_size;
+
+  bool error = false;
+  for (auto i = b; i < e; ++i) {
+    if (bitmap[i]) {
+      if (!error) {
+        ERROR("found allocated in {}, {} ~ {}", segment, offset, len);
+       error = true;
+      }
+      DEBUG("block {} allocated", i * block_size);
+    }
+    bitmap[i] = true;
+  }
+  return update_usage(len);
+}
+
+int64_t SpaceTrackerDetailed::SegmentMap::release(
+  device_segment_id_t segment,
+  seastore_off_t offset,
+  extent_len_t len,
+  const extent_len_t block_size)
+{
+  LOG_PREFIX(SegmentMap::release);
+  assert(offset % block_size == 0);
+  assert(len % block_size == 0);
+
+  const auto b = (offset / block_size);
+  const auto e = (offset + len) / block_size;
+
+  bool error = false;
+  for (auto i = b; i < e; ++i) {
+    if (!bitmap[i]) {
+      if (!error) {
+       ERROR("found unallocated in {}, {} ~ {}", segment, offset, len);
+       error = true;
+      }
+      DEBUG("block {} unallocated", i * block_size);
+    }
+    bitmap[i] = false;
+  }
+  return update_usage(-(int64_t)len);
+}
+
+bool SpaceTrackerDetailed::equals(const SpaceTrackerI &_other) const
+{
+  LOG_PREFIX(SpaceTrackerDetailed::equals);
+  const auto &other = static_cast<const SpaceTrackerDetailed&>(_other);
+
+  if (other.segment_usage.size() != segment_usage.size()) {
+    ERROR("different segment counts, bug in test");
+    assert(0 == "segment counts should match");
+    return false;
+  }
+
+  bool all_match = true;
+  for (auto i = segment_usage.begin(), j = other.segment_usage.begin();
+       i != segment_usage.end(); ++i, ++j) {
+    if (i->second.get_usage() != j->second.get_usage()) {
+      all_match = false;
+      ERROR("segment_id {} live bytes mismatch *this: {}, other: {}",
+            i->first, i->second.get_usage(), j->second.get_usage());
+    }
+  }
+  return all_match;
+}
+
+void SpaceTrackerDetailed::SegmentMap::dump_usage(extent_len_t block_size) const
+{
+  LOG_PREFIX(SegmentMap::dump_usage);
+  INFO("dump start");
+  for (unsigned i = 0; i < bitmap.size(); ++i) {
+    if (bitmap[i]) {
+      LOCAL_LOGGER.info("    {} still live", i * block_size);
+    }
+  }
+}
+
+void SpaceTrackerDetailed::dump_usage(segment_id_t id) const
+{
+  LOG_PREFIX(SpaceTrackerDetailed::dump_usage);
+  INFO("{}", id);
+  segment_usage[id].dump_usage(
+    block_size_by_segment_manager[id.device_id()]);
+}
+
+void SpaceTrackerSimple::dump_usage(segment_id_t id) const
+{
+  LOG_PREFIX(SpaceTrackerSimple::dump_usage);
+  INFO("id: {}, live_bytes: {}",
+       id, live_bytes_by_segment[id].live_bytes);
+}
+
+AsyncCleaner::AsyncCleaner(
+  config_t config,
+  SegmentManagerGroupRef&& sm_group,
+  BackrefManager &backref_manager,
+  bool detailed)
+  : detailed(detailed),
+    config(config),
+    sm_group(std::move(sm_group)),
+    backref_manager(backref_manager),
+    ool_segment_seq_allocator(
+      new SegmentSeqAllocator(segment_type_t::OOL)),
+    gc_process(*this)
+{
+  config.validate();
+}
+
+void AsyncCleaner::register_metrics()
+{
+  namespace sm = seastar::metrics;
+  stats.segment_util.buckets.resize(UTIL_BUCKETS);
+  std::size_t i;
+  for (i = 0; i < UTIL_BUCKETS; ++i) {
+    stats.segment_util.buckets[i].upper_bound = ((double)(i + 1)) / 10;
+    stats.segment_util.buckets[i].count = 0;
+  }
+  // NOTE: by default the segments are empty
+  i = get_bucket_index(UTIL_STATE_EMPTY);
+  stats.segment_util.buckets[i].count = segments.get_num_segments();
+
+  metrics.add_group("async_cleaner", {
+    sm::make_counter("segments_number",
+                    [this] { return segments.get_num_segments(); },
+                    sm::description("the number of segments")),
+    sm::make_counter("segment_size",
+                    [this] { return segments.get_segment_size(); },
+                    sm::description("the bytes of a segment")),
+    sm::make_counter("segments_in_journal",
+                    [this] { return get_segments_in_journal(); },
+                    sm::description("the number of segments in journal")),
+    sm::make_counter("segments_type_journal",
+                    [this] { return segments.get_num_type_journal(); },
+                    sm::description("the number of segments typed journal")),
+    sm::make_counter("segments_type_ool",
+                    [this] { return segments.get_num_type_ool(); },
+                    sm::description("the number of segments typed out-of-line")),
+    sm::make_counter("segments_open",
+                    [this] { return segments.get_num_open(); },
+                    sm::description("the number of open segments")),
+    sm::make_counter("segments_empty",
+                    [this] { return segments.get_num_empty(); },
+                    sm::description("the number of empty segments")),
+    sm::make_counter("segments_closed",
+                    [this] { return segments.get_num_closed(); },
+                    sm::description("the number of closed segments")),
+
+    sm::make_counter("segments_count_open_journal",
+                    [this] { return segments.get_count_open_journal(); },
+                    sm::description("the count of open journal segment operations")),
+    sm::make_counter("segments_count_open_ool",
+                    [this] { return segments.get_count_open_ool(); },
+                    sm::description("the count of open ool segment operations")),
+    sm::make_counter("segments_count_release_journal",
+                    [this] { return segments.get_count_release_journal(); },
+                    sm::description("the count of release journal segment operations")),
+    sm::make_counter("segments_count_release_ool",
+                    [this] { return segments.get_count_release_ool(); },
+                    sm::description("the count of release ool segment operations")),
+    sm::make_counter("segments_count_close_journal",
+                    [this] { return segments.get_count_close_journal(); },
+                    sm::description("the count of close journal segment operations")),
+    sm::make_counter("segments_count_close_ool",
+                    [this] { return segments.get_count_close_ool(); },
+                    sm::description("the count of close ool segment operations")),
+
+    sm::make_counter("total_bytes",
+                    [this] { return segments.get_total_bytes(); },
+                    sm::description("the size of the space")),
+    sm::make_counter("available_bytes",
+                    [this] { return segments.get_available_bytes(); },
+                    sm::description("the size of the space is available")),
+    sm::make_counter("unavailable_unreclaimable_bytes",
+                    [this] { return get_unavailable_unreclaimable_bytes(); },
+                    sm::description("the size of the space is unavailable and unreclaimable")),
+    sm::make_counter("unavailable_reclaimable_bytes",
+                    [this] { return get_unavailable_reclaimable_bytes(); },
+                    sm::description("the size of the space is unavailable and reclaimable")),
+    sm::make_counter("used_bytes", stats.used_bytes,
+                    sm::description("the size of the space occupied by live extents")),
+    sm::make_counter("unavailable_unused_bytes",
+                    [this] { return get_unavailable_unused_bytes(); },
+                    sm::description("the size of the space is unavailable and not alive")),
+
+    sm::make_counter("dirty_journal_bytes",
+                    [this] { return get_dirty_journal_size(); },
+                    sm::description("the size of the journal for dirty extents")),
+    sm::make_counter("alloc_journal_bytes",
+                    [this] { return get_alloc_journal_size(); },
+                    sm::description("the size of the journal for alloc info")),
+
+    sm::make_counter("projected_count", stats.projected_count,
+                   sm::description("the number of projected usage reservations")),
+    sm::make_counter("projected_used_bytes_sum", stats.projected_used_bytes_sum,
+                   sm::description("the sum of the projected usage in bytes")),
+
+    sm::make_counter("io_count", stats.io_count,
+                   sm::description("the sum of IOs")),
+    sm::make_counter("io_blocked_count", stats.io_blocked_count,
+                   sm::description("IOs that are blocked by gc")),
+    sm::make_counter("io_blocked_count_trim", stats.io_blocked_count_trim,
+                   sm::description("IOs that are blocked by trimming")),
+    sm::make_counter("io_blocked_count_reclaim", stats.io_blocked_count_reclaim,
+                   sm::description("IOs that are blocked by reclaimming")),
+    sm::make_counter("io_blocked_sum", stats.io_blocked_sum,
+                    sm::description("the sum of blocking IOs")),
+
+    sm::make_counter("reclaimed_bytes", stats.reclaimed_bytes,
+                    sm::description("rewritten bytes due to reclaim")),
+    sm::make_counter("reclaimed_segment_bytes", stats.reclaimed_segment_bytes,
+                    sm::description("rewritten bytes due to reclaim")),
+    sm::make_counter("closed_journal_used_bytes", stats.closed_journal_used_bytes,
+                    sm::description("used bytes when close a journal segment")),
+    sm::make_counter("closed_journal_total_bytes", stats.closed_journal_total_bytes,
+                    sm::description("total bytes of closed journal segments")),
+    sm::make_counter("closed_ool_used_bytes", stats.closed_ool_used_bytes,
+                    sm::description("used bytes when close a ool segment")),
+    sm::make_counter("closed_ool_total_bytes", stats.closed_ool_total_bytes,
+                    sm::description("total bytes of closed ool segments")),
+
+    sm::make_gauge("available_ratio",
+                   [this] { return segments.get_available_ratio(); },
+                   sm::description("ratio of available space to total space")),
+    sm::make_gauge("reclaim_ratio",
+                   [this] { return get_reclaim_ratio(); },
+                   sm::description("ratio of reclaimable space to unavailable space")),
+
+    sm::make_histogram("segment_utilization_distribution",
+                      [this]() -> seastar::metrics::histogram& {
+                        return stats.segment_util;
+                      },
+                      sm::description("utilization distribution of all segments"))
+  });
+}
+
+segment_id_t AsyncCleaner::allocate_segment(
+    segment_seq_t seq,
+    segment_type_t type)
+{
+  LOG_PREFIX(AsyncCleaner::allocate_segment);
+  assert(seq != NULL_SEG_SEQ);
+  for (auto it = segments.begin();
+       it != segments.end();
+       ++it) {
+    auto seg_id = it->first;
+    auto& segment_info = it->second;
+    if (segment_info.is_empty()) {
+      auto old_usage = calc_utilization(seg_id);
+      segments.mark_open(seg_id, seq, type);
+      auto new_usage = calc_utilization(seg_id);
+      adjust_segment_util(old_usage, new_usage);
+      INFO("opened, should_block_on_gc {}, projected_avail_ratio {}, "
+           "reclaim_ratio {}",
+           should_block_on_gc(),
+           get_projected_available_ratio(),
+           get_reclaim_ratio());
+      return seg_id;
+    }
+  }
+  ERROR("out of space with segment_seq={}", segment_seq_printer_t{seq});
+  ceph_abort();
+  return NULL_SEG_ID;
+}
+
+void AsyncCleaner::update_journal_tail_target(
+  journal_seq_t dirty_replay_from,
+  journal_seq_t alloc_replay_from)
+{
+  LOG_PREFIX(AsyncCleaner::update_journal_tail_target);
+  if (disable_trim) return;
+  assert(dirty_replay_from.offset.get_addr_type() != addr_types_t::RANDOM_BLOCK);
+  assert(alloc_replay_from.offset.get_addr_type() != addr_types_t::RANDOM_BLOCK);
+  if (dirty_extents_replay_from == JOURNAL_SEQ_NULL
+      || dirty_replay_from > dirty_extents_replay_from) {
+    DEBUG("dirty_extents_replay_from={} => {}",
+          dirty_extents_replay_from, dirty_replay_from);
+    dirty_extents_replay_from = dirty_replay_from;
+  }
+
+  update_alloc_info_replay_from(alloc_replay_from);
+
+  journal_seq_t target = std::min(dirty_replay_from, alloc_replay_from);
+  ceph_assert(target != JOURNAL_SEQ_NULL);
+  auto journal_head = segments.get_journal_head();
+  ceph_assert(journal_head == JOURNAL_SEQ_NULL ||
+              journal_head >= target);
+  if (journal_tail_target == JOURNAL_SEQ_NULL ||
+      target > journal_tail_target) {
+    if (!init_complete ||
+        journal_tail_target.segment_seq == target.segment_seq) {
+      DEBUG("journal_tail_target={} => {}", journal_tail_target, target);
+    } else {
+      INFO("journal_tail_target={} => {}", journal_tail_target, target);
+    }
+    journal_tail_target = target;
+  }
+  gc_process.maybe_wake_on_space_used();
+  maybe_wake_gc_blocked_io();
+}
+
+void AsyncCleaner::update_alloc_info_replay_from(
+  journal_seq_t alloc_replay_from)
+{
+  LOG_PREFIX(AsyncCleaner::update_alloc_info_replay_from);
+  if (alloc_info_replay_from == JOURNAL_SEQ_NULL
+      || alloc_replay_from > alloc_info_replay_from) {
+    DEBUG("alloc_info_replay_from={} => {}",
+          alloc_info_replay_from, alloc_replay_from);
+    alloc_info_replay_from = alloc_replay_from;
+  }
+}
+
+void AsyncCleaner::update_journal_tail_committed(journal_seq_t committed)
+{
+  LOG_PREFIX(AsyncCleaner::update_journal_tail_committed);
+  assert(committed.offset.get_addr_type() != addr_types_t::RANDOM_BLOCK);
+  if (committed == JOURNAL_SEQ_NULL) {
+    return;
+  }
+  auto journal_head = segments.get_journal_head();
+  ceph_assert(journal_head == JOURNAL_SEQ_NULL ||
+              journal_head >= committed);
+
+  if (journal_tail_committed == JOURNAL_SEQ_NULL ||
+      committed > journal_tail_committed) {
+    DEBUG("update journal_tail_committed={} => {}",
+          journal_tail_committed, committed);
+    journal_tail_committed = committed;
+  }
+  if (journal_tail_target == JOURNAL_SEQ_NULL ||
+      committed > journal_tail_target) {
+    DEBUG("update journal_tail_target={} => {}",
+          journal_tail_target, committed);
+    journal_tail_target = committed;
+  }
+}
+
+void AsyncCleaner::close_segment(segment_id_t segment)
+{
+  LOG_PREFIX(AsyncCleaner::close_segment);
+  auto old_usage = calc_utilization(segment);
+  segments.mark_closed(segment);
+  auto &seg_info = segments[segment];
+  if (seg_info.type == segment_type_t::JOURNAL) {
+    stats.closed_journal_used_bytes += space_tracker->get_usage(segment);
+    stats.closed_journal_total_bytes += segments.get_segment_size();
+  } else {
+    stats.closed_ool_used_bytes += space_tracker->get_usage(segment);
+    stats.closed_ool_total_bytes += segments.get_segment_size();
+  }
+  auto new_usage = calc_utilization(segment);
+  adjust_segment_util(old_usage, new_usage);
+  INFO("closed, should_block_on_gc {}, projected_avail_ratio {}, "
+       "reclaim_ratio {}",
+       should_block_on_gc(),
+       get_projected_available_ratio(),
+       get_reclaim_ratio());
+}
+
+AsyncCleaner::trim_backrefs_ret AsyncCleaner::trim_backrefs(
+  Transaction &t,
+  journal_seq_t limit)
+{
+  return backref_manager.merge_cached_backrefs(
+    t,
+    limit,
+    config.rewrite_backref_bytes_per_cycle
+  );
+}
+
+AsyncCleaner::rewrite_dirty_ret AsyncCleaner::rewrite_dirty(
+  Transaction &t,
+  journal_seq_t limit)
+{
+  return ecb->get_next_dirty_extents(
+    t,
+    limit,
+    config.rewrite_dirty_bytes_per_cycle
+  ).si_then([=, &t](auto dirty_list) {
+    LOG_PREFIX(AsyncCleaner::rewrite_dirty);
+    DEBUGT("rewrite {} dirty extents", t, dirty_list.size());
+    return seastar::do_with(
+      std::move(dirty_list),
+      [this, FNAME, &t](auto &dirty_list) {
+       return trans_intr::do_for_each(
+         dirty_list,
+         [this, FNAME, &t](auto &e) {
+         DEBUGT("cleaning {}", t, *e);
+         return ecb->rewrite_extent(t, e);
+       });
+      });
+  });
+}
+
+AsyncCleaner::gc_cycle_ret AsyncCleaner::GCProcess::run()
+{
+  return seastar::do_until(
+    [this] { return is_stopping(); },
+    [this] {
+      return maybe_wait_should_run(
+      ).then([this] {
+       cleaner.log_gc_state("GCProcess::run");
+
+       if (is_stopping()) {
+         return seastar::now();
+       } else {
+         return cleaner.do_gc_cycle();
+       }
+      });
+    });
+}
+
+AsyncCleaner::gc_cycle_ret AsyncCleaner::do_gc_cycle()
+{
+  if (gc_should_trim_journal()) {
+    return gc_trim_journal(
+    ).handle_error(
+      crimson::ct_error::assert_all{
+       "GCProcess::run encountered invalid error in gc_trim_journal"
+      }
+    );
+  } else if (gc_should_trim_backref()) {
+    return gc_trim_backref(get_backref_tail()
+    ).safe_then([](auto) {
+      return seastar::now();
+    }).handle_error(
+      crimson::ct_error::assert_all{
+       "GCProcess::run encountered invalid error in gc_trim_backref"
+      }
+    );
+  } else if (gc_should_reclaim_space()) {
+    return gc_reclaim_space(
+    ).handle_error(
+      crimson::ct_error::assert_all{
+       "GCProcess::run encountered invalid error in gc_reclaim_space"
+      }
+    );
+  } else {
+    return seastar::now();
+  }
+}
+
+AsyncCleaner::gc_trim_backref_ret
+AsyncCleaner::gc_trim_backref(journal_seq_t limit) {
+  return seastar::do_with(
+    journal_seq_t(),
+    [this, limit=std::move(limit)](auto &seq) mutable {
+    return repeat_eagain([this, limit=std::move(limit), &seq] {
+      return ecb->with_transaction_intr(
+       Transaction::src_t::TRIM_BACKREF,
+       "trim_backref",
+       [this, limit](auto &t) {
+       return trim_backrefs(
+         t,
+         limit
+       ).si_then([this, &t, limit](auto trim_backrefs_to)
+         -> ExtentCallbackInterface::submit_transaction_direct_iertr::future<
+           journal_seq_t> {
+         if (trim_backrefs_to != JOURNAL_SEQ_NULL) {
+           return ecb->submit_transaction_direct(
+             t, std::make_optional<journal_seq_t>(trim_backrefs_to)
+           ).si_then([trim_backrefs_to=std::move(trim_backrefs_to)]() mutable {
+             return seastar::make_ready_future<
+               journal_seq_t>(std::move(trim_backrefs_to));
+           });
+         }
+         return seastar::make_ready_future<journal_seq_t>(std::move(limit));
+       });
+      }).safe_then([&seq](auto trim_backrefs_to) {
+       seq = std::move(trim_backrefs_to);
+      });
+    }).safe_then([&seq] {
+      return gc_trim_backref_ertr::make_ready_future<
+       journal_seq_t>(std::move(seq));
+    });
+  });
+}
+
+AsyncCleaner::gc_trim_journal_ret AsyncCleaner::gc_trim_journal()
+{
+  return gc_trim_backref(get_dirty_tail()
+  ).safe_then([this](auto seq) {
+    return repeat_eagain([this, seq=std::move(seq)]() mutable {
+      return ecb->with_transaction_intr(
+       Transaction::src_t::CLEANER_TRIM,
+       "trim_journal",
+       [this, seq=std::move(seq)](auto& t)
+      {
+       return rewrite_dirty(t, seq
+       ).si_then([this, &t] {
+         return ecb->submit_transaction_direct(t);
+       });
+      });
+    });
+  });
+}
+
+AsyncCleaner::retrieve_live_extents_ret
+AsyncCleaner::_retrieve_live_extents(
+  Transaction &t,
+  std::set<
+    backref_buf_entry_t,
+    backref_buf_entry_t::cmp_t> &&backrefs,
+  std::vector<CachedExtentRef> &extents)
+{
+  return seastar::do_with(
+    JOURNAL_SEQ_NULL,
+    std::move(backrefs),
+    [this, &t, &extents](auto &seq, auto &backrefs) {
+    return trans_intr::parallel_for_each(
+      backrefs,
+      [this, &extents, &t, &seq](auto &ent) {
+      LOG_PREFIX(AsyncCleaner::_retrieve_live_extents);
+      DEBUGT("getting extent of type {} at {}~{}",
+       t,
+       ent.type,
+       ent.paddr,
+       ent.len);
+      return ecb->get_extent_if_live(
+       t, ent.type, ent.paddr, ent.laddr, ent.len
+      ).si_then([this, FNAME, &extents, &ent, &seq, &t](auto ext) {
+       if (!ext) {
+         DEBUGT("addr {} dead, skipping", t, ent.paddr);
+         auto backref = backref_manager.get_cached_backref_removal(ent.paddr);
+         if (seq == JOURNAL_SEQ_NULL || seq < backref.seq) {
+           seq = backref.seq;
+         }
+       } else {
+         extents.emplace_back(std::move(ext));
+       }
+       return ExtentCallbackInterface::rewrite_extent_iertr::now();
+      });
+    }).si_then([&seq] {
+      return retrieve_live_extents_iertr::make_ready_future<
+       journal_seq_t>(std::move(seq));
+    });
+  });
+}
+
+AsyncCleaner::retrieve_backref_mappings_ret
+AsyncCleaner::retrieve_backref_mappings(
+  paddr_t start_paddr,
+  paddr_t end_paddr)
+{
+  return seastar::do_with(
+    backref_pin_list_t(),
+    [this, start_paddr, end_paddr](auto &pin_list) {
+    return repeat_eagain([this, start_paddr, end_paddr, &pin_list] {
+      return ecb->with_transaction_intr(
+       Transaction::src_t::READ,
+       "get_backref_mappings",
+       [this, start_paddr, end_paddr](auto &t) {
+       return backref_manager.get_mappings(
+         t, start_paddr, end_paddr
+       );
+      }).safe_then([&pin_list](auto&& list) {
+       pin_list = std::move(list);
+      });
+    }).safe_then([&pin_list] {
+      return seastar::make_ready_future<backref_pin_list_t>(std::move(pin_list));
+    });
+  });
+}
+
+AsyncCleaner::gc_reclaim_space_ret AsyncCleaner::gc_reclaim_space()
+{
+  LOG_PREFIX(AsyncCleaner::gc_reclaim_space);
+  if (!reclaim_state) {
+    segment_id_t seg_id = get_next_reclaim_segment();
+    auto &segment_info = segments[seg_id];
+    INFO("reclaim {} {} start", seg_id, segment_info);
+    ceph_assert(segment_info.is_closed());
+    reclaim_state = reclaim_state_t::create(
+        seg_id, segments.get_segment_size());
+  }
+  reclaim_state->advance(config.reclaim_bytes_per_cycle);
+
+  DEBUG("reclaiming {}~{}",
+        reclaim_state->start_pos,
+        reclaim_state->end_pos);
+  double pavail_ratio = get_projected_available_ratio();
+  seastar::lowres_system_clock::time_point start = seastar::lowres_system_clock::now();
+
+  return seastar::do_with(
+    (size_t)0,
+    (size_t)0,
+    [this, pavail_ratio, start](
+      auto &reclaimed,
+      auto &runs) {
+    return retrieve_backref_mappings(
+      reclaim_state->start_pos,
+      reclaim_state->end_pos
+    ).safe_then([this, &reclaimed, &runs](auto pin_list) {
+      return seastar::do_with(
+       std::move(pin_list),
+       [this, &reclaimed, &runs](auto &pin_list) {
+       return repeat_eagain(
+         [this, &reclaimed, &runs, &pin_list]() mutable {
+         reclaimed = 0;
+         runs++;
+         return seastar::do_with(
+           backref_manager.get_cached_backref_extents_in_range(
+             reclaim_state->start_pos, reclaim_state->end_pos),
+           backref_manager.get_cached_backrefs_in_range(
+             reclaim_state->start_pos, reclaim_state->end_pos),
+           backref_manager.get_cached_backref_removals_in_range(
+             reclaim_state->start_pos, reclaim_state->end_pos),
+           JOURNAL_SEQ_NULL,
+           [this, &reclaimed, &pin_list](
+             auto &backref_extents,
+             auto &backrefs,
+             auto &del_backrefs,
+             auto &seq) {
+           return ecb->with_transaction_intr(
+             Transaction::src_t::CLEANER_RECLAIM,
+             "reclaim_space",
+             [this, &backref_extents, &backrefs, &seq,
+             &del_backrefs, &reclaimed, &pin_list](auto &t) {
+             LOG_PREFIX(AsyncCleaner::gc_reclaim_space);
+             DEBUGT("{} backrefs, {} del_backrefs, {} pins", t,
+               backrefs.size(), del_backrefs.size(), pin_list.size());
+             for (auto &br : backrefs) {
+               if (seq == JOURNAL_SEQ_NULL
+                   || (br.seq != JOURNAL_SEQ_NULL && br.seq > seq))
+                 seq = br.seq;
+             }
+             for (auto &pin : pin_list) {
+               backrefs.emplace(
+                 pin->get_key(),
+                 pin->get_val(),
+                 pin->get_length(),
+                 pin->get_type(),
+                 journal_seq_t());
+             }
+             for (auto &del_backref : del_backrefs) {
+               DEBUGT("del_backref {}~{} {} {}", t,
+                 del_backref.paddr, del_backref.len, del_backref.type, del_backref.seq);
+               auto it = backrefs.find(del_backref.paddr);
+               if (it != backrefs.end())
+                 backrefs.erase(it);
+               if (seq == JOURNAL_SEQ_NULL
+                   || (del_backref.seq != JOURNAL_SEQ_NULL && del_backref.seq > seq))
+                 seq = del_backref.seq;
+             }
+             return seastar::do_with(
+               std::vector<CachedExtentRef>(),
+               [this, &backref_extents, &backrefs, &reclaimed, &t, &seq]
+               (auto &extents) {
+               return backref_manager.retrieve_backref_extents(
+                 t, std::move(backref_extents), extents
+               ).si_then([this, &extents, &t, &backrefs] {
+                 return _retrieve_live_extents(
+                   t, std::move(backrefs), extents);
+               }).si_then([this, &seq, &t](auto nseq) {
+                 if (nseq != JOURNAL_SEQ_NULL &&
+                     (nseq > seq || seq == JOURNAL_SEQ_NULL))
+                   seq = nseq;
+                 auto fut = BackrefManager::merge_cached_backrefs_iertr::now();
+                 if (seq != JOURNAL_SEQ_NULL) {
+                   fut = backref_manager.merge_cached_backrefs(
+                     t, seq, std::numeric_limits<uint64_t>::max()
+                   ).si_then([](auto) {
+                     return BackrefManager::merge_cached_backrefs_iertr::now();
+                   });
+                 }
+                 return fut;
+               }).si_then([&extents, this, &t, &reclaimed] {
+                 return trans_intr::do_for_each(
+                   extents,
+                   [this, &t, &reclaimed](auto &ext) {
+                   reclaimed += ext->get_length();
+                   return ecb->rewrite_extent(t, ext);
+                 });
+               });
+             }).si_then([this, &t, &seq] {
+               if (reclaim_state->is_complete()) {
+                 t.mark_segment_to_release(reclaim_state->get_segment_id());
+               }
+               return ecb->submit_transaction_direct(
+                 t, std::make_optional<journal_seq_t>(std::move(seq)));
+             });
+           });
+         });
+       });
+      });
+    }).safe_then(
+      [&reclaimed, this, pavail_ratio, start, &runs] {
+      LOG_PREFIX(AsyncCleaner::gc_reclaim_space);
+#ifndef NDEBUG
+      auto ndel_backrefs =
+       backref_manager.get_cached_backref_removals_in_range(
+         reclaim_state->start_pos, reclaim_state->end_pos);
+      if (!ndel_backrefs.empty()) {
+       for (auto &del_br : ndel_backrefs) {
+         ERROR("unexpected del_backref {}~{} {} {}",
+           del_br.paddr, del_br.len, del_br.type, del_br.seq);
+       }
+       ceph_abort("impossible");
+      }
+#endif
+      stats.reclaiming_bytes += reclaimed;
+      auto d = seastar::lowres_system_clock::now() - start;
+      DEBUG("duration: {}, pavail_ratio before: {}, repeats: {}", d, pavail_ratio, runs);
+      if (reclaim_state->is_complete()) {
+       INFO("reclaim {} finish, alive/total={}",
+             reclaim_state->get_segment_id(),
+             stats.reclaiming_bytes/(double)segments.get_segment_size());
+       stats.reclaimed_bytes += stats.reclaiming_bytes;
+       stats.reclaimed_segment_bytes += segments.get_segment_size();
+       stats.reclaiming_bytes = 0;
+       reclaim_state.reset();
+      }
+    });
+  });
+}
+
+AsyncCleaner::mount_ret AsyncCleaner::mount()
+{
+  LOG_PREFIX(AsyncCleaner::mount);
+  const auto& sms = sm_group->get_segment_managers();
+  INFO("{} segment managers", sms.size());
+  init_complete = false;
+  stats = {};
+  journal_tail_target = JOURNAL_SEQ_NULL;
+  journal_tail_committed = JOURNAL_SEQ_NULL;
+  dirty_extents_replay_from = JOURNAL_SEQ_NULL;
+  alloc_info_replay_from = JOURNAL_SEQ_NULL;
+  
+  space_tracker.reset(
+    detailed ?
+    (SpaceTrackerI*)new SpaceTrackerDetailed(
+      sms) :
+    (SpaceTrackerI*)new SpaceTrackerSimple(
+      sms));
+  
+  segments.reset();
+  for (auto sm : sms) {
+    segments.add_segment_manager(*sm);
+  }
+  metrics.clear();
+  register_metrics();
+
+  INFO("{} segments", segments.get_num_segments());
+  return seastar::do_with(
+    std::vector<std::pair<segment_id_t, segment_header_t>>(),
+    [this, FNAME](auto& segment_set) {
+    return crimson::do_for_each(
+      segments.begin(),
+      segments.end(),
+      [this, FNAME, &segment_set](auto& it) {
+       auto segment_id = it.first;
+       return sm_group->read_segment_header(
+         segment_id
+       ).safe_then([segment_id, this, FNAME, &segment_set](auto header) {
+         INFO("segment_id={} -- {}", segment_id, header);
+         auto s_type = header.get_type();
+         if (s_type == segment_type_t::NULL_SEG) {
+           ERROR("got null segment, segment_id={} -- {}", segment_id, header);
+           ceph_abort();
+         }
+         return sm_group->read_segment_tail(
+           segment_id
+         ).safe_then([this, segment_id, &segment_set, header](auto tail)
+           -> scan_extents_ertr::future<> {
+           if (tail.segment_nonce != header.segment_nonce) {
+             return scan_nonfull_segment(header, segment_set, segment_id);
+           }
+           time_point last_modified(duration(tail.last_modified));
+           time_point last_rewritten(duration(tail.last_rewritten));
+           segments.update_last_modified_rewritten(
+                segment_id, last_modified, last_rewritten);
+           if (tail.get_type() == segment_type_t::JOURNAL) {
+             update_journal_tail_committed(tail.journal_tail);
+             update_journal_tail_target(
+               tail.journal_tail,
+               tail.alloc_replay_from);
+           }
+           init_mark_segment_closed(
+             segment_id,
+             header.segment_seq,
+             header.type);
+           return seastar::now();
+         }).handle_error(
+           crimson::ct_error::enodata::handle(
+             [this, header, segment_id, &segment_set](auto) {
+             return scan_nonfull_segment(header, segment_set, segment_id);
+           }),
+           crimson::ct_error::pass_further_all{}
+         );
+       }).handle_error(
+         crimson::ct_error::enoent::handle([](auto) {
+           return mount_ertr::now();
+         }),
+         crimson::ct_error::enodata::handle([](auto) {
+           return mount_ertr::now();
+         }),
+         crimson::ct_error::input_output_error::pass_further{},
+         crimson::ct_error::assert_all{"unexpected error"}
+       );
+      });
+  });
+}
+
+AsyncCleaner::scan_extents_ret AsyncCleaner::scan_nonfull_segment(
+  const segment_header_t& header,
+  scan_extents_ret_bare& segment_set,
+  segment_id_t segment_id)
+{
+  return seastar::do_with(
+    scan_valid_records_cursor({
+      segments[segment_id].seq,
+      paddr_t::make_seg_paddr(segment_id, 0)}),
+    [this, segment_id, segment_header=header](auto& cursor) {
+    return seastar::do_with(
+       SegmentManagerGroup::found_record_handler_t(
+       [this, segment_id, segment_header](
+         record_locator_t locator,
+         const record_group_header_t& header,
+         const bufferlist& mdbuf
+       ) mutable -> SegmentManagerGroup::scan_valid_records_ertr::future<> {
+       LOG_PREFIX(AsyncCleaner::scan_nonfull_segment);
+       if (segment_header.get_type() == segment_type_t::OOL) {
+         DEBUG("out-of-line segment {}, decodeing {} records",
+           segment_id,
+           header.records);
+         auto maybe_headers = try_decode_record_headers(header, mdbuf);
+         if (!maybe_headers) {
+           ERROR("unable to decode record headers for record group {}",
+             locator.record_block_base);
+           return crimson::ct_error::input_output_error::make();
+         }
+
+         for (auto& header : *maybe_headers) {
+           mod_time_point_t ctime = header.commit_time;
+           auto commit_type = header.commit_type;
+           if (!ctime) {
+             ERROR("AsyncCleaner::scan_nonfull_segment: extent {} 0 commit_time",
+               ctime);
+             ceph_abort("0 commit_time");
+           }
+           time_point commit_time{duration(ctime)};
+           assert(commit_type == record_commit_type_t::MODIFY
+             || commit_type == record_commit_type_t::REWRITE);
+           if (commit_type == record_commit_type_t::MODIFY) {
+              segments.update_last_modified_rewritten(segment_id, commit_time, {});
+           }
+           if (commit_type == record_commit_type_t::REWRITE) {
+              segments.update_last_modified_rewritten(segment_id, {}, commit_time);
+           }
+         }
+       } else {
+         DEBUG("inline segment {}, decodeing {} records",
+           segment_id,
+           header.records);
+         auto maybe_record_deltas_list = try_decode_deltas(
+           header, mdbuf, locator.record_block_base);
+         if (!maybe_record_deltas_list) {
+           ERROR("unable to decode deltas for record {} at {}",
+                 header, locator);
+           return crimson::ct_error::input_output_error::make();
+         }
+         for (auto &record_deltas : *maybe_record_deltas_list) {
+           for (auto &[ctime, delta] : record_deltas.deltas) {
+             if (delta.type == extent_types_t::ALLOC_TAIL) {
+               journal_seq_t seq;
+               decode(seq, delta.bl);
+               update_alloc_info_replay_from(seq);
+             }
+           }
+         }
+       }
+       return seastar::now();
+      }),
+      [&cursor, segment_header, this](auto& handler) {
+       return sm_group->scan_valid_records(
+         cursor,
+         segment_header.segment_nonce,
+         segments.get_segment_size(),
+         handler);
+      }
+    );
+  }).safe_then([this, segment_id, header](auto) {
+    init_mark_segment_closed(
+      segment_id,
+      header.segment_seq,
+      header.type);
+    return seastar::now();
+  });
+}
+
+AsyncCleaner::release_ertr::future<>
+AsyncCleaner::maybe_release_segment(Transaction &t)
+{
+  auto to_release = t.get_segment_to_release();
+  if (to_release != NULL_SEG_ID) {
+    LOG_PREFIX(AsyncCleaner::maybe_release_segment);
+    INFOT("releasing segment {}", t, to_release);
+    return sm_group->release_segment(to_release
+    ).safe_then([this, FNAME, &t, to_release] {
+      auto old_usage = calc_utilization(to_release);
+      ceph_assert(old_usage == 0);
+      segments.mark_empty(to_release);
+      auto new_usage = calc_utilization(to_release);
+      adjust_segment_util(old_usage, new_usage);
+      INFOT("released, should_block_on_gc {}, projected_avail_ratio {}, "
+           "reclaim_ratio {}",
+           t,
+           should_block_on_gc(),
+           get_projected_available_ratio(),
+           get_reclaim_ratio());
+      if (space_tracker->get_usage(to_release) != 0) {
+        space_tracker->dump_usage(to_release);
+        ceph_abort();
+      }
+      maybe_wake_gc_blocked_io();
+    });
+  } else {
+    return SegmentManager::release_ertr::now();
+  }
+}
+
+void AsyncCleaner::complete_init()
+{
+  LOG_PREFIX(AsyncCleaner::complete_init);
+  if (disable_trim) {
+    init_complete = true;
+    return;
+  }
+  INFO("done, start GC");
+  ceph_assert(segments.get_journal_head() != JOURNAL_SEQ_NULL);
+  init_complete = true;
+  gc_process.start();
+}
+
+void AsyncCleaner::mark_space_used(
+  paddr_t addr,
+  extent_len_t len,
+  time_point last_modified,
+  time_point last_rewritten,
+  bool init_scan)
+{
+  LOG_PREFIX(AsyncCleaner::mark_space_used);
+  if (addr.get_addr_type() != addr_types_t::SEGMENT) {
+    return;
+  }
+  auto& seg_addr = addr.as_seg_paddr();
+
+  if (!init_scan && !init_complete) {
+    return;
+  }
+
+  stats.used_bytes += len;
+  auto old_usage = calc_utilization(seg_addr.get_segment_id());
+  [[maybe_unused]] auto ret = space_tracker->allocate(
+    seg_addr.get_segment_id(),
+    seg_addr.get_segment_off(),
+    len);
+  auto new_usage = calc_utilization(seg_addr.get_segment_id());
+  adjust_segment_util(old_usage, new_usage);
+
+  // use the last extent's last modified time for the calculation of the projected
+  // time the segments' live extents are to stay unmodified; this is an approximation
+  // of the sprite lfs' segment "age".
+
+  segments.update_last_modified_rewritten(
+      seg_addr.get_segment_id(), last_modified, last_rewritten);
+
+  gc_process.maybe_wake_on_space_used();
+  assert(ret > 0);
+  DEBUG("segment {} new len: {}~{}, live_bytes: {}",
+        seg_addr.get_segment_id(),
+        addr,
+        len,
+        space_tracker->get_usage(seg_addr.get_segment_id()));
+}
+
+void AsyncCleaner::mark_space_free(
+  paddr_t addr,
+  extent_len_t len)
+{
+  LOG_PREFIX(AsyncCleaner::mark_space_free);
+  if (!init_complete) {
+    return;
+  }
+  if (addr.get_addr_type() != addr_types_t::SEGMENT) {
+    return;
+  }
+
+  ceph_assert(stats.used_bytes >= len);
+  stats.used_bytes -= len;
+  auto& seg_addr = addr.as_seg_paddr();
+
+  DEBUG("segment {} free len: {}~{}",
+        seg_addr.get_segment_id(), addr, len);
+  auto old_usage = calc_utilization(seg_addr.get_segment_id());
+  [[maybe_unused]] auto ret = space_tracker->release(
+    seg_addr.get_segment_id(),
+    seg_addr.get_segment_off(),
+    len);
+  auto new_usage = calc_utilization(seg_addr.get_segment_id());
+  adjust_segment_util(old_usage, new_usage);
+  maybe_wake_gc_blocked_io();
+  assert(ret >= 0);
+  DEBUG("segment {} free len: {}~{}, live_bytes: {}",
+        seg_addr.get_segment_id(),
+        addr,
+        len,
+        space_tracker->get_usage(seg_addr.get_segment_id()));
+}
+
+segment_id_t AsyncCleaner::get_next_reclaim_segment() const
+{
+  LOG_PREFIX(AsyncCleaner::get_next_reclaim_segment);
+  segment_id_t id = NULL_SEG_ID;
+  double max_benefit_cost = 0;
+  for (auto& [_id, segment_info] : segments) {
+    if (segment_info.is_closed() &&
+        !segment_info.is_in_journal(journal_tail_committed)) {
+      double benefit_cost = calc_gc_benefit_cost(_id);
+      if (benefit_cost > max_benefit_cost) {
+        id = _id;
+        max_benefit_cost = benefit_cost;
+      }
+    }
+  }
+  if (id != NULL_SEG_ID) {
+    DEBUG("segment {}, benefit_cost {}",
+          id, max_benefit_cost);
+    return id;
+  } else {
+    ceph_assert(get_segments_reclaimable() == 0);
+    // see gc_should_reclaim_space()
+    ceph_abort("impossible!");
+    return NULL_SEG_ID;
+  }
+}
+
+void AsyncCleaner::log_gc_state(const char *caller) const
+{
+  LOG_PREFIX(AsyncCleaner::log_gc_state);
+  if (LOCAL_LOGGER.is_enabled(seastar::log_level::debug) &&
+      !disable_trim) {
+    DEBUG(
+      "caller {}, "
+      "empty {}, "
+      "open {}, "
+      "closed {}, "
+      "in_journal {}, "
+      "total {}B, "
+      "available {}B, "
+      "unavailable {}B, "
+      "unavailable_used {}B, "
+      "unavailable_unused {}B; "
+      "reclaim_ratio {}, "
+      "available_ratio {}, "
+      "should_block_on_gc {}, "
+      "gc_should_reclaim_space {}, "
+      "journal_head {}, "
+      "journal_tail_target {}, "
+      "journal_tail_commit {}, "
+      "dirty_tail {}, "
+      "dirty_tail_limit {}, "
+      "gc_should_trim_journal {}, ",
+      caller,
+      segments.get_num_empty(),
+      segments.get_num_open(),
+      segments.get_num_closed(),
+      get_segments_in_journal(),
+      segments.get_total_bytes(),
+      segments.get_available_bytes(),
+      segments.get_unavailable_bytes(),
+      stats.used_bytes,
+      get_unavailable_unused_bytes(),
+      get_reclaim_ratio(),
+      segments.get_available_ratio(),
+      should_block_on_gc(),
+      gc_should_reclaim_space(),
+      segments.get_journal_head(),
+      journal_tail_target,
+      journal_tail_committed,
+      get_dirty_tail(),
+      get_dirty_tail_limit(),
+      gc_should_trim_journal()
+    );
+  }
+}
+
+seastar::future<>
+AsyncCleaner::reserve_projected_usage(std::size_t projected_usage)
+{
+  if (disable_trim) {
+    return seastar::now();
+  }
+  ceph_assert(init_complete);
+  // The pipeline configuration prevents another IO from entering
+  // prepare until the prior one exits and clears this.
+  ceph_assert(!blocked_io_wake);
+  ++stats.io_count;
+  bool is_blocked = false;
+  if (should_block_on_trim()) {
+    is_blocked = true;
+    ++stats.io_blocked_count_trim;
+  }
+  if (should_block_on_reclaim()) {
+    is_blocked = true;
+    ++stats.io_blocked_count_reclaim;
+  }
+  if (is_blocked) {
+    ++stats.io_blocking_num;
+    ++stats.io_blocked_count;
+    stats.io_blocked_sum += stats.io_blocking_num;
+  }
+  return seastar::do_until(
+    [this] {
+      log_gc_state("await_hard_limits");
+      return !should_block_on_gc();
+    },
+    [this] {
+      blocked_io_wake = seastar::promise<>();
+      return blocked_io_wake->get_future();
+    }
+  ).then([this, projected_usage, is_blocked] {
+    ceph_assert(!blocked_io_wake);
+    stats.projected_used_bytes += projected_usage;
+    ++stats.projected_count;
+    stats.projected_used_bytes_sum += stats.projected_used_bytes;
+    if (is_blocked) {
+      assert(stats.io_blocking_num > 0);
+      --stats.io_blocking_num;
+    }
+  });
+}
+
+void AsyncCleaner::release_projected_usage(std::size_t projected_usage)
+{
+  if (disable_trim) return;
+  ceph_assert(init_complete);
+  ceph_assert(stats.projected_used_bytes >= projected_usage);
+  stats.projected_used_bytes -= projected_usage;
+  return maybe_wake_gc_blocked_io();
+}
+
+}
diff --git a/src/crimson/os/seastore/async_cleaner.h b/src/crimson/os/seastore/async_cleaner.h
new file mode 100644 (file)
index 0000000..0f2ded6
--- /dev/null
@@ -0,0 +1,1296 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <boost/intrusive/set.hpp>
+#include <seastar/core/metrics_types.hh>
+
+#include "common/ceph_time.h"
+
+#include "osd/osd_types.h"
+
+#include "crimson/os/seastore/backref_manager.h"
+#include "crimson/os/seastore/cached_extent.h"
+#include "crimson/os/seastore/seastore_types.h"
+#include "crimson/os/seastore/segment_manager.h"
+#include "crimson/os/seastore/segment_manager_group.h"
+#include "crimson/os/seastore/transaction.h"
+#include "crimson/os/seastore/segment_seq_allocator.h"
+
+namespace crimson::os::seastore {
+
+/*
+ * segment_info_t
+ *
+ * Maintains the tracked information for a segment.
+ * It is read-only outside segments_info_t.
+ */
+struct segment_info_t {
+  using time_point = seastar::lowres_system_clock::time_point;
+
+  // segment_info_t is initiated as set_empty()
+  Segment::segment_state_t state = Segment::segment_state_t::EMPTY;
+
+  // Will be non-null for any segments in the current journal
+  segment_seq_t seq = NULL_SEG_SEQ;
+
+  segment_type_t type = segment_type_t::NULL_SEG;
+
+  time_point last_modified;
+  time_point last_rewritten;
+
+  std::size_t written_to = 0;
+
+  bool is_in_journal(journal_seq_t tail_committed) const {
+    return type == segment_type_t::JOURNAL &&
+           tail_committed.segment_seq <= seq;
+  }
+
+  bool is_empty() const {
+    return state == Segment::segment_state_t::EMPTY;
+  }
+
+  bool is_closed() const {
+    return state == Segment::segment_state_t::CLOSED;
+  }
+
+  bool is_open() const {
+    return state == Segment::segment_state_t::OPEN;
+  }
+
+  void init_closed(segment_seq_t, segment_type_t, std::size_t);
+
+  void set_open(segment_seq_t, segment_type_t);
+
+  void set_empty();
+
+  void set_closed();
+
+  void update_last_modified_rewritten(
+      time_point _last_modified, time_point _last_rewritten) {
+    if (_last_modified != time_point() && last_modified < _last_modified) {
+      last_modified = _last_modified;
+    }
+    if (_last_rewritten != time_point() && last_rewritten < _last_rewritten) {
+      last_rewritten = _last_rewritten;
+    }
+  }
+};
+
+std::ostream& operator<<(std::ostream&, const segment_info_t&);
+
+/*
+ * segments_info_t
+ *
+ * Keep track of all segments and related information.
+ */
+class segments_info_t {
+public:
+  using time_point = seastar::lowres_system_clock::time_point;
+
+  segments_info_t() {
+    reset();
+  }
+
+  const segment_info_t& operator[](segment_id_t id) const {
+    return segments[id];
+  }
+
+  auto begin() const {
+    return segments.begin();
+  }
+
+  auto end() const {
+    return segments.end();
+  }
+
+  std::size_t get_num_segments() const {
+    assert(segments.size() > 0);
+    return segments.size();
+  }
+  std::size_t get_segment_size() const {
+    assert(segment_size > 0);
+    return segment_size;
+  }
+  std::size_t get_num_in_journal_open() const {
+    return num_in_journal_open;
+  }
+  std::size_t get_num_type_journal() const {
+    return num_type_journal;
+  }
+  std::size_t get_num_type_ool() const {
+    return num_type_ool;
+  }
+  std::size_t get_num_open() const {
+    return num_open;
+  }
+  std::size_t get_num_empty() const {
+    return num_empty;
+  }
+  std::size_t get_num_closed() const {
+    return num_closed;
+  }
+  std::size_t get_count_open_journal() const {
+    return count_open_journal;
+  }
+  std::size_t get_count_open_ool() const {
+    return count_open_ool;
+  }
+  std::size_t get_count_release_journal() const {
+    return count_release_journal;
+  }
+  std::size_t get_count_release_ool() const {
+    return count_release_ool;
+  }
+  std::size_t get_count_close_journal() const {
+    return count_close_journal;
+  }
+  std::size_t get_count_close_ool() const {
+    return count_close_ool;
+  }
+
+  std::size_t get_total_bytes() const {
+    return total_bytes;
+  }
+  /// the available space that is writable, including in open segments
+  std::size_t get_available_bytes() const {
+    return num_empty * get_segment_size() + avail_bytes_in_open;
+  }
+  /// the unavailable space that is not writable
+  std::size_t get_unavailable_bytes() const {
+    assert(total_bytes >= get_available_bytes());
+    return total_bytes - get_available_bytes();
+  }
+  std::size_t get_available_bytes_in_open() const {
+    return avail_bytes_in_open;
+  }
+  double get_available_ratio() const {
+    return (double)get_available_bytes() / (double)total_bytes;
+  }
+
+  journal_seq_t get_journal_head() const {
+    if (unlikely(journal_segment_id == NULL_SEG_ID)) {
+      return JOURNAL_SEQ_NULL;
+    }
+    auto &segment_info = segments[journal_segment_id];
+    assert(!segment_info.is_empty());
+    assert(segment_info.type == segment_type_t::JOURNAL);
+    assert(segment_info.seq != NULL_SEG_SEQ);
+    return journal_seq_t{
+      segment_info.seq,
+      paddr_t::make_seg_paddr(
+        journal_segment_id,
+        segment_info.written_to)
+    };
+  }
+
+  void reset();
+
+  void add_segment_manager(SegmentManager &segment_manager);
+
+  // initiate non-empty segments, the others are by default empty
+  void init_closed(segment_id_t, segment_seq_t, segment_type_t);
+
+  void mark_open(segment_id_t, segment_seq_t, segment_type_t);
+
+  void mark_empty(segment_id_t);
+
+  void mark_closed(segment_id_t);
+
+  void update_written_to(segment_type_t, paddr_t);
+
+  void update_last_modified_rewritten(
+      segment_id_t id, time_point last_modified, time_point last_rewritten) {
+    segments[id].update_last_modified_rewritten(last_modified, last_rewritten);
+  }
+
+private:
+  // See reset() for member initialization
+  segment_map_t<segment_info_t> segments;
+
+  std::size_t segment_size;
+
+  segment_id_t journal_segment_id;
+  std::size_t num_in_journal_open;
+  std::size_t num_type_journal;
+  std::size_t num_type_ool;
+
+  std::size_t num_open;
+  std::size_t num_empty;
+  std::size_t num_closed;
+
+  std::size_t count_open_journal;
+  std::size_t count_open_ool;
+  std::size_t count_release_journal;
+  std::size_t count_release_ool;
+  std::size_t count_close_journal;
+  std::size_t count_close_ool;
+
+  std::size_t total_bytes;
+  std::size_t avail_bytes_in_open;
+};
+
+/**
+ * Callback interface for managing available segments
+ */
+class SegmentProvider {
+public:
+  virtual journal_seq_t get_journal_tail_target() const = 0;
+
+  virtual const segment_info_t& get_seg_info(segment_id_t id) const = 0;
+
+  virtual segment_id_t allocate_segment(
+      segment_seq_t seq, segment_type_t type) = 0;
+
+  virtual journal_seq_t get_dirty_extents_replay_from() const = 0;
+
+  virtual journal_seq_t get_alloc_info_replay_from() const = 0;
+
+  virtual void close_segment(segment_id_t) = 0;
+
+  virtual void update_journal_tail_committed(journal_seq_t tail_committed) = 0;
+
+  virtual void update_segment_avail_bytes(segment_type_t, paddr_t) = 0;
+
+  virtual SegmentManagerGroup* get_segment_manager_group() = 0;
+
+  virtual ~SegmentProvider() {}
+};
+
+class SpaceTrackerI {
+public:
+  virtual int64_t allocate(
+    segment_id_t segment,
+    seastore_off_t offset,
+    extent_len_t len) = 0;
+
+  virtual int64_t release(
+    segment_id_t segment,
+    seastore_off_t offset,
+    extent_len_t len) = 0;
+
+  virtual int64_t get_usage(
+    segment_id_t segment) const = 0;
+
+  virtual bool equals(const SpaceTrackerI &other) const = 0;
+
+  virtual std::unique_ptr<SpaceTrackerI> make_empty() const = 0;
+
+  virtual void dump_usage(segment_id_t) const = 0;
+
+  virtual double calc_utilization(segment_id_t segment) const = 0;
+
+  virtual void reset() = 0;
+
+  virtual ~SpaceTrackerI() = default;
+};
+using SpaceTrackerIRef = std::unique_ptr<SpaceTrackerI>;
+
+class SpaceTrackerSimple : public SpaceTrackerI {
+  struct segment_bytes_t {
+    int64_t live_bytes = 0;
+    seastore_off_t total_bytes = 0;
+  };
+  // Tracks live space for each segment
+  segment_map_t<segment_bytes_t> live_bytes_by_segment;
+
+  int64_t update_usage(segment_id_t segment, int64_t delta) {
+    live_bytes_by_segment[segment].live_bytes += delta;
+    assert(live_bytes_by_segment[segment].live_bytes >= 0);
+    return live_bytes_by_segment[segment].live_bytes;
+  }
+public:
+  SpaceTrackerSimple(const SpaceTrackerSimple &) = default;
+  SpaceTrackerSimple(const std::vector<SegmentManager*> &sms) {
+    for (auto sm : sms) {
+      live_bytes_by_segment.add_device(
+       sm->get_device_id(),
+       sm->get_num_segments(),
+       {0, sm->get_segment_size()});
+    }
+  }
+
+  int64_t allocate(
+    segment_id_t segment,
+    seastore_off_t offset,
+    extent_len_t len) final {
+    return update_usage(segment, len);
+  }
+
+  int64_t release(
+    segment_id_t segment,
+    seastore_off_t offset,
+    extent_len_t len) final {
+    return update_usage(segment, -(int64_t)len);
+  }
+
+  int64_t get_usage(segment_id_t segment) const final {
+    return live_bytes_by_segment[segment].live_bytes;
+  }
+
+  double calc_utilization(segment_id_t segment) const final {
+    auto& seg_bytes = live_bytes_by_segment[segment];
+    return (double)seg_bytes.live_bytes / (double)seg_bytes.total_bytes;
+  }
+
+  void dump_usage(segment_id_t) const final;
+
+  void reset() final {
+    for (auto &i : live_bytes_by_segment) {
+      i.second = {0, 0};
+    }
+  }
+
+  SpaceTrackerIRef make_empty() const final {
+    auto ret = SpaceTrackerIRef(new SpaceTrackerSimple(*this));
+    ret->reset();
+    return ret;
+  }
+
+  bool equals(const SpaceTrackerI &other) const;
+};
+
+class SpaceTrackerDetailed : public SpaceTrackerI {
+  class SegmentMap {
+    int64_t used = 0;
+    seastore_off_t total_bytes = 0;
+    std::vector<bool> bitmap;
+
+  public:
+    SegmentMap(
+      size_t blocks,
+      seastore_off_t total_bytes)
+    : total_bytes(total_bytes),
+      bitmap(blocks, false) {}
+
+    int64_t update_usage(int64_t delta) {
+      used += delta;
+      return used;
+    }
+
+    int64_t allocate(
+      device_segment_id_t segment,
+      seastore_off_t offset,
+      extent_len_t len,
+      const extent_len_t block_size);
+
+    int64_t release(
+      device_segment_id_t segment,
+      seastore_off_t offset,
+      extent_len_t len,
+      const extent_len_t block_size);
+
+    int64_t get_usage() const {
+      return used;
+    }
+
+    void dump_usage(extent_len_t block_size) const;
+
+    double calc_utilization() const {
+      return (double)used / (double)total_bytes;
+    }
+
+    void reset() {
+      used = 0;
+      for (auto &&i: bitmap) {
+       i = false;
+      }
+    }
+  };
+
+  // Tracks live space for each segment
+  segment_map_t<SegmentMap> segment_usage;
+  std::vector<size_t> block_size_by_segment_manager;
+
+public:
+  SpaceTrackerDetailed(const SpaceTrackerDetailed &) = default;
+  SpaceTrackerDetailed(const std::vector<SegmentManager*> &sms)
+  {
+    block_size_by_segment_manager.resize(DEVICE_ID_MAX, 0);
+    for (auto sm : sms) {
+      segment_usage.add_device(
+       sm->get_device_id(),
+       sm->get_num_segments(),
+       SegmentMap(
+         sm->get_segment_size() / sm->get_block_size(),
+         sm->get_segment_size()));
+      block_size_by_segment_manager[sm->get_device_id()] = sm->get_block_size();
+    }
+  }
+
+  int64_t allocate(
+    segment_id_t segment,
+    seastore_off_t offset,
+    extent_len_t len) final {
+    return segment_usage[segment].allocate(
+      segment.device_segment_id(),
+      offset,
+      len,
+      block_size_by_segment_manager[segment.device_id()]);
+  }
+
+  int64_t release(
+    segment_id_t segment,
+    seastore_off_t offset,
+    extent_len_t len) final {
+    return segment_usage[segment].release(
+      segment.device_segment_id(),
+      offset,
+      len,
+      block_size_by_segment_manager[segment.device_id()]);
+  }
+
+  int64_t get_usage(segment_id_t segment) const final {
+    return segment_usage[segment].get_usage();
+  }
+
+  double calc_utilization(segment_id_t segment) const final {
+    return segment_usage[segment].calc_utilization();
+  }
+
+  void dump_usage(segment_id_t seg) const final;
+
+  void reset() final {
+    for (auto &i: segment_usage) {
+      i.second.reset();
+    }
+  }
+
+  SpaceTrackerIRef make_empty() const final {
+    auto ret = SpaceTrackerIRef(new SpaceTrackerDetailed(*this));
+    ret->reset();
+    return ret;
+  }
+
+  bool equals(const SpaceTrackerI &other) const;
+};
+
+
+class AsyncCleaner : public SegmentProvider {
+public:
+  using time_point = seastar::lowres_system_clock::time_point;
+  using duration = seastar::lowres_system_clock::duration;
+
+  /// Config
+  struct config_t {
+    /// Number of minimum journal segments to stop trimming.
+    size_t target_journal_segments = 0;
+    /// Number of maximum journal segments to block user transactions.
+    size_t max_journal_segments = 0;
+
+    /// Number of journal segments the transactions in which can
+    /// have their corresponding backrefs unmerged
+    size_t target_backref_inflight_segments = 0;
+
+    /// Ratio of maximum available space to disable reclaiming.
+    double available_ratio_gc_max = 0;
+    /// Ratio of minimum available space to force reclaiming.
+    double available_ratio_hard_limit = 0;
+
+    /// Ratio of minimum reclaimable space to stop reclaiming.
+    double reclaim_ratio_gc_threshold = 0;
+
+    /// Number of bytes to reclaim per cycle
+    size_t reclaim_bytes_per_cycle = 0;
+
+    /// Number of bytes to rewrite dirty per cycle
+    size_t rewrite_dirty_bytes_per_cycle = 0;
+
+    /// Number of bytes to rewrite backref per cycle
+    size_t rewrite_backref_bytes_per_cycle = 0;
+
+    void validate() const {
+      ceph_assert(max_journal_segments > target_journal_segments);
+      ceph_assert(available_ratio_gc_max > available_ratio_hard_limit);
+      ceph_assert(reclaim_bytes_per_cycle > 0);
+      ceph_assert(rewrite_dirty_bytes_per_cycle > 0);
+      ceph_assert(rewrite_backref_bytes_per_cycle > 0);
+    }
+
+    static config_t get_default() {
+      return config_t{
+         12,   // target_journal_segments
+         16,   // max_journal_segments
+         2,    // target_backref_inflight_segments
+         .1,   // available_ratio_gc_max
+         .05,  // available_ratio_hard_limit
+         .1,   // reclaim_ratio_gc_threshold
+         1<<20,// reclaim_bytes_per_cycle
+         1<<17,// rewrite_dirty_bytes_per_cycle
+         1<<24 // rewrite_backref_bytes_per_cycle
+       };
+    }
+
+    static config_t get_test() {
+      return config_t{
+         2,    // target_journal_segments
+         4,    // max_journal_segments
+         2,    // target_backref_inflight_segments
+         .99,  // available_ratio_gc_max
+         .2,   // available_ratio_hard_limit
+         .6,   // reclaim_ratio_gc_threshold
+         1<<20,// reclaim_bytes_per_cycle
+         1<<17,// rewrite_dirty_bytes_per_cycle
+         1<<24 // rewrite_backref_bytes_per_cycle
+       };
+    }
+  };
+
+  /// Callback interface for querying and operating on segments
+  class ExtentCallbackInterface {
+  public:
+    virtual ~ExtentCallbackInterface() = default;
+
+    virtual TransactionRef create_transaction(
+        Transaction::src_t, const char*) = 0;
+
+    /// Creates empty transaction with interruptible context
+    template <typename Func>
+    auto with_transaction_intr(
+        Transaction::src_t src,
+        const char* name,
+        Func &&f) {
+      return seastar::do_with(
+        create_transaction(src, name),
+        [f=std::forward<Func>(f)](auto &ref_t) mutable {
+          return with_trans_intr(
+            *ref_t,
+            [f=std::forward<Func>(f)](auto& t) mutable {
+              return f(t);
+            }
+          );
+        }
+      );
+    }
+
+    /// See Cache::get_next_dirty_extents
+    using get_next_dirty_extents_iertr = trans_iertr<
+      crimson::errorator<
+        crimson::ct_error::input_output_error>
+      >;
+    using get_next_dirty_extents_ret = get_next_dirty_extents_iertr::future<
+      std::vector<CachedExtentRef>>;
+    virtual get_next_dirty_extents_ret get_next_dirty_extents(
+      Transaction &t,     ///< [in] current transaction
+      journal_seq_t bound,///< [in] return extents with dirty_from < bound
+      size_t max_bytes    ///< [in] return up to max_bytes of extents
+    ) = 0;
+
+    using extent_mapping_ertr = crimson::errorator<
+      crimson::ct_error::input_output_error,
+      crimson::ct_error::eagain>;
+    using extent_mapping_iertr = trans_iertr<
+      crimson::errorator<
+       crimson::ct_error::input_output_error>
+      >;
+
+    /**
+     * rewrite_extent
+     *
+     * Updates t with operations moving the passed extents to a new
+     * segment.  extent may be invalid, implementation must correctly
+     * handle finding the current instance if it is still alive and
+     * otherwise ignore it.
+     */
+    using rewrite_extent_iertr = extent_mapping_iertr;
+    using rewrite_extent_ret = rewrite_extent_iertr::future<>;
+    virtual rewrite_extent_ret rewrite_extent(
+      Transaction &t,
+      CachedExtentRef extent) = 0;
+
+    /**
+     * get_extent_if_live
+     *
+     * Returns extent at specified location if still referenced by
+     * lba_manager and not removed by t.
+     *
+     * See TransactionManager::get_extent_if_live and
+     * LBAManager::get_physical_extent_if_live.
+     */
+    using get_extent_if_live_iertr = extent_mapping_iertr;
+    using get_extent_if_live_ret = get_extent_if_live_iertr::future<
+      CachedExtentRef>;
+    virtual get_extent_if_live_ret get_extent_if_live(
+      Transaction &t,
+      extent_types_t type,
+      paddr_t addr,
+      laddr_t laddr,
+      seastore_off_t len) = 0;
+
+    /**
+     * submit_transaction_direct
+     *
+     * Submits transaction without any space throttling.
+     */
+    using submit_transaction_direct_iertr = trans_iertr<
+      crimson::errorator<
+        crimson::ct_error::input_output_error>
+      >;
+    using submit_transaction_direct_ret =
+      submit_transaction_direct_iertr::future<>;
+    virtual submit_transaction_direct_ret submit_transaction_direct(
+      Transaction &t,
+      std::optional<journal_seq_t> seq_to_trim = std::nullopt) = 0;
+  };
+
+private:
+  const bool detailed;
+  const config_t config;
+
+  SegmentManagerGroupRef sm_group;
+  BackrefManager &backref_manager;
+
+  SpaceTrackerIRef space_tracker;
+  segments_info_t segments;
+  bool init_complete = false;
+
+  struct {
+    /**
+     * used_bytes
+     *
+     * Bytes occupied by live extents
+     */
+    uint64_t used_bytes = 0;
+
+    /**
+     * projected_used_bytes
+     *
+     * Sum of projected bytes used by each transaction between throttle
+     * acquisition and commit completion.  See reserve_projected_usage()
+     */
+    uint64_t projected_used_bytes = 0;
+    uint64_t projected_count = 0;
+    uint64_t projected_used_bytes_sum = 0;
+
+    uint64_t closed_journal_used_bytes = 0;
+    uint64_t closed_journal_total_bytes = 0;
+    uint64_t closed_ool_used_bytes = 0;
+    uint64_t closed_ool_total_bytes = 0;
+
+    uint64_t io_blocking_num = 0;
+    uint64_t io_count = 0;
+    uint64_t io_blocked_count = 0;
+    uint64_t io_blocked_count_trim = 0;
+    uint64_t io_blocked_count_reclaim = 0;
+    uint64_t io_blocked_sum = 0;
+
+    uint64_t reclaiming_bytes = 0;
+    uint64_t reclaimed_bytes = 0;
+    uint64_t reclaimed_segment_bytes = 0;
+
+    seastar::metrics::histogram segment_util;
+  } stats;
+  seastar::metrics::metric_group metrics;
+  void register_metrics();
+
+  /// target journal_tail for next fresh segment
+  journal_seq_t journal_tail_target;
+
+  /// target replay_from for dirty extents
+  journal_seq_t dirty_extents_replay_from;
+
+  /// target replay_from for alloc infos
+  journal_seq_t alloc_info_replay_from;
+
+  /// most recently committed journal_tail
+  journal_seq_t journal_tail_committed;
+
+  ExtentCallbackInterface *ecb = nullptr;
+
+  /// populated if there is an IO blocked on hard limits
+  std::optional<seastar::promise<>> blocked_io_wake;
+
+  SegmentSeqAllocatorRef ool_segment_seq_allocator;
+
+  /**
+   * disable_trim
+   *
+   * added to enable unit testing of CircularBoundedJournal before
+   * proper support is added to AsyncCleaner.
+   * Should be removed once proper support is added. TODO
+   */
+  bool disable_trim = false;
+public:
+  AsyncCleaner(
+    config_t config,
+    SegmentManagerGroupRef&& sm_group,
+    BackrefManager &backref_manager,
+    bool detailed = false);
+
+  SegmentSeqAllocator& get_ool_segment_seq_allocator() {
+    return *ool_segment_seq_allocator;
+  }
+
+  using mount_ertr = crimson::errorator<
+    crimson::ct_error::input_output_error>;
+  using mount_ret = mount_ertr::future<>;
+  mount_ret mount();
+
+  /*
+   * SegmentProvider interfaces
+   */
+  journal_seq_t get_journal_tail_target() const final {
+    return journal_tail_target;
+  }
+
+  const segment_info_t& get_seg_info(segment_id_t id) const final {
+    return segments[id];
+  }
+
+  segment_id_t allocate_segment(
+      segment_seq_t seq, segment_type_t type) final;
+
+  void close_segment(segment_id_t segment) final;
+
+  void update_journal_tail_committed(journal_seq_t committed) final;
+
+  void update_segment_avail_bytes(segment_type_t type, paddr_t offset) final {
+    segments.update_written_to(type, offset);
+    gc_process.maybe_wake_on_space_used();
+  }
+
+  SegmentManagerGroup* get_segment_manager_group() final {
+    return sm_group.get();
+  }
+
+  journal_seq_t get_dirty_extents_replay_from() const final {
+    return dirty_extents_replay_from;
+  }
+
+  journal_seq_t get_alloc_info_replay_from() const final {
+    return alloc_info_replay_from;
+  }
+
+  void update_journal_tail_target(
+    journal_seq_t dirty_replay_from,
+    journal_seq_t alloc_replay_from);
+
+  void update_alloc_info_replay_from(
+    journal_seq_t alloc_replay_from);
+
+  void init_mkfs() {
+    auto journal_head = segments.get_journal_head();
+    ceph_assert(disable_trim || journal_head != JOURNAL_SEQ_NULL);
+    journal_tail_target = journal_head;
+    journal_tail_committed = journal_head;
+  }
+
+  using release_ertr = SegmentManagerGroup::release_ertr;
+  release_ertr::future<> maybe_release_segment(Transaction &t);
+
+  void adjust_segment_util(double old_usage, double new_usage) {
+    auto old_index = get_bucket_index(old_usage);
+    auto new_index = get_bucket_index(new_usage);
+    assert(stats.segment_util.buckets[old_index].count > 0);
+    stats.segment_util.buckets[old_index].count--;
+    stats.segment_util.buckets[new_index].count++;
+  }
+
+  void mark_space_used(
+    paddr_t addr,
+    extent_len_t len,
+    time_point last_modified = time_point(),
+    time_point last_rewritten = time_point(),
+    bool init_scan = false);
+
+  void mark_space_free(
+    paddr_t addr,
+    extent_len_t len);
+
+  SpaceTrackerIRef get_empty_space_tracker() const {
+    return space_tracker->make_empty();
+  }
+
+  void complete_init();
+
+  store_statfs_t stat() const {
+    store_statfs_t st;
+    st.total = segments.get_total_bytes();
+    st.available = segments.get_total_bytes() - stats.used_bytes;
+    st.allocated = stats.used_bytes;
+    st.data_stored = stats.used_bytes;
+
+    // TODO add per extent type counters for omap_allocated and
+    // internal metadata
+    return st;
+  }
+
+  seastar::future<> stop() {
+    return gc_process.stop();
+  }
+
+  seastar::future<> run_until_halt() {
+    return gc_process.run_until_halt();
+  }
+
+  void set_extent_callback(ExtentCallbackInterface *cb) {
+    ecb = cb;
+  }
+
+  bool debug_check_space(const SpaceTrackerI &tracker) {
+    return space_tracker->equals(tracker);
+  }
+
+  void set_disable_trim(bool val) {
+    disable_trim = val;
+  }
+
+  using work_ertr = ExtentCallbackInterface::extent_mapping_ertr;
+  using work_iertr = ExtentCallbackInterface::extent_mapping_iertr;
+
+private:
+  /*
+   * 10 buckets for the number of closed segments by usage
+   * 2 extra buckets for the number of open and empty segments
+   */
+  static constexpr double UTIL_STATE_OPEN = 1.05;
+  static constexpr double UTIL_STATE_EMPTY = 1.15;
+  static constexpr std::size_t UTIL_BUCKETS = 12;
+  static std::size_t get_bucket_index(double util) {
+    auto index = std::floor(util * 10);
+    assert(index < UTIL_BUCKETS);
+    return index;
+  }
+  double calc_utilization(segment_id_t id) const {
+    auto& info = segments[id];
+    if (info.is_open()) {
+      return UTIL_STATE_OPEN;
+    } else if (info.is_empty()) {
+      return UTIL_STATE_EMPTY;
+    } else {
+      auto ret = space_tracker->calc_utilization(id);
+      assert(ret >= 0 && ret < 1);
+      return ret;
+    }
+  }
+
+  // journal status helpers
+
+  double calc_gc_benefit_cost(segment_id_t id) const {
+    double util = calc_utilization(id);
+    ceph_assert(util >= 0 && util < 1);
+    auto cur_time = seastar::lowres_system_clock::now();
+    auto segment = segments[id];
+    assert(cur_time >= segment.last_modified);
+    auto segment_age =
+      cur_time - std::max(segment.last_modified, segment.last_rewritten);
+    uint64_t age = segment_age.count();
+    return (1 - util) * age / (1 + util);
+  }
+
+  segment_id_t get_next_reclaim_segment() const;
+
+  /**
+   * rewrite_dirty
+   *
+   * Writes out dirty blocks dirtied earlier than limit.
+   */
+  using rewrite_dirty_iertr = work_iertr;
+  using rewrite_dirty_ret = rewrite_dirty_iertr::future<>;
+  rewrite_dirty_ret rewrite_dirty(
+    Transaction &t,
+    journal_seq_t limit);
+
+  using trim_backrefs_iertr = work_iertr;
+  using trim_backrefs_ret = trim_backrefs_iertr::future<journal_seq_t>;
+  trim_backrefs_ret trim_backrefs(
+    Transaction &t,
+    journal_seq_t limit);
+
+  journal_seq_t get_dirty_tail() const {
+    auto ret = segments.get_journal_head();
+    ceph_assert(ret != JOURNAL_SEQ_NULL);
+    if (ret.segment_seq >= config.target_journal_segments) {
+      ret.segment_seq -= config.target_journal_segments;
+    } else {
+      ret.segment_seq = 0;
+      ret.offset = P_ADDR_MIN;
+    }
+    return ret;
+  }
+
+  journal_seq_t get_dirty_tail_limit() const {
+    auto ret = segments.get_journal_head();
+    ceph_assert(ret != JOURNAL_SEQ_NULL);
+    if (ret.segment_seq >= config.max_journal_segments) {
+      ret.segment_seq -= config.max_journal_segments;
+    } else {
+      ret.segment_seq = 0;
+      ret.offset = P_ADDR_MIN;
+    }
+    return ret;
+  }
+
+  journal_seq_t get_backref_tail() const {
+    auto ret = segments.get_journal_head();
+    ceph_assert(ret != JOURNAL_SEQ_NULL);
+    if (ret.segment_seq >= config.target_backref_inflight_segments) {
+      ret.segment_seq -= config.target_backref_inflight_segments;
+    } else {
+      ret.segment_seq = 0;
+      ret.offset = P_ADDR_MIN;
+    }
+    return ret;
+  }
+
+  struct reclaim_state_t {
+    std::size_t segment_size;
+    paddr_t start_pos;
+    paddr_t end_pos;
+
+    static reclaim_state_t create(
+        segment_id_t segment_id,
+        std::size_t segment_size) {
+      return {segment_size,
+              P_ADDR_NULL,
+              paddr_t::make_seg_paddr(segment_id, 0)};
+    }
+
+    segment_id_t get_segment_id() const {
+      return end_pos.as_seg_paddr().get_segment_id();
+    }
+
+    bool is_complete() const {
+      return (std::size_t)end_pos.as_seg_paddr().get_segment_off() >= segment_size;
+    }
+
+    void advance(std::size_t bytes) {
+      assert(!is_complete());
+      start_pos = end_pos;
+      auto &end_seg_paddr = end_pos.as_seg_paddr();
+      auto next_off = end_seg_paddr.get_segment_off() + bytes;
+      if (next_off > segment_size) {
+        end_seg_paddr.set_segment_off(segment_size);
+      } else {
+        end_seg_paddr.set_segment_off(next_off);
+      }
+    }
+  };
+  std::optional<reclaim_state_t> reclaim_state;
+
+  /**
+   * GCProcess
+   *
+   * Background gc process.
+   */
+  using gc_cycle_ret = seastar::future<>;
+  class GCProcess {
+    std::optional<gc_cycle_ret> process_join;
+
+    AsyncCleaner &cleaner;
+
+    std::optional<seastar::promise<>> blocking;
+
+    bool is_stopping() const {
+      return !process_join;
+    }
+
+    gc_cycle_ret run();
+
+    void wake() {
+      if (blocking) {
+       blocking->set_value();
+       blocking = std::nullopt;
+      }
+    }
+
+    seastar::future<> maybe_wait_should_run() {
+      return seastar::do_until(
+       [this] {
+         cleaner.log_gc_state("GCProcess::maybe_wait_should_run");
+         return is_stopping() || cleaner.gc_should_run();
+       },
+       [this] {
+         ceph_assert(!blocking);
+         blocking = seastar::promise<>();
+         return blocking->get_future();
+       });
+    }
+  public:
+    GCProcess(AsyncCleaner &cleaner) : cleaner(cleaner) {}
+
+    void start() {
+      ceph_assert(is_stopping());
+      process_join = seastar::now(); // allow run()
+      process_join = run();
+      assert(!is_stopping());
+    }
+
+    gc_cycle_ret stop() {
+      if (is_stopping()) {
+        return seastar::now();
+      }
+      auto ret = std::move(*process_join);
+      process_join.reset();
+      assert(is_stopping());
+      wake();
+      return ret;
+    }
+
+    gc_cycle_ret run_until_halt() {
+      ceph_assert(is_stopping());
+      return seastar::do_until(
+       [this] {
+         cleaner.log_gc_state("GCProcess::run_until_halt");
+         return !cleaner.gc_should_run();
+       },
+       [this] {
+         return cleaner.do_gc_cycle();
+       });
+    }
+
+    void maybe_wake_on_space_used() {
+      if (is_stopping()) {
+        return;
+      }
+      if (cleaner.gc_should_run()) {
+       wake();
+      }
+    }
+  } gc_process;
+
+  using gc_ertr = work_ertr::extend_ertr<
+    SegmentManagerGroup::scan_extents_ertr
+    >;
+
+  gc_cycle_ret do_gc_cycle();
+
+  using gc_trim_journal_ertr = gc_ertr;
+  using gc_trim_journal_ret = gc_trim_journal_ertr::future<>;
+  gc_trim_journal_ret gc_trim_journal();
+
+  using gc_trim_backref_ertr = gc_ertr;
+  using gc_trim_backref_ret = gc_trim_backref_ertr::future<journal_seq_t>;
+  gc_trim_backref_ret gc_trim_backref(journal_seq_t limit);
+
+  using gc_reclaim_space_ertr = gc_ertr;
+  using gc_reclaim_space_ret = gc_reclaim_space_ertr::future<>;
+  gc_reclaim_space_ret gc_reclaim_space();
+
+
+  using retrieve_live_extents_iertr = work_iertr;
+  using retrieve_live_extents_ret =
+    retrieve_live_extents_iertr::future<journal_seq_t>;
+  retrieve_live_extents_ret _retrieve_live_extents(
+    Transaction &t,
+    std::set<
+      backref_buf_entry_t,
+      backref_buf_entry_t::cmp_t> &&backrefs,
+    std::vector<CachedExtentRef> &extents);
+
+  using retrieve_backref_mappings_ertr = work_ertr;
+  using retrieve_backref_mappings_ret =
+    retrieve_backref_mappings_ertr::future<backref_pin_list_t>;
+  retrieve_backref_mappings_ret retrieve_backref_mappings(
+    paddr_t start_paddr,
+    paddr_t end_paddr);
+
+  /*
+   * Segments calculations
+   */
+  std::size_t get_segments_in_journal() const {
+    if (!init_complete) {
+      return 0;
+    }
+    if (journal_tail_committed == JOURNAL_SEQ_NULL) {
+      return segments.get_num_type_journal();
+    }
+    auto journal_head = segments.get_journal_head();
+    assert(journal_head != JOURNAL_SEQ_NULL);
+    assert(journal_head.segment_seq >= journal_tail_committed.segment_seq);
+    return journal_head.segment_seq + 1 - journal_tail_committed.segment_seq;
+  }
+  std::size_t get_segments_in_journal_closed() const {
+    auto in_journal = get_segments_in_journal();
+    auto in_journal_open = segments.get_num_in_journal_open();
+    if (in_journal >= in_journal_open) {
+      return in_journal - in_journal_open;
+    } else {
+      return 0;
+    }
+  }
+  std::size_t get_segments_reclaimable() const {
+    assert(segments.get_num_closed() >= get_segments_in_journal_closed());
+    return segments.get_num_closed() - get_segments_in_journal_closed();
+  }
+
+  /*
+   * Space calculations
+   */
+  /// the unavailable space that is not reclaimable yet
+  std::size_t get_unavailable_unreclaimable_bytes() const {
+    auto ret = (segments.get_num_open() + get_segments_in_journal_closed()) *
+               segments.get_segment_size();
+    assert(ret >= segments.get_available_bytes_in_open());
+    return ret - segments.get_available_bytes_in_open();
+  }
+  /// the unavailable space that can be reclaimed
+  std::size_t get_unavailable_reclaimable_bytes() const {
+    auto ret = get_segments_reclaimable() * segments.get_segment_size();
+    ceph_assert(ret + get_unavailable_unreclaimable_bytes() == segments.get_unavailable_bytes());
+    return ret;
+  }
+  /// the unavailable space that is not alive
+  std::size_t get_unavailable_unused_bytes() const {
+    assert(segments.get_unavailable_bytes() > stats.used_bytes);
+    return segments.get_unavailable_bytes() - stats.used_bytes;
+  }
+  double get_reclaim_ratio() const {
+    if (segments.get_unavailable_bytes() == 0) return 0;
+    return (double)get_unavailable_unused_bytes() / (double)segments.get_unavailable_bytes();
+  }
+
+  /*
+   * Space calculations (projected)
+   */
+  std::size_t get_projected_available_bytes() const {
+    return (segments.get_available_bytes() > stats.projected_used_bytes) ?
+      segments.get_available_bytes() - stats.projected_used_bytes:
+      0;
+  }
+  double get_projected_available_ratio() const {
+    return (double)get_projected_available_bytes() /
+      (double)segments.get_total_bytes();
+  }
+
+  /*
+   * Journal sizes
+   */
+  std::size_t get_dirty_journal_size() const {
+    auto journal_head = segments.get_journal_head();
+    if (journal_head == JOURNAL_SEQ_NULL ||
+        dirty_extents_replay_from == JOURNAL_SEQ_NULL) {
+      return 0;
+    }
+    return (journal_head.segment_seq - dirty_extents_replay_from.segment_seq) *
+           segments.get_segment_size() +
+           journal_head.offset.as_seg_paddr().get_segment_off() -
+           segments.get_segment_size() -
+           dirty_extents_replay_from.offset.as_seg_paddr().get_segment_off();
+  }
+
+  std::size_t get_alloc_journal_size() const {
+    auto journal_head = segments.get_journal_head();
+    if (journal_head == JOURNAL_SEQ_NULL ||
+        alloc_info_replay_from == JOURNAL_SEQ_NULL) {
+      return 0;
+    }
+    return (journal_head.segment_seq - alloc_info_replay_from.segment_seq) *
+           segments.get_segment_size() +
+           journal_head.offset.as_seg_paddr().get_segment_off() -
+           segments.get_segment_size() -
+           alloc_info_replay_from.offset.as_seg_paddr().get_segment_off();
+  }
+
+  /**
+   * should_block_on_gc
+   *
+   * Encapsulates whether block pending gc.
+   */
+  bool should_block_on_trim() const {
+    if (disable_trim) return false;
+    return get_dirty_tail_limit() > journal_tail_target;
+  }
+
+  bool should_block_on_reclaim() const {
+    if (disable_trim) return false;
+    if (get_segments_reclaimable() == 0) {
+      return false;
+    }
+    auto aratio = get_projected_available_ratio();
+    return aratio < config.available_ratio_hard_limit;
+  }
+
+  bool should_block_on_gc() const {
+    return should_block_on_trim() || should_block_on_reclaim();
+  }
+
+  void log_gc_state(const char *caller) const;
+
+public:
+  seastar::future<> reserve_projected_usage(std::size_t projected_usage);
+
+  void release_projected_usage(size_t projected_usage);
+
+private:
+  void maybe_wake_gc_blocked_io() {
+    if (!init_complete) {
+      return;
+    }
+    if (!should_block_on_gc() && blocked_io_wake) {
+      blocked_io_wake->set_value();
+      blocked_io_wake = std::nullopt;
+    }
+  }
+
+  using scan_extents_ret_bare =
+    std::vector<std::pair<segment_id_t, segment_header_t>>;
+  using scan_extents_ertr = SegmentManagerGroup::scan_extents_ertr;
+  using scan_extents_ret = scan_extents_ertr::future<>;
+  scan_extents_ret scan_nonfull_segment(
+    const segment_header_t& header,
+    scan_extents_ret_bare& segment_set,
+    segment_id_t segment_id);
+
+  /**
+   * gc_should_reclaim_space
+   *
+   * Encapsulates logic for whether gc should be reclaiming segment space.
+   */
+  bool gc_should_reclaim_space() const {
+    if (disable_trim) return false;
+    if (get_segments_reclaimable() == 0) {
+      return false;
+    }
+    auto aratio = segments.get_available_ratio();
+    auto rratio = get_reclaim_ratio();
+    return (
+      (aratio < config.available_ratio_hard_limit) ||
+      ((aratio < config.available_ratio_gc_max) &&
+       (rratio > config.reclaim_ratio_gc_threshold))
+    );
+  }
+
+  /**
+   * gc_should_trim_journal
+   *
+   * Encapsulates logic for whether gc should be reclaiming segment space.
+   */
+  bool gc_should_trim_journal() const {
+    return get_dirty_tail() > journal_tail_target;
+  }
+
+  bool gc_should_trim_backref() const {
+    return get_backref_tail() > alloc_info_replay_from;
+  }
+  /**
+   * gc_should_run
+   *
+   * True if gc should be running.
+   */
+  bool gc_should_run() const {
+    if (disable_trim) return false;
+    ceph_assert(init_complete);
+    return gc_should_reclaim_space()
+      || gc_should_trim_journal()
+      || gc_should_trim_backref();
+  }
+
+  void init_mark_segment_closed(
+      segment_id_t segment,
+      segment_seq_t seq,
+      segment_type_t s_type) {
+    ceph_assert(!init_complete);
+    auto old_usage = calc_utilization(segment);
+    segments.init_closed(segment, seq, s_type);
+    auto new_usage = calc_utilization(segment);
+    adjust_segment_util(old_usage, new_usage);
+    if (s_type == segment_type_t::OOL) {
+      ool_segment_seq_allocator->set_next_segment_seq(seq);
+    }
+  }
+};
+using AsyncCleanerRef = std::unique_ptr<AsyncCleaner>;
+
+}
index ee128db1d856127a1b93b3fc5ebbe2e30b974544..dc7fdffa43ef1e9944a7345d2926ce284dcb7184 100644 (file)
@@ -10,7 +10,7 @@
 
 #include "crimson/os/seastore/logging.h"
 #include "crimson/common/config_proxy.h"
-#include "crimson/os/seastore/segment_cleaner.h"
+#include "crimson/os/seastore/async_cleaner.h"
 
 // included for get_extent_by_type
 #include "crimson/os/seastore/collection_manager/collection_flat_node.h"
@@ -1371,7 +1371,7 @@ void Cache::complete_commit(
   Transaction &t,
   paddr_t final_block_start,
   journal_seq_t seq,
-  SegmentCleaner *cleaner)
+  AsyncCleaner *cleaner)
 {
   LOG_PREFIX(Cache::complete_commit);
   SUBTRACET(seastore_t, "final_block_start={}, seq={}",
index 081dd49743de977d9ff320289f0569652ca66ac8..3560406c4ebb348c7a2050beb8af7ede806c20f9 100644 (file)
@@ -26,7 +26,7 @@ class BtreeBackrefManager;
 namespace crimson::os::seastore {
 
 class BackrefManager;
-class SegmentCleaner;
+class AsyncCleaner;
 
 struct backref_buf_entry_t {
   backref_buf_entry_t(
@@ -747,7 +747,7 @@ public:
     Transaction &t,            ///< [in, out] current transaction
     paddr_t final_block_start, ///< [in] offset of initial block
     journal_seq_t seq,         ///< [in] journal commit seq
-    SegmentCleaner *cleaner=nullptr ///< [out] optional segment stat listener
+    AsyncCleaner *cleaner=nullptr ///< [out] optional segment stat listener
   );
 
   /**
index 0ea508b31517e7d8113d0bec68f38a50c4209dc3..2716228531d4388e6853253b4b797bf39d297d88 100644 (file)
@@ -6,7 +6,7 @@
 #include <fmt/format.h>
 
 #include "crimson/os/seastore/logging.h"
-#include "crimson/os/seastore/segment_cleaner.h"
+#include "crimson/os/seastore/async_cleaner.h"
 
 SET_SUBSYS(seastore_journal);
 
index 46c3675bdf1fd6ceb62695009babdad2c977c292..a97db1b74ce680b9329facd59403f86dc84642cd 100644 (file)
@@ -9,7 +9,7 @@
 #include "include/buffer.h"
 #include "include/denc.h"
 
-#include "crimson/os/seastore/segment_cleaner.h"
+#include "crimson/os/seastore/async_cleaner.h"
 #include "crimson/os/seastore/journal.h"
 #include "crimson/os/seastore/segment_manager_group.h"
 #include "crimson/os/seastore/ordering_handle.h"
index 2d0428e8f0bf1a41f102dbad8f9da8840f64a144..a07bd50ba56036ffaf80feea09c11927db2b4a75 100644 (file)
@@ -23,7 +23,7 @@
 #include "crimson/os/futurized_collection.h"
 
 #include "crimson/os/seastore/backref_manager.h"
-#include "crimson/os/seastore/segment_cleaner.h"
+#include "crimson/os/seastore/async_cleaner.h"
 #include "crimson/os/seastore/collection_manager/flat_collection_manager.h"
 #include "crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h"
 #include "crimson/os/seastore/omap_manager/btree/btree_omap_manager.h"
diff --git a/src/crimson/os/seastore/segment_cleaner.cc b/src/crimson/os/seastore/segment_cleaner.cc
deleted file mode 100644 (file)
index a3c3d6b..0000000
+++ /dev/null
@@ -1,1439 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-#include <seastar/core/metrics.hh>
-
-#include "crimson/os/seastore/logging.h"
-
-#include "crimson/os/seastore/segment_cleaner.h"
-#include "crimson/os/seastore/transaction_manager.h"
-
-SET_SUBSYS(seastore_cleaner);
-
-namespace crimson::os::seastore {
-
-void segment_info_t::set_open(
-    segment_seq_t _seq, segment_type_t _type)
-{
-  ceph_assert(_seq != NULL_SEG_SEQ);
-  ceph_assert(_type != segment_type_t::NULL_SEG);
-  state = Segment::segment_state_t::OPEN;
-  seq = _seq;
-  type = _type;
-  written_to = 0;
-}
-
-void segment_info_t::set_empty()
-{
-  state = Segment::segment_state_t::EMPTY;
-  seq = NULL_SEG_SEQ;
-  type = segment_type_t::NULL_SEG;
-  last_modified = {};
-  last_rewritten = {};
-  written_to = 0;
-}
-
-void segment_info_t::set_closed()
-{
-  state = Segment::segment_state_t::CLOSED;
-  // the rest of information is unchanged
-}
-
-void segment_info_t::init_closed(
-    segment_seq_t _seq, segment_type_t _type, std::size_t seg_size)
-{
-  ceph_assert(_seq != NULL_SEG_SEQ);
-  ceph_assert(_type != segment_type_t::NULL_SEG);
-  state = Segment::segment_state_t::CLOSED;
-  seq = _seq;
-  type = _type;
-  written_to = seg_size;
-}
-
-std::ostream& operator<<(std::ostream &out, const segment_info_t &info)
-{
-  out << "seg_info_t("
-      << "state=" << info.state;
-  if (info.is_empty()) {
-    // pass
-  } else { // open or closed
-    out << ", seq=" << segment_seq_printer_t{info.seq}
-        << ", type=" << info.type
-        << ", last_modified=" << info.last_modified.time_since_epoch()
-        << ", last_rewritten=" << info.last_rewritten.time_since_epoch()
-        << ", written_to=" << info.written_to;
-  }
-  return out << ")";
-}
-
-void segments_info_t::reset()
-{
-  segments.clear();
-
-  segment_size = 0;
-
-  journal_segment_id = NULL_SEG_ID;
-  num_in_journal_open = 0;
-  num_type_journal = 0;
-  num_type_ool = 0;
-
-  num_open = 0;
-  num_empty = 0;
-  num_closed = 0;
-
-  count_open_journal = 0;
-  count_open_ool = 0;
-  count_release_journal = 0;
-  count_release_ool = 0;
-  count_close_journal = 0;
-  count_close_ool = 0;
-
-  total_bytes = 0;
-  avail_bytes_in_open = 0;
-}
-
-void segments_info_t::add_segment_manager(
-    SegmentManager &segment_manager)
-{
-  LOG_PREFIX(segments_info_t::add_segment_manager);
-  device_id_t d_id = segment_manager.get_device_id();
-  auto ssize = segment_manager.get_segment_size();
-  auto nsegments = segment_manager.get_num_segments();
-  auto sm_size = segment_manager.get_size();
-  INFO("adding segment manager {}, size={}, ssize={}, segments={}",
-       device_id_printer_t{d_id}, sm_size, ssize, nsegments);
-  ceph_assert(ssize > 0);
-  ceph_assert(nsegments > 0);
-  ceph_assert(sm_size > 0);
-
-  // also validate if the device is duplicated
-  segments.add_device(d_id, nsegments, segment_info_t{});
-
-  // assume all the segment managers share the same settings as follows.
-  if (segment_size == 0) {
-    ceph_assert(ssize > 0);
-    segment_size = ssize;
-  } else {
-    ceph_assert(segment_size == (std::size_t)ssize);
-  }
-
-  // NOTE: by default the segments are empty
-  num_empty += nsegments;
-
-  total_bytes += sm_size;
-}
-
-void segments_info_t::init_closed(
-    segment_id_t segment, segment_seq_t seq, segment_type_t type)
-{
-  LOG_PREFIX(segments_info_t::init_closed);
-  auto& segment_info = segments[segment];
-  INFO("initiating {} {} {}, {}, num_segments(empty={}, opened={}, closed={})",
-       segment, segment_seq_printer_t{seq}, type,
-       segment_info, num_empty, num_open, num_closed);
-  ceph_assert(segment_info.is_empty());
-  segment_info.init_closed(seq, type, get_segment_size());
-  ceph_assert(num_empty > 0);
-  --num_empty;
-  ++num_closed;
-  if (type == segment_type_t::JOURNAL) {
-    // init_closed won't initialize journal_segment_id
-    ceph_assert(get_journal_head() == JOURNAL_SEQ_NULL);
-    ++num_type_journal;
-  } else {
-    ++num_type_ool;
-  }
-  // do not increment count_close_*;
-}
-
-void segments_info_t::mark_open(
-    segment_id_t segment, segment_seq_t seq, segment_type_t type)
-{
-  LOG_PREFIX(segments_info_t::mark_open);
-  auto& segment_info = segments[segment];
-  INFO("opening {} {} {}, {}, num_segments(empty={}, opened={}, closed={})",
-       segment, segment_seq_printer_t{seq}, type,
-       segment_info, num_empty, num_open, num_closed);
-  ceph_assert(segment_info.is_empty());
-  segment_info.set_open(seq, type);
-  ceph_assert(num_empty > 0);
-  --num_empty;
-  ++num_open;
-  if (type == segment_type_t::JOURNAL) {
-    if (journal_segment_id != NULL_SEG_ID) {
-      auto& last_journal_segment = segments[journal_segment_id];
-      ceph_assert(last_journal_segment.is_closed());
-      ceph_assert(last_journal_segment.type == segment_type_t::JOURNAL);
-      ceph_assert(last_journal_segment.seq + 1 == seq);
-    }
-    journal_segment_id = segment;
-
-    ++num_in_journal_open;
-    ++num_type_journal;
-    ++count_open_journal;
-  } else {
-    ++num_type_ool;
-    ++count_open_ool;
-  }
-  ceph_assert(segment_info.written_to == 0);
-  avail_bytes_in_open += get_segment_size();
-}
-
-void segments_info_t::mark_empty(
-    segment_id_t segment)
-{
-  LOG_PREFIX(segments_info_t::mark_empty);
-  auto& segment_info = segments[segment];
-  INFO("releasing {}, {}, num_segments(empty={}, opened={}, closed={})",
-       segment, segment_info,
-       num_empty, num_open, num_closed);
-  ceph_assert(segment_info.is_closed());
-  auto type = segment_info.type;
-  assert(type != segment_type_t::NULL_SEG);
-  segment_info.set_empty();
-  ceph_assert(num_closed > 0);
-  --num_closed;
-  ++num_empty;
-  if (type == segment_type_t::JOURNAL) {
-    ceph_assert(num_type_journal > 0);
-    --num_type_journal;
-    ++count_release_journal;
-  } else {
-    ceph_assert(num_type_ool > 0);
-    --num_type_ool;
-    ++count_release_ool;
-  }
-}
-
-void segments_info_t::mark_closed(
-    segment_id_t segment)
-{
-  LOG_PREFIX(segments_info_t::mark_closed);
-  auto& segment_info = segments[segment];
-  INFO("closing {}, {}, num_segments(empty={}, opened={}, closed={})",
-       segment, segment_info,
-       num_empty, num_open, num_closed);
-  ceph_assert(segment_info.is_open());
-  segment_info.set_closed();
-  ceph_assert(num_open > 0);
-  --num_open;
-  ++num_closed;
-  if (segment_info.type == segment_type_t::JOURNAL) {
-    ceph_assert(num_in_journal_open > 0);
-    --num_in_journal_open;
-    ++count_close_journal;
-  } else {
-    ++count_close_ool;
-  }
-  ceph_assert(get_segment_size() >= segment_info.written_to);
-  auto seg_avail_bytes = get_segment_size() - segment_info.written_to;
-  ceph_assert(avail_bytes_in_open >= seg_avail_bytes);
-  avail_bytes_in_open -= seg_avail_bytes;
-}
-
-void segments_info_t::update_written_to(
-    segment_type_t type,
-    paddr_t offset)
-{
-  LOG_PREFIX(segments_info_t::update_written_to);
-  auto& saddr = offset.as_seg_paddr();
-  auto& segment_info = segments[saddr.get_segment_id()];
-  if (!segment_info.is_open()) {
-    ERROR("segment is not open, not updating, type={}, offset={}, {}",
-          type, offset, segment_info);
-    ceph_abort();
-  }
-
-  auto new_written_to = static_cast<std::size_t>(saddr.get_segment_off());
-  ceph_assert(new_written_to <= get_segment_size());
-  if (segment_info.written_to > new_written_to) {
-    ERROR("written_to should not decrease! type={}, offset={}, {}",
-          type, offset, segment_info);
-    ceph_abort();
-  }
-
-  DEBUG("type={}, offset={}, {}", type, offset, segment_info);
-  ceph_assert(type == segment_info.type);
-  auto avail_deduction = new_written_to - segment_info.written_to;
-  ceph_assert(avail_bytes_in_open >= avail_deduction);
-  avail_bytes_in_open -= avail_deduction;
-  segment_info.written_to = new_written_to;
-}
-
-bool SpaceTrackerSimple::equals(const SpaceTrackerI &_other) const
-{
-  LOG_PREFIX(SpaceTrackerSimple::equals);
-  const auto &other = static_cast<const SpaceTrackerSimple&>(_other);
-
-  if (other.live_bytes_by_segment.size() != live_bytes_by_segment.size()) {
-    ERROR("different segment counts, bug in test");
-    assert(0 == "segment counts should match");
-    return false;
-  }
-
-  bool all_match = true;
-  for (auto i = live_bytes_by_segment.begin(), j = other.live_bytes_by_segment.begin();
-       i != live_bytes_by_segment.end(); ++i, ++j) {
-    if (i->second.live_bytes != j->second.live_bytes) {
-      all_match = false;
-      DEBUG("segment_id {} live bytes mismatch *this: {}, other: {}",
-            i->first, i->second.live_bytes, j->second.live_bytes);
-    }
-  }
-  return all_match;
-}
-
-int64_t SpaceTrackerDetailed::SegmentMap::allocate(
-  device_segment_id_t segment,
-  seastore_off_t offset,
-  extent_len_t len,
-  const extent_len_t block_size)
-{
-  LOG_PREFIX(SegmentMap::allocate);
-  assert(offset % block_size == 0);
-  assert(len % block_size == 0);
-
-  const auto b = (offset / block_size);
-  const auto e = (offset + len) / block_size;
-
-  bool error = false;
-  for (auto i = b; i < e; ++i) {
-    if (bitmap[i]) {
-      if (!error) {
-        ERROR("found allocated in {}, {} ~ {}", segment, offset, len);
-       error = true;
-      }
-      DEBUG("block {} allocated", i * block_size);
-    }
-    bitmap[i] = true;
-  }
-  return update_usage(len);
-}
-
-int64_t SpaceTrackerDetailed::SegmentMap::release(
-  device_segment_id_t segment,
-  seastore_off_t offset,
-  extent_len_t len,
-  const extent_len_t block_size)
-{
-  LOG_PREFIX(SegmentMap::release);
-  assert(offset % block_size == 0);
-  assert(len % block_size == 0);
-
-  const auto b = (offset / block_size);
-  const auto e = (offset + len) / block_size;
-
-  bool error = false;
-  for (auto i = b; i < e; ++i) {
-    if (!bitmap[i]) {
-      if (!error) {
-       ERROR("found unallocated in {}, {} ~ {}", segment, offset, len);
-       error = true;
-      }
-      DEBUG("block {} unallocated", i * block_size);
-    }
-    bitmap[i] = false;
-  }
-  return update_usage(-(int64_t)len);
-}
-
-bool SpaceTrackerDetailed::equals(const SpaceTrackerI &_other) const
-{
-  LOG_PREFIX(SpaceTrackerDetailed::equals);
-  const auto &other = static_cast<const SpaceTrackerDetailed&>(_other);
-
-  if (other.segment_usage.size() != segment_usage.size()) {
-    ERROR("different segment counts, bug in test");
-    assert(0 == "segment counts should match");
-    return false;
-  }
-
-  bool all_match = true;
-  for (auto i = segment_usage.begin(), j = other.segment_usage.begin();
-       i != segment_usage.end(); ++i, ++j) {
-    if (i->second.get_usage() != j->second.get_usage()) {
-      all_match = false;
-      ERROR("segment_id {} live bytes mismatch *this: {}, other: {}",
-            i->first, i->second.get_usage(), j->second.get_usage());
-    }
-  }
-  return all_match;
-}
-
-void SpaceTrackerDetailed::SegmentMap::dump_usage(extent_len_t block_size) const
-{
-  LOG_PREFIX(SegmentMap::dump_usage);
-  INFO("dump start");
-  for (unsigned i = 0; i < bitmap.size(); ++i) {
-    if (bitmap[i]) {
-      LOCAL_LOGGER.info("    {} still live", i * block_size);
-    }
-  }
-}
-
-void SpaceTrackerDetailed::dump_usage(segment_id_t id) const
-{
-  LOG_PREFIX(SpaceTrackerDetailed::dump_usage);
-  INFO("{}", id);
-  segment_usage[id].dump_usage(
-    block_size_by_segment_manager[id.device_id()]);
-}
-
-void SpaceTrackerSimple::dump_usage(segment_id_t id) const
-{
-  LOG_PREFIX(SpaceTrackerSimple::dump_usage);
-  INFO("id: {}, live_bytes: {}",
-       id, live_bytes_by_segment[id].live_bytes);
-}
-
-SegmentCleaner::SegmentCleaner(
-  config_t config,
-  SegmentManagerGroupRef&& sm_group,
-  BackrefManager &backref_manager,
-  bool detailed)
-  : detailed(detailed),
-    config(config),
-    sm_group(std::move(sm_group)),
-    backref_manager(backref_manager),
-    ool_segment_seq_allocator(
-      new SegmentSeqAllocator(segment_type_t::OOL)),
-    gc_process(*this)
-{
-  config.validate();
-}
-
-void SegmentCleaner::register_metrics()
-{
-  namespace sm = seastar::metrics;
-  stats.segment_util.buckets.resize(UTIL_BUCKETS);
-  std::size_t i;
-  for (i = 0; i < UTIL_BUCKETS; ++i) {
-    stats.segment_util.buckets[i].upper_bound = ((double)(i + 1)) / 10;
-    stats.segment_util.buckets[i].count = 0;
-  }
-  // NOTE: by default the segments are empty
-  i = get_bucket_index(UTIL_STATE_EMPTY);
-  stats.segment_util.buckets[i].count = segments.get_num_segments();
-
-  metrics.add_group("segment_cleaner", {
-    sm::make_counter("segments_number",
-                    [this] { return segments.get_num_segments(); },
-                    sm::description("the number of segments")),
-    sm::make_counter("segment_size",
-                    [this] { return segments.get_segment_size(); },
-                    sm::description("the bytes of a segment")),
-    sm::make_counter("segments_in_journal",
-                    [this] { return get_segments_in_journal(); },
-                    sm::description("the number of segments in journal")),
-    sm::make_counter("segments_type_journal",
-                    [this] { return segments.get_num_type_journal(); },
-                    sm::description("the number of segments typed journal")),
-    sm::make_counter("segments_type_ool",
-                    [this] { return segments.get_num_type_ool(); },
-                    sm::description("the number of segments typed out-of-line")),
-    sm::make_counter("segments_open",
-                    [this] { return segments.get_num_open(); },
-                    sm::description("the number of open segments")),
-    sm::make_counter("segments_empty",
-                    [this] { return segments.get_num_empty(); },
-                    sm::description("the number of empty segments")),
-    sm::make_counter("segments_closed",
-                    [this] { return segments.get_num_closed(); },
-                    sm::description("the number of closed segments")),
-
-    sm::make_counter("segments_count_open_journal",
-                    [this] { return segments.get_count_open_journal(); },
-                    sm::description("the count of open journal segment operations")),
-    sm::make_counter("segments_count_open_ool",
-                    [this] { return segments.get_count_open_ool(); },
-                    sm::description("the count of open ool segment operations")),
-    sm::make_counter("segments_count_release_journal",
-                    [this] { return segments.get_count_release_journal(); },
-                    sm::description("the count of release journal segment operations")),
-    sm::make_counter("segments_count_release_ool",
-                    [this] { return segments.get_count_release_ool(); },
-                    sm::description("the count of release ool segment operations")),
-    sm::make_counter("segments_count_close_journal",
-                    [this] { return segments.get_count_close_journal(); },
-                    sm::description("the count of close journal segment operations")),
-    sm::make_counter("segments_count_close_ool",
-                    [this] { return segments.get_count_close_ool(); },
-                    sm::description("the count of close ool segment operations")),
-
-    sm::make_counter("total_bytes",
-                    [this] { return segments.get_total_bytes(); },
-                    sm::description("the size of the space")),
-    sm::make_counter("available_bytes",
-                    [this] { return segments.get_available_bytes(); },
-                    sm::description("the size of the space is available")),
-    sm::make_counter("unavailable_unreclaimable_bytes",
-                    [this] { return get_unavailable_unreclaimable_bytes(); },
-                    sm::description("the size of the space is unavailable and unreclaimable")),
-    sm::make_counter("unavailable_reclaimable_bytes",
-                    [this] { return get_unavailable_reclaimable_bytes(); },
-                    sm::description("the size of the space is unavailable and reclaimable")),
-    sm::make_counter("used_bytes", stats.used_bytes,
-                    sm::description("the size of the space occupied by live extents")),
-    sm::make_counter("unavailable_unused_bytes",
-                    [this] { return get_unavailable_unused_bytes(); },
-                    sm::description("the size of the space is unavailable and not alive")),
-
-    sm::make_counter("dirty_journal_bytes",
-                    [this] { return get_dirty_journal_size(); },
-                    sm::description("the size of the journal for dirty extents")),
-    sm::make_counter("alloc_journal_bytes",
-                    [this] { return get_alloc_journal_size(); },
-                    sm::description("the size of the journal for alloc info")),
-
-    sm::make_counter("projected_count", stats.projected_count,
-                   sm::description("the number of projected usage reservations")),
-    sm::make_counter("projected_used_bytes_sum", stats.projected_used_bytes_sum,
-                   sm::description("the sum of the projected usage in bytes")),
-
-    sm::make_counter("io_count", stats.io_count,
-                   sm::description("the sum of IOs")),
-    sm::make_counter("io_blocked_count", stats.io_blocked_count,
-                   sm::description("IOs that are blocked by gc")),
-    sm::make_counter("io_blocked_count_trim", stats.io_blocked_count_trim,
-                   sm::description("IOs that are blocked by trimming")),
-    sm::make_counter("io_blocked_count_reclaim", stats.io_blocked_count_reclaim,
-                   sm::description("IOs that are blocked by reclaimming")),
-    sm::make_counter("io_blocked_sum", stats.io_blocked_sum,
-                    sm::description("the sum of blocking IOs")),
-
-    sm::make_counter("reclaimed_bytes", stats.reclaimed_bytes,
-                    sm::description("rewritten bytes due to reclaim")),
-    sm::make_counter("reclaimed_segment_bytes", stats.reclaimed_segment_bytes,
-                    sm::description("rewritten bytes due to reclaim")),
-    sm::make_counter("closed_journal_used_bytes", stats.closed_journal_used_bytes,
-                    sm::description("used bytes when close a journal segment")),
-    sm::make_counter("closed_journal_total_bytes", stats.closed_journal_total_bytes,
-                    sm::description("total bytes of closed journal segments")),
-    sm::make_counter("closed_ool_used_bytes", stats.closed_ool_used_bytes,
-                    sm::description("used bytes when close a ool segment")),
-    sm::make_counter("closed_ool_total_bytes", stats.closed_ool_total_bytes,
-                    sm::description("total bytes of closed ool segments")),
-
-    sm::make_gauge("available_ratio",
-                   [this] { return segments.get_available_ratio(); },
-                   sm::description("ratio of available space to total space")),
-    sm::make_gauge("reclaim_ratio",
-                   [this] { return get_reclaim_ratio(); },
-                   sm::description("ratio of reclaimable space to unavailable space")),
-
-    sm::make_histogram("segment_utilization_distribution",
-                      [this]() -> seastar::metrics::histogram& {
-                        return stats.segment_util;
-                      },
-                      sm::description("utilization distribution of all segments"))
-  });
-}
-
-segment_id_t SegmentCleaner::allocate_segment(
-    segment_seq_t seq,
-    segment_type_t type)
-{
-  LOG_PREFIX(SegmentCleaner::allocate_segment);
-  assert(seq != NULL_SEG_SEQ);
-  for (auto it = segments.begin();
-       it != segments.end();
-       ++it) {
-    auto seg_id = it->first;
-    auto& segment_info = it->second;
-    if (segment_info.is_empty()) {
-      auto old_usage = calc_utilization(seg_id);
-      segments.mark_open(seg_id, seq, type);
-      auto new_usage = calc_utilization(seg_id);
-      adjust_segment_util(old_usage, new_usage);
-      INFO("opened, should_block_on_gc {}, projected_avail_ratio {}, "
-           "reclaim_ratio {}",
-           should_block_on_gc(),
-           get_projected_available_ratio(),
-           get_reclaim_ratio());
-      return seg_id;
-    }
-  }
-  ERROR("out of space with segment_seq={}", segment_seq_printer_t{seq});
-  ceph_abort();
-  return NULL_SEG_ID;
-}
-
-void SegmentCleaner::update_journal_tail_target(
-  journal_seq_t dirty_replay_from,
-  journal_seq_t alloc_replay_from)
-{
-  LOG_PREFIX(SegmentCleaner::update_journal_tail_target);
-  if (disable_trim) return;
-  assert(dirty_replay_from.offset.get_addr_type() != addr_types_t::RANDOM_BLOCK);
-  assert(alloc_replay_from.offset.get_addr_type() != addr_types_t::RANDOM_BLOCK);
-  if (dirty_extents_replay_from == JOURNAL_SEQ_NULL
-      || dirty_replay_from > dirty_extents_replay_from) {
-    DEBUG("dirty_extents_replay_from={} => {}",
-          dirty_extents_replay_from, dirty_replay_from);
-    dirty_extents_replay_from = dirty_replay_from;
-  }
-
-  update_alloc_info_replay_from(alloc_replay_from);
-
-  journal_seq_t target = std::min(dirty_replay_from, alloc_replay_from);
-  ceph_assert(target != JOURNAL_SEQ_NULL);
-  auto journal_head = segments.get_journal_head();
-  ceph_assert(journal_head == JOURNAL_SEQ_NULL ||
-              journal_head >= target);
-  if (journal_tail_target == JOURNAL_SEQ_NULL ||
-      target > journal_tail_target) {
-    if (!init_complete ||
-        journal_tail_target.segment_seq == target.segment_seq) {
-      DEBUG("journal_tail_target={} => {}", journal_tail_target, target);
-    } else {
-      INFO("journal_tail_target={} => {}", journal_tail_target, target);
-    }
-    journal_tail_target = target;
-  }
-  gc_process.maybe_wake_on_space_used();
-  maybe_wake_gc_blocked_io();
-}
-
-void SegmentCleaner::update_alloc_info_replay_from(
-  journal_seq_t alloc_replay_from)
-{
-  LOG_PREFIX(SegmentCleaner::update_alloc_info_replay_from);
-  if (alloc_info_replay_from == JOURNAL_SEQ_NULL
-      || alloc_replay_from > alloc_info_replay_from) {
-    DEBUG("alloc_info_replay_from={} => {}",
-          alloc_info_replay_from, alloc_replay_from);
-    alloc_info_replay_from = alloc_replay_from;
-  }
-}
-
-void SegmentCleaner::update_journal_tail_committed(journal_seq_t committed)
-{
-  LOG_PREFIX(SegmentCleaner::update_journal_tail_committed);
-  assert(committed.offset.get_addr_type() != addr_types_t::RANDOM_BLOCK);
-  if (committed == JOURNAL_SEQ_NULL) {
-    return;
-  }
-  auto journal_head = segments.get_journal_head();
-  ceph_assert(journal_head == JOURNAL_SEQ_NULL ||
-              journal_head >= committed);
-
-  if (journal_tail_committed == JOURNAL_SEQ_NULL ||
-      committed > journal_tail_committed) {
-    DEBUG("update journal_tail_committed={} => {}",
-          journal_tail_committed, committed);
-    journal_tail_committed = committed;
-  }
-  if (journal_tail_target == JOURNAL_SEQ_NULL ||
-      committed > journal_tail_target) {
-    DEBUG("update journal_tail_target={} => {}",
-          journal_tail_target, committed);
-    journal_tail_target = committed;
-  }
-}
-
-void SegmentCleaner::close_segment(segment_id_t segment)
-{
-  LOG_PREFIX(SegmentCleaner::close_segment);
-  auto old_usage = calc_utilization(segment);
-  segments.mark_closed(segment);
-  auto &seg_info = segments[segment];
-  if (seg_info.type == segment_type_t::JOURNAL) {
-    stats.closed_journal_used_bytes += space_tracker->get_usage(segment);
-    stats.closed_journal_total_bytes += segments.get_segment_size();
-  } else {
-    stats.closed_ool_used_bytes += space_tracker->get_usage(segment);
-    stats.closed_ool_total_bytes += segments.get_segment_size();
-  }
-  auto new_usage = calc_utilization(segment);
-  adjust_segment_util(old_usage, new_usage);
-  INFO("closed, should_block_on_gc {}, projected_avail_ratio {}, "
-       "reclaim_ratio {}",
-       should_block_on_gc(),
-       get_projected_available_ratio(),
-       get_reclaim_ratio());
-}
-
-SegmentCleaner::trim_backrefs_ret SegmentCleaner::trim_backrefs(
-  Transaction &t,
-  journal_seq_t limit)
-{
-  return backref_manager.merge_cached_backrefs(
-    t,
-    limit,
-    config.rewrite_backref_bytes_per_cycle
-  );
-}
-
-SegmentCleaner::rewrite_dirty_ret SegmentCleaner::rewrite_dirty(
-  Transaction &t,
-  journal_seq_t limit)
-{
-  return ecb->get_next_dirty_extents(
-    t,
-    limit,
-    config.rewrite_dirty_bytes_per_cycle
-  ).si_then([=, &t](auto dirty_list) {
-    LOG_PREFIX(SegmentCleaner::rewrite_dirty);
-    DEBUGT("rewrite {} dirty extents", t, dirty_list.size());
-    return seastar::do_with(
-      std::move(dirty_list),
-      [this, FNAME, &t](auto &dirty_list) {
-       return trans_intr::do_for_each(
-         dirty_list,
-         [this, FNAME, &t](auto &e) {
-         DEBUGT("cleaning {}", t, *e);
-         return ecb->rewrite_extent(t, e);
-       });
-      });
-  });
-}
-
-SegmentCleaner::gc_cycle_ret SegmentCleaner::GCProcess::run()
-{
-  return seastar::do_until(
-    [this] { return is_stopping(); },
-    [this] {
-      return maybe_wait_should_run(
-      ).then([this] {
-       cleaner.log_gc_state("GCProcess::run");
-
-       if (is_stopping()) {
-         return seastar::now();
-       } else {
-         return cleaner.do_gc_cycle();
-       }
-      });
-    });
-}
-
-SegmentCleaner::gc_cycle_ret SegmentCleaner::do_gc_cycle()
-{
-  if (gc_should_trim_journal()) {
-    return gc_trim_journal(
-    ).handle_error(
-      crimson::ct_error::assert_all{
-       "GCProcess::run encountered invalid error in gc_trim_journal"
-      }
-    );
-  } else if (gc_should_trim_backref()) {
-    return gc_trim_backref(get_backref_tail()
-    ).safe_then([](auto) {
-      return seastar::now();
-    }).handle_error(
-      crimson::ct_error::assert_all{
-       "GCProcess::run encountered invalid error in gc_trim_backref"
-      }
-    );
-  } else if (gc_should_reclaim_space()) {
-    return gc_reclaim_space(
-    ).handle_error(
-      crimson::ct_error::assert_all{
-       "GCProcess::run encountered invalid error in gc_reclaim_space"
-      }
-    );
-  } else {
-    return seastar::now();
-  }
-}
-
-SegmentCleaner::gc_trim_backref_ret
-SegmentCleaner::gc_trim_backref(journal_seq_t limit) {
-  return seastar::do_with(
-    journal_seq_t(),
-    [this, limit=std::move(limit)](auto &seq) mutable {
-    return repeat_eagain([this, limit=std::move(limit), &seq] {
-      return ecb->with_transaction_intr(
-       Transaction::src_t::TRIM_BACKREF,
-       "trim_backref",
-       [this, limit](auto &t) {
-       return trim_backrefs(
-         t,
-         limit
-       ).si_then([this, &t, limit](auto trim_backrefs_to)
-         -> ExtentCallbackInterface::submit_transaction_direct_iertr::future<
-           journal_seq_t> {
-         if (trim_backrefs_to != JOURNAL_SEQ_NULL) {
-           return ecb->submit_transaction_direct(
-             t, std::make_optional<journal_seq_t>(trim_backrefs_to)
-           ).si_then([trim_backrefs_to=std::move(trim_backrefs_to)]() mutable {
-             return seastar::make_ready_future<
-               journal_seq_t>(std::move(trim_backrefs_to));
-           });
-         }
-         return seastar::make_ready_future<journal_seq_t>(std::move(limit));
-       });
-      }).safe_then([&seq](auto trim_backrefs_to) {
-       seq = std::move(trim_backrefs_to);
-      });
-    }).safe_then([&seq] {
-      return gc_trim_backref_ertr::make_ready_future<
-       journal_seq_t>(std::move(seq));
-    });
-  });
-}
-
-SegmentCleaner::gc_trim_journal_ret SegmentCleaner::gc_trim_journal()
-{
-  return gc_trim_backref(get_dirty_tail()
-  ).safe_then([this](auto seq) {
-    return repeat_eagain([this, seq=std::move(seq)]() mutable {
-      return ecb->with_transaction_intr(
-       Transaction::src_t::CLEANER_TRIM,
-       "trim_journal",
-       [this, seq=std::move(seq)](auto& t)
-      {
-       return rewrite_dirty(t, seq
-       ).si_then([this, &t] {
-         return ecb->submit_transaction_direct(t);
-       });
-      });
-    });
-  });
-}
-
-SegmentCleaner::retrieve_live_extents_ret
-SegmentCleaner::_retrieve_live_extents(
-  Transaction &t,
-  std::set<
-    backref_buf_entry_t,
-    backref_buf_entry_t::cmp_t> &&backrefs,
-  std::vector<CachedExtentRef> &extents)
-{
-  return seastar::do_with(
-    JOURNAL_SEQ_NULL,
-    std::move(backrefs),
-    [this, &t, &extents](auto &seq, auto &backrefs) {
-    return trans_intr::parallel_for_each(
-      backrefs,
-      [this, &extents, &t, &seq](auto &ent) {
-      LOG_PREFIX(SegmentCleaner::_retrieve_live_extents);
-      DEBUGT("getting extent of type {} at {}~{}",
-       t,
-       ent.type,
-       ent.paddr,
-       ent.len);
-      return ecb->get_extent_if_live(
-       t, ent.type, ent.paddr, ent.laddr, ent.len
-      ).si_then([this, FNAME, &extents, &ent, &seq, &t](auto ext) {
-       if (!ext) {
-         DEBUGT("addr {} dead, skipping", t, ent.paddr);
-         auto backref = backref_manager.get_cached_backref_removal(ent.paddr);
-         if (seq == JOURNAL_SEQ_NULL || seq < backref.seq) {
-           seq = backref.seq;
-         }
-       } else {
-         extents.emplace_back(std::move(ext));
-       }
-       return ExtentCallbackInterface::rewrite_extent_iertr::now();
-      });
-    }).si_then([&seq] {
-      return retrieve_live_extents_iertr::make_ready_future<
-       journal_seq_t>(std::move(seq));
-    });
-  });
-}
-
-SegmentCleaner::retrieve_backref_mappings_ret
-SegmentCleaner::retrieve_backref_mappings(
-  paddr_t start_paddr,
-  paddr_t end_paddr)
-{
-  return seastar::do_with(
-    backref_pin_list_t(),
-    [this, start_paddr, end_paddr](auto &pin_list) {
-    return repeat_eagain([this, start_paddr, end_paddr, &pin_list] {
-      return ecb->with_transaction_intr(
-       Transaction::src_t::READ,
-       "get_backref_mappings",
-       [this, start_paddr, end_paddr](auto &t) {
-       return backref_manager.get_mappings(
-         t, start_paddr, end_paddr
-       );
-      }).safe_then([&pin_list](auto&& list) {
-       pin_list = std::move(list);
-      });
-    }).safe_then([&pin_list] {
-      return seastar::make_ready_future<backref_pin_list_t>(std::move(pin_list));
-    });
-  });
-}
-
-SegmentCleaner::gc_reclaim_space_ret SegmentCleaner::gc_reclaim_space()
-{
-  LOG_PREFIX(SegmentCleaner::gc_reclaim_space);
-  if (!reclaim_state) {
-    segment_id_t seg_id = get_next_reclaim_segment();
-    auto &segment_info = segments[seg_id];
-    INFO("reclaim {} {} start", seg_id, segment_info);
-    ceph_assert(segment_info.is_closed());
-    reclaim_state = reclaim_state_t::create(
-        seg_id, segments.get_segment_size());
-  }
-  reclaim_state->advance(config.reclaim_bytes_per_cycle);
-
-  DEBUG("reclaiming {}~{}",
-        reclaim_state->start_pos,
-        reclaim_state->end_pos);
-  double pavail_ratio = get_projected_available_ratio();
-  seastar::lowres_system_clock::time_point start = seastar::lowres_system_clock::now();
-
-  return seastar::do_with(
-    (size_t)0,
-    (size_t)0,
-    [this, pavail_ratio, start](
-      auto &reclaimed,
-      auto &runs) {
-    return retrieve_backref_mappings(
-      reclaim_state->start_pos,
-      reclaim_state->end_pos
-    ).safe_then([this, &reclaimed, &runs](auto pin_list) {
-      return seastar::do_with(
-       std::move(pin_list),
-       [this, &reclaimed, &runs](auto &pin_list) {
-       return repeat_eagain(
-         [this, &reclaimed, &runs, &pin_list]() mutable {
-         reclaimed = 0;
-         runs++;
-         return seastar::do_with(
-           backref_manager.get_cached_backref_extents_in_range(
-             reclaim_state->start_pos, reclaim_state->end_pos),
-           backref_manager.get_cached_backrefs_in_range(
-             reclaim_state->start_pos, reclaim_state->end_pos),
-           backref_manager.get_cached_backref_removals_in_range(
-             reclaim_state->start_pos, reclaim_state->end_pos),
-           JOURNAL_SEQ_NULL,
-           [this, &reclaimed, &pin_list](
-             auto &backref_extents,
-             auto &backrefs,
-             auto &del_backrefs,
-             auto &seq) {
-           return ecb->with_transaction_intr(
-             Transaction::src_t::CLEANER_RECLAIM,
-             "reclaim_space",
-             [this, &backref_extents, &backrefs, &seq,
-             &del_backrefs, &reclaimed, &pin_list](auto &t) {
-             LOG_PREFIX(SegmentCleaner::gc_reclaim_space);
-             DEBUGT("{} backrefs, {} del_backrefs, {} pins", t,
-               backrefs.size(), del_backrefs.size(), pin_list.size());
-             for (auto &br : backrefs) {
-               if (seq == JOURNAL_SEQ_NULL
-                   || (br.seq != JOURNAL_SEQ_NULL && br.seq > seq))
-                 seq = br.seq;
-             }
-             for (auto &pin : pin_list) {
-               backrefs.emplace(
-                 pin->get_key(),
-                 pin->get_val(),
-                 pin->get_length(),
-                 pin->get_type(),
-                 journal_seq_t());
-             }
-             for (auto &del_backref : del_backrefs) {
-               DEBUGT("del_backref {}~{} {} {}", t,
-                 del_backref.paddr, del_backref.len, del_backref.type, del_backref.seq);
-               auto it = backrefs.find(del_backref.paddr);
-               if (it != backrefs.end())
-                 backrefs.erase(it);
-               if (seq == JOURNAL_SEQ_NULL
-                   || (del_backref.seq != JOURNAL_SEQ_NULL && del_backref.seq > seq))
-                 seq = del_backref.seq;
-             }
-             return seastar::do_with(
-               std::vector<CachedExtentRef>(),
-               [this, &backref_extents, &backrefs, &reclaimed, &t, &seq]
-               (auto &extents) {
-               return backref_manager.retrieve_backref_extents(
-                 t, std::move(backref_extents), extents
-               ).si_then([this, &extents, &t, &backrefs] {
-                 return _retrieve_live_extents(
-                   t, std::move(backrefs), extents);
-               }).si_then([this, &seq, &t](auto nseq) {
-                 if (nseq != JOURNAL_SEQ_NULL &&
-                     (nseq > seq || seq == JOURNAL_SEQ_NULL))
-                   seq = nseq;
-                 auto fut = BackrefManager::merge_cached_backrefs_iertr::now();
-                 if (seq != JOURNAL_SEQ_NULL) {
-                   fut = backref_manager.merge_cached_backrefs(
-                     t, seq, std::numeric_limits<uint64_t>::max()
-                   ).si_then([](auto) {
-                     return BackrefManager::merge_cached_backrefs_iertr::now();
-                   });
-                 }
-                 return fut;
-               }).si_then([&extents, this, &t, &reclaimed] {
-                 return trans_intr::do_for_each(
-                   extents,
-                   [this, &t, &reclaimed](auto &ext) {
-                   reclaimed += ext->get_length();
-                   return ecb->rewrite_extent(t, ext);
-                 });
-               });
-             }).si_then([this, &t, &seq] {
-               if (reclaim_state->is_complete()) {
-                 t.mark_segment_to_release(reclaim_state->get_segment_id());
-               }
-               return ecb->submit_transaction_direct(
-                 t, std::make_optional<journal_seq_t>(std::move(seq)));
-             });
-           });
-         });
-       });
-      });
-    }).safe_then(
-      [&reclaimed, this, pavail_ratio, start, &runs] {
-      LOG_PREFIX(SegmentCleaner::gc_reclaim_space);
-#ifndef NDEBUG
-      auto ndel_backrefs =
-       backref_manager.get_cached_backref_removals_in_range(
-         reclaim_state->start_pos, reclaim_state->end_pos);
-      if (!ndel_backrefs.empty()) {
-       for (auto &del_br : ndel_backrefs) {
-         ERROR("unexpected del_backref {}~{} {} {}",
-           del_br.paddr, del_br.len, del_br.type, del_br.seq);
-       }
-       ceph_abort("impossible");
-      }
-#endif
-      stats.reclaiming_bytes += reclaimed;
-      auto d = seastar::lowres_system_clock::now() - start;
-      DEBUG("duration: {}, pavail_ratio before: {}, repeats: {}", d, pavail_ratio, runs);
-      if (reclaim_state->is_complete()) {
-       INFO("reclaim {} finish, alive/total={}",
-             reclaim_state->get_segment_id(),
-             stats.reclaiming_bytes/(double)segments.get_segment_size());
-       stats.reclaimed_bytes += stats.reclaiming_bytes;
-       stats.reclaimed_segment_bytes += segments.get_segment_size();
-       stats.reclaiming_bytes = 0;
-       reclaim_state.reset();
-      }
-    });
-  });
-}
-
-SegmentCleaner::mount_ret SegmentCleaner::mount()
-{
-  LOG_PREFIX(SegmentCleaner::mount);
-  const auto& sms = sm_group->get_segment_managers();
-  INFO("{} segment managers", sms.size());
-  init_complete = false;
-  stats = {};
-  journal_tail_target = JOURNAL_SEQ_NULL;
-  journal_tail_committed = JOURNAL_SEQ_NULL;
-  dirty_extents_replay_from = JOURNAL_SEQ_NULL;
-  alloc_info_replay_from = JOURNAL_SEQ_NULL;
-  
-  space_tracker.reset(
-    detailed ?
-    (SpaceTrackerI*)new SpaceTrackerDetailed(
-      sms) :
-    (SpaceTrackerI*)new SpaceTrackerSimple(
-      sms));
-  
-  segments.reset();
-  for (auto sm : sms) {
-    segments.add_segment_manager(*sm);
-  }
-  metrics.clear();
-  register_metrics();
-
-  INFO("{} segments", segments.get_num_segments());
-  return seastar::do_with(
-    std::vector<std::pair<segment_id_t, segment_header_t>>(),
-    [this, FNAME](auto& segment_set) {
-    return crimson::do_for_each(
-      segments.begin(),
-      segments.end(),
-      [this, FNAME, &segment_set](auto& it) {
-       auto segment_id = it.first;
-       return sm_group->read_segment_header(
-         segment_id
-       ).safe_then([segment_id, this, FNAME, &segment_set](auto header) {
-         INFO("segment_id={} -- {}", segment_id, header);
-         auto s_type = header.get_type();
-         if (s_type == segment_type_t::NULL_SEG) {
-           ERROR("got null segment, segment_id={} -- {}", segment_id, header);
-           ceph_abort();
-         }
-         return sm_group->read_segment_tail(
-           segment_id
-         ).safe_then([this, segment_id, &segment_set, header](auto tail)
-           -> scan_extents_ertr::future<> {
-           if (tail.segment_nonce != header.segment_nonce) {
-             return scan_nonfull_segment(header, segment_set, segment_id);
-           }
-           time_point last_modified(duration(tail.last_modified));
-           time_point last_rewritten(duration(tail.last_rewritten));
-           segments.update_last_modified_rewritten(
-                segment_id, last_modified, last_rewritten);
-           if (tail.get_type() == segment_type_t::JOURNAL) {
-             update_journal_tail_committed(tail.journal_tail);
-             update_journal_tail_target(
-               tail.journal_tail,
-               tail.alloc_replay_from);
-           }
-           init_mark_segment_closed(
-             segment_id,
-             header.segment_seq,
-             header.type);
-           return seastar::now();
-         }).handle_error(
-           crimson::ct_error::enodata::handle(
-             [this, header, segment_id, &segment_set](auto) {
-             return scan_nonfull_segment(header, segment_set, segment_id);
-           }),
-           crimson::ct_error::pass_further_all{}
-         );
-       }).handle_error(
-         crimson::ct_error::enoent::handle([](auto) {
-           return mount_ertr::now();
-         }),
-         crimson::ct_error::enodata::handle([](auto) {
-           return mount_ertr::now();
-         }),
-         crimson::ct_error::input_output_error::pass_further{},
-         crimson::ct_error::assert_all{"unexpected error"}
-       );
-      });
-  });
-}
-
-SegmentCleaner::scan_extents_ret SegmentCleaner::scan_nonfull_segment(
-  const segment_header_t& header,
-  scan_extents_ret_bare& segment_set,
-  segment_id_t segment_id)
-{
-  return seastar::do_with(
-    scan_valid_records_cursor({
-      segments[segment_id].seq,
-      paddr_t::make_seg_paddr(segment_id, 0)}),
-    [this, segment_id, segment_header=header](auto& cursor) {
-    return seastar::do_with(
-       SegmentManagerGroup::found_record_handler_t(
-       [this, segment_id, segment_header](
-         record_locator_t locator,
-         const record_group_header_t& header,
-         const bufferlist& mdbuf
-       ) mutable -> SegmentManagerGroup::scan_valid_records_ertr::future<> {
-       LOG_PREFIX(SegmentCleaner::scan_nonfull_segment);
-       if (segment_header.get_type() == segment_type_t::OOL) {
-         DEBUG("out-of-line segment {}, decodeing {} records",
-           segment_id,
-           header.records);
-         auto maybe_headers = try_decode_record_headers(header, mdbuf);
-         if (!maybe_headers) {
-           ERROR("unable to decode record headers for record group {}",
-             locator.record_block_base);
-           return crimson::ct_error::input_output_error::make();
-         }
-
-         for (auto& header : *maybe_headers) {
-           mod_time_point_t ctime = header.commit_time;
-           auto commit_type = header.commit_type;
-           if (!ctime) {
-             ERROR("SegmentCleaner::scan_nonfull_segment: extent {} 0 commit_time",
-               ctime);
-             ceph_abort("0 commit_time");
-           }
-           time_point commit_time{duration(ctime)};
-           assert(commit_type == record_commit_type_t::MODIFY
-             || commit_type == record_commit_type_t::REWRITE);
-           if (commit_type == record_commit_type_t::MODIFY) {
-              segments.update_last_modified_rewritten(segment_id, commit_time, {});
-           }
-           if (commit_type == record_commit_type_t::REWRITE) {
-              segments.update_last_modified_rewritten(segment_id, {}, commit_time);
-           }
-         }
-       } else {
-         DEBUG("inline segment {}, decodeing {} records",
-           segment_id,
-           header.records);
-         auto maybe_record_deltas_list = try_decode_deltas(
-           header, mdbuf, locator.record_block_base);
-         if (!maybe_record_deltas_list) {
-           ERROR("unable to decode deltas for record {} at {}",
-                 header, locator);
-           return crimson::ct_error::input_output_error::make();
-         }
-         for (auto &record_deltas : *maybe_record_deltas_list) {
-           for (auto &[ctime, delta] : record_deltas.deltas) {
-             if (delta.type == extent_types_t::ALLOC_TAIL) {
-               journal_seq_t seq;
-               decode(seq, delta.bl);
-               update_alloc_info_replay_from(seq);
-             }
-           }
-         }
-       }
-       return seastar::now();
-      }),
-      [&cursor, segment_header, this](auto& handler) {
-       return sm_group->scan_valid_records(
-         cursor,
-         segment_header.segment_nonce,
-         segments.get_segment_size(),
-         handler);
-      }
-    );
-  }).safe_then([this, segment_id, header](auto) {
-    init_mark_segment_closed(
-      segment_id,
-      header.segment_seq,
-      header.type);
-    return seastar::now();
-  });
-}
-
-SegmentCleaner::release_ertr::future<>
-SegmentCleaner::maybe_release_segment(Transaction &t)
-{
-  auto to_release = t.get_segment_to_release();
-  if (to_release != NULL_SEG_ID) {
-    LOG_PREFIX(SegmentCleaner::maybe_release_segment);
-    INFOT("releasing segment {}", t, to_release);
-    return sm_group->release_segment(to_release
-    ).safe_then([this, FNAME, &t, to_release] {
-      auto old_usage = calc_utilization(to_release);
-      ceph_assert(old_usage == 0);
-      segments.mark_empty(to_release);
-      auto new_usage = calc_utilization(to_release);
-      adjust_segment_util(old_usage, new_usage);
-      INFOT("released, should_block_on_gc {}, projected_avail_ratio {}, "
-           "reclaim_ratio {}",
-           t,
-           should_block_on_gc(),
-           get_projected_available_ratio(),
-           get_reclaim_ratio());
-      if (space_tracker->get_usage(to_release) != 0) {
-        space_tracker->dump_usage(to_release);
-        ceph_abort();
-      }
-      maybe_wake_gc_blocked_io();
-    });
-  } else {
-    return SegmentManager::release_ertr::now();
-  }
-}
-
-void SegmentCleaner::complete_init()
-{
-  LOG_PREFIX(SegmentCleaner::complete_init);
-  if (disable_trim) {
-    init_complete = true;
-    return;
-  }
-  INFO("done, start GC");
-  ceph_assert(segments.get_journal_head() != JOURNAL_SEQ_NULL);
-  init_complete = true;
-  gc_process.start();
-}
-
-void SegmentCleaner::mark_space_used(
-  paddr_t addr,
-  extent_len_t len,
-  time_point last_modified,
-  time_point last_rewritten,
-  bool init_scan)
-{
-  LOG_PREFIX(SegmentCleaner::mark_space_used);
-  if (addr.get_addr_type() != addr_types_t::SEGMENT) {
-    return;
-  }
-  auto& seg_addr = addr.as_seg_paddr();
-
-  if (!init_scan && !init_complete) {
-    return;
-  }
-
-  stats.used_bytes += len;
-  auto old_usage = calc_utilization(seg_addr.get_segment_id());
-  [[maybe_unused]] auto ret = space_tracker->allocate(
-    seg_addr.get_segment_id(),
-    seg_addr.get_segment_off(),
-    len);
-  auto new_usage = calc_utilization(seg_addr.get_segment_id());
-  adjust_segment_util(old_usage, new_usage);
-
-  // use the last extent's last modified time for the calculation of the projected
-  // time the segments' live extents are to stay unmodified; this is an approximation
-  // of the sprite lfs' segment "age".
-
-  segments.update_last_modified_rewritten(
-      seg_addr.get_segment_id(), last_modified, last_rewritten);
-
-  gc_process.maybe_wake_on_space_used();
-  assert(ret > 0);
-  DEBUG("segment {} new len: {}~{}, live_bytes: {}",
-        seg_addr.get_segment_id(),
-        addr,
-        len,
-        space_tracker->get_usage(seg_addr.get_segment_id()));
-}
-
-void SegmentCleaner::mark_space_free(
-  paddr_t addr,
-  extent_len_t len)
-{
-  LOG_PREFIX(SegmentCleaner::mark_space_free);
-  if (!init_complete) {
-    return;
-  }
-  if (addr.get_addr_type() != addr_types_t::SEGMENT) {
-    return;
-  }
-
-  ceph_assert(stats.used_bytes >= len);
-  stats.used_bytes -= len;
-  auto& seg_addr = addr.as_seg_paddr();
-
-  DEBUG("segment {} free len: {}~{}",
-        seg_addr.get_segment_id(), addr, len);
-  auto old_usage = calc_utilization(seg_addr.get_segment_id());
-  [[maybe_unused]] auto ret = space_tracker->release(
-    seg_addr.get_segment_id(),
-    seg_addr.get_segment_off(),
-    len);
-  auto new_usage = calc_utilization(seg_addr.get_segment_id());
-  adjust_segment_util(old_usage, new_usage);
-  maybe_wake_gc_blocked_io();
-  assert(ret >= 0);
-  DEBUG("segment {} free len: {}~{}, live_bytes: {}",
-        seg_addr.get_segment_id(),
-        addr,
-        len,
-        space_tracker->get_usage(seg_addr.get_segment_id()));
-}
-
-segment_id_t SegmentCleaner::get_next_reclaim_segment() const
-{
-  LOG_PREFIX(SegmentCleaner::get_next_reclaim_segment);
-  segment_id_t id = NULL_SEG_ID;
-  double max_benefit_cost = 0;
-  for (auto& [_id, segment_info] : segments) {
-    if (segment_info.is_closed() &&
-        !segment_info.is_in_journal(journal_tail_committed)) {
-      double benefit_cost = calc_gc_benefit_cost(_id);
-      if (benefit_cost > max_benefit_cost) {
-        id = _id;
-        max_benefit_cost = benefit_cost;
-      }
-    }
-  }
-  if (id != NULL_SEG_ID) {
-    DEBUG("segment {}, benefit_cost {}",
-          id, max_benefit_cost);
-    return id;
-  } else {
-    ceph_assert(get_segments_reclaimable() == 0);
-    // see gc_should_reclaim_space()
-    ceph_abort("impossible!");
-    return NULL_SEG_ID;
-  }
-}
-
-void SegmentCleaner::log_gc_state(const char *caller) const
-{
-  LOG_PREFIX(SegmentCleaner::log_gc_state);
-  if (LOCAL_LOGGER.is_enabled(seastar::log_level::debug) &&
-      !disable_trim) {
-    DEBUG(
-      "caller {}, "
-      "empty {}, "
-      "open {}, "
-      "closed {}, "
-      "in_journal {}, "
-      "total {}B, "
-      "available {}B, "
-      "unavailable {}B, "
-      "unavailable_used {}B, "
-      "unavailable_unused {}B; "
-      "reclaim_ratio {}, "
-      "available_ratio {}, "
-      "should_block_on_gc {}, "
-      "gc_should_reclaim_space {}, "
-      "journal_head {}, "
-      "journal_tail_target {}, "
-      "journal_tail_commit {}, "
-      "dirty_tail {}, "
-      "dirty_tail_limit {}, "
-      "gc_should_trim_journal {}, ",
-      caller,
-      segments.get_num_empty(),
-      segments.get_num_open(),
-      segments.get_num_closed(),
-      get_segments_in_journal(),
-      segments.get_total_bytes(),
-      segments.get_available_bytes(),
-      segments.get_unavailable_bytes(),
-      stats.used_bytes,
-      get_unavailable_unused_bytes(),
-      get_reclaim_ratio(),
-      segments.get_available_ratio(),
-      should_block_on_gc(),
-      gc_should_reclaim_space(),
-      segments.get_journal_head(),
-      journal_tail_target,
-      journal_tail_committed,
-      get_dirty_tail(),
-      get_dirty_tail_limit(),
-      gc_should_trim_journal()
-    );
-  }
-}
-
-seastar::future<>
-SegmentCleaner::reserve_projected_usage(std::size_t projected_usage)
-{
-  if (disable_trim) {
-    return seastar::now();
-  }
-  ceph_assert(init_complete);
-  // The pipeline configuration prevents another IO from entering
-  // prepare until the prior one exits and clears this.
-  ceph_assert(!blocked_io_wake);
-  ++stats.io_count;
-  bool is_blocked = false;
-  if (should_block_on_trim()) {
-    is_blocked = true;
-    ++stats.io_blocked_count_trim;
-  }
-  if (should_block_on_reclaim()) {
-    is_blocked = true;
-    ++stats.io_blocked_count_reclaim;
-  }
-  if (is_blocked) {
-    ++stats.io_blocking_num;
-    ++stats.io_blocked_count;
-    stats.io_blocked_sum += stats.io_blocking_num;
-  }
-  return seastar::do_until(
-    [this] {
-      log_gc_state("await_hard_limits");
-      return !should_block_on_gc();
-    },
-    [this] {
-      blocked_io_wake = seastar::promise<>();
-      return blocked_io_wake->get_future();
-    }
-  ).then([this, projected_usage, is_blocked] {
-    ceph_assert(!blocked_io_wake);
-    stats.projected_used_bytes += projected_usage;
-    ++stats.projected_count;
-    stats.projected_used_bytes_sum += stats.projected_used_bytes;
-    if (is_blocked) {
-      assert(stats.io_blocking_num > 0);
-      --stats.io_blocking_num;
-    }
-  });
-}
-
-void SegmentCleaner::release_projected_usage(std::size_t projected_usage)
-{
-  if (disable_trim) return;
-  ceph_assert(init_complete);
-  ceph_assert(stats.projected_used_bytes >= projected_usage);
-  stats.projected_used_bytes -= projected_usage;
-  return maybe_wake_gc_blocked_io();
-}
-
-}
diff --git a/src/crimson/os/seastore/segment_cleaner.h b/src/crimson/os/seastore/segment_cleaner.h
deleted file mode 100644 (file)
index c28e7b3..0000000
+++ /dev/null
@@ -1,1296 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-#pragma once
-
-#include <boost/intrusive/set.hpp>
-#include <seastar/core/metrics_types.hh>
-
-#include "common/ceph_time.h"
-
-#include "osd/osd_types.h"
-
-#include "crimson/os/seastore/backref_manager.h"
-#include "crimson/os/seastore/cached_extent.h"
-#include "crimson/os/seastore/seastore_types.h"
-#include "crimson/os/seastore/segment_manager.h"
-#include "crimson/os/seastore/segment_manager_group.h"
-#include "crimson/os/seastore/transaction.h"
-#include "crimson/os/seastore/segment_seq_allocator.h"
-
-namespace crimson::os::seastore {
-
-/*
- * segment_info_t
- *
- * Maintains the tracked information for a segment.
- * It is read-only outside segments_info_t.
- */
-struct segment_info_t {
-  using time_point = seastar::lowres_system_clock::time_point;
-
-  // segment_info_t is initiated as set_empty()
-  Segment::segment_state_t state = Segment::segment_state_t::EMPTY;
-
-  // Will be non-null for any segments in the current journal
-  segment_seq_t seq = NULL_SEG_SEQ;
-
-  segment_type_t type = segment_type_t::NULL_SEG;
-
-  time_point last_modified;
-  time_point last_rewritten;
-
-  std::size_t written_to = 0;
-
-  bool is_in_journal(journal_seq_t tail_committed) const {
-    return type == segment_type_t::JOURNAL &&
-           tail_committed.segment_seq <= seq;
-  }
-
-  bool is_empty() const {
-    return state == Segment::segment_state_t::EMPTY;
-  }
-
-  bool is_closed() const {
-    return state == Segment::segment_state_t::CLOSED;
-  }
-
-  bool is_open() const {
-    return state == Segment::segment_state_t::OPEN;
-  }
-
-  void init_closed(segment_seq_t, segment_type_t, std::size_t);
-
-  void set_open(segment_seq_t, segment_type_t);
-
-  void set_empty();
-
-  void set_closed();
-
-  void update_last_modified_rewritten(
-      time_point _last_modified, time_point _last_rewritten) {
-    if (_last_modified != time_point() && last_modified < _last_modified) {
-      last_modified = _last_modified;
-    }
-    if (_last_rewritten != time_point() && last_rewritten < _last_rewritten) {
-      last_rewritten = _last_rewritten;
-    }
-  }
-};
-
-std::ostream& operator<<(std::ostream&, const segment_info_t&);
-
-/*
- * segments_info_t
- *
- * Keep track of all segments and related information.
- */
-class segments_info_t {
-public:
-  using time_point = seastar::lowres_system_clock::time_point;
-
-  segments_info_t() {
-    reset();
-  }
-
-  const segment_info_t& operator[](segment_id_t id) const {
-    return segments[id];
-  }
-
-  auto begin() const {
-    return segments.begin();
-  }
-
-  auto end() const {
-    return segments.end();
-  }
-
-  std::size_t get_num_segments() const {
-    assert(segments.size() > 0);
-    return segments.size();
-  }
-  std::size_t get_segment_size() const {
-    assert(segment_size > 0);
-    return segment_size;
-  }
-  std::size_t get_num_in_journal_open() const {
-    return num_in_journal_open;
-  }
-  std::size_t get_num_type_journal() const {
-    return num_type_journal;
-  }
-  std::size_t get_num_type_ool() const {
-    return num_type_ool;
-  }
-  std::size_t get_num_open() const {
-    return num_open;
-  }
-  std::size_t get_num_empty() const {
-    return num_empty;
-  }
-  std::size_t get_num_closed() const {
-    return num_closed;
-  }
-  std::size_t get_count_open_journal() const {
-    return count_open_journal;
-  }
-  std::size_t get_count_open_ool() const {
-    return count_open_ool;
-  }
-  std::size_t get_count_release_journal() const {
-    return count_release_journal;
-  }
-  std::size_t get_count_release_ool() const {
-    return count_release_ool;
-  }
-  std::size_t get_count_close_journal() const {
-    return count_close_journal;
-  }
-  std::size_t get_count_close_ool() const {
-    return count_close_ool;
-  }
-
-  std::size_t get_total_bytes() const {
-    return total_bytes;
-  }
-  /// the available space that is writable, including in open segments
-  std::size_t get_available_bytes() const {
-    return num_empty * get_segment_size() + avail_bytes_in_open;
-  }
-  /// the unavailable space that is not writable
-  std::size_t get_unavailable_bytes() const {
-    assert(total_bytes >= get_available_bytes());
-    return total_bytes - get_available_bytes();
-  }
-  std::size_t get_available_bytes_in_open() const {
-    return avail_bytes_in_open;
-  }
-  double get_available_ratio() const {
-    return (double)get_available_bytes() / (double)total_bytes;
-  }
-
-  journal_seq_t get_journal_head() const {
-    if (unlikely(journal_segment_id == NULL_SEG_ID)) {
-      return JOURNAL_SEQ_NULL;
-    }
-    auto &segment_info = segments[journal_segment_id];
-    assert(!segment_info.is_empty());
-    assert(segment_info.type == segment_type_t::JOURNAL);
-    assert(segment_info.seq != NULL_SEG_SEQ);
-    return journal_seq_t{
-      segment_info.seq,
-      paddr_t::make_seg_paddr(
-        journal_segment_id,
-        segment_info.written_to)
-    };
-  }
-
-  void reset();
-
-  void add_segment_manager(SegmentManager &segment_manager);
-
-  // initiate non-empty segments, the others are by default empty
-  void init_closed(segment_id_t, segment_seq_t, segment_type_t);
-
-  void mark_open(segment_id_t, segment_seq_t, segment_type_t);
-
-  void mark_empty(segment_id_t);
-
-  void mark_closed(segment_id_t);
-
-  void update_written_to(segment_type_t, paddr_t);
-
-  void update_last_modified_rewritten(
-      segment_id_t id, time_point last_modified, time_point last_rewritten) {
-    segments[id].update_last_modified_rewritten(last_modified, last_rewritten);
-  }
-
-private:
-  // See reset() for member initialization
-  segment_map_t<segment_info_t> segments;
-
-  std::size_t segment_size;
-
-  segment_id_t journal_segment_id;
-  std::size_t num_in_journal_open;
-  std::size_t num_type_journal;
-  std::size_t num_type_ool;
-
-  std::size_t num_open;
-  std::size_t num_empty;
-  std::size_t num_closed;
-
-  std::size_t count_open_journal;
-  std::size_t count_open_ool;
-  std::size_t count_release_journal;
-  std::size_t count_release_ool;
-  std::size_t count_close_journal;
-  std::size_t count_close_ool;
-
-  std::size_t total_bytes;
-  std::size_t avail_bytes_in_open;
-};
-
-/**
- * Callback interface for managing available segments
- */
-class SegmentProvider {
-public:
-  virtual journal_seq_t get_journal_tail_target() const = 0;
-
-  virtual const segment_info_t& get_seg_info(segment_id_t id) const = 0;
-
-  virtual segment_id_t allocate_segment(
-      segment_seq_t seq, segment_type_t type) = 0;
-
-  virtual journal_seq_t get_dirty_extents_replay_from() const = 0;
-
-  virtual journal_seq_t get_alloc_info_replay_from() const = 0;
-
-  virtual void close_segment(segment_id_t) = 0;
-
-  virtual void update_journal_tail_committed(journal_seq_t tail_committed) = 0;
-
-  virtual void update_segment_avail_bytes(segment_type_t, paddr_t) = 0;
-
-  virtual SegmentManagerGroup* get_segment_manager_group() = 0;
-
-  virtual ~SegmentProvider() {}
-};
-
-class SpaceTrackerI {
-public:
-  virtual int64_t allocate(
-    segment_id_t segment,
-    seastore_off_t offset,
-    extent_len_t len) = 0;
-
-  virtual int64_t release(
-    segment_id_t segment,
-    seastore_off_t offset,
-    extent_len_t len) = 0;
-
-  virtual int64_t get_usage(
-    segment_id_t segment) const = 0;
-
-  virtual bool equals(const SpaceTrackerI &other) const = 0;
-
-  virtual std::unique_ptr<SpaceTrackerI> make_empty() const = 0;
-
-  virtual void dump_usage(segment_id_t) const = 0;
-
-  virtual double calc_utilization(segment_id_t segment) const = 0;
-
-  virtual void reset() = 0;
-
-  virtual ~SpaceTrackerI() = default;
-};
-using SpaceTrackerIRef = std::unique_ptr<SpaceTrackerI>;
-
-class SpaceTrackerSimple : public SpaceTrackerI {
-  struct segment_bytes_t {
-    int64_t live_bytes = 0;
-    seastore_off_t total_bytes = 0;
-  };
-  // Tracks live space for each segment
-  segment_map_t<segment_bytes_t> live_bytes_by_segment;
-
-  int64_t update_usage(segment_id_t segment, int64_t delta) {
-    live_bytes_by_segment[segment].live_bytes += delta;
-    assert(live_bytes_by_segment[segment].live_bytes >= 0);
-    return live_bytes_by_segment[segment].live_bytes;
-  }
-public:
-  SpaceTrackerSimple(const SpaceTrackerSimple &) = default;
-  SpaceTrackerSimple(const std::vector<SegmentManager*> &sms) {
-    for (auto sm : sms) {
-      live_bytes_by_segment.add_device(
-       sm->get_device_id(),
-       sm->get_num_segments(),
-       {0, sm->get_segment_size()});
-    }
-  }
-
-  int64_t allocate(
-    segment_id_t segment,
-    seastore_off_t offset,
-    extent_len_t len) final {
-    return update_usage(segment, len);
-  }
-
-  int64_t release(
-    segment_id_t segment,
-    seastore_off_t offset,
-    extent_len_t len) final {
-    return update_usage(segment, -(int64_t)len);
-  }
-
-  int64_t get_usage(segment_id_t segment) const final {
-    return live_bytes_by_segment[segment].live_bytes;
-  }
-
-  double calc_utilization(segment_id_t segment) const final {
-    auto& seg_bytes = live_bytes_by_segment[segment];
-    return (double)seg_bytes.live_bytes / (double)seg_bytes.total_bytes;
-  }
-
-  void dump_usage(segment_id_t) const final;
-
-  void reset() final {
-    for (auto &i : live_bytes_by_segment) {
-      i.second = {0, 0};
-    }
-  }
-
-  SpaceTrackerIRef make_empty() const final {
-    auto ret = SpaceTrackerIRef(new SpaceTrackerSimple(*this));
-    ret->reset();
-    return ret;
-  }
-
-  bool equals(const SpaceTrackerI &other) const;
-};
-
-class SpaceTrackerDetailed : public SpaceTrackerI {
-  class SegmentMap {
-    int64_t used = 0;
-    seastore_off_t total_bytes = 0;
-    std::vector<bool> bitmap;
-
-  public:
-    SegmentMap(
-      size_t blocks,
-      seastore_off_t total_bytes)
-    : total_bytes(total_bytes),
-      bitmap(blocks, false) {}
-
-    int64_t update_usage(int64_t delta) {
-      used += delta;
-      return used;
-    }
-
-    int64_t allocate(
-      device_segment_id_t segment,
-      seastore_off_t offset,
-      extent_len_t len,
-      const extent_len_t block_size);
-
-    int64_t release(
-      device_segment_id_t segment,
-      seastore_off_t offset,
-      extent_len_t len,
-      const extent_len_t block_size);
-
-    int64_t get_usage() const {
-      return used;
-    }
-
-    void dump_usage(extent_len_t block_size) const;
-
-    double calc_utilization() const {
-      return (double)used / (double)total_bytes;
-    }
-
-    void reset() {
-      used = 0;
-      for (auto &&i: bitmap) {
-       i = false;
-      }
-    }
-  };
-
-  // Tracks live space for each segment
-  segment_map_t<SegmentMap> segment_usage;
-  std::vector<size_t> block_size_by_segment_manager;
-
-public:
-  SpaceTrackerDetailed(const SpaceTrackerDetailed &) = default;
-  SpaceTrackerDetailed(const std::vector<SegmentManager*> &sms)
-  {
-    block_size_by_segment_manager.resize(DEVICE_ID_MAX, 0);
-    for (auto sm : sms) {
-      segment_usage.add_device(
-       sm->get_device_id(),
-       sm->get_num_segments(),
-       SegmentMap(
-         sm->get_segment_size() / sm->get_block_size(),
-         sm->get_segment_size()));
-      block_size_by_segment_manager[sm->get_device_id()] = sm->get_block_size();
-    }
-  }
-
-  int64_t allocate(
-    segment_id_t segment,
-    seastore_off_t offset,
-    extent_len_t len) final {
-    return segment_usage[segment].allocate(
-      segment.device_segment_id(),
-      offset,
-      len,
-      block_size_by_segment_manager[segment.device_id()]);
-  }
-
-  int64_t release(
-    segment_id_t segment,
-    seastore_off_t offset,
-    extent_len_t len) final {
-    return segment_usage[segment].release(
-      segment.device_segment_id(),
-      offset,
-      len,
-      block_size_by_segment_manager[segment.device_id()]);
-  }
-
-  int64_t get_usage(segment_id_t segment) const final {
-    return segment_usage[segment].get_usage();
-  }
-
-  double calc_utilization(segment_id_t segment) const final {
-    return segment_usage[segment].calc_utilization();
-  }
-
-  void dump_usage(segment_id_t seg) const final;
-
-  void reset() final {
-    for (auto &i: segment_usage) {
-      i.second.reset();
-    }
-  }
-
-  SpaceTrackerIRef make_empty() const final {
-    auto ret = SpaceTrackerIRef(new SpaceTrackerDetailed(*this));
-    ret->reset();
-    return ret;
-  }
-
-  bool equals(const SpaceTrackerI &other) const;
-};
-
-
-class SegmentCleaner : public SegmentProvider {
-public:
-  using time_point = seastar::lowres_system_clock::time_point;
-  using duration = seastar::lowres_system_clock::duration;
-
-  /// Config
-  struct config_t {
-    /// Number of minimum journal segments to stop trimming.
-    size_t target_journal_segments = 0;
-    /// Number of maximum journal segments to block user transactions.
-    size_t max_journal_segments = 0;
-
-    /// Number of journal segments the transactions in which can
-    /// have their corresponding backrefs unmerged
-    size_t target_backref_inflight_segments = 0;
-
-    /// Ratio of maximum available space to disable reclaiming.
-    double available_ratio_gc_max = 0;
-    /// Ratio of minimum available space to force reclaiming.
-    double available_ratio_hard_limit = 0;
-
-    /// Ratio of minimum reclaimable space to stop reclaiming.
-    double reclaim_ratio_gc_threshold = 0;
-
-    /// Number of bytes to reclaim per cycle
-    size_t reclaim_bytes_per_cycle = 0;
-
-    /// Number of bytes to rewrite dirty per cycle
-    size_t rewrite_dirty_bytes_per_cycle = 0;
-
-    /// Number of bytes to rewrite backref per cycle
-    size_t rewrite_backref_bytes_per_cycle = 0;
-
-    void validate() const {
-      ceph_assert(max_journal_segments > target_journal_segments);
-      ceph_assert(available_ratio_gc_max > available_ratio_hard_limit);
-      ceph_assert(reclaim_bytes_per_cycle > 0);
-      ceph_assert(rewrite_dirty_bytes_per_cycle > 0);
-      ceph_assert(rewrite_backref_bytes_per_cycle > 0);
-    }
-
-    static config_t get_default() {
-      return config_t{
-         12,   // target_journal_segments
-         16,   // max_journal_segments
-         2,    // target_backref_inflight_segments
-         .1,   // available_ratio_gc_max
-         .05,  // available_ratio_hard_limit
-         .1,   // reclaim_ratio_gc_threshold
-         1<<20,// reclaim_bytes_per_cycle
-         1<<17,// rewrite_dirty_bytes_per_cycle
-         1<<24 // rewrite_backref_bytes_per_cycle
-       };
-    }
-
-    static config_t get_test() {
-      return config_t{
-         2,    // target_journal_segments
-         4,    // max_journal_segments
-         2,    // target_backref_inflight_segments
-         .99,  // available_ratio_gc_max
-         .2,   // available_ratio_hard_limit
-         .6,   // reclaim_ratio_gc_threshold
-         1<<20,// reclaim_bytes_per_cycle
-         1<<17,// rewrite_dirty_bytes_per_cycle
-         1<<24 // rewrite_backref_bytes_per_cycle
-       };
-    }
-  };
-
-  /// Callback interface for querying and operating on segments
-  class ExtentCallbackInterface {
-  public:
-    virtual ~ExtentCallbackInterface() = default;
-
-    virtual TransactionRef create_transaction(
-        Transaction::src_t, const char*) = 0;
-
-    /// Creates empty transaction with interruptible context
-    template <typename Func>
-    auto with_transaction_intr(
-        Transaction::src_t src,
-        const char* name,
-        Func &&f) {
-      return seastar::do_with(
-        create_transaction(src, name),
-        [f=std::forward<Func>(f)](auto &ref_t) mutable {
-          return with_trans_intr(
-            *ref_t,
-            [f=std::forward<Func>(f)](auto& t) mutable {
-              return f(t);
-            }
-          );
-        }
-      );
-    }
-
-    /// See Cache::get_next_dirty_extents
-    using get_next_dirty_extents_iertr = trans_iertr<
-      crimson::errorator<
-        crimson::ct_error::input_output_error>
-      >;
-    using get_next_dirty_extents_ret = get_next_dirty_extents_iertr::future<
-      std::vector<CachedExtentRef>>;
-    virtual get_next_dirty_extents_ret get_next_dirty_extents(
-      Transaction &t,     ///< [in] current transaction
-      journal_seq_t bound,///< [in] return extents with dirty_from < bound
-      size_t max_bytes    ///< [in] return up to max_bytes of extents
-    ) = 0;
-
-    using extent_mapping_ertr = crimson::errorator<
-      crimson::ct_error::input_output_error,
-      crimson::ct_error::eagain>;
-    using extent_mapping_iertr = trans_iertr<
-      crimson::errorator<
-       crimson::ct_error::input_output_error>
-      >;
-
-    /**
-     * rewrite_extent
-     *
-     * Updates t with operations moving the passed extents to a new
-     * segment.  extent may be invalid, implementation must correctly
-     * handle finding the current instance if it is still alive and
-     * otherwise ignore it.
-     */
-    using rewrite_extent_iertr = extent_mapping_iertr;
-    using rewrite_extent_ret = rewrite_extent_iertr::future<>;
-    virtual rewrite_extent_ret rewrite_extent(
-      Transaction &t,
-      CachedExtentRef extent) = 0;
-
-    /**
-     * get_extent_if_live
-     *
-     * Returns extent at specified location if still referenced by
-     * lba_manager and not removed by t.
-     *
-     * See TransactionManager::get_extent_if_live and
-     * LBAManager::get_physical_extent_if_live.
-     */
-    using get_extent_if_live_iertr = extent_mapping_iertr;
-    using get_extent_if_live_ret = get_extent_if_live_iertr::future<
-      CachedExtentRef>;
-    virtual get_extent_if_live_ret get_extent_if_live(
-      Transaction &t,
-      extent_types_t type,
-      paddr_t addr,
-      laddr_t laddr,
-      seastore_off_t len) = 0;
-
-    /**
-     * submit_transaction_direct
-     *
-     * Submits transaction without any space throttling.
-     */
-    using submit_transaction_direct_iertr = trans_iertr<
-      crimson::errorator<
-        crimson::ct_error::input_output_error>
-      >;
-    using submit_transaction_direct_ret =
-      submit_transaction_direct_iertr::future<>;
-    virtual submit_transaction_direct_ret submit_transaction_direct(
-      Transaction &t,
-      std::optional<journal_seq_t> seq_to_trim = std::nullopt) = 0;
-  };
-
-private:
-  const bool detailed;
-  const config_t config;
-
-  SegmentManagerGroupRef sm_group;
-  BackrefManager &backref_manager;
-
-  SpaceTrackerIRef space_tracker;
-  segments_info_t segments;
-  bool init_complete = false;
-
-  struct {
-    /**
-     * used_bytes
-     *
-     * Bytes occupied by live extents
-     */
-    uint64_t used_bytes = 0;
-
-    /**
-     * projected_used_bytes
-     *
-     * Sum of projected bytes used by each transaction between throttle
-     * acquisition and commit completion.  See reserve_projected_usage()
-     */
-    uint64_t projected_used_bytes = 0;
-    uint64_t projected_count = 0;
-    uint64_t projected_used_bytes_sum = 0;
-
-    uint64_t closed_journal_used_bytes = 0;
-    uint64_t closed_journal_total_bytes = 0;
-    uint64_t closed_ool_used_bytes = 0;
-    uint64_t closed_ool_total_bytes = 0;
-
-    uint64_t io_blocking_num = 0;
-    uint64_t io_count = 0;
-    uint64_t io_blocked_count = 0;
-    uint64_t io_blocked_count_trim = 0;
-    uint64_t io_blocked_count_reclaim = 0;
-    uint64_t io_blocked_sum = 0;
-
-    uint64_t reclaiming_bytes = 0;
-    uint64_t reclaimed_bytes = 0;
-    uint64_t reclaimed_segment_bytes = 0;
-
-    seastar::metrics::histogram segment_util;
-  } stats;
-  seastar::metrics::metric_group metrics;
-  void register_metrics();
-
-  /// target journal_tail for next fresh segment
-  journal_seq_t journal_tail_target;
-
-  /// target replay_from for dirty extents
-  journal_seq_t dirty_extents_replay_from;
-
-  /// target replay_from for alloc infos
-  journal_seq_t alloc_info_replay_from;
-
-  /// most recently committed journal_tail
-  journal_seq_t journal_tail_committed;
-
-  ExtentCallbackInterface *ecb = nullptr;
-
-  /// populated if there is an IO blocked on hard limits
-  std::optional<seastar::promise<>> blocked_io_wake;
-
-  SegmentSeqAllocatorRef ool_segment_seq_allocator;
-
-  /**
-   * disable_trim
-   *
-   * added to enable unit testing of CircularBoundedJournal before
-   * proper support is added to SegmentCleaner.
-   * Should be removed once proper support is added. TODO
-   */
-  bool disable_trim = false;
-public:
-  SegmentCleaner(
-    config_t config,
-    SegmentManagerGroupRef&& sm_group,
-    BackrefManager &backref_manager,
-    bool detailed = false);
-
-  SegmentSeqAllocator& get_ool_segment_seq_allocator() {
-    return *ool_segment_seq_allocator;
-  }
-
-  using mount_ertr = crimson::errorator<
-    crimson::ct_error::input_output_error>;
-  using mount_ret = mount_ertr::future<>;
-  mount_ret mount();
-
-  /*
-   * SegmentProvider interfaces
-   */
-  journal_seq_t get_journal_tail_target() const final {
-    return journal_tail_target;
-  }
-
-  const segment_info_t& get_seg_info(segment_id_t id) const final {
-    return segments[id];
-  }
-
-  segment_id_t allocate_segment(
-      segment_seq_t seq, segment_type_t type) final;
-
-  void close_segment(segment_id_t segment) final;
-
-  void update_journal_tail_committed(journal_seq_t committed) final;
-
-  void update_segment_avail_bytes(segment_type_t type, paddr_t offset) final {
-    segments.update_written_to(type, offset);
-    gc_process.maybe_wake_on_space_used();
-  }
-
-  SegmentManagerGroup* get_segment_manager_group() final {
-    return sm_group.get();
-  }
-
-  journal_seq_t get_dirty_extents_replay_from() const final {
-    return dirty_extents_replay_from;
-  }
-
-  journal_seq_t get_alloc_info_replay_from() const final {
-    return alloc_info_replay_from;
-  }
-
-  void update_journal_tail_target(
-    journal_seq_t dirty_replay_from,
-    journal_seq_t alloc_replay_from);
-
-  void update_alloc_info_replay_from(
-    journal_seq_t alloc_replay_from);
-
-  void init_mkfs() {
-    auto journal_head = segments.get_journal_head();
-    ceph_assert(disable_trim || journal_head != JOURNAL_SEQ_NULL);
-    journal_tail_target = journal_head;
-    journal_tail_committed = journal_head;
-  }
-
-  using release_ertr = SegmentManagerGroup::release_ertr;
-  release_ertr::future<> maybe_release_segment(Transaction &t);
-
-  void adjust_segment_util(double old_usage, double new_usage) {
-    auto old_index = get_bucket_index(old_usage);
-    auto new_index = get_bucket_index(new_usage);
-    assert(stats.segment_util.buckets[old_index].count > 0);
-    stats.segment_util.buckets[old_index].count--;
-    stats.segment_util.buckets[new_index].count++;
-  }
-
-  void mark_space_used(
-    paddr_t addr,
-    extent_len_t len,
-    time_point last_modified = time_point(),
-    time_point last_rewritten = time_point(),
-    bool init_scan = false);
-
-  void mark_space_free(
-    paddr_t addr,
-    extent_len_t len);
-
-  SpaceTrackerIRef get_empty_space_tracker() const {
-    return space_tracker->make_empty();
-  }
-
-  void complete_init();
-
-  store_statfs_t stat() const {
-    store_statfs_t st;
-    st.total = segments.get_total_bytes();
-    st.available = segments.get_total_bytes() - stats.used_bytes;
-    st.allocated = stats.used_bytes;
-    st.data_stored = stats.used_bytes;
-
-    // TODO add per extent type counters for omap_allocated and
-    // internal metadata
-    return st;
-  }
-
-  seastar::future<> stop() {
-    return gc_process.stop();
-  }
-
-  seastar::future<> run_until_halt() {
-    return gc_process.run_until_halt();
-  }
-
-  void set_extent_callback(ExtentCallbackInterface *cb) {
-    ecb = cb;
-  }
-
-  bool debug_check_space(const SpaceTrackerI &tracker) {
-    return space_tracker->equals(tracker);
-  }
-
-  void set_disable_trim(bool val) {
-    disable_trim = val;
-  }
-
-  using work_ertr = ExtentCallbackInterface::extent_mapping_ertr;
-  using work_iertr = ExtentCallbackInterface::extent_mapping_iertr;
-
-private:
-  /*
-   * 10 buckets for the number of closed segments by usage
-   * 2 extra buckets for the number of open and empty segments
-   */
-  static constexpr double UTIL_STATE_OPEN = 1.05;
-  static constexpr double UTIL_STATE_EMPTY = 1.15;
-  static constexpr std::size_t UTIL_BUCKETS = 12;
-  static std::size_t get_bucket_index(double util) {
-    auto index = std::floor(util * 10);
-    assert(index < UTIL_BUCKETS);
-    return index;
-  }
-  double calc_utilization(segment_id_t id) const {
-    auto& info = segments[id];
-    if (info.is_open()) {
-      return UTIL_STATE_OPEN;
-    } else if (info.is_empty()) {
-      return UTIL_STATE_EMPTY;
-    } else {
-      auto ret = space_tracker->calc_utilization(id);
-      assert(ret >= 0 && ret < 1);
-      return ret;
-    }
-  }
-
-  // journal status helpers
-
-  double calc_gc_benefit_cost(segment_id_t id) const {
-    double util = calc_utilization(id);
-    ceph_assert(util >= 0 && util < 1);
-    auto cur_time = seastar::lowres_system_clock::now();
-    auto segment = segments[id];
-    assert(cur_time >= segment.last_modified);
-    auto segment_age =
-      cur_time - std::max(segment.last_modified, segment.last_rewritten);
-    uint64_t age = segment_age.count();
-    return (1 - util) * age / (1 + util);
-  }
-
-  segment_id_t get_next_reclaim_segment() const;
-
-  /**
-   * rewrite_dirty
-   *
-   * Writes out dirty blocks dirtied earlier than limit.
-   */
-  using rewrite_dirty_iertr = work_iertr;
-  using rewrite_dirty_ret = rewrite_dirty_iertr::future<>;
-  rewrite_dirty_ret rewrite_dirty(
-    Transaction &t,
-    journal_seq_t limit);
-
-  using trim_backrefs_iertr = work_iertr;
-  using trim_backrefs_ret = trim_backrefs_iertr::future<journal_seq_t>;
-  trim_backrefs_ret trim_backrefs(
-    Transaction &t,
-    journal_seq_t limit);
-
-  journal_seq_t get_dirty_tail() const {
-    auto ret = segments.get_journal_head();
-    ceph_assert(ret != JOURNAL_SEQ_NULL);
-    if (ret.segment_seq >= config.target_journal_segments) {
-      ret.segment_seq -= config.target_journal_segments;
-    } else {
-      ret.segment_seq = 0;
-      ret.offset = P_ADDR_MIN;
-    }
-    return ret;
-  }
-
-  journal_seq_t get_dirty_tail_limit() const {
-    auto ret = segments.get_journal_head();
-    ceph_assert(ret != JOURNAL_SEQ_NULL);
-    if (ret.segment_seq >= config.max_journal_segments) {
-      ret.segment_seq -= config.max_journal_segments;
-    } else {
-      ret.segment_seq = 0;
-      ret.offset = P_ADDR_MIN;
-    }
-    return ret;
-  }
-
-  journal_seq_t get_backref_tail() const {
-    auto ret = segments.get_journal_head();
-    ceph_assert(ret != JOURNAL_SEQ_NULL);
-    if (ret.segment_seq >= config.target_backref_inflight_segments) {
-      ret.segment_seq -= config.target_backref_inflight_segments;
-    } else {
-      ret.segment_seq = 0;
-      ret.offset = P_ADDR_MIN;
-    }
-    return ret;
-  }
-
-  struct reclaim_state_t {
-    std::size_t segment_size;
-    paddr_t start_pos;
-    paddr_t end_pos;
-
-    static reclaim_state_t create(
-        segment_id_t segment_id,
-        std::size_t segment_size) {
-      return {segment_size,
-              P_ADDR_NULL,
-              paddr_t::make_seg_paddr(segment_id, 0)};
-    }
-
-    segment_id_t get_segment_id() const {
-      return end_pos.as_seg_paddr().get_segment_id();
-    }
-
-    bool is_complete() const {
-      return (std::size_t)end_pos.as_seg_paddr().get_segment_off() >= segment_size;
-    }
-
-    void advance(std::size_t bytes) {
-      assert(!is_complete());
-      start_pos = end_pos;
-      auto &end_seg_paddr = end_pos.as_seg_paddr();
-      auto next_off = end_seg_paddr.get_segment_off() + bytes;
-      if (next_off > segment_size) {
-        end_seg_paddr.set_segment_off(segment_size);
-      } else {
-        end_seg_paddr.set_segment_off(next_off);
-      }
-    }
-  };
-  std::optional<reclaim_state_t> reclaim_state;
-
-  /**
-   * GCProcess
-   *
-   * Background gc process.
-   */
-  using gc_cycle_ret = seastar::future<>;
-  class GCProcess {
-    std::optional<gc_cycle_ret> process_join;
-
-    SegmentCleaner &cleaner;
-
-    std::optional<seastar::promise<>> blocking;
-
-    bool is_stopping() const {
-      return !process_join;
-    }
-
-    gc_cycle_ret run();
-
-    void wake() {
-      if (blocking) {
-       blocking->set_value();
-       blocking = std::nullopt;
-      }
-    }
-
-    seastar::future<> maybe_wait_should_run() {
-      return seastar::do_until(
-       [this] {
-         cleaner.log_gc_state("GCProcess::maybe_wait_should_run");
-         return is_stopping() || cleaner.gc_should_run();
-       },
-       [this] {
-         ceph_assert(!blocking);
-         blocking = seastar::promise<>();
-         return blocking->get_future();
-       });
-    }
-  public:
-    GCProcess(SegmentCleaner &cleaner) : cleaner(cleaner) {}
-
-    void start() {
-      ceph_assert(is_stopping());
-      process_join = seastar::now(); // allow run()
-      process_join = run();
-      assert(!is_stopping());
-    }
-
-    gc_cycle_ret stop() {
-      if (is_stopping()) {
-        return seastar::now();
-      }
-      auto ret = std::move(*process_join);
-      process_join.reset();
-      assert(is_stopping());
-      wake();
-      return ret;
-    }
-
-    gc_cycle_ret run_until_halt() {
-      ceph_assert(is_stopping());
-      return seastar::do_until(
-       [this] {
-         cleaner.log_gc_state("GCProcess::run_until_halt");
-         return !cleaner.gc_should_run();
-       },
-       [this] {
-         return cleaner.do_gc_cycle();
-       });
-    }
-
-    void maybe_wake_on_space_used() {
-      if (is_stopping()) {
-        return;
-      }
-      if (cleaner.gc_should_run()) {
-       wake();
-      }
-    }
-  } gc_process;
-
-  using gc_ertr = work_ertr::extend_ertr<
-    SegmentManagerGroup::scan_extents_ertr
-    >;
-
-  gc_cycle_ret do_gc_cycle();
-
-  using gc_trim_journal_ertr = gc_ertr;
-  using gc_trim_journal_ret = gc_trim_journal_ertr::future<>;
-  gc_trim_journal_ret gc_trim_journal();
-
-  using gc_trim_backref_ertr = gc_ertr;
-  using gc_trim_backref_ret = gc_trim_backref_ertr::future<journal_seq_t>;
-  gc_trim_backref_ret gc_trim_backref(journal_seq_t limit);
-
-  using gc_reclaim_space_ertr = gc_ertr;
-  using gc_reclaim_space_ret = gc_reclaim_space_ertr::future<>;
-  gc_reclaim_space_ret gc_reclaim_space();
-
-
-  using retrieve_live_extents_iertr = work_iertr;
-  using retrieve_live_extents_ret =
-    retrieve_live_extents_iertr::future<journal_seq_t>;
-  retrieve_live_extents_ret _retrieve_live_extents(
-    Transaction &t,
-    std::set<
-      backref_buf_entry_t,
-      backref_buf_entry_t::cmp_t> &&backrefs,
-    std::vector<CachedExtentRef> &extents);
-
-  using retrieve_backref_mappings_ertr = work_ertr;
-  using retrieve_backref_mappings_ret =
-    retrieve_backref_mappings_ertr::future<backref_pin_list_t>;
-  retrieve_backref_mappings_ret retrieve_backref_mappings(
-    paddr_t start_paddr,
-    paddr_t end_paddr);
-
-  /*
-   * Segments calculations
-   */
-  std::size_t get_segments_in_journal() const {
-    if (!init_complete) {
-      return 0;
-    }
-    if (journal_tail_committed == JOURNAL_SEQ_NULL) {
-      return segments.get_num_type_journal();
-    }
-    auto journal_head = segments.get_journal_head();
-    assert(journal_head != JOURNAL_SEQ_NULL);
-    assert(journal_head.segment_seq >= journal_tail_committed.segment_seq);
-    return journal_head.segment_seq + 1 - journal_tail_committed.segment_seq;
-  }
-  std::size_t get_segments_in_journal_closed() const {
-    auto in_journal = get_segments_in_journal();
-    auto in_journal_open = segments.get_num_in_journal_open();
-    if (in_journal >= in_journal_open) {
-      return in_journal - in_journal_open;
-    } else {
-      return 0;
-    }
-  }
-  std::size_t get_segments_reclaimable() const {
-    assert(segments.get_num_closed() >= get_segments_in_journal_closed());
-    return segments.get_num_closed() - get_segments_in_journal_closed();
-  }
-
-  /*
-   * Space calculations
-   */
-  /// the unavailable space that is not reclaimable yet
-  std::size_t get_unavailable_unreclaimable_bytes() const {
-    auto ret = (segments.get_num_open() + get_segments_in_journal_closed()) *
-               segments.get_segment_size();
-    assert(ret >= segments.get_available_bytes_in_open());
-    return ret - segments.get_available_bytes_in_open();
-  }
-  /// the unavailable space that can be reclaimed
-  std::size_t get_unavailable_reclaimable_bytes() const {
-    auto ret = get_segments_reclaimable() * segments.get_segment_size();
-    ceph_assert(ret + get_unavailable_unreclaimable_bytes() == segments.get_unavailable_bytes());
-    return ret;
-  }
-  /// the unavailable space that is not alive
-  std::size_t get_unavailable_unused_bytes() const {
-    assert(segments.get_unavailable_bytes() > stats.used_bytes);
-    return segments.get_unavailable_bytes() - stats.used_bytes;
-  }
-  double get_reclaim_ratio() const {
-    if (segments.get_unavailable_bytes() == 0) return 0;
-    return (double)get_unavailable_unused_bytes() / (double)segments.get_unavailable_bytes();
-  }
-
-  /*
-   * Space calculations (projected)
-   */
-  std::size_t get_projected_available_bytes() const {
-    return (segments.get_available_bytes() > stats.projected_used_bytes) ?
-      segments.get_available_bytes() - stats.projected_used_bytes:
-      0;
-  }
-  double get_projected_available_ratio() const {
-    return (double)get_projected_available_bytes() /
-      (double)segments.get_total_bytes();
-  }
-
-  /*
-   * Journal sizes
-   */
-  std::size_t get_dirty_journal_size() const {
-    auto journal_head = segments.get_journal_head();
-    if (journal_head == JOURNAL_SEQ_NULL ||
-        dirty_extents_replay_from == JOURNAL_SEQ_NULL) {
-      return 0;
-    }
-    return (journal_head.segment_seq - dirty_extents_replay_from.segment_seq) *
-           segments.get_segment_size() +
-           journal_head.offset.as_seg_paddr().get_segment_off() -
-           segments.get_segment_size() -
-           dirty_extents_replay_from.offset.as_seg_paddr().get_segment_off();
-  }
-
-  std::size_t get_alloc_journal_size() const {
-    auto journal_head = segments.get_journal_head();
-    if (journal_head == JOURNAL_SEQ_NULL ||
-        alloc_info_replay_from == JOURNAL_SEQ_NULL) {
-      return 0;
-    }
-    return (journal_head.segment_seq - alloc_info_replay_from.segment_seq) *
-           segments.get_segment_size() +
-           journal_head.offset.as_seg_paddr().get_segment_off() -
-           segments.get_segment_size() -
-           alloc_info_replay_from.offset.as_seg_paddr().get_segment_off();
-  }
-
-  /**
-   * should_block_on_gc
-   *
-   * Encapsulates whether block pending gc.
-   */
-  bool should_block_on_trim() const {
-    if (disable_trim) return false;
-    return get_dirty_tail_limit() > journal_tail_target;
-  }
-
-  bool should_block_on_reclaim() const {
-    if (disable_trim) return false;
-    if (get_segments_reclaimable() == 0) {
-      return false;
-    }
-    auto aratio = get_projected_available_ratio();
-    return aratio < config.available_ratio_hard_limit;
-  }
-
-  bool should_block_on_gc() const {
-    return should_block_on_trim() || should_block_on_reclaim();
-  }
-
-  void log_gc_state(const char *caller) const;
-
-public:
-  seastar::future<> reserve_projected_usage(std::size_t projected_usage);
-
-  void release_projected_usage(size_t projected_usage);
-
-private:
-  void maybe_wake_gc_blocked_io() {
-    if (!init_complete) {
-      return;
-    }
-    if (!should_block_on_gc() && blocked_io_wake) {
-      blocked_io_wake->set_value();
-      blocked_io_wake = std::nullopt;
-    }
-  }
-
-  using scan_extents_ret_bare =
-    std::vector<std::pair<segment_id_t, segment_header_t>>;
-  using scan_extents_ertr = SegmentManagerGroup::scan_extents_ertr;
-  using scan_extents_ret = scan_extents_ertr::future<>;
-  scan_extents_ret scan_nonfull_segment(
-    const segment_header_t& header,
-    scan_extents_ret_bare& segment_set,
-    segment_id_t segment_id);
-
-  /**
-   * gc_should_reclaim_space
-   *
-   * Encapsulates logic for whether gc should be reclaiming segment space.
-   */
-  bool gc_should_reclaim_space() const {
-    if (disable_trim) return false;
-    if (get_segments_reclaimable() == 0) {
-      return false;
-    }
-    auto aratio = segments.get_available_ratio();
-    auto rratio = get_reclaim_ratio();
-    return (
-      (aratio < config.available_ratio_hard_limit) ||
-      ((aratio < config.available_ratio_gc_max) &&
-       (rratio > config.reclaim_ratio_gc_threshold))
-    );
-  }
-
-  /**
-   * gc_should_trim_journal
-   *
-   * Encapsulates logic for whether gc should be reclaiming segment space.
-   */
-  bool gc_should_trim_journal() const {
-    return get_dirty_tail() > journal_tail_target;
-  }
-
-  bool gc_should_trim_backref() const {
-    return get_backref_tail() > alloc_info_replay_from;
-  }
-  /**
-   * gc_should_run
-   *
-   * True if gc should be running.
-   */
-  bool gc_should_run() const {
-    if (disable_trim) return false;
-    ceph_assert(init_complete);
-    return gc_should_reclaim_space()
-      || gc_should_trim_journal()
-      || gc_should_trim_backref();
-  }
-
-  void init_mark_segment_closed(
-      segment_id_t segment,
-      segment_seq_t seq,
-      segment_type_t s_type) {
-    ceph_assert(!init_complete);
-    auto old_usage = calc_utilization(segment);
-    segments.init_closed(segment, seq, s_type);
-    auto new_usage = calc_utilization(segment);
-    adjust_segment_util(old_usage, new_usage);
-    if (s_type == segment_type_t::OOL) {
-      ool_segment_seq_allocator->set_next_segment_seq(seq);
-    }
-  }
-};
-using SegmentCleanerRef = std::unique_ptr<SegmentCleaner>;
-
-}
index 7bbca15572a71e057e3bf7270bfca46ddfa3f466..e4a864a7b3b66b5ad8e4372bb23098a7b3eba95c 100644 (file)
@@ -7,7 +7,7 @@
 #include "crimson/os/seastore/seastore_types.h"
 
 namespace crimson::os::seastore {
-class SegmentCleaner;
+class AsyncCleaner;
 }
 
 namespace crimson::os::seastore::journal {
@@ -41,7 +41,7 @@ private:
   segment_seq_t next_segment_seq = 0;
   segment_type_t type = segment_type_t::NULL_SEG;
   friend class journal::SegmentedJournal;
-  friend class SegmentCleaner;
+  friend class AsyncCleaner;
 };
 
 using SegmentSeqAllocatorRef =
index 2b89fe45e19849c0c84c22f7bae1c8df479cc28a..0a7d316235a931b397bf3bddf2e91992348974ae 100644 (file)
@@ -23,23 +23,23 @@ SET_SUBSYS(seastore_tm);
 namespace crimson::os::seastore {
 
 TransactionManager::TransactionManager(
-  SegmentCleanerRef _segment_cleaner,
+  AsyncCleanerRef _async_cleaner,
   JournalRef _journal,
   CacheRef _cache,
   LBAManagerRef _lba_manager,
   ExtentPlacementManagerRef &&epm,
   BackrefManagerRef&& backref_manager,
   tm_make_config_t config)
-  : segment_cleaner(std::move(_segment_cleaner)),
+  : async_cleaner(std::move(_async_cleaner)),
     cache(std::move(_cache)),
     lba_manager(std::move(_lba_manager)),
     journal(std::move(_journal)),
     epm(std::move(epm)),
     backref_manager(std::move(backref_manager)),
-    sm_group(*segment_cleaner->get_segment_manager_group()),
+    sm_group(*async_cleaner->get_segment_manager_group()),
     config(config)
 {
-  segment_cleaner->set_extent_callback(this);
+  async_cleaner->set_extent_callback(this);
   journal->set_write_pipeline(&write_pipeline);
 }
 
@@ -47,11 +47,11 @@ TransactionManager::mkfs_ertr::future<> TransactionManager::mkfs()
 {
   LOG_PREFIX(TransactionManager::mkfs);
   INFO("enter");
-  return segment_cleaner->mount(
+  return async_cleaner->mount(
   ).safe_then([this] {
     return journal->open_for_write();
   }).safe_then([this](auto) {
-    segment_cleaner->init_mkfs();
+    async_cleaner->init_mkfs();
     return epm->open();
   }).safe_then([this, FNAME]() {
     return with_transaction_intr(
@@ -88,7 +88,7 @@ TransactionManager::mount_ertr::future<> TransactionManager::mount()
   LOG_PREFIX(TransactionManager::mount);
   INFO("enter");
   cache->init();
-  return segment_cleaner->mount(
+  return async_cleaner->mount(
   ).safe_then([this] {
     return journal->replay(
       [this](
@@ -98,7 +98,7 @@ TransactionManager::mount_ertr::future<> TransactionManager::mount()
        auto last_modified)
       {
        auto start_seq = offsets.write_result.start_seq;
-       segment_cleaner->update_journal_tail_target(
+       async_cleaner->update_journal_tail_target(
          cache->get_oldest_dirty_from().value_or(start_seq),
          cache->get_oldest_backref_dirty_from().value_or(start_seq));
        return cache->replay_delta(
@@ -124,8 +124,8 @@ TransactionManager::mount_ertr::future<> TransactionManager::mount()
              else
                return lba_manager->init_cached_extent(t, e);
            }).si_then([this, FNAME, &t] {
-             assert(segment_cleaner->debug_check_space(
-                      *segment_cleaner->get_empty_space_tracker()));
+             assert(async_cleaner->debug_check_space(
+                      *async_cleaner->get_empty_space_tracker()));
              return backref_manager->scan_mapped_space(
                t,
                [this, FNAME, &t](
@@ -141,7 +141,7 @@ TransactionManager::mount_ertr::future<> TransactionManager::mount()
                    len);
                  if (addr.is_real() &&
                      !backref_manager->backref_should_be_removed(addr)) {
-                   segment_cleaner->mark_space_used(
+                   async_cleaner->mark_space_used(
                      addr,
                      len ,
                      seastar::lowres_system_clock::time_point(),
@@ -163,7 +163,7 @@ TransactionManager::mount_ertr::future<> TransactionManager::mount()
                  auto &backrefs = backref_manager->get_cached_backrefs();
                  DEBUG("marking {} backrefs used", backrefs.size());
                  for (auto &backref : backrefs) {
-                   segment_cleaner->mark_space_used(
+                   async_cleaner->mark_space_used(
                      backref.paddr,
                      backref.len,
                      seastar::lowres_system_clock::time_point(),
@@ -179,7 +179,7 @@ TransactionManager::mount_ertr::future<> TransactionManager::mount()
   }).safe_then([this] {
     return epm->open();
   }).safe_then([FNAME, this] {
-    segment_cleaner->complete_init();
+    async_cleaner->complete_init();
     INFO("completed");
   }).handle_error(
     mount_ertr::pass_further{},
@@ -192,7 +192,7 @@ TransactionManager::mount_ertr::future<> TransactionManager::mount()
 TransactionManager::close_ertr::future<> TransactionManager::close() {
   LOG_PREFIX(TransactionManager::close);
   INFO("enter");
-  return segment_cleaner->stop(
+  return async_cleaner->stop(
   ).then([this] {
     return cache->close();
   }).safe_then([this] {
@@ -314,12 +314,12 @@ TransactionManager::submit_transaction(
     size_t projected_usage = t.get_allocation_size();
     SUBTRACET(seastore_t, "waiting for projected_usage: {}", t, projected_usage);
     return trans_intr::make_interruptible(
-      segment_cleaner->reserve_projected_usage(projected_usage)
+      async_cleaner->reserve_projected_usage(projected_usage)
     ).then_interruptible([this, &t] {
       return submit_transaction_direct(t);
     }).finally([this, FNAME, projected_usage, &t] {
       SUBTRACET(seastore_t, "releasing projected_usage: {}", t, projected_usage);
-      segment_cleaner->release_projected_usage(projected_usage);
+      async_cleaner->release_projected_usage(projected_usage);
     });
   });
 }
@@ -365,7 +365,7 @@ TransactionManager::submit_transaction_direct(
     if (seq_to_trim && *seq_to_trim != JOURNAL_SEQ_NULL) {
       cache->trim_backref_bufs(*seq_to_trim);
     }
-    auto record = cache->prepare_record(tref, segment_cleaner.get());
+    auto record = cache->prepare_record(tref, async_cleaner.get());
 
     tref.get_handle().maybe_release_collection_lock();
 
@@ -379,7 +379,7 @@ TransactionManager::submit_transaction_direct(
           tref,
           submit_result.record_block_base,
           start_seq,
-          segment_cleaner.get());
+          async_cleaner.get());
 
       std::vector<CachedExtentRef> lba_to_clear;
       std::vector<CachedExtentRef> backref_to_clear;
@@ -409,10 +409,10 @@ TransactionManager::submit_transaction_direct(
       lba_manager->complete_transaction(tref, lba_to_clear, lba_to_link);
       backref_manager->complete_transaction(tref, backref_to_clear, backref_to_link);
 
-      segment_cleaner->update_journal_tail_target(
+      async_cleaner->update_journal_tail_target(
        cache->get_oldest_dirty_from().value_or(start_seq),
        cache->get_oldest_backref_dirty_from().value_or(start_seq));
-      return segment_cleaner->maybe_release_segment(tref);
+      return async_cleaner->maybe_release_segment(tref);
     }).safe_then([FNAME, &tref] {
       SUBTRACET(seastore_t, "completed", tref);
       return tref.get_handle().complete();
@@ -486,7 +486,7 @@ TransactionManager::rewrite_logical_extent(
 
   /* This update_mapping is, strictly speaking, unnecessary for delayed_alloc
    * extents since we're going to do it again once we either do the ool write
-   * or allocate a relative inline addr.  TODO: refactor SegmentCleaner to
+   * or allocate a relative inline addr.  TODO: refactor AsyncCleaner to
    * avoid this complication. */
   return lba_manager->update_mapping(
     t,
@@ -647,15 +647,15 @@ TransactionManagerRef make_transaction_manager(tm_make_config_t config)
   auto backref_manager = create_backref_manager(*sms, *cache);
 
   bool cleaner_is_detailed;
-  SegmentCleaner::config_t cleaner_config;
+  AsyncCleaner::config_t cleaner_config;
   if (config.is_test) {
     cleaner_is_detailed = true;
-    cleaner_config = SegmentCleaner::config_t::get_test();
+    cleaner_config = AsyncCleaner::config_t::get_test();
   } else {
     cleaner_is_detailed = false;
-    cleaner_config = SegmentCleaner::config_t::get_default();
+    cleaner_config = AsyncCleaner::config_t::get_default();
   }
-  auto segment_cleaner = std::make_unique<SegmentCleaner>(
+  auto async_cleaner = std::make_unique<AsyncCleaner>(
     cleaner_config,
     std::move(sms),
     *backref_manager,
@@ -663,20 +663,20 @@ TransactionManagerRef make_transaction_manager(tm_make_config_t config)
 
   JournalRef journal;
   if (config.j_type == journal_type_t::SEGMENT_JOURNAL) {
-    journal = journal::make_segmented(*segment_cleaner);
+    journal = journal::make_segmented(*async_cleaner);
   } else {
     journal = journal::make_circularbounded(
       nullptr, "");
-    segment_cleaner->set_disable_trim(true);
+    async_cleaner->set_disable_trim(true);
     ERROR("disabling journal trimming since support for CircularBoundedJournal\
          hasn't been added yet");
   }
   epm->init_ool_writers(
-      *segment_cleaner,
-      segment_cleaner->get_ool_segment_seq_allocator());
+      *async_cleaner,
+      async_cleaner->get_ool_segment_seq_allocator());
 
   return std::make_unique<TransactionManager>(
-    std::move(segment_cleaner),
+    std::move(async_cleaner),
     std::move(journal),
     std::move(cache),
     std::move(lba_manager),
index a02b8bb21ddab0694e142a2d6e24f054243da0fd..8661297cf4bbbcdf890d14bbf2a75de147b45d82 100644 (file)
@@ -21,7 +21,7 @@
 #include "crimson/osd/exceptions.h"
 
 #include "crimson/os/seastore/logging.h"
-#include "crimson/os/seastore/segment_cleaner.h"
+#include "crimson/os/seastore/async_cleaner.h"
 #include "crimson/os/seastore/seastore_types.h"
 #include "crimson/os/seastore/cache.h"
 #include "crimson/os/seastore/lba_manager.h"
@@ -103,13 +103,13 @@ auto repeat_eagain(F &&f) {
  * Abstraction hiding reading and writing to persistence.
  * Exposes transaction based interface with read isolation.
  */
-class TransactionManager : public SegmentCleaner::ExtentCallbackInterface {
+class TransactionManager : public AsyncCleaner::ExtentCallbackInterface {
 public:
   using base_ertr = Cache::base_ertr;
   using base_iertr = Cache::base_iertr;
 
   TransactionManager(
-    SegmentCleanerRef segment_cleaner,
+    AsyncCleanerRef async_cleaner,
     JournalRef journal,
     CacheRef cache,
     LBAManagerRef lba_manager,
@@ -423,8 +423,8 @@ public:
   using submit_transaction_iertr = base_iertr;
   submit_transaction_iertr::future<> submit_transaction(Transaction &);
 
-  /// SegmentCleaner::ExtentCallbackInterface
-  using SegmentCleaner::ExtentCallbackInterface::submit_transaction_direct_ret;
+  /// AsyncCleaner::ExtentCallbackInterface
+  using AsyncCleaner::ExtentCallbackInterface::submit_transaction_direct_ret;
   submit_transaction_direct_ret submit_transaction_direct(
     Transaction &t,
     std::optional<journal_seq_t> seq_to_trim = std::nullopt) final;
@@ -438,18 +438,18 @@ public:
    */
   seastar::future<> flush(OrderingHandle &handle);
 
-  using SegmentCleaner::ExtentCallbackInterface::get_next_dirty_extents_ret;
+  using AsyncCleaner::ExtentCallbackInterface::get_next_dirty_extents_ret;
   get_next_dirty_extents_ret get_next_dirty_extents(
     Transaction &t,
     journal_seq_t seq,
     size_t max_bytes) final;
 
-  using SegmentCleaner::ExtentCallbackInterface::rewrite_extent_ret;
+  using AsyncCleaner::ExtentCallbackInterface::rewrite_extent_ret;
   rewrite_extent_ret rewrite_extent(
     Transaction &t,
     CachedExtentRef extent) final;
 
-  using SegmentCleaner::ExtentCallbackInterface::get_extent_if_live_ret;
+  using AsyncCleaner::ExtentCallbackInterface::get_extent_if_live_ret;
   get_extent_if_live_ret get_extent_if_live(
     Transaction &t,
     extent_types_t type,
@@ -577,7 +577,7 @@ public:
   }
 
   store_statfs_t store_stat() const {
-    return segment_cleaner->stat();
+    return async_cleaner->stat();
   }
 
   void add_device(Device* dev, bool is_primary) {
@@ -598,7 +598,7 @@ public:
 private:
   friend class Transaction;
 
-  SegmentCleanerRef segment_cleaner;
+  AsyncCleanerRef async_cleaner;
   CacheRef cache;
   LBAManagerRef lba_manager;
   JournalRef journal;
@@ -614,8 +614,8 @@ private:
     LogicalCachedExtentRef extent);
 public:
   // Testing interfaces
-  auto get_segment_cleaner() {
-    return segment_cleaner.get();
+  auto get_async_cleaner() {
+    return async_cleaner.get();
   }
 
   auto get_lba_manager() {
index 892e5f780e953c4a6712779ebacdbcb8610176c5..0da0cce18441ba9e82cad04b2fbc261abb040cb3 100644 (file)
@@ -112,7 +112,7 @@ struct fltree_onode_manager_test_t
     auto t = create_mutate_transaction();
     std::invoke(f, *t);
     submit_transaction(std::move(t));
-    segment_cleaner->run_until_halt().get0();
+    async_cleaner->run_until_halt().get0();
   }
 
   template <typename F>
index 3d890c27683a51c8e77973c0af57f48353b61714..b00db5e8a6df33cf75a848d04d9028af078a5cf2 100644 (file)
@@ -1591,7 +1591,7 @@ TEST_F(d_seastore_tm_test_t, 6_random_tree_insert_erase)
       auto t = create_mutate_transaction();
       INTR(tree->bootstrap, *t).unsafe_get();
       submit_transaction(std::move(t));
-      segment_cleaner->run_until_halt().get0();
+      async_cleaner->run_until_halt().get0();
     }
 
     // test insert
@@ -1599,7 +1599,7 @@ TEST_F(d_seastore_tm_test_t, 6_random_tree_insert_erase)
       auto t = create_mutate_transaction();
       INTR(tree->insert, *t).unsafe_get();
       submit_transaction(std::move(t));
-      segment_cleaner->run_until_halt().get0();
+      async_cleaner->run_until_halt().get0();
     }
     {
       auto t = create_read_transaction();
@@ -1623,7 +1623,7 @@ TEST_F(d_seastore_tm_test_t, 6_random_tree_insert_erase)
       auto size = kvs.size() / 4 * 3;
       INTR_R(tree->erase, *t, size).unsafe_get();
       submit_transaction(std::move(t));
-      segment_cleaner->run_until_halt().get0();
+      async_cleaner->run_until_halt().get0();
     }
     {
       auto t = create_read_transaction();
@@ -1646,7 +1646,7 @@ TEST_F(d_seastore_tm_test_t, 6_random_tree_insert_erase)
       auto size = kvs.size();
       INTR_R(tree->erase, *t, size).unsafe_get();
       submit_transaction(std::move(t));
-      segment_cleaner->run_until_halt().get0();
+      async_cleaner->run_until_halt().get0();
     }
     {
       auto t = create_read_transaction();
@@ -1703,7 +1703,7 @@ TEST_F(d_seastore_tm_test_t, 7_tree_insert_erase_eagain)
          });
        });
     }).unsafe_get0();
-    segment_cleaner->run_until_halt().get0();
+    async_cleaner->run_until_halt().get0();
 
     // insert
     logger().warn("start inserting {} kvs ...", kvs.size());
@@ -1723,7 +1723,7 @@ TEST_F(d_seastore_tm_test_t, 7_tree_insert_erase_eagain)
              });
            });
         }).unsafe_get0();
-        segment_cleaner->run_until_halt().get0();
+        async_cleaner->run_until_halt().get0();
         ++iter;
       }
     }
@@ -1769,7 +1769,7 @@ TEST_F(d_seastore_tm_test_t, 7_tree_insert_erase_eagain)
              });
            });
         }).unsafe_get0();
-        segment_cleaner->run_until_halt().get0();
+        async_cleaner->run_until_halt().get0();
         ++iter;
       }
       kvs.erase_from_random(kvs.random_begin(), kvs.random_end());
index 91600ca10b8c16cac7513f9a5138b70e3bfd6c07..a67b0aa70d43df8de4eb6d31f3ff6fc0c330d558 100644 (file)
@@ -6,7 +6,7 @@
 #include <random>
 
 #include "crimson/common/log.h"
-#include "crimson/os/seastore/segment_cleaner.h"
+#include "crimson/os/seastore/async_cleaner.h"
 #include "crimson/os/seastore/journal.h"
 #include "crimson/os/seastore/segment_manager/ephemeral.h"
 
index f5c564fa4b0db5e4cc4e70b3f505206177cba774..4b6c86e142f55b6f5dcfd9c0848bccd643e7a590 100644 (file)
@@ -8,7 +8,7 @@
 #include "test/crimson/gtest_seastar.h"
 #include "test/crimson/seastore/transaction_manager_test_state.h"
 
-#include "crimson/os/seastore/segment_cleaner.h"
+#include "crimson/os/seastore/async_cleaner.h"
 #include "crimson/os/seastore/cache.h"
 #include "crimson/os/seastore/transaction_manager.h"
 #include "crimson/os/seastore/segment_manager/ephemeral.h"
@@ -396,7 +396,7 @@ struct transaction_manager_test_t :
 
   bool check_usage() {
     auto t = create_weak_test_transaction();
-    SpaceTrackerIRef tracker(segment_cleaner->get_empty_space_tracker());
+    SpaceTrackerIRef tracker(async_cleaner->get_empty_space_tracker());
     with_trans_intr(
       *t.t,
       [this, &tracker](auto &t) {
@@ -427,7 +427,7 @@ struct transaction_manager_test_t :
            return seastar::now();
          });
       }).unsafe_get0();
-    return segment_cleaner->debug_check_space(*tracker);
+    return async_cleaner->debug_check_space(*tracker);
   }
 
   void replay() {
@@ -578,7 +578,7 @@ struct transaction_manager_test_t :
        "try_submit_transaction hit invalid error"
       }
     ).then([this](auto ret) {
-      return segment_cleaner->run_until_halt().then([ret] { return ret; });
+      return async_cleaner->run_until_halt().then([ret] { return ret; });
     }).get0();
 
     if (success) {
@@ -628,7 +628,7 @@ struct transaction_manager_test_t :
            });
        });
     }).safe_then([this]() {
-      return segment_cleaner->run_until_halt();
+      return async_cleaner->run_until_halt();
     }).handle_error(
       crimson::ct_error::assert_all{
        "Invalid error in SeaStore::list_collections"
index b712effaf4be3f08c2e7e1452685551ce5dd62cf..19b5b08cdf3a875f5cac04264d6d72a1f07fe3de 100644 (file)
@@ -6,7 +6,7 @@
 #include <random>
 #include <boost/iterator/counting_iterator.hpp>
 
-#include "crimson/os/seastore/segment_cleaner.h"
+#include "crimson/os/seastore/async_cleaner.h"
 #include "crimson/os/seastore/cache.h"
 #include "crimson/os/seastore/transaction_manager.h"
 #include "crimson/os/seastore/segment_manager/ephemeral.h"
@@ -146,7 +146,7 @@ protected:
   LBAManager *lba_manager;
   BackrefManager *backref_manager;
   Cache* cache;
-  SegmentCleaner *segment_cleaner;
+  AsyncCleaner *async_cleaner;
 
   TMTestState() : EphemeralTestState(1) {}
 
@@ -165,14 +165,14 @@ protected:
         tm->add_device(sec_sm.get(), false);
       }
     }
-    segment_cleaner = tm->get_segment_cleaner();
+    async_cleaner = tm->get_async_cleaner();
     lba_manager = tm->get_lba_manager();
     backref_manager = tm->get_backref_manager();
     cache = tm->get_cache();
   }
 
   virtual void _destroy() override {
-    segment_cleaner = nullptr;
+    async_cleaner = nullptr;
     lba_manager = nullptr;
     tm.reset();
   }
@@ -191,9 +191,9 @@ protected:
     ).handle_error(
       crimson::ct_error::assert_all{"Error in mount"}
     ).then([this] {
-      return segment_cleaner->stop();
+      return async_cleaner->stop();
     }).then([this] {
-      return segment_cleaner->run_until_halt();
+      return async_cleaner->run_until_halt();
     });
   }
 
@@ -251,7 +251,7 @@ protected:
 
   void submit_transaction(TransactionRef t) {
     submit_transaction_fut(*t).unsafe_get0();
-    segment_cleaner->run_until_halt().get0();
+    async_cleaner->run_until_halt().get0();
   }
 };