transaction.cc
cache.cc
lba_manager.cc
- segment_cleaner.cc
+ async_cleaner.cc
backref_manager.cc
backref/backref_tree_node.cc
backref/btree_backref_manager.cc
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <seastar/core/metrics.hh>
+
+#include "crimson/os/seastore/logging.h"
+
+#include "crimson/os/seastore/async_cleaner.h"
+#include "crimson/os/seastore/transaction_manager.h"
+
+SET_SUBSYS(seastore_cleaner);
+
+namespace crimson::os::seastore {
+
+void segment_info_t::set_open(
+ segment_seq_t _seq, segment_type_t _type)
+{
+ ceph_assert(_seq != NULL_SEG_SEQ);
+ ceph_assert(_type != segment_type_t::NULL_SEG);
+ state = Segment::segment_state_t::OPEN;
+ seq = _seq;
+ type = _type;
+ written_to = 0;
+}
+
+void segment_info_t::set_empty()
+{
+ state = Segment::segment_state_t::EMPTY;
+ seq = NULL_SEG_SEQ;
+ type = segment_type_t::NULL_SEG;
+ last_modified = {};
+ last_rewritten = {};
+ written_to = 0;
+}
+
+void segment_info_t::set_closed()
+{
+ state = Segment::segment_state_t::CLOSED;
+ // the rest of information is unchanged
+}
+
+void segment_info_t::init_closed(
+ segment_seq_t _seq, segment_type_t _type, std::size_t seg_size)
+{
+ ceph_assert(_seq != NULL_SEG_SEQ);
+ ceph_assert(_type != segment_type_t::NULL_SEG);
+ state = Segment::segment_state_t::CLOSED;
+ seq = _seq;
+ type = _type;
+ written_to = seg_size;
+}
+
+std::ostream& operator<<(std::ostream &out, const segment_info_t &info)
+{
+ out << "seg_info_t("
+ << "state=" << info.state;
+ if (info.is_empty()) {
+ // pass
+ } else { // open or closed
+ out << ", seq=" << segment_seq_printer_t{info.seq}
+ << ", type=" << info.type
+ << ", last_modified=" << info.last_modified.time_since_epoch()
+ << ", last_rewritten=" << info.last_rewritten.time_since_epoch()
+ << ", written_to=" << info.written_to;
+ }
+ return out << ")";
+}
+
+void segments_info_t::reset()
+{
+ segments.clear();
+
+ segment_size = 0;
+
+ journal_segment_id = NULL_SEG_ID;
+ num_in_journal_open = 0;
+ num_type_journal = 0;
+ num_type_ool = 0;
+
+ num_open = 0;
+ num_empty = 0;
+ num_closed = 0;
+
+ count_open_journal = 0;
+ count_open_ool = 0;
+ count_release_journal = 0;
+ count_release_ool = 0;
+ count_close_journal = 0;
+ count_close_ool = 0;
+
+ total_bytes = 0;
+ avail_bytes_in_open = 0;
+}
+
+void segments_info_t::add_segment_manager(
+ SegmentManager &segment_manager)
+{
+ LOG_PREFIX(segments_info_t::add_segment_manager);
+ device_id_t d_id = segment_manager.get_device_id();
+ auto ssize = segment_manager.get_segment_size();
+ auto nsegments = segment_manager.get_num_segments();
+ auto sm_size = segment_manager.get_size();
+ INFO("adding segment manager {}, size={}, ssize={}, segments={}",
+ device_id_printer_t{d_id}, sm_size, ssize, nsegments);
+ ceph_assert(ssize > 0);
+ ceph_assert(nsegments > 0);
+ ceph_assert(sm_size > 0);
+
+ // also validate if the device is duplicated
+ segments.add_device(d_id, nsegments, segment_info_t{});
+
+ // assume all the segment managers share the same settings as follows.
+ if (segment_size == 0) {
+ ceph_assert(ssize > 0);
+ segment_size = ssize;
+ } else {
+ ceph_assert(segment_size == (std::size_t)ssize);
+ }
+
+ // NOTE: by default the segments are empty
+ num_empty += nsegments;
+
+ total_bytes += sm_size;
+}
+
+void segments_info_t::init_closed(
+ segment_id_t segment, segment_seq_t seq, segment_type_t type)
+{
+ LOG_PREFIX(segments_info_t::init_closed);
+ auto& segment_info = segments[segment];
+ INFO("initiating {} {} {}, {}, num_segments(empty={}, opened={}, closed={})",
+ segment, segment_seq_printer_t{seq}, type,
+ segment_info, num_empty, num_open, num_closed);
+ ceph_assert(segment_info.is_empty());
+ segment_info.init_closed(seq, type, get_segment_size());
+ ceph_assert(num_empty > 0);
+ --num_empty;
+ ++num_closed;
+ if (type == segment_type_t::JOURNAL) {
+ // init_closed won't initialize journal_segment_id
+ ceph_assert(get_journal_head() == JOURNAL_SEQ_NULL);
+ ++num_type_journal;
+ } else {
+ ++num_type_ool;
+ }
+ // do not increment count_close_*;
+}
+
+void segments_info_t::mark_open(
+ segment_id_t segment, segment_seq_t seq, segment_type_t type)
+{
+ LOG_PREFIX(segments_info_t::mark_open);
+ auto& segment_info = segments[segment];
+ INFO("opening {} {} {}, {}, num_segments(empty={}, opened={}, closed={})",
+ segment, segment_seq_printer_t{seq}, type,
+ segment_info, num_empty, num_open, num_closed);
+ ceph_assert(segment_info.is_empty());
+ segment_info.set_open(seq, type);
+ ceph_assert(num_empty > 0);
+ --num_empty;
+ ++num_open;
+ if (type == segment_type_t::JOURNAL) {
+ if (journal_segment_id != NULL_SEG_ID) {
+ auto& last_journal_segment = segments[journal_segment_id];
+ ceph_assert(last_journal_segment.is_closed());
+ ceph_assert(last_journal_segment.type == segment_type_t::JOURNAL);
+ ceph_assert(last_journal_segment.seq + 1 == seq);
+ }
+ journal_segment_id = segment;
+
+ ++num_in_journal_open;
+ ++num_type_journal;
+ ++count_open_journal;
+ } else {
+ ++num_type_ool;
+ ++count_open_ool;
+ }
+ ceph_assert(segment_info.written_to == 0);
+ avail_bytes_in_open += get_segment_size();
+}
+
+void segments_info_t::mark_empty(
+ segment_id_t segment)
+{
+ LOG_PREFIX(segments_info_t::mark_empty);
+ auto& segment_info = segments[segment];
+ INFO("releasing {}, {}, num_segments(empty={}, opened={}, closed={})",
+ segment, segment_info,
+ num_empty, num_open, num_closed);
+ ceph_assert(segment_info.is_closed());
+ auto type = segment_info.type;
+ assert(type != segment_type_t::NULL_SEG);
+ segment_info.set_empty();
+ ceph_assert(num_closed > 0);
+ --num_closed;
+ ++num_empty;
+ if (type == segment_type_t::JOURNAL) {
+ ceph_assert(num_type_journal > 0);
+ --num_type_journal;
+ ++count_release_journal;
+ } else {
+ ceph_assert(num_type_ool > 0);
+ --num_type_ool;
+ ++count_release_ool;
+ }
+}
+
+void segments_info_t::mark_closed(
+ segment_id_t segment)
+{
+ LOG_PREFIX(segments_info_t::mark_closed);
+ auto& segment_info = segments[segment];
+ INFO("closing {}, {}, num_segments(empty={}, opened={}, closed={})",
+ segment, segment_info,
+ num_empty, num_open, num_closed);
+ ceph_assert(segment_info.is_open());
+ segment_info.set_closed();
+ ceph_assert(num_open > 0);
+ --num_open;
+ ++num_closed;
+ if (segment_info.type == segment_type_t::JOURNAL) {
+ ceph_assert(num_in_journal_open > 0);
+ --num_in_journal_open;
+ ++count_close_journal;
+ } else {
+ ++count_close_ool;
+ }
+ ceph_assert(get_segment_size() >= segment_info.written_to);
+ auto seg_avail_bytes = get_segment_size() - segment_info.written_to;
+ ceph_assert(avail_bytes_in_open >= seg_avail_bytes);
+ avail_bytes_in_open -= seg_avail_bytes;
+}
+
+void segments_info_t::update_written_to(
+ segment_type_t type,
+ paddr_t offset)
+{
+ LOG_PREFIX(segments_info_t::update_written_to);
+ auto& saddr = offset.as_seg_paddr();
+ auto& segment_info = segments[saddr.get_segment_id()];
+ if (!segment_info.is_open()) {
+ ERROR("segment is not open, not updating, type={}, offset={}, {}",
+ type, offset, segment_info);
+ ceph_abort();
+ }
+
+ auto new_written_to = static_cast<std::size_t>(saddr.get_segment_off());
+ ceph_assert(new_written_to <= get_segment_size());
+ if (segment_info.written_to > new_written_to) {
+ ERROR("written_to should not decrease! type={}, offset={}, {}",
+ type, offset, segment_info);
+ ceph_abort();
+ }
+
+ DEBUG("type={}, offset={}, {}", type, offset, segment_info);
+ ceph_assert(type == segment_info.type);
+ auto avail_deduction = new_written_to - segment_info.written_to;
+ ceph_assert(avail_bytes_in_open >= avail_deduction);
+ avail_bytes_in_open -= avail_deduction;
+ segment_info.written_to = new_written_to;
+}
+
+bool SpaceTrackerSimple::equals(const SpaceTrackerI &_other) const
+{
+ LOG_PREFIX(SpaceTrackerSimple::equals);
+ const auto &other = static_cast<const SpaceTrackerSimple&>(_other);
+
+ if (other.live_bytes_by_segment.size() != live_bytes_by_segment.size()) {
+ ERROR("different segment counts, bug in test");
+ assert(0 == "segment counts should match");
+ return false;
+ }
+
+ bool all_match = true;
+ for (auto i = live_bytes_by_segment.begin(), j = other.live_bytes_by_segment.begin();
+ i != live_bytes_by_segment.end(); ++i, ++j) {
+ if (i->second.live_bytes != j->second.live_bytes) {
+ all_match = false;
+ DEBUG("segment_id {} live bytes mismatch *this: {}, other: {}",
+ i->first, i->second.live_bytes, j->second.live_bytes);
+ }
+ }
+ return all_match;
+}
+
+int64_t SpaceTrackerDetailed::SegmentMap::allocate(
+ device_segment_id_t segment,
+ seastore_off_t offset,
+ extent_len_t len,
+ const extent_len_t block_size)
+{
+ LOG_PREFIX(SegmentMap::allocate);
+ assert(offset % block_size == 0);
+ assert(len % block_size == 0);
+
+ const auto b = (offset / block_size);
+ const auto e = (offset + len) / block_size;
+
+ bool error = false;
+ for (auto i = b; i < e; ++i) {
+ if (bitmap[i]) {
+ if (!error) {
+ ERROR("found allocated in {}, {} ~ {}", segment, offset, len);
+ error = true;
+ }
+ DEBUG("block {} allocated", i * block_size);
+ }
+ bitmap[i] = true;
+ }
+ return update_usage(len);
+}
+
+int64_t SpaceTrackerDetailed::SegmentMap::release(
+ device_segment_id_t segment,
+ seastore_off_t offset,
+ extent_len_t len,
+ const extent_len_t block_size)
+{
+ LOG_PREFIX(SegmentMap::release);
+ assert(offset % block_size == 0);
+ assert(len % block_size == 0);
+
+ const auto b = (offset / block_size);
+ const auto e = (offset + len) / block_size;
+
+ bool error = false;
+ for (auto i = b; i < e; ++i) {
+ if (!bitmap[i]) {
+ if (!error) {
+ ERROR("found unallocated in {}, {} ~ {}", segment, offset, len);
+ error = true;
+ }
+ DEBUG("block {} unallocated", i * block_size);
+ }
+ bitmap[i] = false;
+ }
+ return update_usage(-(int64_t)len);
+}
+
+bool SpaceTrackerDetailed::equals(const SpaceTrackerI &_other) const
+{
+ LOG_PREFIX(SpaceTrackerDetailed::equals);
+ const auto &other = static_cast<const SpaceTrackerDetailed&>(_other);
+
+ if (other.segment_usage.size() != segment_usage.size()) {
+ ERROR("different segment counts, bug in test");
+ assert(0 == "segment counts should match");
+ return false;
+ }
+
+ bool all_match = true;
+ for (auto i = segment_usage.begin(), j = other.segment_usage.begin();
+ i != segment_usage.end(); ++i, ++j) {
+ if (i->second.get_usage() != j->second.get_usage()) {
+ all_match = false;
+ ERROR("segment_id {} live bytes mismatch *this: {}, other: {}",
+ i->first, i->second.get_usage(), j->second.get_usage());
+ }
+ }
+ return all_match;
+}
+
+void SpaceTrackerDetailed::SegmentMap::dump_usage(extent_len_t block_size) const
+{
+ LOG_PREFIX(SegmentMap::dump_usage);
+ INFO("dump start");
+ for (unsigned i = 0; i < bitmap.size(); ++i) {
+ if (bitmap[i]) {
+ LOCAL_LOGGER.info(" {} still live", i * block_size);
+ }
+ }
+}
+
+void SpaceTrackerDetailed::dump_usage(segment_id_t id) const
+{
+ LOG_PREFIX(SpaceTrackerDetailed::dump_usage);
+ INFO("{}", id);
+ segment_usage[id].dump_usage(
+ block_size_by_segment_manager[id.device_id()]);
+}
+
+void SpaceTrackerSimple::dump_usage(segment_id_t id) const
+{
+ LOG_PREFIX(SpaceTrackerSimple::dump_usage);
+ INFO("id: {}, live_bytes: {}",
+ id, live_bytes_by_segment[id].live_bytes);
+}
+
+AsyncCleaner::AsyncCleaner(
+ config_t config,
+ SegmentManagerGroupRef&& sm_group,
+ BackrefManager &backref_manager,
+ bool detailed)
+ : detailed(detailed),
+ config(config),
+ sm_group(std::move(sm_group)),
+ backref_manager(backref_manager),
+ ool_segment_seq_allocator(
+ new SegmentSeqAllocator(segment_type_t::OOL)),
+ gc_process(*this)
+{
+ config.validate();
+}
+
+void AsyncCleaner::register_metrics()
+{
+ namespace sm = seastar::metrics;
+ stats.segment_util.buckets.resize(UTIL_BUCKETS);
+ std::size_t i;
+ for (i = 0; i < UTIL_BUCKETS; ++i) {
+ stats.segment_util.buckets[i].upper_bound = ((double)(i + 1)) / 10;
+ stats.segment_util.buckets[i].count = 0;
+ }
+ // NOTE: by default the segments are empty
+ i = get_bucket_index(UTIL_STATE_EMPTY);
+ stats.segment_util.buckets[i].count = segments.get_num_segments();
+
+ metrics.add_group("async_cleaner", {
+ sm::make_counter("segments_number",
+ [this] { return segments.get_num_segments(); },
+ sm::description("the number of segments")),
+ sm::make_counter("segment_size",
+ [this] { return segments.get_segment_size(); },
+ sm::description("the bytes of a segment")),
+ sm::make_counter("segments_in_journal",
+ [this] { return get_segments_in_journal(); },
+ sm::description("the number of segments in journal")),
+ sm::make_counter("segments_type_journal",
+ [this] { return segments.get_num_type_journal(); },
+ sm::description("the number of segments typed journal")),
+ sm::make_counter("segments_type_ool",
+ [this] { return segments.get_num_type_ool(); },
+ sm::description("the number of segments typed out-of-line")),
+ sm::make_counter("segments_open",
+ [this] { return segments.get_num_open(); },
+ sm::description("the number of open segments")),
+ sm::make_counter("segments_empty",
+ [this] { return segments.get_num_empty(); },
+ sm::description("the number of empty segments")),
+ sm::make_counter("segments_closed",
+ [this] { return segments.get_num_closed(); },
+ sm::description("the number of closed segments")),
+
+ sm::make_counter("segments_count_open_journal",
+ [this] { return segments.get_count_open_journal(); },
+ sm::description("the count of open journal segment operations")),
+ sm::make_counter("segments_count_open_ool",
+ [this] { return segments.get_count_open_ool(); },
+ sm::description("the count of open ool segment operations")),
+ sm::make_counter("segments_count_release_journal",
+ [this] { return segments.get_count_release_journal(); },
+ sm::description("the count of release journal segment operations")),
+ sm::make_counter("segments_count_release_ool",
+ [this] { return segments.get_count_release_ool(); },
+ sm::description("the count of release ool segment operations")),
+ sm::make_counter("segments_count_close_journal",
+ [this] { return segments.get_count_close_journal(); },
+ sm::description("the count of close journal segment operations")),
+ sm::make_counter("segments_count_close_ool",
+ [this] { return segments.get_count_close_ool(); },
+ sm::description("the count of close ool segment operations")),
+
+ sm::make_counter("total_bytes",
+ [this] { return segments.get_total_bytes(); },
+ sm::description("the size of the space")),
+ sm::make_counter("available_bytes",
+ [this] { return segments.get_available_bytes(); },
+ sm::description("the size of the space is available")),
+ sm::make_counter("unavailable_unreclaimable_bytes",
+ [this] { return get_unavailable_unreclaimable_bytes(); },
+ sm::description("the size of the space is unavailable and unreclaimable")),
+ sm::make_counter("unavailable_reclaimable_bytes",
+ [this] { return get_unavailable_reclaimable_bytes(); },
+ sm::description("the size of the space is unavailable and reclaimable")),
+ sm::make_counter("used_bytes", stats.used_bytes,
+ sm::description("the size of the space occupied by live extents")),
+ sm::make_counter("unavailable_unused_bytes",
+ [this] { return get_unavailable_unused_bytes(); },
+ sm::description("the size of the space is unavailable and not alive")),
+
+ sm::make_counter("dirty_journal_bytes",
+ [this] { return get_dirty_journal_size(); },
+ sm::description("the size of the journal for dirty extents")),
+ sm::make_counter("alloc_journal_bytes",
+ [this] { return get_alloc_journal_size(); },
+ sm::description("the size of the journal for alloc info")),
+
+ sm::make_counter("projected_count", stats.projected_count,
+ sm::description("the number of projected usage reservations")),
+ sm::make_counter("projected_used_bytes_sum", stats.projected_used_bytes_sum,
+ sm::description("the sum of the projected usage in bytes")),
+
+ sm::make_counter("io_count", stats.io_count,
+ sm::description("the sum of IOs")),
+ sm::make_counter("io_blocked_count", stats.io_blocked_count,
+ sm::description("IOs that are blocked by gc")),
+ sm::make_counter("io_blocked_count_trim", stats.io_blocked_count_trim,
+ sm::description("IOs that are blocked by trimming")),
+ sm::make_counter("io_blocked_count_reclaim", stats.io_blocked_count_reclaim,
+ sm::description("IOs that are blocked by reclaimming")),
+ sm::make_counter("io_blocked_sum", stats.io_blocked_sum,
+ sm::description("the sum of blocking IOs")),
+
+ sm::make_counter("reclaimed_bytes", stats.reclaimed_bytes,
+ sm::description("rewritten bytes due to reclaim")),
+ sm::make_counter("reclaimed_segment_bytes", stats.reclaimed_segment_bytes,
+ sm::description("rewritten bytes due to reclaim")),
+ sm::make_counter("closed_journal_used_bytes", stats.closed_journal_used_bytes,
+ sm::description("used bytes when close a journal segment")),
+ sm::make_counter("closed_journal_total_bytes", stats.closed_journal_total_bytes,
+ sm::description("total bytes of closed journal segments")),
+ sm::make_counter("closed_ool_used_bytes", stats.closed_ool_used_bytes,
+ sm::description("used bytes when close a ool segment")),
+ sm::make_counter("closed_ool_total_bytes", stats.closed_ool_total_bytes,
+ sm::description("total bytes of closed ool segments")),
+
+ sm::make_gauge("available_ratio",
+ [this] { return segments.get_available_ratio(); },
+ sm::description("ratio of available space to total space")),
+ sm::make_gauge("reclaim_ratio",
+ [this] { return get_reclaim_ratio(); },
+ sm::description("ratio of reclaimable space to unavailable space")),
+
+ sm::make_histogram("segment_utilization_distribution",
+ [this]() -> seastar::metrics::histogram& {
+ return stats.segment_util;
+ },
+ sm::description("utilization distribution of all segments"))
+ });
+}
+
+segment_id_t AsyncCleaner::allocate_segment(
+ segment_seq_t seq,
+ segment_type_t type)
+{
+ LOG_PREFIX(AsyncCleaner::allocate_segment);
+ assert(seq != NULL_SEG_SEQ);
+ for (auto it = segments.begin();
+ it != segments.end();
+ ++it) {
+ auto seg_id = it->first;
+ auto& segment_info = it->second;
+ if (segment_info.is_empty()) {
+ auto old_usage = calc_utilization(seg_id);
+ segments.mark_open(seg_id, seq, type);
+ auto new_usage = calc_utilization(seg_id);
+ adjust_segment_util(old_usage, new_usage);
+ INFO("opened, should_block_on_gc {}, projected_avail_ratio {}, "
+ "reclaim_ratio {}",
+ should_block_on_gc(),
+ get_projected_available_ratio(),
+ get_reclaim_ratio());
+ return seg_id;
+ }
+ }
+ ERROR("out of space with segment_seq={}", segment_seq_printer_t{seq});
+ ceph_abort();
+ return NULL_SEG_ID;
+}
+
+void AsyncCleaner::update_journal_tail_target(
+ journal_seq_t dirty_replay_from,
+ journal_seq_t alloc_replay_from)
+{
+ LOG_PREFIX(AsyncCleaner::update_journal_tail_target);
+ if (disable_trim) return;
+ assert(dirty_replay_from.offset.get_addr_type() != addr_types_t::RANDOM_BLOCK);
+ assert(alloc_replay_from.offset.get_addr_type() != addr_types_t::RANDOM_BLOCK);
+ if (dirty_extents_replay_from == JOURNAL_SEQ_NULL
+ || dirty_replay_from > dirty_extents_replay_from) {
+ DEBUG("dirty_extents_replay_from={} => {}",
+ dirty_extents_replay_from, dirty_replay_from);
+ dirty_extents_replay_from = dirty_replay_from;
+ }
+
+ update_alloc_info_replay_from(alloc_replay_from);
+
+ journal_seq_t target = std::min(dirty_replay_from, alloc_replay_from);
+ ceph_assert(target != JOURNAL_SEQ_NULL);
+ auto journal_head = segments.get_journal_head();
+ ceph_assert(journal_head == JOURNAL_SEQ_NULL ||
+ journal_head >= target);
+ if (journal_tail_target == JOURNAL_SEQ_NULL ||
+ target > journal_tail_target) {
+ if (!init_complete ||
+ journal_tail_target.segment_seq == target.segment_seq) {
+ DEBUG("journal_tail_target={} => {}", journal_tail_target, target);
+ } else {
+ INFO("journal_tail_target={} => {}", journal_tail_target, target);
+ }
+ journal_tail_target = target;
+ }
+ gc_process.maybe_wake_on_space_used();
+ maybe_wake_gc_blocked_io();
+}
+
+void AsyncCleaner::update_alloc_info_replay_from(
+ journal_seq_t alloc_replay_from)
+{
+ LOG_PREFIX(AsyncCleaner::update_alloc_info_replay_from);
+ if (alloc_info_replay_from == JOURNAL_SEQ_NULL
+ || alloc_replay_from > alloc_info_replay_from) {
+ DEBUG("alloc_info_replay_from={} => {}",
+ alloc_info_replay_from, alloc_replay_from);
+ alloc_info_replay_from = alloc_replay_from;
+ }
+}
+
+void AsyncCleaner::update_journal_tail_committed(journal_seq_t committed)
+{
+ LOG_PREFIX(AsyncCleaner::update_journal_tail_committed);
+ assert(committed.offset.get_addr_type() != addr_types_t::RANDOM_BLOCK);
+ if (committed == JOURNAL_SEQ_NULL) {
+ return;
+ }
+ auto journal_head = segments.get_journal_head();
+ ceph_assert(journal_head == JOURNAL_SEQ_NULL ||
+ journal_head >= committed);
+
+ if (journal_tail_committed == JOURNAL_SEQ_NULL ||
+ committed > journal_tail_committed) {
+ DEBUG("update journal_tail_committed={} => {}",
+ journal_tail_committed, committed);
+ journal_tail_committed = committed;
+ }
+ if (journal_tail_target == JOURNAL_SEQ_NULL ||
+ committed > journal_tail_target) {
+ DEBUG("update journal_tail_target={} => {}",
+ journal_tail_target, committed);
+ journal_tail_target = committed;
+ }
+}
+
+void AsyncCleaner::close_segment(segment_id_t segment)
+{
+ LOG_PREFIX(AsyncCleaner::close_segment);
+ auto old_usage = calc_utilization(segment);
+ segments.mark_closed(segment);
+ auto &seg_info = segments[segment];
+ if (seg_info.type == segment_type_t::JOURNAL) {
+ stats.closed_journal_used_bytes += space_tracker->get_usage(segment);
+ stats.closed_journal_total_bytes += segments.get_segment_size();
+ } else {
+ stats.closed_ool_used_bytes += space_tracker->get_usage(segment);
+ stats.closed_ool_total_bytes += segments.get_segment_size();
+ }
+ auto new_usage = calc_utilization(segment);
+ adjust_segment_util(old_usage, new_usage);
+ INFO("closed, should_block_on_gc {}, projected_avail_ratio {}, "
+ "reclaim_ratio {}",
+ should_block_on_gc(),
+ get_projected_available_ratio(),
+ get_reclaim_ratio());
+}
+
+AsyncCleaner::trim_backrefs_ret AsyncCleaner::trim_backrefs(
+ Transaction &t,
+ journal_seq_t limit)
+{
+ return backref_manager.merge_cached_backrefs(
+ t,
+ limit,
+ config.rewrite_backref_bytes_per_cycle
+ );
+}
+
+AsyncCleaner::rewrite_dirty_ret AsyncCleaner::rewrite_dirty(
+ Transaction &t,
+ journal_seq_t limit)
+{
+ return ecb->get_next_dirty_extents(
+ t,
+ limit,
+ config.rewrite_dirty_bytes_per_cycle
+ ).si_then([=, &t](auto dirty_list) {
+ LOG_PREFIX(AsyncCleaner::rewrite_dirty);
+ DEBUGT("rewrite {} dirty extents", t, dirty_list.size());
+ return seastar::do_with(
+ std::move(dirty_list),
+ [this, FNAME, &t](auto &dirty_list) {
+ return trans_intr::do_for_each(
+ dirty_list,
+ [this, FNAME, &t](auto &e) {
+ DEBUGT("cleaning {}", t, *e);
+ return ecb->rewrite_extent(t, e);
+ });
+ });
+ });
+}
+
+AsyncCleaner::gc_cycle_ret AsyncCleaner::GCProcess::run()
+{
+ return seastar::do_until(
+ [this] { return is_stopping(); },
+ [this] {
+ return maybe_wait_should_run(
+ ).then([this] {
+ cleaner.log_gc_state("GCProcess::run");
+
+ if (is_stopping()) {
+ return seastar::now();
+ } else {
+ return cleaner.do_gc_cycle();
+ }
+ });
+ });
+}
+
+AsyncCleaner::gc_cycle_ret AsyncCleaner::do_gc_cycle()
+{
+ if (gc_should_trim_journal()) {
+ return gc_trim_journal(
+ ).handle_error(
+ crimson::ct_error::assert_all{
+ "GCProcess::run encountered invalid error in gc_trim_journal"
+ }
+ );
+ } else if (gc_should_trim_backref()) {
+ return gc_trim_backref(get_backref_tail()
+ ).safe_then([](auto) {
+ return seastar::now();
+ }).handle_error(
+ crimson::ct_error::assert_all{
+ "GCProcess::run encountered invalid error in gc_trim_backref"
+ }
+ );
+ } else if (gc_should_reclaim_space()) {
+ return gc_reclaim_space(
+ ).handle_error(
+ crimson::ct_error::assert_all{
+ "GCProcess::run encountered invalid error in gc_reclaim_space"
+ }
+ );
+ } else {
+ return seastar::now();
+ }
+}
+
+AsyncCleaner::gc_trim_backref_ret
+AsyncCleaner::gc_trim_backref(journal_seq_t limit) {
+ return seastar::do_with(
+ journal_seq_t(),
+ [this, limit=std::move(limit)](auto &seq) mutable {
+ return repeat_eagain([this, limit=std::move(limit), &seq] {
+ return ecb->with_transaction_intr(
+ Transaction::src_t::TRIM_BACKREF,
+ "trim_backref",
+ [this, limit](auto &t) {
+ return trim_backrefs(
+ t,
+ limit
+ ).si_then([this, &t, limit](auto trim_backrefs_to)
+ -> ExtentCallbackInterface::submit_transaction_direct_iertr::future<
+ journal_seq_t> {
+ if (trim_backrefs_to != JOURNAL_SEQ_NULL) {
+ return ecb->submit_transaction_direct(
+ t, std::make_optional<journal_seq_t>(trim_backrefs_to)
+ ).si_then([trim_backrefs_to=std::move(trim_backrefs_to)]() mutable {
+ return seastar::make_ready_future<
+ journal_seq_t>(std::move(trim_backrefs_to));
+ });
+ }
+ return seastar::make_ready_future<journal_seq_t>(std::move(limit));
+ });
+ }).safe_then([&seq](auto trim_backrefs_to) {
+ seq = std::move(trim_backrefs_to);
+ });
+ }).safe_then([&seq] {
+ return gc_trim_backref_ertr::make_ready_future<
+ journal_seq_t>(std::move(seq));
+ });
+ });
+}
+
+AsyncCleaner::gc_trim_journal_ret AsyncCleaner::gc_trim_journal()
+{
+ return gc_trim_backref(get_dirty_tail()
+ ).safe_then([this](auto seq) {
+ return repeat_eagain([this, seq=std::move(seq)]() mutable {
+ return ecb->with_transaction_intr(
+ Transaction::src_t::CLEANER_TRIM,
+ "trim_journal",
+ [this, seq=std::move(seq)](auto& t)
+ {
+ return rewrite_dirty(t, seq
+ ).si_then([this, &t] {
+ return ecb->submit_transaction_direct(t);
+ });
+ });
+ });
+ });
+}
+
+AsyncCleaner::retrieve_live_extents_ret
+AsyncCleaner::_retrieve_live_extents(
+ Transaction &t,
+ std::set<
+ backref_buf_entry_t,
+ backref_buf_entry_t::cmp_t> &&backrefs,
+ std::vector<CachedExtentRef> &extents)
+{
+ return seastar::do_with(
+ JOURNAL_SEQ_NULL,
+ std::move(backrefs),
+ [this, &t, &extents](auto &seq, auto &backrefs) {
+ return trans_intr::parallel_for_each(
+ backrefs,
+ [this, &extents, &t, &seq](auto &ent) {
+ LOG_PREFIX(AsyncCleaner::_retrieve_live_extents);
+ DEBUGT("getting extent of type {} at {}~{}",
+ t,
+ ent.type,
+ ent.paddr,
+ ent.len);
+ return ecb->get_extent_if_live(
+ t, ent.type, ent.paddr, ent.laddr, ent.len
+ ).si_then([this, FNAME, &extents, &ent, &seq, &t](auto ext) {
+ if (!ext) {
+ DEBUGT("addr {} dead, skipping", t, ent.paddr);
+ auto backref = backref_manager.get_cached_backref_removal(ent.paddr);
+ if (seq == JOURNAL_SEQ_NULL || seq < backref.seq) {
+ seq = backref.seq;
+ }
+ } else {
+ extents.emplace_back(std::move(ext));
+ }
+ return ExtentCallbackInterface::rewrite_extent_iertr::now();
+ });
+ }).si_then([&seq] {
+ return retrieve_live_extents_iertr::make_ready_future<
+ journal_seq_t>(std::move(seq));
+ });
+ });
+}
+
+AsyncCleaner::retrieve_backref_mappings_ret
+AsyncCleaner::retrieve_backref_mappings(
+ paddr_t start_paddr,
+ paddr_t end_paddr)
+{
+ return seastar::do_with(
+ backref_pin_list_t(),
+ [this, start_paddr, end_paddr](auto &pin_list) {
+ return repeat_eagain([this, start_paddr, end_paddr, &pin_list] {
+ return ecb->with_transaction_intr(
+ Transaction::src_t::READ,
+ "get_backref_mappings",
+ [this, start_paddr, end_paddr](auto &t) {
+ return backref_manager.get_mappings(
+ t, start_paddr, end_paddr
+ );
+ }).safe_then([&pin_list](auto&& list) {
+ pin_list = std::move(list);
+ });
+ }).safe_then([&pin_list] {
+ return seastar::make_ready_future<backref_pin_list_t>(std::move(pin_list));
+ });
+ });
+}
+
+AsyncCleaner::gc_reclaim_space_ret AsyncCleaner::gc_reclaim_space()
+{
+ LOG_PREFIX(AsyncCleaner::gc_reclaim_space);
+ if (!reclaim_state) {
+ segment_id_t seg_id = get_next_reclaim_segment();
+ auto &segment_info = segments[seg_id];
+ INFO("reclaim {} {} start", seg_id, segment_info);
+ ceph_assert(segment_info.is_closed());
+ reclaim_state = reclaim_state_t::create(
+ seg_id, segments.get_segment_size());
+ }
+ reclaim_state->advance(config.reclaim_bytes_per_cycle);
+
+ DEBUG("reclaiming {}~{}",
+ reclaim_state->start_pos,
+ reclaim_state->end_pos);
+ double pavail_ratio = get_projected_available_ratio();
+ seastar::lowres_system_clock::time_point start = seastar::lowres_system_clock::now();
+
+ return seastar::do_with(
+ (size_t)0,
+ (size_t)0,
+ [this, pavail_ratio, start](
+ auto &reclaimed,
+ auto &runs) {
+ return retrieve_backref_mappings(
+ reclaim_state->start_pos,
+ reclaim_state->end_pos
+ ).safe_then([this, &reclaimed, &runs](auto pin_list) {
+ return seastar::do_with(
+ std::move(pin_list),
+ [this, &reclaimed, &runs](auto &pin_list) {
+ return repeat_eagain(
+ [this, &reclaimed, &runs, &pin_list]() mutable {
+ reclaimed = 0;
+ runs++;
+ return seastar::do_with(
+ backref_manager.get_cached_backref_extents_in_range(
+ reclaim_state->start_pos, reclaim_state->end_pos),
+ backref_manager.get_cached_backrefs_in_range(
+ reclaim_state->start_pos, reclaim_state->end_pos),
+ backref_manager.get_cached_backref_removals_in_range(
+ reclaim_state->start_pos, reclaim_state->end_pos),
+ JOURNAL_SEQ_NULL,
+ [this, &reclaimed, &pin_list](
+ auto &backref_extents,
+ auto &backrefs,
+ auto &del_backrefs,
+ auto &seq) {
+ return ecb->with_transaction_intr(
+ Transaction::src_t::CLEANER_RECLAIM,
+ "reclaim_space",
+ [this, &backref_extents, &backrefs, &seq,
+ &del_backrefs, &reclaimed, &pin_list](auto &t) {
+ LOG_PREFIX(AsyncCleaner::gc_reclaim_space);
+ DEBUGT("{} backrefs, {} del_backrefs, {} pins", t,
+ backrefs.size(), del_backrefs.size(), pin_list.size());
+ for (auto &br : backrefs) {
+ if (seq == JOURNAL_SEQ_NULL
+ || (br.seq != JOURNAL_SEQ_NULL && br.seq > seq))
+ seq = br.seq;
+ }
+ for (auto &pin : pin_list) {
+ backrefs.emplace(
+ pin->get_key(),
+ pin->get_val(),
+ pin->get_length(),
+ pin->get_type(),
+ journal_seq_t());
+ }
+ for (auto &del_backref : del_backrefs) {
+ DEBUGT("del_backref {}~{} {} {}", t,
+ del_backref.paddr, del_backref.len, del_backref.type, del_backref.seq);
+ auto it = backrefs.find(del_backref.paddr);
+ if (it != backrefs.end())
+ backrefs.erase(it);
+ if (seq == JOURNAL_SEQ_NULL
+ || (del_backref.seq != JOURNAL_SEQ_NULL && del_backref.seq > seq))
+ seq = del_backref.seq;
+ }
+ return seastar::do_with(
+ std::vector<CachedExtentRef>(),
+ [this, &backref_extents, &backrefs, &reclaimed, &t, &seq]
+ (auto &extents) {
+ return backref_manager.retrieve_backref_extents(
+ t, std::move(backref_extents), extents
+ ).si_then([this, &extents, &t, &backrefs] {
+ return _retrieve_live_extents(
+ t, std::move(backrefs), extents);
+ }).si_then([this, &seq, &t](auto nseq) {
+ if (nseq != JOURNAL_SEQ_NULL &&
+ (nseq > seq || seq == JOURNAL_SEQ_NULL))
+ seq = nseq;
+ auto fut = BackrefManager::merge_cached_backrefs_iertr::now();
+ if (seq != JOURNAL_SEQ_NULL) {
+ fut = backref_manager.merge_cached_backrefs(
+ t, seq, std::numeric_limits<uint64_t>::max()
+ ).si_then([](auto) {
+ return BackrefManager::merge_cached_backrefs_iertr::now();
+ });
+ }
+ return fut;
+ }).si_then([&extents, this, &t, &reclaimed] {
+ return trans_intr::do_for_each(
+ extents,
+ [this, &t, &reclaimed](auto &ext) {
+ reclaimed += ext->get_length();
+ return ecb->rewrite_extent(t, ext);
+ });
+ });
+ }).si_then([this, &t, &seq] {
+ if (reclaim_state->is_complete()) {
+ t.mark_segment_to_release(reclaim_state->get_segment_id());
+ }
+ return ecb->submit_transaction_direct(
+ t, std::make_optional<journal_seq_t>(std::move(seq)));
+ });
+ });
+ });
+ });
+ });
+ }).safe_then(
+ [&reclaimed, this, pavail_ratio, start, &runs] {
+ LOG_PREFIX(AsyncCleaner::gc_reclaim_space);
+#ifndef NDEBUG
+ auto ndel_backrefs =
+ backref_manager.get_cached_backref_removals_in_range(
+ reclaim_state->start_pos, reclaim_state->end_pos);
+ if (!ndel_backrefs.empty()) {
+ for (auto &del_br : ndel_backrefs) {
+ ERROR("unexpected del_backref {}~{} {} {}",
+ del_br.paddr, del_br.len, del_br.type, del_br.seq);
+ }
+ ceph_abort("impossible");
+ }
+#endif
+ stats.reclaiming_bytes += reclaimed;
+ auto d = seastar::lowres_system_clock::now() - start;
+ DEBUG("duration: {}, pavail_ratio before: {}, repeats: {}", d, pavail_ratio, runs);
+ if (reclaim_state->is_complete()) {
+ INFO("reclaim {} finish, alive/total={}",
+ reclaim_state->get_segment_id(),
+ stats.reclaiming_bytes/(double)segments.get_segment_size());
+ stats.reclaimed_bytes += stats.reclaiming_bytes;
+ stats.reclaimed_segment_bytes += segments.get_segment_size();
+ stats.reclaiming_bytes = 0;
+ reclaim_state.reset();
+ }
+ });
+ });
+}
+
+AsyncCleaner::mount_ret AsyncCleaner::mount()
+{
+ LOG_PREFIX(AsyncCleaner::mount);
+ const auto& sms = sm_group->get_segment_managers();
+ INFO("{} segment managers", sms.size());
+ init_complete = false;
+ stats = {};
+ journal_tail_target = JOURNAL_SEQ_NULL;
+ journal_tail_committed = JOURNAL_SEQ_NULL;
+ dirty_extents_replay_from = JOURNAL_SEQ_NULL;
+ alloc_info_replay_from = JOURNAL_SEQ_NULL;
+
+ space_tracker.reset(
+ detailed ?
+ (SpaceTrackerI*)new SpaceTrackerDetailed(
+ sms) :
+ (SpaceTrackerI*)new SpaceTrackerSimple(
+ sms));
+
+ segments.reset();
+ for (auto sm : sms) {
+ segments.add_segment_manager(*sm);
+ }
+ metrics.clear();
+ register_metrics();
+
+ INFO("{} segments", segments.get_num_segments());
+ return seastar::do_with(
+ std::vector<std::pair<segment_id_t, segment_header_t>>(),
+ [this, FNAME](auto& segment_set) {
+ return crimson::do_for_each(
+ segments.begin(),
+ segments.end(),
+ [this, FNAME, &segment_set](auto& it) {
+ auto segment_id = it.first;
+ return sm_group->read_segment_header(
+ segment_id
+ ).safe_then([segment_id, this, FNAME, &segment_set](auto header) {
+ INFO("segment_id={} -- {}", segment_id, header);
+ auto s_type = header.get_type();
+ if (s_type == segment_type_t::NULL_SEG) {
+ ERROR("got null segment, segment_id={} -- {}", segment_id, header);
+ ceph_abort();
+ }
+ return sm_group->read_segment_tail(
+ segment_id
+ ).safe_then([this, segment_id, &segment_set, header](auto tail)
+ -> scan_extents_ertr::future<> {
+ if (tail.segment_nonce != header.segment_nonce) {
+ return scan_nonfull_segment(header, segment_set, segment_id);
+ }
+ time_point last_modified(duration(tail.last_modified));
+ time_point last_rewritten(duration(tail.last_rewritten));
+ segments.update_last_modified_rewritten(
+ segment_id, last_modified, last_rewritten);
+ if (tail.get_type() == segment_type_t::JOURNAL) {
+ update_journal_tail_committed(tail.journal_tail);
+ update_journal_tail_target(
+ tail.journal_tail,
+ tail.alloc_replay_from);
+ }
+ init_mark_segment_closed(
+ segment_id,
+ header.segment_seq,
+ header.type);
+ return seastar::now();
+ }).handle_error(
+ crimson::ct_error::enodata::handle(
+ [this, header, segment_id, &segment_set](auto) {
+ return scan_nonfull_segment(header, segment_set, segment_id);
+ }),
+ crimson::ct_error::pass_further_all{}
+ );
+ }).handle_error(
+ crimson::ct_error::enoent::handle([](auto) {
+ return mount_ertr::now();
+ }),
+ crimson::ct_error::enodata::handle([](auto) {
+ return mount_ertr::now();
+ }),
+ crimson::ct_error::input_output_error::pass_further{},
+ crimson::ct_error::assert_all{"unexpected error"}
+ );
+ });
+ });
+}
+
+AsyncCleaner::scan_extents_ret AsyncCleaner::scan_nonfull_segment(
+ const segment_header_t& header,
+ scan_extents_ret_bare& segment_set,
+ segment_id_t segment_id)
+{
+ return seastar::do_with(
+ scan_valid_records_cursor({
+ segments[segment_id].seq,
+ paddr_t::make_seg_paddr(segment_id, 0)}),
+ [this, segment_id, segment_header=header](auto& cursor) {
+ return seastar::do_with(
+ SegmentManagerGroup::found_record_handler_t(
+ [this, segment_id, segment_header](
+ record_locator_t locator,
+ const record_group_header_t& header,
+ const bufferlist& mdbuf
+ ) mutable -> SegmentManagerGroup::scan_valid_records_ertr::future<> {
+ LOG_PREFIX(AsyncCleaner::scan_nonfull_segment);
+ if (segment_header.get_type() == segment_type_t::OOL) {
+ DEBUG("out-of-line segment {}, decodeing {} records",
+ segment_id,
+ header.records);
+ auto maybe_headers = try_decode_record_headers(header, mdbuf);
+ if (!maybe_headers) {
+ ERROR("unable to decode record headers for record group {}",
+ locator.record_block_base);
+ return crimson::ct_error::input_output_error::make();
+ }
+
+ for (auto& header : *maybe_headers) {
+ mod_time_point_t ctime = header.commit_time;
+ auto commit_type = header.commit_type;
+ if (!ctime) {
+ ERROR("AsyncCleaner::scan_nonfull_segment: extent {} 0 commit_time",
+ ctime);
+ ceph_abort("0 commit_time");
+ }
+ time_point commit_time{duration(ctime)};
+ assert(commit_type == record_commit_type_t::MODIFY
+ || commit_type == record_commit_type_t::REWRITE);
+ if (commit_type == record_commit_type_t::MODIFY) {
+ segments.update_last_modified_rewritten(segment_id, commit_time, {});
+ }
+ if (commit_type == record_commit_type_t::REWRITE) {
+ segments.update_last_modified_rewritten(segment_id, {}, commit_time);
+ }
+ }
+ } else {
+ DEBUG("inline segment {}, decodeing {} records",
+ segment_id,
+ header.records);
+ auto maybe_record_deltas_list = try_decode_deltas(
+ header, mdbuf, locator.record_block_base);
+ if (!maybe_record_deltas_list) {
+ ERROR("unable to decode deltas for record {} at {}",
+ header, locator);
+ return crimson::ct_error::input_output_error::make();
+ }
+ for (auto &record_deltas : *maybe_record_deltas_list) {
+ for (auto &[ctime, delta] : record_deltas.deltas) {
+ if (delta.type == extent_types_t::ALLOC_TAIL) {
+ journal_seq_t seq;
+ decode(seq, delta.bl);
+ update_alloc_info_replay_from(seq);
+ }
+ }
+ }
+ }
+ return seastar::now();
+ }),
+ [&cursor, segment_header, this](auto& handler) {
+ return sm_group->scan_valid_records(
+ cursor,
+ segment_header.segment_nonce,
+ segments.get_segment_size(),
+ handler);
+ }
+ );
+ }).safe_then([this, segment_id, header](auto) {
+ init_mark_segment_closed(
+ segment_id,
+ header.segment_seq,
+ header.type);
+ return seastar::now();
+ });
+}
+
+AsyncCleaner::release_ertr::future<>
+AsyncCleaner::maybe_release_segment(Transaction &t)
+{
+ auto to_release = t.get_segment_to_release();
+ if (to_release != NULL_SEG_ID) {
+ LOG_PREFIX(AsyncCleaner::maybe_release_segment);
+ INFOT("releasing segment {}", t, to_release);
+ return sm_group->release_segment(to_release
+ ).safe_then([this, FNAME, &t, to_release] {
+ auto old_usage = calc_utilization(to_release);
+ ceph_assert(old_usage == 0);
+ segments.mark_empty(to_release);
+ auto new_usage = calc_utilization(to_release);
+ adjust_segment_util(old_usage, new_usage);
+ INFOT("released, should_block_on_gc {}, projected_avail_ratio {}, "
+ "reclaim_ratio {}",
+ t,
+ should_block_on_gc(),
+ get_projected_available_ratio(),
+ get_reclaim_ratio());
+ if (space_tracker->get_usage(to_release) != 0) {
+ space_tracker->dump_usage(to_release);
+ ceph_abort();
+ }
+ maybe_wake_gc_blocked_io();
+ });
+ } else {
+ return SegmentManager::release_ertr::now();
+ }
+}
+
+void AsyncCleaner::complete_init()
+{
+ LOG_PREFIX(AsyncCleaner::complete_init);
+ if (disable_trim) {
+ init_complete = true;
+ return;
+ }
+ INFO("done, start GC");
+ ceph_assert(segments.get_journal_head() != JOURNAL_SEQ_NULL);
+ init_complete = true;
+ gc_process.start();
+}
+
+void AsyncCleaner::mark_space_used(
+ paddr_t addr,
+ extent_len_t len,
+ time_point last_modified,
+ time_point last_rewritten,
+ bool init_scan)
+{
+ LOG_PREFIX(AsyncCleaner::mark_space_used);
+ if (addr.get_addr_type() != addr_types_t::SEGMENT) {
+ return;
+ }
+ auto& seg_addr = addr.as_seg_paddr();
+
+ if (!init_scan && !init_complete) {
+ return;
+ }
+
+ stats.used_bytes += len;
+ auto old_usage = calc_utilization(seg_addr.get_segment_id());
+ [[maybe_unused]] auto ret = space_tracker->allocate(
+ seg_addr.get_segment_id(),
+ seg_addr.get_segment_off(),
+ len);
+ auto new_usage = calc_utilization(seg_addr.get_segment_id());
+ adjust_segment_util(old_usage, new_usage);
+
+ // use the last extent's last modified time for the calculation of the projected
+ // time the segments' live extents are to stay unmodified; this is an approximation
+ // of the sprite lfs' segment "age".
+
+ segments.update_last_modified_rewritten(
+ seg_addr.get_segment_id(), last_modified, last_rewritten);
+
+ gc_process.maybe_wake_on_space_used();
+ assert(ret > 0);
+ DEBUG("segment {} new len: {}~{}, live_bytes: {}",
+ seg_addr.get_segment_id(),
+ addr,
+ len,
+ space_tracker->get_usage(seg_addr.get_segment_id()));
+}
+
+void AsyncCleaner::mark_space_free(
+ paddr_t addr,
+ extent_len_t len)
+{
+ LOG_PREFIX(AsyncCleaner::mark_space_free);
+ if (!init_complete) {
+ return;
+ }
+ if (addr.get_addr_type() != addr_types_t::SEGMENT) {
+ return;
+ }
+
+ ceph_assert(stats.used_bytes >= len);
+ stats.used_bytes -= len;
+ auto& seg_addr = addr.as_seg_paddr();
+
+ DEBUG("segment {} free len: {}~{}",
+ seg_addr.get_segment_id(), addr, len);
+ auto old_usage = calc_utilization(seg_addr.get_segment_id());
+ [[maybe_unused]] auto ret = space_tracker->release(
+ seg_addr.get_segment_id(),
+ seg_addr.get_segment_off(),
+ len);
+ auto new_usage = calc_utilization(seg_addr.get_segment_id());
+ adjust_segment_util(old_usage, new_usage);
+ maybe_wake_gc_blocked_io();
+ assert(ret >= 0);
+ DEBUG("segment {} free len: {}~{}, live_bytes: {}",
+ seg_addr.get_segment_id(),
+ addr,
+ len,
+ space_tracker->get_usage(seg_addr.get_segment_id()));
+}
+
+segment_id_t AsyncCleaner::get_next_reclaim_segment() const
+{
+ LOG_PREFIX(AsyncCleaner::get_next_reclaim_segment);
+ segment_id_t id = NULL_SEG_ID;
+ double max_benefit_cost = 0;
+ for (auto& [_id, segment_info] : segments) {
+ if (segment_info.is_closed() &&
+ !segment_info.is_in_journal(journal_tail_committed)) {
+ double benefit_cost = calc_gc_benefit_cost(_id);
+ if (benefit_cost > max_benefit_cost) {
+ id = _id;
+ max_benefit_cost = benefit_cost;
+ }
+ }
+ }
+ if (id != NULL_SEG_ID) {
+ DEBUG("segment {}, benefit_cost {}",
+ id, max_benefit_cost);
+ return id;
+ } else {
+ ceph_assert(get_segments_reclaimable() == 0);
+ // see gc_should_reclaim_space()
+ ceph_abort("impossible!");
+ return NULL_SEG_ID;
+ }
+}
+
+void AsyncCleaner::log_gc_state(const char *caller) const
+{
+ LOG_PREFIX(AsyncCleaner::log_gc_state);
+ if (LOCAL_LOGGER.is_enabled(seastar::log_level::debug) &&
+ !disable_trim) {
+ DEBUG(
+ "caller {}, "
+ "empty {}, "
+ "open {}, "
+ "closed {}, "
+ "in_journal {}, "
+ "total {}B, "
+ "available {}B, "
+ "unavailable {}B, "
+ "unavailable_used {}B, "
+ "unavailable_unused {}B; "
+ "reclaim_ratio {}, "
+ "available_ratio {}, "
+ "should_block_on_gc {}, "
+ "gc_should_reclaim_space {}, "
+ "journal_head {}, "
+ "journal_tail_target {}, "
+ "journal_tail_commit {}, "
+ "dirty_tail {}, "
+ "dirty_tail_limit {}, "
+ "gc_should_trim_journal {}, ",
+ caller,
+ segments.get_num_empty(),
+ segments.get_num_open(),
+ segments.get_num_closed(),
+ get_segments_in_journal(),
+ segments.get_total_bytes(),
+ segments.get_available_bytes(),
+ segments.get_unavailable_bytes(),
+ stats.used_bytes,
+ get_unavailable_unused_bytes(),
+ get_reclaim_ratio(),
+ segments.get_available_ratio(),
+ should_block_on_gc(),
+ gc_should_reclaim_space(),
+ segments.get_journal_head(),
+ journal_tail_target,
+ journal_tail_committed,
+ get_dirty_tail(),
+ get_dirty_tail_limit(),
+ gc_should_trim_journal()
+ );
+ }
+}
+
+seastar::future<>
+AsyncCleaner::reserve_projected_usage(std::size_t projected_usage)
+{
+ if (disable_trim) {
+ return seastar::now();
+ }
+ ceph_assert(init_complete);
+ // The pipeline configuration prevents another IO from entering
+ // prepare until the prior one exits and clears this.
+ ceph_assert(!blocked_io_wake);
+ ++stats.io_count;
+ bool is_blocked = false;
+ if (should_block_on_trim()) {
+ is_blocked = true;
+ ++stats.io_blocked_count_trim;
+ }
+ if (should_block_on_reclaim()) {
+ is_blocked = true;
+ ++stats.io_blocked_count_reclaim;
+ }
+ if (is_blocked) {
+ ++stats.io_blocking_num;
+ ++stats.io_blocked_count;
+ stats.io_blocked_sum += stats.io_blocking_num;
+ }
+ return seastar::do_until(
+ [this] {
+ log_gc_state("await_hard_limits");
+ return !should_block_on_gc();
+ },
+ [this] {
+ blocked_io_wake = seastar::promise<>();
+ return blocked_io_wake->get_future();
+ }
+ ).then([this, projected_usage, is_blocked] {
+ ceph_assert(!blocked_io_wake);
+ stats.projected_used_bytes += projected_usage;
+ ++stats.projected_count;
+ stats.projected_used_bytes_sum += stats.projected_used_bytes;
+ if (is_blocked) {
+ assert(stats.io_blocking_num > 0);
+ --stats.io_blocking_num;
+ }
+ });
+}
+
+void AsyncCleaner::release_projected_usage(std::size_t projected_usage)
+{
+ if (disable_trim) return;
+ ceph_assert(init_complete);
+ ceph_assert(stats.projected_used_bytes >= projected_usage);
+ stats.projected_used_bytes -= projected_usage;
+ return maybe_wake_gc_blocked_io();
+}
+
+}
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <boost/intrusive/set.hpp>
+#include <seastar/core/metrics_types.hh>
+
+#include "common/ceph_time.h"
+
+#include "osd/osd_types.h"
+
+#include "crimson/os/seastore/backref_manager.h"
+#include "crimson/os/seastore/cached_extent.h"
+#include "crimson/os/seastore/seastore_types.h"
+#include "crimson/os/seastore/segment_manager.h"
+#include "crimson/os/seastore/segment_manager_group.h"
+#include "crimson/os/seastore/transaction.h"
+#include "crimson/os/seastore/segment_seq_allocator.h"
+
+namespace crimson::os::seastore {
+
+/*
+ * segment_info_t
+ *
+ * Maintains the tracked information for a segment.
+ * It is read-only outside segments_info_t.
+ */
+struct segment_info_t {
+ using time_point = seastar::lowres_system_clock::time_point;
+
+ // segment_info_t is initiated as set_empty()
+ Segment::segment_state_t state = Segment::segment_state_t::EMPTY;
+
+ // Will be non-null for any segments in the current journal
+ segment_seq_t seq = NULL_SEG_SEQ;
+
+ segment_type_t type = segment_type_t::NULL_SEG;
+
+ time_point last_modified;
+ time_point last_rewritten;
+
+ std::size_t written_to = 0;
+
+ bool is_in_journal(journal_seq_t tail_committed) const {
+ return type == segment_type_t::JOURNAL &&
+ tail_committed.segment_seq <= seq;
+ }
+
+ bool is_empty() const {
+ return state == Segment::segment_state_t::EMPTY;
+ }
+
+ bool is_closed() const {
+ return state == Segment::segment_state_t::CLOSED;
+ }
+
+ bool is_open() const {
+ return state == Segment::segment_state_t::OPEN;
+ }
+
+ void init_closed(segment_seq_t, segment_type_t, std::size_t);
+
+ void set_open(segment_seq_t, segment_type_t);
+
+ void set_empty();
+
+ void set_closed();
+
+ void update_last_modified_rewritten(
+ time_point _last_modified, time_point _last_rewritten) {
+ if (_last_modified != time_point() && last_modified < _last_modified) {
+ last_modified = _last_modified;
+ }
+ if (_last_rewritten != time_point() && last_rewritten < _last_rewritten) {
+ last_rewritten = _last_rewritten;
+ }
+ }
+};
+
+std::ostream& operator<<(std::ostream&, const segment_info_t&);
+
+/*
+ * segments_info_t
+ *
+ * Keep track of all segments and related information.
+ */
+class segments_info_t {
+public:
+ using time_point = seastar::lowres_system_clock::time_point;
+
+ segments_info_t() {
+ reset();
+ }
+
+ const segment_info_t& operator[](segment_id_t id) const {
+ return segments[id];
+ }
+
+ auto begin() const {
+ return segments.begin();
+ }
+
+ auto end() const {
+ return segments.end();
+ }
+
+ std::size_t get_num_segments() const {
+ assert(segments.size() > 0);
+ return segments.size();
+ }
+ std::size_t get_segment_size() const {
+ assert(segment_size > 0);
+ return segment_size;
+ }
+ std::size_t get_num_in_journal_open() const {
+ return num_in_journal_open;
+ }
+ std::size_t get_num_type_journal() const {
+ return num_type_journal;
+ }
+ std::size_t get_num_type_ool() const {
+ return num_type_ool;
+ }
+ std::size_t get_num_open() const {
+ return num_open;
+ }
+ std::size_t get_num_empty() const {
+ return num_empty;
+ }
+ std::size_t get_num_closed() const {
+ return num_closed;
+ }
+ std::size_t get_count_open_journal() const {
+ return count_open_journal;
+ }
+ std::size_t get_count_open_ool() const {
+ return count_open_ool;
+ }
+ std::size_t get_count_release_journal() const {
+ return count_release_journal;
+ }
+ std::size_t get_count_release_ool() const {
+ return count_release_ool;
+ }
+ std::size_t get_count_close_journal() const {
+ return count_close_journal;
+ }
+ std::size_t get_count_close_ool() const {
+ return count_close_ool;
+ }
+
+ std::size_t get_total_bytes() const {
+ return total_bytes;
+ }
+ /// the available space that is writable, including in open segments
+ std::size_t get_available_bytes() const {
+ return num_empty * get_segment_size() + avail_bytes_in_open;
+ }
+ /// the unavailable space that is not writable
+ std::size_t get_unavailable_bytes() const {
+ assert(total_bytes >= get_available_bytes());
+ return total_bytes - get_available_bytes();
+ }
+ std::size_t get_available_bytes_in_open() const {
+ return avail_bytes_in_open;
+ }
+ double get_available_ratio() const {
+ return (double)get_available_bytes() / (double)total_bytes;
+ }
+
+ journal_seq_t get_journal_head() const {
+ if (unlikely(journal_segment_id == NULL_SEG_ID)) {
+ return JOURNAL_SEQ_NULL;
+ }
+ auto &segment_info = segments[journal_segment_id];
+ assert(!segment_info.is_empty());
+ assert(segment_info.type == segment_type_t::JOURNAL);
+ assert(segment_info.seq != NULL_SEG_SEQ);
+ return journal_seq_t{
+ segment_info.seq,
+ paddr_t::make_seg_paddr(
+ journal_segment_id,
+ segment_info.written_to)
+ };
+ }
+
+ void reset();
+
+ void add_segment_manager(SegmentManager &segment_manager);
+
+ // initiate non-empty segments, the others are by default empty
+ void init_closed(segment_id_t, segment_seq_t, segment_type_t);
+
+ void mark_open(segment_id_t, segment_seq_t, segment_type_t);
+
+ void mark_empty(segment_id_t);
+
+ void mark_closed(segment_id_t);
+
+ void update_written_to(segment_type_t, paddr_t);
+
+ void update_last_modified_rewritten(
+ segment_id_t id, time_point last_modified, time_point last_rewritten) {
+ segments[id].update_last_modified_rewritten(last_modified, last_rewritten);
+ }
+
+private:
+ // See reset() for member initialization
+ segment_map_t<segment_info_t> segments;
+
+ std::size_t segment_size;
+
+ segment_id_t journal_segment_id;
+ std::size_t num_in_journal_open;
+ std::size_t num_type_journal;
+ std::size_t num_type_ool;
+
+ std::size_t num_open;
+ std::size_t num_empty;
+ std::size_t num_closed;
+
+ std::size_t count_open_journal;
+ std::size_t count_open_ool;
+ std::size_t count_release_journal;
+ std::size_t count_release_ool;
+ std::size_t count_close_journal;
+ std::size_t count_close_ool;
+
+ std::size_t total_bytes;
+ std::size_t avail_bytes_in_open;
+};
+
+/**
+ * Callback interface for managing available segments
+ */
+class SegmentProvider {
+public:
+ virtual journal_seq_t get_journal_tail_target() const = 0;
+
+ virtual const segment_info_t& get_seg_info(segment_id_t id) const = 0;
+
+ virtual segment_id_t allocate_segment(
+ segment_seq_t seq, segment_type_t type) = 0;
+
+ virtual journal_seq_t get_dirty_extents_replay_from() const = 0;
+
+ virtual journal_seq_t get_alloc_info_replay_from() const = 0;
+
+ virtual void close_segment(segment_id_t) = 0;
+
+ virtual void update_journal_tail_committed(journal_seq_t tail_committed) = 0;
+
+ virtual void update_segment_avail_bytes(segment_type_t, paddr_t) = 0;
+
+ virtual SegmentManagerGroup* get_segment_manager_group() = 0;
+
+ virtual ~SegmentProvider() {}
+};
+
+class SpaceTrackerI {
+public:
+ virtual int64_t allocate(
+ segment_id_t segment,
+ seastore_off_t offset,
+ extent_len_t len) = 0;
+
+ virtual int64_t release(
+ segment_id_t segment,
+ seastore_off_t offset,
+ extent_len_t len) = 0;
+
+ virtual int64_t get_usage(
+ segment_id_t segment) const = 0;
+
+ virtual bool equals(const SpaceTrackerI &other) const = 0;
+
+ virtual std::unique_ptr<SpaceTrackerI> make_empty() const = 0;
+
+ virtual void dump_usage(segment_id_t) const = 0;
+
+ virtual double calc_utilization(segment_id_t segment) const = 0;
+
+ virtual void reset() = 0;
+
+ virtual ~SpaceTrackerI() = default;
+};
+using SpaceTrackerIRef = std::unique_ptr<SpaceTrackerI>;
+
+class SpaceTrackerSimple : public SpaceTrackerI {
+ struct segment_bytes_t {
+ int64_t live_bytes = 0;
+ seastore_off_t total_bytes = 0;
+ };
+ // Tracks live space for each segment
+ segment_map_t<segment_bytes_t> live_bytes_by_segment;
+
+ int64_t update_usage(segment_id_t segment, int64_t delta) {
+ live_bytes_by_segment[segment].live_bytes += delta;
+ assert(live_bytes_by_segment[segment].live_bytes >= 0);
+ return live_bytes_by_segment[segment].live_bytes;
+ }
+public:
+ SpaceTrackerSimple(const SpaceTrackerSimple &) = default;
+ SpaceTrackerSimple(const std::vector<SegmentManager*> &sms) {
+ for (auto sm : sms) {
+ live_bytes_by_segment.add_device(
+ sm->get_device_id(),
+ sm->get_num_segments(),
+ {0, sm->get_segment_size()});
+ }
+ }
+
+ int64_t allocate(
+ segment_id_t segment,
+ seastore_off_t offset,
+ extent_len_t len) final {
+ return update_usage(segment, len);
+ }
+
+ int64_t release(
+ segment_id_t segment,
+ seastore_off_t offset,
+ extent_len_t len) final {
+ return update_usage(segment, -(int64_t)len);
+ }
+
+ int64_t get_usage(segment_id_t segment) const final {
+ return live_bytes_by_segment[segment].live_bytes;
+ }
+
+ double calc_utilization(segment_id_t segment) const final {
+ auto& seg_bytes = live_bytes_by_segment[segment];
+ return (double)seg_bytes.live_bytes / (double)seg_bytes.total_bytes;
+ }
+
+ void dump_usage(segment_id_t) const final;
+
+ void reset() final {
+ for (auto &i : live_bytes_by_segment) {
+ i.second = {0, 0};
+ }
+ }
+
+ SpaceTrackerIRef make_empty() const final {
+ auto ret = SpaceTrackerIRef(new SpaceTrackerSimple(*this));
+ ret->reset();
+ return ret;
+ }
+
+ bool equals(const SpaceTrackerI &other) const;
+};
+
+class SpaceTrackerDetailed : public SpaceTrackerI {
+ class SegmentMap {
+ int64_t used = 0;
+ seastore_off_t total_bytes = 0;
+ std::vector<bool> bitmap;
+
+ public:
+ SegmentMap(
+ size_t blocks,
+ seastore_off_t total_bytes)
+ : total_bytes(total_bytes),
+ bitmap(blocks, false) {}
+
+ int64_t update_usage(int64_t delta) {
+ used += delta;
+ return used;
+ }
+
+ int64_t allocate(
+ device_segment_id_t segment,
+ seastore_off_t offset,
+ extent_len_t len,
+ const extent_len_t block_size);
+
+ int64_t release(
+ device_segment_id_t segment,
+ seastore_off_t offset,
+ extent_len_t len,
+ const extent_len_t block_size);
+
+ int64_t get_usage() const {
+ return used;
+ }
+
+ void dump_usage(extent_len_t block_size) const;
+
+ double calc_utilization() const {
+ return (double)used / (double)total_bytes;
+ }
+
+ void reset() {
+ used = 0;
+ for (auto &&i: bitmap) {
+ i = false;
+ }
+ }
+ };
+
+ // Tracks live space for each segment
+ segment_map_t<SegmentMap> segment_usage;
+ std::vector<size_t> block_size_by_segment_manager;
+
+public:
+ SpaceTrackerDetailed(const SpaceTrackerDetailed &) = default;
+ SpaceTrackerDetailed(const std::vector<SegmentManager*> &sms)
+ {
+ block_size_by_segment_manager.resize(DEVICE_ID_MAX, 0);
+ for (auto sm : sms) {
+ segment_usage.add_device(
+ sm->get_device_id(),
+ sm->get_num_segments(),
+ SegmentMap(
+ sm->get_segment_size() / sm->get_block_size(),
+ sm->get_segment_size()));
+ block_size_by_segment_manager[sm->get_device_id()] = sm->get_block_size();
+ }
+ }
+
+ int64_t allocate(
+ segment_id_t segment,
+ seastore_off_t offset,
+ extent_len_t len) final {
+ return segment_usage[segment].allocate(
+ segment.device_segment_id(),
+ offset,
+ len,
+ block_size_by_segment_manager[segment.device_id()]);
+ }
+
+ int64_t release(
+ segment_id_t segment,
+ seastore_off_t offset,
+ extent_len_t len) final {
+ return segment_usage[segment].release(
+ segment.device_segment_id(),
+ offset,
+ len,
+ block_size_by_segment_manager[segment.device_id()]);
+ }
+
+ int64_t get_usage(segment_id_t segment) const final {
+ return segment_usage[segment].get_usage();
+ }
+
+ double calc_utilization(segment_id_t segment) const final {
+ return segment_usage[segment].calc_utilization();
+ }
+
+ void dump_usage(segment_id_t seg) const final;
+
+ void reset() final {
+ for (auto &i: segment_usage) {
+ i.second.reset();
+ }
+ }
+
+ SpaceTrackerIRef make_empty() const final {
+ auto ret = SpaceTrackerIRef(new SpaceTrackerDetailed(*this));
+ ret->reset();
+ return ret;
+ }
+
+ bool equals(const SpaceTrackerI &other) const;
+};
+
+
+class AsyncCleaner : public SegmentProvider {
+public:
+ using time_point = seastar::lowres_system_clock::time_point;
+ using duration = seastar::lowres_system_clock::duration;
+
+ /// Config
+ struct config_t {
+ /// Number of minimum journal segments to stop trimming.
+ size_t target_journal_segments = 0;
+ /// Number of maximum journal segments to block user transactions.
+ size_t max_journal_segments = 0;
+
+ /// Number of journal segments the transactions in which can
+ /// have their corresponding backrefs unmerged
+ size_t target_backref_inflight_segments = 0;
+
+ /// Ratio of maximum available space to disable reclaiming.
+ double available_ratio_gc_max = 0;
+ /// Ratio of minimum available space to force reclaiming.
+ double available_ratio_hard_limit = 0;
+
+ /// Ratio of minimum reclaimable space to stop reclaiming.
+ double reclaim_ratio_gc_threshold = 0;
+
+ /// Number of bytes to reclaim per cycle
+ size_t reclaim_bytes_per_cycle = 0;
+
+ /// Number of bytes to rewrite dirty per cycle
+ size_t rewrite_dirty_bytes_per_cycle = 0;
+
+ /// Number of bytes to rewrite backref per cycle
+ size_t rewrite_backref_bytes_per_cycle = 0;
+
+ void validate() const {
+ ceph_assert(max_journal_segments > target_journal_segments);
+ ceph_assert(available_ratio_gc_max > available_ratio_hard_limit);
+ ceph_assert(reclaim_bytes_per_cycle > 0);
+ ceph_assert(rewrite_dirty_bytes_per_cycle > 0);
+ ceph_assert(rewrite_backref_bytes_per_cycle > 0);
+ }
+
+ static config_t get_default() {
+ return config_t{
+ 12, // target_journal_segments
+ 16, // max_journal_segments
+ 2, // target_backref_inflight_segments
+ .1, // available_ratio_gc_max
+ .05, // available_ratio_hard_limit
+ .1, // reclaim_ratio_gc_threshold
+ 1<<20,// reclaim_bytes_per_cycle
+ 1<<17,// rewrite_dirty_bytes_per_cycle
+ 1<<24 // rewrite_backref_bytes_per_cycle
+ };
+ }
+
+ static config_t get_test() {
+ return config_t{
+ 2, // target_journal_segments
+ 4, // max_journal_segments
+ 2, // target_backref_inflight_segments
+ .99, // available_ratio_gc_max
+ .2, // available_ratio_hard_limit
+ .6, // reclaim_ratio_gc_threshold
+ 1<<20,// reclaim_bytes_per_cycle
+ 1<<17,// rewrite_dirty_bytes_per_cycle
+ 1<<24 // rewrite_backref_bytes_per_cycle
+ };
+ }
+ };
+
+ /// Callback interface for querying and operating on segments
+ class ExtentCallbackInterface {
+ public:
+ virtual ~ExtentCallbackInterface() = default;
+
+ virtual TransactionRef create_transaction(
+ Transaction::src_t, const char*) = 0;
+
+ /// Creates empty transaction with interruptible context
+ template <typename Func>
+ auto with_transaction_intr(
+ Transaction::src_t src,
+ const char* name,
+ Func &&f) {
+ return seastar::do_with(
+ create_transaction(src, name),
+ [f=std::forward<Func>(f)](auto &ref_t) mutable {
+ return with_trans_intr(
+ *ref_t,
+ [f=std::forward<Func>(f)](auto& t) mutable {
+ return f(t);
+ }
+ );
+ }
+ );
+ }
+
+ /// See Cache::get_next_dirty_extents
+ using get_next_dirty_extents_iertr = trans_iertr<
+ crimson::errorator<
+ crimson::ct_error::input_output_error>
+ >;
+ using get_next_dirty_extents_ret = get_next_dirty_extents_iertr::future<
+ std::vector<CachedExtentRef>>;
+ virtual get_next_dirty_extents_ret get_next_dirty_extents(
+ Transaction &t, ///< [in] current transaction
+ journal_seq_t bound,///< [in] return extents with dirty_from < bound
+ size_t max_bytes ///< [in] return up to max_bytes of extents
+ ) = 0;
+
+ using extent_mapping_ertr = crimson::errorator<
+ crimson::ct_error::input_output_error,
+ crimson::ct_error::eagain>;
+ using extent_mapping_iertr = trans_iertr<
+ crimson::errorator<
+ crimson::ct_error::input_output_error>
+ >;
+
+ /**
+ * rewrite_extent
+ *
+ * Updates t with operations moving the passed extents to a new
+ * segment. extent may be invalid, implementation must correctly
+ * handle finding the current instance if it is still alive and
+ * otherwise ignore it.
+ */
+ using rewrite_extent_iertr = extent_mapping_iertr;
+ using rewrite_extent_ret = rewrite_extent_iertr::future<>;
+ virtual rewrite_extent_ret rewrite_extent(
+ Transaction &t,
+ CachedExtentRef extent) = 0;
+
+ /**
+ * get_extent_if_live
+ *
+ * Returns extent at specified location if still referenced by
+ * lba_manager and not removed by t.
+ *
+ * See TransactionManager::get_extent_if_live and
+ * LBAManager::get_physical_extent_if_live.
+ */
+ using get_extent_if_live_iertr = extent_mapping_iertr;
+ using get_extent_if_live_ret = get_extent_if_live_iertr::future<
+ CachedExtentRef>;
+ virtual get_extent_if_live_ret get_extent_if_live(
+ Transaction &t,
+ extent_types_t type,
+ paddr_t addr,
+ laddr_t laddr,
+ seastore_off_t len) = 0;
+
+ /**
+ * submit_transaction_direct
+ *
+ * Submits transaction without any space throttling.
+ */
+ using submit_transaction_direct_iertr = trans_iertr<
+ crimson::errorator<
+ crimson::ct_error::input_output_error>
+ >;
+ using submit_transaction_direct_ret =
+ submit_transaction_direct_iertr::future<>;
+ virtual submit_transaction_direct_ret submit_transaction_direct(
+ Transaction &t,
+ std::optional<journal_seq_t> seq_to_trim = std::nullopt) = 0;
+ };
+
+private:
+ const bool detailed;
+ const config_t config;
+
+ SegmentManagerGroupRef sm_group;
+ BackrefManager &backref_manager;
+
+ SpaceTrackerIRef space_tracker;
+ segments_info_t segments;
+ bool init_complete = false;
+
+ struct {
+ /**
+ * used_bytes
+ *
+ * Bytes occupied by live extents
+ */
+ uint64_t used_bytes = 0;
+
+ /**
+ * projected_used_bytes
+ *
+ * Sum of projected bytes used by each transaction between throttle
+ * acquisition and commit completion. See reserve_projected_usage()
+ */
+ uint64_t projected_used_bytes = 0;
+ uint64_t projected_count = 0;
+ uint64_t projected_used_bytes_sum = 0;
+
+ uint64_t closed_journal_used_bytes = 0;
+ uint64_t closed_journal_total_bytes = 0;
+ uint64_t closed_ool_used_bytes = 0;
+ uint64_t closed_ool_total_bytes = 0;
+
+ uint64_t io_blocking_num = 0;
+ uint64_t io_count = 0;
+ uint64_t io_blocked_count = 0;
+ uint64_t io_blocked_count_trim = 0;
+ uint64_t io_blocked_count_reclaim = 0;
+ uint64_t io_blocked_sum = 0;
+
+ uint64_t reclaiming_bytes = 0;
+ uint64_t reclaimed_bytes = 0;
+ uint64_t reclaimed_segment_bytes = 0;
+
+ seastar::metrics::histogram segment_util;
+ } stats;
+ seastar::metrics::metric_group metrics;
+ void register_metrics();
+
+ /// target journal_tail for next fresh segment
+ journal_seq_t journal_tail_target;
+
+ /// target replay_from for dirty extents
+ journal_seq_t dirty_extents_replay_from;
+
+ /// target replay_from for alloc infos
+ journal_seq_t alloc_info_replay_from;
+
+ /// most recently committed journal_tail
+ journal_seq_t journal_tail_committed;
+
+ ExtentCallbackInterface *ecb = nullptr;
+
+ /// populated if there is an IO blocked on hard limits
+ std::optional<seastar::promise<>> blocked_io_wake;
+
+ SegmentSeqAllocatorRef ool_segment_seq_allocator;
+
+ /**
+ * disable_trim
+ *
+ * added to enable unit testing of CircularBoundedJournal before
+ * proper support is added to AsyncCleaner.
+ * Should be removed once proper support is added. TODO
+ */
+ bool disable_trim = false;
+public:
+ AsyncCleaner(
+ config_t config,
+ SegmentManagerGroupRef&& sm_group,
+ BackrefManager &backref_manager,
+ bool detailed = false);
+
+ SegmentSeqAllocator& get_ool_segment_seq_allocator() {
+ return *ool_segment_seq_allocator;
+ }
+
+ using mount_ertr = crimson::errorator<
+ crimson::ct_error::input_output_error>;
+ using mount_ret = mount_ertr::future<>;
+ mount_ret mount();
+
+ /*
+ * SegmentProvider interfaces
+ */
+ journal_seq_t get_journal_tail_target() const final {
+ return journal_tail_target;
+ }
+
+ const segment_info_t& get_seg_info(segment_id_t id) const final {
+ return segments[id];
+ }
+
+ segment_id_t allocate_segment(
+ segment_seq_t seq, segment_type_t type) final;
+
+ void close_segment(segment_id_t segment) final;
+
+ void update_journal_tail_committed(journal_seq_t committed) final;
+
+ void update_segment_avail_bytes(segment_type_t type, paddr_t offset) final {
+ segments.update_written_to(type, offset);
+ gc_process.maybe_wake_on_space_used();
+ }
+
+ SegmentManagerGroup* get_segment_manager_group() final {
+ return sm_group.get();
+ }
+
+ journal_seq_t get_dirty_extents_replay_from() const final {
+ return dirty_extents_replay_from;
+ }
+
+ journal_seq_t get_alloc_info_replay_from() const final {
+ return alloc_info_replay_from;
+ }
+
+ void update_journal_tail_target(
+ journal_seq_t dirty_replay_from,
+ journal_seq_t alloc_replay_from);
+
+ void update_alloc_info_replay_from(
+ journal_seq_t alloc_replay_from);
+
+ void init_mkfs() {
+ auto journal_head = segments.get_journal_head();
+ ceph_assert(disable_trim || journal_head != JOURNAL_SEQ_NULL);
+ journal_tail_target = journal_head;
+ journal_tail_committed = journal_head;
+ }
+
+ using release_ertr = SegmentManagerGroup::release_ertr;
+ release_ertr::future<> maybe_release_segment(Transaction &t);
+
+ void adjust_segment_util(double old_usage, double new_usage) {
+ auto old_index = get_bucket_index(old_usage);
+ auto new_index = get_bucket_index(new_usage);
+ assert(stats.segment_util.buckets[old_index].count > 0);
+ stats.segment_util.buckets[old_index].count--;
+ stats.segment_util.buckets[new_index].count++;
+ }
+
+ void mark_space_used(
+ paddr_t addr,
+ extent_len_t len,
+ time_point last_modified = time_point(),
+ time_point last_rewritten = time_point(),
+ bool init_scan = false);
+
+ void mark_space_free(
+ paddr_t addr,
+ extent_len_t len);
+
+ SpaceTrackerIRef get_empty_space_tracker() const {
+ return space_tracker->make_empty();
+ }
+
+ void complete_init();
+
+ store_statfs_t stat() const {
+ store_statfs_t st;
+ st.total = segments.get_total_bytes();
+ st.available = segments.get_total_bytes() - stats.used_bytes;
+ st.allocated = stats.used_bytes;
+ st.data_stored = stats.used_bytes;
+
+ // TODO add per extent type counters for omap_allocated and
+ // internal metadata
+ return st;
+ }
+
+ seastar::future<> stop() {
+ return gc_process.stop();
+ }
+
+ seastar::future<> run_until_halt() {
+ return gc_process.run_until_halt();
+ }
+
+ void set_extent_callback(ExtentCallbackInterface *cb) {
+ ecb = cb;
+ }
+
+ bool debug_check_space(const SpaceTrackerI &tracker) {
+ return space_tracker->equals(tracker);
+ }
+
+ void set_disable_trim(bool val) {
+ disable_trim = val;
+ }
+
+ using work_ertr = ExtentCallbackInterface::extent_mapping_ertr;
+ using work_iertr = ExtentCallbackInterface::extent_mapping_iertr;
+
+private:
+ /*
+ * 10 buckets for the number of closed segments by usage
+ * 2 extra buckets for the number of open and empty segments
+ */
+ static constexpr double UTIL_STATE_OPEN = 1.05;
+ static constexpr double UTIL_STATE_EMPTY = 1.15;
+ static constexpr std::size_t UTIL_BUCKETS = 12;
+ static std::size_t get_bucket_index(double util) {
+ auto index = std::floor(util * 10);
+ assert(index < UTIL_BUCKETS);
+ return index;
+ }
+ double calc_utilization(segment_id_t id) const {
+ auto& info = segments[id];
+ if (info.is_open()) {
+ return UTIL_STATE_OPEN;
+ } else if (info.is_empty()) {
+ return UTIL_STATE_EMPTY;
+ } else {
+ auto ret = space_tracker->calc_utilization(id);
+ assert(ret >= 0 && ret < 1);
+ return ret;
+ }
+ }
+
+ // journal status helpers
+
+ double calc_gc_benefit_cost(segment_id_t id) const {
+ double util = calc_utilization(id);
+ ceph_assert(util >= 0 && util < 1);
+ auto cur_time = seastar::lowres_system_clock::now();
+ auto segment = segments[id];
+ assert(cur_time >= segment.last_modified);
+ auto segment_age =
+ cur_time - std::max(segment.last_modified, segment.last_rewritten);
+ uint64_t age = segment_age.count();
+ return (1 - util) * age / (1 + util);
+ }
+
+ segment_id_t get_next_reclaim_segment() const;
+
+ /**
+ * rewrite_dirty
+ *
+ * Writes out dirty blocks dirtied earlier than limit.
+ */
+ using rewrite_dirty_iertr = work_iertr;
+ using rewrite_dirty_ret = rewrite_dirty_iertr::future<>;
+ rewrite_dirty_ret rewrite_dirty(
+ Transaction &t,
+ journal_seq_t limit);
+
+ using trim_backrefs_iertr = work_iertr;
+ using trim_backrefs_ret = trim_backrefs_iertr::future<journal_seq_t>;
+ trim_backrefs_ret trim_backrefs(
+ Transaction &t,
+ journal_seq_t limit);
+
+ journal_seq_t get_dirty_tail() const {
+ auto ret = segments.get_journal_head();
+ ceph_assert(ret != JOURNAL_SEQ_NULL);
+ if (ret.segment_seq >= config.target_journal_segments) {
+ ret.segment_seq -= config.target_journal_segments;
+ } else {
+ ret.segment_seq = 0;
+ ret.offset = P_ADDR_MIN;
+ }
+ return ret;
+ }
+
+ journal_seq_t get_dirty_tail_limit() const {
+ auto ret = segments.get_journal_head();
+ ceph_assert(ret != JOURNAL_SEQ_NULL);
+ if (ret.segment_seq >= config.max_journal_segments) {
+ ret.segment_seq -= config.max_journal_segments;
+ } else {
+ ret.segment_seq = 0;
+ ret.offset = P_ADDR_MIN;
+ }
+ return ret;
+ }
+
+ journal_seq_t get_backref_tail() const {
+ auto ret = segments.get_journal_head();
+ ceph_assert(ret != JOURNAL_SEQ_NULL);
+ if (ret.segment_seq >= config.target_backref_inflight_segments) {
+ ret.segment_seq -= config.target_backref_inflight_segments;
+ } else {
+ ret.segment_seq = 0;
+ ret.offset = P_ADDR_MIN;
+ }
+ return ret;
+ }
+
+ struct reclaim_state_t {
+ std::size_t segment_size;
+ paddr_t start_pos;
+ paddr_t end_pos;
+
+ static reclaim_state_t create(
+ segment_id_t segment_id,
+ std::size_t segment_size) {
+ return {segment_size,
+ P_ADDR_NULL,
+ paddr_t::make_seg_paddr(segment_id, 0)};
+ }
+
+ segment_id_t get_segment_id() const {
+ return end_pos.as_seg_paddr().get_segment_id();
+ }
+
+ bool is_complete() const {
+ return (std::size_t)end_pos.as_seg_paddr().get_segment_off() >= segment_size;
+ }
+
+ void advance(std::size_t bytes) {
+ assert(!is_complete());
+ start_pos = end_pos;
+ auto &end_seg_paddr = end_pos.as_seg_paddr();
+ auto next_off = end_seg_paddr.get_segment_off() + bytes;
+ if (next_off > segment_size) {
+ end_seg_paddr.set_segment_off(segment_size);
+ } else {
+ end_seg_paddr.set_segment_off(next_off);
+ }
+ }
+ };
+ std::optional<reclaim_state_t> reclaim_state;
+
+ /**
+ * GCProcess
+ *
+ * Background gc process.
+ */
+ using gc_cycle_ret = seastar::future<>;
+ class GCProcess {
+ std::optional<gc_cycle_ret> process_join;
+
+ AsyncCleaner &cleaner;
+
+ std::optional<seastar::promise<>> blocking;
+
+ bool is_stopping() const {
+ return !process_join;
+ }
+
+ gc_cycle_ret run();
+
+ void wake() {
+ if (blocking) {
+ blocking->set_value();
+ blocking = std::nullopt;
+ }
+ }
+
+ seastar::future<> maybe_wait_should_run() {
+ return seastar::do_until(
+ [this] {
+ cleaner.log_gc_state("GCProcess::maybe_wait_should_run");
+ return is_stopping() || cleaner.gc_should_run();
+ },
+ [this] {
+ ceph_assert(!blocking);
+ blocking = seastar::promise<>();
+ return blocking->get_future();
+ });
+ }
+ public:
+ GCProcess(AsyncCleaner &cleaner) : cleaner(cleaner) {}
+
+ void start() {
+ ceph_assert(is_stopping());
+ process_join = seastar::now(); // allow run()
+ process_join = run();
+ assert(!is_stopping());
+ }
+
+ gc_cycle_ret stop() {
+ if (is_stopping()) {
+ return seastar::now();
+ }
+ auto ret = std::move(*process_join);
+ process_join.reset();
+ assert(is_stopping());
+ wake();
+ return ret;
+ }
+
+ gc_cycle_ret run_until_halt() {
+ ceph_assert(is_stopping());
+ return seastar::do_until(
+ [this] {
+ cleaner.log_gc_state("GCProcess::run_until_halt");
+ return !cleaner.gc_should_run();
+ },
+ [this] {
+ return cleaner.do_gc_cycle();
+ });
+ }
+
+ void maybe_wake_on_space_used() {
+ if (is_stopping()) {
+ return;
+ }
+ if (cleaner.gc_should_run()) {
+ wake();
+ }
+ }
+ } gc_process;
+
+ using gc_ertr = work_ertr::extend_ertr<
+ SegmentManagerGroup::scan_extents_ertr
+ >;
+
+ gc_cycle_ret do_gc_cycle();
+
+ using gc_trim_journal_ertr = gc_ertr;
+ using gc_trim_journal_ret = gc_trim_journal_ertr::future<>;
+ gc_trim_journal_ret gc_trim_journal();
+
+ using gc_trim_backref_ertr = gc_ertr;
+ using gc_trim_backref_ret = gc_trim_backref_ertr::future<journal_seq_t>;
+ gc_trim_backref_ret gc_trim_backref(journal_seq_t limit);
+
+ using gc_reclaim_space_ertr = gc_ertr;
+ using gc_reclaim_space_ret = gc_reclaim_space_ertr::future<>;
+ gc_reclaim_space_ret gc_reclaim_space();
+
+
+ using retrieve_live_extents_iertr = work_iertr;
+ using retrieve_live_extents_ret =
+ retrieve_live_extents_iertr::future<journal_seq_t>;
+ retrieve_live_extents_ret _retrieve_live_extents(
+ Transaction &t,
+ std::set<
+ backref_buf_entry_t,
+ backref_buf_entry_t::cmp_t> &&backrefs,
+ std::vector<CachedExtentRef> &extents);
+
+ using retrieve_backref_mappings_ertr = work_ertr;
+ using retrieve_backref_mappings_ret =
+ retrieve_backref_mappings_ertr::future<backref_pin_list_t>;
+ retrieve_backref_mappings_ret retrieve_backref_mappings(
+ paddr_t start_paddr,
+ paddr_t end_paddr);
+
+ /*
+ * Segments calculations
+ */
+ std::size_t get_segments_in_journal() const {
+ if (!init_complete) {
+ return 0;
+ }
+ if (journal_tail_committed == JOURNAL_SEQ_NULL) {
+ return segments.get_num_type_journal();
+ }
+ auto journal_head = segments.get_journal_head();
+ assert(journal_head != JOURNAL_SEQ_NULL);
+ assert(journal_head.segment_seq >= journal_tail_committed.segment_seq);
+ return journal_head.segment_seq + 1 - journal_tail_committed.segment_seq;
+ }
+ std::size_t get_segments_in_journal_closed() const {
+ auto in_journal = get_segments_in_journal();
+ auto in_journal_open = segments.get_num_in_journal_open();
+ if (in_journal >= in_journal_open) {
+ return in_journal - in_journal_open;
+ } else {
+ return 0;
+ }
+ }
+ std::size_t get_segments_reclaimable() const {
+ assert(segments.get_num_closed() >= get_segments_in_journal_closed());
+ return segments.get_num_closed() - get_segments_in_journal_closed();
+ }
+
+ /*
+ * Space calculations
+ */
+ /// the unavailable space that is not reclaimable yet
+ std::size_t get_unavailable_unreclaimable_bytes() const {
+ auto ret = (segments.get_num_open() + get_segments_in_journal_closed()) *
+ segments.get_segment_size();
+ assert(ret >= segments.get_available_bytes_in_open());
+ return ret - segments.get_available_bytes_in_open();
+ }
+ /// the unavailable space that can be reclaimed
+ std::size_t get_unavailable_reclaimable_bytes() const {
+ auto ret = get_segments_reclaimable() * segments.get_segment_size();
+ ceph_assert(ret + get_unavailable_unreclaimable_bytes() == segments.get_unavailable_bytes());
+ return ret;
+ }
+ /// the unavailable space that is not alive
+ std::size_t get_unavailable_unused_bytes() const {
+ assert(segments.get_unavailable_bytes() > stats.used_bytes);
+ return segments.get_unavailable_bytes() - stats.used_bytes;
+ }
+ double get_reclaim_ratio() const {
+ if (segments.get_unavailable_bytes() == 0) return 0;
+ return (double)get_unavailable_unused_bytes() / (double)segments.get_unavailable_bytes();
+ }
+
+ /*
+ * Space calculations (projected)
+ */
+ std::size_t get_projected_available_bytes() const {
+ return (segments.get_available_bytes() > stats.projected_used_bytes) ?
+ segments.get_available_bytes() - stats.projected_used_bytes:
+ 0;
+ }
+ double get_projected_available_ratio() const {
+ return (double)get_projected_available_bytes() /
+ (double)segments.get_total_bytes();
+ }
+
+ /*
+ * Journal sizes
+ */
+ std::size_t get_dirty_journal_size() const {
+ auto journal_head = segments.get_journal_head();
+ if (journal_head == JOURNAL_SEQ_NULL ||
+ dirty_extents_replay_from == JOURNAL_SEQ_NULL) {
+ return 0;
+ }
+ return (journal_head.segment_seq - dirty_extents_replay_from.segment_seq) *
+ segments.get_segment_size() +
+ journal_head.offset.as_seg_paddr().get_segment_off() -
+ segments.get_segment_size() -
+ dirty_extents_replay_from.offset.as_seg_paddr().get_segment_off();
+ }
+
+ std::size_t get_alloc_journal_size() const {
+ auto journal_head = segments.get_journal_head();
+ if (journal_head == JOURNAL_SEQ_NULL ||
+ alloc_info_replay_from == JOURNAL_SEQ_NULL) {
+ return 0;
+ }
+ return (journal_head.segment_seq - alloc_info_replay_from.segment_seq) *
+ segments.get_segment_size() +
+ journal_head.offset.as_seg_paddr().get_segment_off() -
+ segments.get_segment_size() -
+ alloc_info_replay_from.offset.as_seg_paddr().get_segment_off();
+ }
+
+ /**
+ * should_block_on_gc
+ *
+ * Encapsulates whether block pending gc.
+ */
+ bool should_block_on_trim() const {
+ if (disable_trim) return false;
+ return get_dirty_tail_limit() > journal_tail_target;
+ }
+
+ bool should_block_on_reclaim() const {
+ if (disable_trim) return false;
+ if (get_segments_reclaimable() == 0) {
+ return false;
+ }
+ auto aratio = get_projected_available_ratio();
+ return aratio < config.available_ratio_hard_limit;
+ }
+
+ bool should_block_on_gc() const {
+ return should_block_on_trim() || should_block_on_reclaim();
+ }
+
+ void log_gc_state(const char *caller) const;
+
+public:
+ seastar::future<> reserve_projected_usage(std::size_t projected_usage);
+
+ void release_projected_usage(size_t projected_usage);
+
+private:
+ void maybe_wake_gc_blocked_io() {
+ if (!init_complete) {
+ return;
+ }
+ if (!should_block_on_gc() && blocked_io_wake) {
+ blocked_io_wake->set_value();
+ blocked_io_wake = std::nullopt;
+ }
+ }
+
+ using scan_extents_ret_bare =
+ std::vector<std::pair<segment_id_t, segment_header_t>>;
+ using scan_extents_ertr = SegmentManagerGroup::scan_extents_ertr;
+ using scan_extents_ret = scan_extents_ertr::future<>;
+ scan_extents_ret scan_nonfull_segment(
+ const segment_header_t& header,
+ scan_extents_ret_bare& segment_set,
+ segment_id_t segment_id);
+
+ /**
+ * gc_should_reclaim_space
+ *
+ * Encapsulates logic for whether gc should be reclaiming segment space.
+ */
+ bool gc_should_reclaim_space() const {
+ if (disable_trim) return false;
+ if (get_segments_reclaimable() == 0) {
+ return false;
+ }
+ auto aratio = segments.get_available_ratio();
+ auto rratio = get_reclaim_ratio();
+ return (
+ (aratio < config.available_ratio_hard_limit) ||
+ ((aratio < config.available_ratio_gc_max) &&
+ (rratio > config.reclaim_ratio_gc_threshold))
+ );
+ }
+
+ /**
+ * gc_should_trim_journal
+ *
+ * Encapsulates logic for whether gc should be reclaiming segment space.
+ */
+ bool gc_should_trim_journal() const {
+ return get_dirty_tail() > journal_tail_target;
+ }
+
+ bool gc_should_trim_backref() const {
+ return get_backref_tail() > alloc_info_replay_from;
+ }
+ /**
+ * gc_should_run
+ *
+ * True if gc should be running.
+ */
+ bool gc_should_run() const {
+ if (disable_trim) return false;
+ ceph_assert(init_complete);
+ return gc_should_reclaim_space()
+ || gc_should_trim_journal()
+ || gc_should_trim_backref();
+ }
+
+ void init_mark_segment_closed(
+ segment_id_t segment,
+ segment_seq_t seq,
+ segment_type_t s_type) {
+ ceph_assert(!init_complete);
+ auto old_usage = calc_utilization(segment);
+ segments.init_closed(segment, seq, s_type);
+ auto new_usage = calc_utilization(segment);
+ adjust_segment_util(old_usage, new_usage);
+ if (s_type == segment_type_t::OOL) {
+ ool_segment_seq_allocator->set_next_segment_seq(seq);
+ }
+ }
+};
+using AsyncCleanerRef = std::unique_ptr<AsyncCleaner>;
+
+}
#include "crimson/os/seastore/logging.h"
#include "crimson/common/config_proxy.h"
-#include "crimson/os/seastore/segment_cleaner.h"
+#include "crimson/os/seastore/async_cleaner.h"
// included for get_extent_by_type
#include "crimson/os/seastore/collection_manager/collection_flat_node.h"
Transaction &t,
paddr_t final_block_start,
journal_seq_t seq,
- SegmentCleaner *cleaner)
+ AsyncCleaner *cleaner)
{
LOG_PREFIX(Cache::complete_commit);
SUBTRACET(seastore_t, "final_block_start={}, seq={}",
namespace crimson::os::seastore {
class BackrefManager;
-class SegmentCleaner;
+class AsyncCleaner;
struct backref_buf_entry_t {
backref_buf_entry_t(
Transaction &t, ///< [in, out] current transaction
paddr_t final_block_start, ///< [in] offset of initial block
journal_seq_t seq, ///< [in] journal commit seq
- SegmentCleaner *cleaner=nullptr ///< [out] optional segment stat listener
+ AsyncCleaner *cleaner=nullptr ///< [out] optional segment stat listener
);
/**
#include <fmt/format.h>
#include "crimson/os/seastore/logging.h"
-#include "crimson/os/seastore/segment_cleaner.h"
+#include "crimson/os/seastore/async_cleaner.h"
SET_SUBSYS(seastore_journal);
#include "include/buffer.h"
#include "include/denc.h"
-#include "crimson/os/seastore/segment_cleaner.h"
+#include "crimson/os/seastore/async_cleaner.h"
#include "crimson/os/seastore/journal.h"
#include "crimson/os/seastore/segment_manager_group.h"
#include "crimson/os/seastore/ordering_handle.h"
#include "crimson/os/futurized_collection.h"
#include "crimson/os/seastore/backref_manager.h"
-#include "crimson/os/seastore/segment_cleaner.h"
+#include "crimson/os/seastore/async_cleaner.h"
#include "crimson/os/seastore/collection_manager/flat_collection_manager.h"
#include "crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h"
#include "crimson/os/seastore/omap_manager/btree/btree_omap_manager.h"
+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-#include <seastar/core/metrics.hh>
-
-#include "crimson/os/seastore/logging.h"
-
-#include "crimson/os/seastore/segment_cleaner.h"
-#include "crimson/os/seastore/transaction_manager.h"
-
-SET_SUBSYS(seastore_cleaner);
-
-namespace crimson::os::seastore {
-
-void segment_info_t::set_open(
- segment_seq_t _seq, segment_type_t _type)
-{
- ceph_assert(_seq != NULL_SEG_SEQ);
- ceph_assert(_type != segment_type_t::NULL_SEG);
- state = Segment::segment_state_t::OPEN;
- seq = _seq;
- type = _type;
- written_to = 0;
-}
-
-void segment_info_t::set_empty()
-{
- state = Segment::segment_state_t::EMPTY;
- seq = NULL_SEG_SEQ;
- type = segment_type_t::NULL_SEG;
- last_modified = {};
- last_rewritten = {};
- written_to = 0;
-}
-
-void segment_info_t::set_closed()
-{
- state = Segment::segment_state_t::CLOSED;
- // the rest of information is unchanged
-}
-
-void segment_info_t::init_closed(
- segment_seq_t _seq, segment_type_t _type, std::size_t seg_size)
-{
- ceph_assert(_seq != NULL_SEG_SEQ);
- ceph_assert(_type != segment_type_t::NULL_SEG);
- state = Segment::segment_state_t::CLOSED;
- seq = _seq;
- type = _type;
- written_to = seg_size;
-}
-
-std::ostream& operator<<(std::ostream &out, const segment_info_t &info)
-{
- out << "seg_info_t("
- << "state=" << info.state;
- if (info.is_empty()) {
- // pass
- } else { // open or closed
- out << ", seq=" << segment_seq_printer_t{info.seq}
- << ", type=" << info.type
- << ", last_modified=" << info.last_modified.time_since_epoch()
- << ", last_rewritten=" << info.last_rewritten.time_since_epoch()
- << ", written_to=" << info.written_to;
- }
- return out << ")";
-}
-
-void segments_info_t::reset()
-{
- segments.clear();
-
- segment_size = 0;
-
- journal_segment_id = NULL_SEG_ID;
- num_in_journal_open = 0;
- num_type_journal = 0;
- num_type_ool = 0;
-
- num_open = 0;
- num_empty = 0;
- num_closed = 0;
-
- count_open_journal = 0;
- count_open_ool = 0;
- count_release_journal = 0;
- count_release_ool = 0;
- count_close_journal = 0;
- count_close_ool = 0;
-
- total_bytes = 0;
- avail_bytes_in_open = 0;
-}
-
-void segments_info_t::add_segment_manager(
- SegmentManager &segment_manager)
-{
- LOG_PREFIX(segments_info_t::add_segment_manager);
- device_id_t d_id = segment_manager.get_device_id();
- auto ssize = segment_manager.get_segment_size();
- auto nsegments = segment_manager.get_num_segments();
- auto sm_size = segment_manager.get_size();
- INFO("adding segment manager {}, size={}, ssize={}, segments={}",
- device_id_printer_t{d_id}, sm_size, ssize, nsegments);
- ceph_assert(ssize > 0);
- ceph_assert(nsegments > 0);
- ceph_assert(sm_size > 0);
-
- // also validate if the device is duplicated
- segments.add_device(d_id, nsegments, segment_info_t{});
-
- // assume all the segment managers share the same settings as follows.
- if (segment_size == 0) {
- ceph_assert(ssize > 0);
- segment_size = ssize;
- } else {
- ceph_assert(segment_size == (std::size_t)ssize);
- }
-
- // NOTE: by default the segments are empty
- num_empty += nsegments;
-
- total_bytes += sm_size;
-}
-
-void segments_info_t::init_closed(
- segment_id_t segment, segment_seq_t seq, segment_type_t type)
-{
- LOG_PREFIX(segments_info_t::init_closed);
- auto& segment_info = segments[segment];
- INFO("initiating {} {} {}, {}, num_segments(empty={}, opened={}, closed={})",
- segment, segment_seq_printer_t{seq}, type,
- segment_info, num_empty, num_open, num_closed);
- ceph_assert(segment_info.is_empty());
- segment_info.init_closed(seq, type, get_segment_size());
- ceph_assert(num_empty > 0);
- --num_empty;
- ++num_closed;
- if (type == segment_type_t::JOURNAL) {
- // init_closed won't initialize journal_segment_id
- ceph_assert(get_journal_head() == JOURNAL_SEQ_NULL);
- ++num_type_journal;
- } else {
- ++num_type_ool;
- }
- // do not increment count_close_*;
-}
-
-void segments_info_t::mark_open(
- segment_id_t segment, segment_seq_t seq, segment_type_t type)
-{
- LOG_PREFIX(segments_info_t::mark_open);
- auto& segment_info = segments[segment];
- INFO("opening {} {} {}, {}, num_segments(empty={}, opened={}, closed={})",
- segment, segment_seq_printer_t{seq}, type,
- segment_info, num_empty, num_open, num_closed);
- ceph_assert(segment_info.is_empty());
- segment_info.set_open(seq, type);
- ceph_assert(num_empty > 0);
- --num_empty;
- ++num_open;
- if (type == segment_type_t::JOURNAL) {
- if (journal_segment_id != NULL_SEG_ID) {
- auto& last_journal_segment = segments[journal_segment_id];
- ceph_assert(last_journal_segment.is_closed());
- ceph_assert(last_journal_segment.type == segment_type_t::JOURNAL);
- ceph_assert(last_journal_segment.seq + 1 == seq);
- }
- journal_segment_id = segment;
-
- ++num_in_journal_open;
- ++num_type_journal;
- ++count_open_journal;
- } else {
- ++num_type_ool;
- ++count_open_ool;
- }
- ceph_assert(segment_info.written_to == 0);
- avail_bytes_in_open += get_segment_size();
-}
-
-void segments_info_t::mark_empty(
- segment_id_t segment)
-{
- LOG_PREFIX(segments_info_t::mark_empty);
- auto& segment_info = segments[segment];
- INFO("releasing {}, {}, num_segments(empty={}, opened={}, closed={})",
- segment, segment_info,
- num_empty, num_open, num_closed);
- ceph_assert(segment_info.is_closed());
- auto type = segment_info.type;
- assert(type != segment_type_t::NULL_SEG);
- segment_info.set_empty();
- ceph_assert(num_closed > 0);
- --num_closed;
- ++num_empty;
- if (type == segment_type_t::JOURNAL) {
- ceph_assert(num_type_journal > 0);
- --num_type_journal;
- ++count_release_journal;
- } else {
- ceph_assert(num_type_ool > 0);
- --num_type_ool;
- ++count_release_ool;
- }
-}
-
-void segments_info_t::mark_closed(
- segment_id_t segment)
-{
- LOG_PREFIX(segments_info_t::mark_closed);
- auto& segment_info = segments[segment];
- INFO("closing {}, {}, num_segments(empty={}, opened={}, closed={})",
- segment, segment_info,
- num_empty, num_open, num_closed);
- ceph_assert(segment_info.is_open());
- segment_info.set_closed();
- ceph_assert(num_open > 0);
- --num_open;
- ++num_closed;
- if (segment_info.type == segment_type_t::JOURNAL) {
- ceph_assert(num_in_journal_open > 0);
- --num_in_journal_open;
- ++count_close_journal;
- } else {
- ++count_close_ool;
- }
- ceph_assert(get_segment_size() >= segment_info.written_to);
- auto seg_avail_bytes = get_segment_size() - segment_info.written_to;
- ceph_assert(avail_bytes_in_open >= seg_avail_bytes);
- avail_bytes_in_open -= seg_avail_bytes;
-}
-
-void segments_info_t::update_written_to(
- segment_type_t type,
- paddr_t offset)
-{
- LOG_PREFIX(segments_info_t::update_written_to);
- auto& saddr = offset.as_seg_paddr();
- auto& segment_info = segments[saddr.get_segment_id()];
- if (!segment_info.is_open()) {
- ERROR("segment is not open, not updating, type={}, offset={}, {}",
- type, offset, segment_info);
- ceph_abort();
- }
-
- auto new_written_to = static_cast<std::size_t>(saddr.get_segment_off());
- ceph_assert(new_written_to <= get_segment_size());
- if (segment_info.written_to > new_written_to) {
- ERROR("written_to should not decrease! type={}, offset={}, {}",
- type, offset, segment_info);
- ceph_abort();
- }
-
- DEBUG("type={}, offset={}, {}", type, offset, segment_info);
- ceph_assert(type == segment_info.type);
- auto avail_deduction = new_written_to - segment_info.written_to;
- ceph_assert(avail_bytes_in_open >= avail_deduction);
- avail_bytes_in_open -= avail_deduction;
- segment_info.written_to = new_written_to;
-}
-
-bool SpaceTrackerSimple::equals(const SpaceTrackerI &_other) const
-{
- LOG_PREFIX(SpaceTrackerSimple::equals);
- const auto &other = static_cast<const SpaceTrackerSimple&>(_other);
-
- if (other.live_bytes_by_segment.size() != live_bytes_by_segment.size()) {
- ERROR("different segment counts, bug in test");
- assert(0 == "segment counts should match");
- return false;
- }
-
- bool all_match = true;
- for (auto i = live_bytes_by_segment.begin(), j = other.live_bytes_by_segment.begin();
- i != live_bytes_by_segment.end(); ++i, ++j) {
- if (i->second.live_bytes != j->second.live_bytes) {
- all_match = false;
- DEBUG("segment_id {} live bytes mismatch *this: {}, other: {}",
- i->first, i->second.live_bytes, j->second.live_bytes);
- }
- }
- return all_match;
-}
-
-int64_t SpaceTrackerDetailed::SegmentMap::allocate(
- device_segment_id_t segment,
- seastore_off_t offset,
- extent_len_t len,
- const extent_len_t block_size)
-{
- LOG_PREFIX(SegmentMap::allocate);
- assert(offset % block_size == 0);
- assert(len % block_size == 0);
-
- const auto b = (offset / block_size);
- const auto e = (offset + len) / block_size;
-
- bool error = false;
- for (auto i = b; i < e; ++i) {
- if (bitmap[i]) {
- if (!error) {
- ERROR("found allocated in {}, {} ~ {}", segment, offset, len);
- error = true;
- }
- DEBUG("block {} allocated", i * block_size);
- }
- bitmap[i] = true;
- }
- return update_usage(len);
-}
-
-int64_t SpaceTrackerDetailed::SegmentMap::release(
- device_segment_id_t segment,
- seastore_off_t offset,
- extent_len_t len,
- const extent_len_t block_size)
-{
- LOG_PREFIX(SegmentMap::release);
- assert(offset % block_size == 0);
- assert(len % block_size == 0);
-
- const auto b = (offset / block_size);
- const auto e = (offset + len) / block_size;
-
- bool error = false;
- for (auto i = b; i < e; ++i) {
- if (!bitmap[i]) {
- if (!error) {
- ERROR("found unallocated in {}, {} ~ {}", segment, offset, len);
- error = true;
- }
- DEBUG("block {} unallocated", i * block_size);
- }
- bitmap[i] = false;
- }
- return update_usage(-(int64_t)len);
-}
-
-bool SpaceTrackerDetailed::equals(const SpaceTrackerI &_other) const
-{
- LOG_PREFIX(SpaceTrackerDetailed::equals);
- const auto &other = static_cast<const SpaceTrackerDetailed&>(_other);
-
- if (other.segment_usage.size() != segment_usage.size()) {
- ERROR("different segment counts, bug in test");
- assert(0 == "segment counts should match");
- return false;
- }
-
- bool all_match = true;
- for (auto i = segment_usage.begin(), j = other.segment_usage.begin();
- i != segment_usage.end(); ++i, ++j) {
- if (i->second.get_usage() != j->second.get_usage()) {
- all_match = false;
- ERROR("segment_id {} live bytes mismatch *this: {}, other: {}",
- i->first, i->second.get_usage(), j->second.get_usage());
- }
- }
- return all_match;
-}
-
-void SpaceTrackerDetailed::SegmentMap::dump_usage(extent_len_t block_size) const
-{
- LOG_PREFIX(SegmentMap::dump_usage);
- INFO("dump start");
- for (unsigned i = 0; i < bitmap.size(); ++i) {
- if (bitmap[i]) {
- LOCAL_LOGGER.info(" {} still live", i * block_size);
- }
- }
-}
-
-void SpaceTrackerDetailed::dump_usage(segment_id_t id) const
-{
- LOG_PREFIX(SpaceTrackerDetailed::dump_usage);
- INFO("{}", id);
- segment_usage[id].dump_usage(
- block_size_by_segment_manager[id.device_id()]);
-}
-
-void SpaceTrackerSimple::dump_usage(segment_id_t id) const
-{
- LOG_PREFIX(SpaceTrackerSimple::dump_usage);
- INFO("id: {}, live_bytes: {}",
- id, live_bytes_by_segment[id].live_bytes);
-}
-
-SegmentCleaner::SegmentCleaner(
- config_t config,
- SegmentManagerGroupRef&& sm_group,
- BackrefManager &backref_manager,
- bool detailed)
- : detailed(detailed),
- config(config),
- sm_group(std::move(sm_group)),
- backref_manager(backref_manager),
- ool_segment_seq_allocator(
- new SegmentSeqAllocator(segment_type_t::OOL)),
- gc_process(*this)
-{
- config.validate();
-}
-
-void SegmentCleaner::register_metrics()
-{
- namespace sm = seastar::metrics;
- stats.segment_util.buckets.resize(UTIL_BUCKETS);
- std::size_t i;
- for (i = 0; i < UTIL_BUCKETS; ++i) {
- stats.segment_util.buckets[i].upper_bound = ((double)(i + 1)) / 10;
- stats.segment_util.buckets[i].count = 0;
- }
- // NOTE: by default the segments are empty
- i = get_bucket_index(UTIL_STATE_EMPTY);
- stats.segment_util.buckets[i].count = segments.get_num_segments();
-
- metrics.add_group("segment_cleaner", {
- sm::make_counter("segments_number",
- [this] { return segments.get_num_segments(); },
- sm::description("the number of segments")),
- sm::make_counter("segment_size",
- [this] { return segments.get_segment_size(); },
- sm::description("the bytes of a segment")),
- sm::make_counter("segments_in_journal",
- [this] { return get_segments_in_journal(); },
- sm::description("the number of segments in journal")),
- sm::make_counter("segments_type_journal",
- [this] { return segments.get_num_type_journal(); },
- sm::description("the number of segments typed journal")),
- sm::make_counter("segments_type_ool",
- [this] { return segments.get_num_type_ool(); },
- sm::description("the number of segments typed out-of-line")),
- sm::make_counter("segments_open",
- [this] { return segments.get_num_open(); },
- sm::description("the number of open segments")),
- sm::make_counter("segments_empty",
- [this] { return segments.get_num_empty(); },
- sm::description("the number of empty segments")),
- sm::make_counter("segments_closed",
- [this] { return segments.get_num_closed(); },
- sm::description("the number of closed segments")),
-
- sm::make_counter("segments_count_open_journal",
- [this] { return segments.get_count_open_journal(); },
- sm::description("the count of open journal segment operations")),
- sm::make_counter("segments_count_open_ool",
- [this] { return segments.get_count_open_ool(); },
- sm::description("the count of open ool segment operations")),
- sm::make_counter("segments_count_release_journal",
- [this] { return segments.get_count_release_journal(); },
- sm::description("the count of release journal segment operations")),
- sm::make_counter("segments_count_release_ool",
- [this] { return segments.get_count_release_ool(); },
- sm::description("the count of release ool segment operations")),
- sm::make_counter("segments_count_close_journal",
- [this] { return segments.get_count_close_journal(); },
- sm::description("the count of close journal segment operations")),
- sm::make_counter("segments_count_close_ool",
- [this] { return segments.get_count_close_ool(); },
- sm::description("the count of close ool segment operations")),
-
- sm::make_counter("total_bytes",
- [this] { return segments.get_total_bytes(); },
- sm::description("the size of the space")),
- sm::make_counter("available_bytes",
- [this] { return segments.get_available_bytes(); },
- sm::description("the size of the space is available")),
- sm::make_counter("unavailable_unreclaimable_bytes",
- [this] { return get_unavailable_unreclaimable_bytes(); },
- sm::description("the size of the space is unavailable and unreclaimable")),
- sm::make_counter("unavailable_reclaimable_bytes",
- [this] { return get_unavailable_reclaimable_bytes(); },
- sm::description("the size of the space is unavailable and reclaimable")),
- sm::make_counter("used_bytes", stats.used_bytes,
- sm::description("the size of the space occupied by live extents")),
- sm::make_counter("unavailable_unused_bytes",
- [this] { return get_unavailable_unused_bytes(); },
- sm::description("the size of the space is unavailable and not alive")),
-
- sm::make_counter("dirty_journal_bytes",
- [this] { return get_dirty_journal_size(); },
- sm::description("the size of the journal for dirty extents")),
- sm::make_counter("alloc_journal_bytes",
- [this] { return get_alloc_journal_size(); },
- sm::description("the size of the journal for alloc info")),
-
- sm::make_counter("projected_count", stats.projected_count,
- sm::description("the number of projected usage reservations")),
- sm::make_counter("projected_used_bytes_sum", stats.projected_used_bytes_sum,
- sm::description("the sum of the projected usage in bytes")),
-
- sm::make_counter("io_count", stats.io_count,
- sm::description("the sum of IOs")),
- sm::make_counter("io_blocked_count", stats.io_blocked_count,
- sm::description("IOs that are blocked by gc")),
- sm::make_counter("io_blocked_count_trim", stats.io_blocked_count_trim,
- sm::description("IOs that are blocked by trimming")),
- sm::make_counter("io_blocked_count_reclaim", stats.io_blocked_count_reclaim,
- sm::description("IOs that are blocked by reclaimming")),
- sm::make_counter("io_blocked_sum", stats.io_blocked_sum,
- sm::description("the sum of blocking IOs")),
-
- sm::make_counter("reclaimed_bytes", stats.reclaimed_bytes,
- sm::description("rewritten bytes due to reclaim")),
- sm::make_counter("reclaimed_segment_bytes", stats.reclaimed_segment_bytes,
- sm::description("rewritten bytes due to reclaim")),
- sm::make_counter("closed_journal_used_bytes", stats.closed_journal_used_bytes,
- sm::description("used bytes when close a journal segment")),
- sm::make_counter("closed_journal_total_bytes", stats.closed_journal_total_bytes,
- sm::description("total bytes of closed journal segments")),
- sm::make_counter("closed_ool_used_bytes", stats.closed_ool_used_bytes,
- sm::description("used bytes when close a ool segment")),
- sm::make_counter("closed_ool_total_bytes", stats.closed_ool_total_bytes,
- sm::description("total bytes of closed ool segments")),
-
- sm::make_gauge("available_ratio",
- [this] { return segments.get_available_ratio(); },
- sm::description("ratio of available space to total space")),
- sm::make_gauge("reclaim_ratio",
- [this] { return get_reclaim_ratio(); },
- sm::description("ratio of reclaimable space to unavailable space")),
-
- sm::make_histogram("segment_utilization_distribution",
- [this]() -> seastar::metrics::histogram& {
- return stats.segment_util;
- },
- sm::description("utilization distribution of all segments"))
- });
-}
-
-segment_id_t SegmentCleaner::allocate_segment(
- segment_seq_t seq,
- segment_type_t type)
-{
- LOG_PREFIX(SegmentCleaner::allocate_segment);
- assert(seq != NULL_SEG_SEQ);
- for (auto it = segments.begin();
- it != segments.end();
- ++it) {
- auto seg_id = it->first;
- auto& segment_info = it->second;
- if (segment_info.is_empty()) {
- auto old_usage = calc_utilization(seg_id);
- segments.mark_open(seg_id, seq, type);
- auto new_usage = calc_utilization(seg_id);
- adjust_segment_util(old_usage, new_usage);
- INFO("opened, should_block_on_gc {}, projected_avail_ratio {}, "
- "reclaim_ratio {}",
- should_block_on_gc(),
- get_projected_available_ratio(),
- get_reclaim_ratio());
- return seg_id;
- }
- }
- ERROR("out of space with segment_seq={}", segment_seq_printer_t{seq});
- ceph_abort();
- return NULL_SEG_ID;
-}
-
-void SegmentCleaner::update_journal_tail_target(
- journal_seq_t dirty_replay_from,
- journal_seq_t alloc_replay_from)
-{
- LOG_PREFIX(SegmentCleaner::update_journal_tail_target);
- if (disable_trim) return;
- assert(dirty_replay_from.offset.get_addr_type() != addr_types_t::RANDOM_BLOCK);
- assert(alloc_replay_from.offset.get_addr_type() != addr_types_t::RANDOM_BLOCK);
- if (dirty_extents_replay_from == JOURNAL_SEQ_NULL
- || dirty_replay_from > dirty_extents_replay_from) {
- DEBUG("dirty_extents_replay_from={} => {}",
- dirty_extents_replay_from, dirty_replay_from);
- dirty_extents_replay_from = dirty_replay_from;
- }
-
- update_alloc_info_replay_from(alloc_replay_from);
-
- journal_seq_t target = std::min(dirty_replay_from, alloc_replay_from);
- ceph_assert(target != JOURNAL_SEQ_NULL);
- auto journal_head = segments.get_journal_head();
- ceph_assert(journal_head == JOURNAL_SEQ_NULL ||
- journal_head >= target);
- if (journal_tail_target == JOURNAL_SEQ_NULL ||
- target > journal_tail_target) {
- if (!init_complete ||
- journal_tail_target.segment_seq == target.segment_seq) {
- DEBUG("journal_tail_target={} => {}", journal_tail_target, target);
- } else {
- INFO("journal_tail_target={} => {}", journal_tail_target, target);
- }
- journal_tail_target = target;
- }
- gc_process.maybe_wake_on_space_used();
- maybe_wake_gc_blocked_io();
-}
-
-void SegmentCleaner::update_alloc_info_replay_from(
- journal_seq_t alloc_replay_from)
-{
- LOG_PREFIX(SegmentCleaner::update_alloc_info_replay_from);
- if (alloc_info_replay_from == JOURNAL_SEQ_NULL
- || alloc_replay_from > alloc_info_replay_from) {
- DEBUG("alloc_info_replay_from={} => {}",
- alloc_info_replay_from, alloc_replay_from);
- alloc_info_replay_from = alloc_replay_from;
- }
-}
-
-void SegmentCleaner::update_journal_tail_committed(journal_seq_t committed)
-{
- LOG_PREFIX(SegmentCleaner::update_journal_tail_committed);
- assert(committed.offset.get_addr_type() != addr_types_t::RANDOM_BLOCK);
- if (committed == JOURNAL_SEQ_NULL) {
- return;
- }
- auto journal_head = segments.get_journal_head();
- ceph_assert(journal_head == JOURNAL_SEQ_NULL ||
- journal_head >= committed);
-
- if (journal_tail_committed == JOURNAL_SEQ_NULL ||
- committed > journal_tail_committed) {
- DEBUG("update journal_tail_committed={} => {}",
- journal_tail_committed, committed);
- journal_tail_committed = committed;
- }
- if (journal_tail_target == JOURNAL_SEQ_NULL ||
- committed > journal_tail_target) {
- DEBUG("update journal_tail_target={} => {}",
- journal_tail_target, committed);
- journal_tail_target = committed;
- }
-}
-
-void SegmentCleaner::close_segment(segment_id_t segment)
-{
- LOG_PREFIX(SegmentCleaner::close_segment);
- auto old_usage = calc_utilization(segment);
- segments.mark_closed(segment);
- auto &seg_info = segments[segment];
- if (seg_info.type == segment_type_t::JOURNAL) {
- stats.closed_journal_used_bytes += space_tracker->get_usage(segment);
- stats.closed_journal_total_bytes += segments.get_segment_size();
- } else {
- stats.closed_ool_used_bytes += space_tracker->get_usage(segment);
- stats.closed_ool_total_bytes += segments.get_segment_size();
- }
- auto new_usage = calc_utilization(segment);
- adjust_segment_util(old_usage, new_usage);
- INFO("closed, should_block_on_gc {}, projected_avail_ratio {}, "
- "reclaim_ratio {}",
- should_block_on_gc(),
- get_projected_available_ratio(),
- get_reclaim_ratio());
-}
-
-SegmentCleaner::trim_backrefs_ret SegmentCleaner::trim_backrefs(
- Transaction &t,
- journal_seq_t limit)
-{
- return backref_manager.merge_cached_backrefs(
- t,
- limit,
- config.rewrite_backref_bytes_per_cycle
- );
-}
-
-SegmentCleaner::rewrite_dirty_ret SegmentCleaner::rewrite_dirty(
- Transaction &t,
- journal_seq_t limit)
-{
- return ecb->get_next_dirty_extents(
- t,
- limit,
- config.rewrite_dirty_bytes_per_cycle
- ).si_then([=, &t](auto dirty_list) {
- LOG_PREFIX(SegmentCleaner::rewrite_dirty);
- DEBUGT("rewrite {} dirty extents", t, dirty_list.size());
- return seastar::do_with(
- std::move(dirty_list),
- [this, FNAME, &t](auto &dirty_list) {
- return trans_intr::do_for_each(
- dirty_list,
- [this, FNAME, &t](auto &e) {
- DEBUGT("cleaning {}", t, *e);
- return ecb->rewrite_extent(t, e);
- });
- });
- });
-}
-
-SegmentCleaner::gc_cycle_ret SegmentCleaner::GCProcess::run()
-{
- return seastar::do_until(
- [this] { return is_stopping(); },
- [this] {
- return maybe_wait_should_run(
- ).then([this] {
- cleaner.log_gc_state("GCProcess::run");
-
- if (is_stopping()) {
- return seastar::now();
- } else {
- return cleaner.do_gc_cycle();
- }
- });
- });
-}
-
-SegmentCleaner::gc_cycle_ret SegmentCleaner::do_gc_cycle()
-{
- if (gc_should_trim_journal()) {
- return gc_trim_journal(
- ).handle_error(
- crimson::ct_error::assert_all{
- "GCProcess::run encountered invalid error in gc_trim_journal"
- }
- );
- } else if (gc_should_trim_backref()) {
- return gc_trim_backref(get_backref_tail()
- ).safe_then([](auto) {
- return seastar::now();
- }).handle_error(
- crimson::ct_error::assert_all{
- "GCProcess::run encountered invalid error in gc_trim_backref"
- }
- );
- } else if (gc_should_reclaim_space()) {
- return gc_reclaim_space(
- ).handle_error(
- crimson::ct_error::assert_all{
- "GCProcess::run encountered invalid error in gc_reclaim_space"
- }
- );
- } else {
- return seastar::now();
- }
-}
-
-SegmentCleaner::gc_trim_backref_ret
-SegmentCleaner::gc_trim_backref(journal_seq_t limit) {
- return seastar::do_with(
- journal_seq_t(),
- [this, limit=std::move(limit)](auto &seq) mutable {
- return repeat_eagain([this, limit=std::move(limit), &seq] {
- return ecb->with_transaction_intr(
- Transaction::src_t::TRIM_BACKREF,
- "trim_backref",
- [this, limit](auto &t) {
- return trim_backrefs(
- t,
- limit
- ).si_then([this, &t, limit](auto trim_backrefs_to)
- -> ExtentCallbackInterface::submit_transaction_direct_iertr::future<
- journal_seq_t> {
- if (trim_backrefs_to != JOURNAL_SEQ_NULL) {
- return ecb->submit_transaction_direct(
- t, std::make_optional<journal_seq_t>(trim_backrefs_to)
- ).si_then([trim_backrefs_to=std::move(trim_backrefs_to)]() mutable {
- return seastar::make_ready_future<
- journal_seq_t>(std::move(trim_backrefs_to));
- });
- }
- return seastar::make_ready_future<journal_seq_t>(std::move(limit));
- });
- }).safe_then([&seq](auto trim_backrefs_to) {
- seq = std::move(trim_backrefs_to);
- });
- }).safe_then([&seq] {
- return gc_trim_backref_ertr::make_ready_future<
- journal_seq_t>(std::move(seq));
- });
- });
-}
-
-SegmentCleaner::gc_trim_journal_ret SegmentCleaner::gc_trim_journal()
-{
- return gc_trim_backref(get_dirty_tail()
- ).safe_then([this](auto seq) {
- return repeat_eagain([this, seq=std::move(seq)]() mutable {
- return ecb->with_transaction_intr(
- Transaction::src_t::CLEANER_TRIM,
- "trim_journal",
- [this, seq=std::move(seq)](auto& t)
- {
- return rewrite_dirty(t, seq
- ).si_then([this, &t] {
- return ecb->submit_transaction_direct(t);
- });
- });
- });
- });
-}
-
-SegmentCleaner::retrieve_live_extents_ret
-SegmentCleaner::_retrieve_live_extents(
- Transaction &t,
- std::set<
- backref_buf_entry_t,
- backref_buf_entry_t::cmp_t> &&backrefs,
- std::vector<CachedExtentRef> &extents)
-{
- return seastar::do_with(
- JOURNAL_SEQ_NULL,
- std::move(backrefs),
- [this, &t, &extents](auto &seq, auto &backrefs) {
- return trans_intr::parallel_for_each(
- backrefs,
- [this, &extents, &t, &seq](auto &ent) {
- LOG_PREFIX(SegmentCleaner::_retrieve_live_extents);
- DEBUGT("getting extent of type {} at {}~{}",
- t,
- ent.type,
- ent.paddr,
- ent.len);
- return ecb->get_extent_if_live(
- t, ent.type, ent.paddr, ent.laddr, ent.len
- ).si_then([this, FNAME, &extents, &ent, &seq, &t](auto ext) {
- if (!ext) {
- DEBUGT("addr {} dead, skipping", t, ent.paddr);
- auto backref = backref_manager.get_cached_backref_removal(ent.paddr);
- if (seq == JOURNAL_SEQ_NULL || seq < backref.seq) {
- seq = backref.seq;
- }
- } else {
- extents.emplace_back(std::move(ext));
- }
- return ExtentCallbackInterface::rewrite_extent_iertr::now();
- });
- }).si_then([&seq] {
- return retrieve_live_extents_iertr::make_ready_future<
- journal_seq_t>(std::move(seq));
- });
- });
-}
-
-SegmentCleaner::retrieve_backref_mappings_ret
-SegmentCleaner::retrieve_backref_mappings(
- paddr_t start_paddr,
- paddr_t end_paddr)
-{
- return seastar::do_with(
- backref_pin_list_t(),
- [this, start_paddr, end_paddr](auto &pin_list) {
- return repeat_eagain([this, start_paddr, end_paddr, &pin_list] {
- return ecb->with_transaction_intr(
- Transaction::src_t::READ,
- "get_backref_mappings",
- [this, start_paddr, end_paddr](auto &t) {
- return backref_manager.get_mappings(
- t, start_paddr, end_paddr
- );
- }).safe_then([&pin_list](auto&& list) {
- pin_list = std::move(list);
- });
- }).safe_then([&pin_list] {
- return seastar::make_ready_future<backref_pin_list_t>(std::move(pin_list));
- });
- });
-}
-
-SegmentCleaner::gc_reclaim_space_ret SegmentCleaner::gc_reclaim_space()
-{
- LOG_PREFIX(SegmentCleaner::gc_reclaim_space);
- if (!reclaim_state) {
- segment_id_t seg_id = get_next_reclaim_segment();
- auto &segment_info = segments[seg_id];
- INFO("reclaim {} {} start", seg_id, segment_info);
- ceph_assert(segment_info.is_closed());
- reclaim_state = reclaim_state_t::create(
- seg_id, segments.get_segment_size());
- }
- reclaim_state->advance(config.reclaim_bytes_per_cycle);
-
- DEBUG("reclaiming {}~{}",
- reclaim_state->start_pos,
- reclaim_state->end_pos);
- double pavail_ratio = get_projected_available_ratio();
- seastar::lowres_system_clock::time_point start = seastar::lowres_system_clock::now();
-
- return seastar::do_with(
- (size_t)0,
- (size_t)0,
- [this, pavail_ratio, start](
- auto &reclaimed,
- auto &runs) {
- return retrieve_backref_mappings(
- reclaim_state->start_pos,
- reclaim_state->end_pos
- ).safe_then([this, &reclaimed, &runs](auto pin_list) {
- return seastar::do_with(
- std::move(pin_list),
- [this, &reclaimed, &runs](auto &pin_list) {
- return repeat_eagain(
- [this, &reclaimed, &runs, &pin_list]() mutable {
- reclaimed = 0;
- runs++;
- return seastar::do_with(
- backref_manager.get_cached_backref_extents_in_range(
- reclaim_state->start_pos, reclaim_state->end_pos),
- backref_manager.get_cached_backrefs_in_range(
- reclaim_state->start_pos, reclaim_state->end_pos),
- backref_manager.get_cached_backref_removals_in_range(
- reclaim_state->start_pos, reclaim_state->end_pos),
- JOURNAL_SEQ_NULL,
- [this, &reclaimed, &pin_list](
- auto &backref_extents,
- auto &backrefs,
- auto &del_backrefs,
- auto &seq) {
- return ecb->with_transaction_intr(
- Transaction::src_t::CLEANER_RECLAIM,
- "reclaim_space",
- [this, &backref_extents, &backrefs, &seq,
- &del_backrefs, &reclaimed, &pin_list](auto &t) {
- LOG_PREFIX(SegmentCleaner::gc_reclaim_space);
- DEBUGT("{} backrefs, {} del_backrefs, {} pins", t,
- backrefs.size(), del_backrefs.size(), pin_list.size());
- for (auto &br : backrefs) {
- if (seq == JOURNAL_SEQ_NULL
- || (br.seq != JOURNAL_SEQ_NULL && br.seq > seq))
- seq = br.seq;
- }
- for (auto &pin : pin_list) {
- backrefs.emplace(
- pin->get_key(),
- pin->get_val(),
- pin->get_length(),
- pin->get_type(),
- journal_seq_t());
- }
- for (auto &del_backref : del_backrefs) {
- DEBUGT("del_backref {}~{} {} {}", t,
- del_backref.paddr, del_backref.len, del_backref.type, del_backref.seq);
- auto it = backrefs.find(del_backref.paddr);
- if (it != backrefs.end())
- backrefs.erase(it);
- if (seq == JOURNAL_SEQ_NULL
- || (del_backref.seq != JOURNAL_SEQ_NULL && del_backref.seq > seq))
- seq = del_backref.seq;
- }
- return seastar::do_with(
- std::vector<CachedExtentRef>(),
- [this, &backref_extents, &backrefs, &reclaimed, &t, &seq]
- (auto &extents) {
- return backref_manager.retrieve_backref_extents(
- t, std::move(backref_extents), extents
- ).si_then([this, &extents, &t, &backrefs] {
- return _retrieve_live_extents(
- t, std::move(backrefs), extents);
- }).si_then([this, &seq, &t](auto nseq) {
- if (nseq != JOURNAL_SEQ_NULL &&
- (nseq > seq || seq == JOURNAL_SEQ_NULL))
- seq = nseq;
- auto fut = BackrefManager::merge_cached_backrefs_iertr::now();
- if (seq != JOURNAL_SEQ_NULL) {
- fut = backref_manager.merge_cached_backrefs(
- t, seq, std::numeric_limits<uint64_t>::max()
- ).si_then([](auto) {
- return BackrefManager::merge_cached_backrefs_iertr::now();
- });
- }
- return fut;
- }).si_then([&extents, this, &t, &reclaimed] {
- return trans_intr::do_for_each(
- extents,
- [this, &t, &reclaimed](auto &ext) {
- reclaimed += ext->get_length();
- return ecb->rewrite_extent(t, ext);
- });
- });
- }).si_then([this, &t, &seq] {
- if (reclaim_state->is_complete()) {
- t.mark_segment_to_release(reclaim_state->get_segment_id());
- }
- return ecb->submit_transaction_direct(
- t, std::make_optional<journal_seq_t>(std::move(seq)));
- });
- });
- });
- });
- });
- }).safe_then(
- [&reclaimed, this, pavail_ratio, start, &runs] {
- LOG_PREFIX(SegmentCleaner::gc_reclaim_space);
-#ifndef NDEBUG
- auto ndel_backrefs =
- backref_manager.get_cached_backref_removals_in_range(
- reclaim_state->start_pos, reclaim_state->end_pos);
- if (!ndel_backrefs.empty()) {
- for (auto &del_br : ndel_backrefs) {
- ERROR("unexpected del_backref {}~{} {} {}",
- del_br.paddr, del_br.len, del_br.type, del_br.seq);
- }
- ceph_abort("impossible");
- }
-#endif
- stats.reclaiming_bytes += reclaimed;
- auto d = seastar::lowres_system_clock::now() - start;
- DEBUG("duration: {}, pavail_ratio before: {}, repeats: {}", d, pavail_ratio, runs);
- if (reclaim_state->is_complete()) {
- INFO("reclaim {} finish, alive/total={}",
- reclaim_state->get_segment_id(),
- stats.reclaiming_bytes/(double)segments.get_segment_size());
- stats.reclaimed_bytes += stats.reclaiming_bytes;
- stats.reclaimed_segment_bytes += segments.get_segment_size();
- stats.reclaiming_bytes = 0;
- reclaim_state.reset();
- }
- });
- });
-}
-
-SegmentCleaner::mount_ret SegmentCleaner::mount()
-{
- LOG_PREFIX(SegmentCleaner::mount);
- const auto& sms = sm_group->get_segment_managers();
- INFO("{} segment managers", sms.size());
- init_complete = false;
- stats = {};
- journal_tail_target = JOURNAL_SEQ_NULL;
- journal_tail_committed = JOURNAL_SEQ_NULL;
- dirty_extents_replay_from = JOURNAL_SEQ_NULL;
- alloc_info_replay_from = JOURNAL_SEQ_NULL;
-
- space_tracker.reset(
- detailed ?
- (SpaceTrackerI*)new SpaceTrackerDetailed(
- sms) :
- (SpaceTrackerI*)new SpaceTrackerSimple(
- sms));
-
- segments.reset();
- for (auto sm : sms) {
- segments.add_segment_manager(*sm);
- }
- metrics.clear();
- register_metrics();
-
- INFO("{} segments", segments.get_num_segments());
- return seastar::do_with(
- std::vector<std::pair<segment_id_t, segment_header_t>>(),
- [this, FNAME](auto& segment_set) {
- return crimson::do_for_each(
- segments.begin(),
- segments.end(),
- [this, FNAME, &segment_set](auto& it) {
- auto segment_id = it.first;
- return sm_group->read_segment_header(
- segment_id
- ).safe_then([segment_id, this, FNAME, &segment_set](auto header) {
- INFO("segment_id={} -- {}", segment_id, header);
- auto s_type = header.get_type();
- if (s_type == segment_type_t::NULL_SEG) {
- ERROR("got null segment, segment_id={} -- {}", segment_id, header);
- ceph_abort();
- }
- return sm_group->read_segment_tail(
- segment_id
- ).safe_then([this, segment_id, &segment_set, header](auto tail)
- -> scan_extents_ertr::future<> {
- if (tail.segment_nonce != header.segment_nonce) {
- return scan_nonfull_segment(header, segment_set, segment_id);
- }
- time_point last_modified(duration(tail.last_modified));
- time_point last_rewritten(duration(tail.last_rewritten));
- segments.update_last_modified_rewritten(
- segment_id, last_modified, last_rewritten);
- if (tail.get_type() == segment_type_t::JOURNAL) {
- update_journal_tail_committed(tail.journal_tail);
- update_journal_tail_target(
- tail.journal_tail,
- tail.alloc_replay_from);
- }
- init_mark_segment_closed(
- segment_id,
- header.segment_seq,
- header.type);
- return seastar::now();
- }).handle_error(
- crimson::ct_error::enodata::handle(
- [this, header, segment_id, &segment_set](auto) {
- return scan_nonfull_segment(header, segment_set, segment_id);
- }),
- crimson::ct_error::pass_further_all{}
- );
- }).handle_error(
- crimson::ct_error::enoent::handle([](auto) {
- return mount_ertr::now();
- }),
- crimson::ct_error::enodata::handle([](auto) {
- return mount_ertr::now();
- }),
- crimson::ct_error::input_output_error::pass_further{},
- crimson::ct_error::assert_all{"unexpected error"}
- );
- });
- });
-}
-
-SegmentCleaner::scan_extents_ret SegmentCleaner::scan_nonfull_segment(
- const segment_header_t& header,
- scan_extents_ret_bare& segment_set,
- segment_id_t segment_id)
-{
- return seastar::do_with(
- scan_valid_records_cursor({
- segments[segment_id].seq,
- paddr_t::make_seg_paddr(segment_id, 0)}),
- [this, segment_id, segment_header=header](auto& cursor) {
- return seastar::do_with(
- SegmentManagerGroup::found_record_handler_t(
- [this, segment_id, segment_header](
- record_locator_t locator,
- const record_group_header_t& header,
- const bufferlist& mdbuf
- ) mutable -> SegmentManagerGroup::scan_valid_records_ertr::future<> {
- LOG_PREFIX(SegmentCleaner::scan_nonfull_segment);
- if (segment_header.get_type() == segment_type_t::OOL) {
- DEBUG("out-of-line segment {}, decodeing {} records",
- segment_id,
- header.records);
- auto maybe_headers = try_decode_record_headers(header, mdbuf);
- if (!maybe_headers) {
- ERROR("unable to decode record headers for record group {}",
- locator.record_block_base);
- return crimson::ct_error::input_output_error::make();
- }
-
- for (auto& header : *maybe_headers) {
- mod_time_point_t ctime = header.commit_time;
- auto commit_type = header.commit_type;
- if (!ctime) {
- ERROR("SegmentCleaner::scan_nonfull_segment: extent {} 0 commit_time",
- ctime);
- ceph_abort("0 commit_time");
- }
- time_point commit_time{duration(ctime)};
- assert(commit_type == record_commit_type_t::MODIFY
- || commit_type == record_commit_type_t::REWRITE);
- if (commit_type == record_commit_type_t::MODIFY) {
- segments.update_last_modified_rewritten(segment_id, commit_time, {});
- }
- if (commit_type == record_commit_type_t::REWRITE) {
- segments.update_last_modified_rewritten(segment_id, {}, commit_time);
- }
- }
- } else {
- DEBUG("inline segment {}, decodeing {} records",
- segment_id,
- header.records);
- auto maybe_record_deltas_list = try_decode_deltas(
- header, mdbuf, locator.record_block_base);
- if (!maybe_record_deltas_list) {
- ERROR("unable to decode deltas for record {} at {}",
- header, locator);
- return crimson::ct_error::input_output_error::make();
- }
- for (auto &record_deltas : *maybe_record_deltas_list) {
- for (auto &[ctime, delta] : record_deltas.deltas) {
- if (delta.type == extent_types_t::ALLOC_TAIL) {
- journal_seq_t seq;
- decode(seq, delta.bl);
- update_alloc_info_replay_from(seq);
- }
- }
- }
- }
- return seastar::now();
- }),
- [&cursor, segment_header, this](auto& handler) {
- return sm_group->scan_valid_records(
- cursor,
- segment_header.segment_nonce,
- segments.get_segment_size(),
- handler);
- }
- );
- }).safe_then([this, segment_id, header](auto) {
- init_mark_segment_closed(
- segment_id,
- header.segment_seq,
- header.type);
- return seastar::now();
- });
-}
-
-SegmentCleaner::release_ertr::future<>
-SegmentCleaner::maybe_release_segment(Transaction &t)
-{
- auto to_release = t.get_segment_to_release();
- if (to_release != NULL_SEG_ID) {
- LOG_PREFIX(SegmentCleaner::maybe_release_segment);
- INFOT("releasing segment {}", t, to_release);
- return sm_group->release_segment(to_release
- ).safe_then([this, FNAME, &t, to_release] {
- auto old_usage = calc_utilization(to_release);
- ceph_assert(old_usage == 0);
- segments.mark_empty(to_release);
- auto new_usage = calc_utilization(to_release);
- adjust_segment_util(old_usage, new_usage);
- INFOT("released, should_block_on_gc {}, projected_avail_ratio {}, "
- "reclaim_ratio {}",
- t,
- should_block_on_gc(),
- get_projected_available_ratio(),
- get_reclaim_ratio());
- if (space_tracker->get_usage(to_release) != 0) {
- space_tracker->dump_usage(to_release);
- ceph_abort();
- }
- maybe_wake_gc_blocked_io();
- });
- } else {
- return SegmentManager::release_ertr::now();
- }
-}
-
-void SegmentCleaner::complete_init()
-{
- LOG_PREFIX(SegmentCleaner::complete_init);
- if (disable_trim) {
- init_complete = true;
- return;
- }
- INFO("done, start GC");
- ceph_assert(segments.get_journal_head() != JOURNAL_SEQ_NULL);
- init_complete = true;
- gc_process.start();
-}
-
-void SegmentCleaner::mark_space_used(
- paddr_t addr,
- extent_len_t len,
- time_point last_modified,
- time_point last_rewritten,
- bool init_scan)
-{
- LOG_PREFIX(SegmentCleaner::mark_space_used);
- if (addr.get_addr_type() != addr_types_t::SEGMENT) {
- return;
- }
- auto& seg_addr = addr.as_seg_paddr();
-
- if (!init_scan && !init_complete) {
- return;
- }
-
- stats.used_bytes += len;
- auto old_usage = calc_utilization(seg_addr.get_segment_id());
- [[maybe_unused]] auto ret = space_tracker->allocate(
- seg_addr.get_segment_id(),
- seg_addr.get_segment_off(),
- len);
- auto new_usage = calc_utilization(seg_addr.get_segment_id());
- adjust_segment_util(old_usage, new_usage);
-
- // use the last extent's last modified time for the calculation of the projected
- // time the segments' live extents are to stay unmodified; this is an approximation
- // of the sprite lfs' segment "age".
-
- segments.update_last_modified_rewritten(
- seg_addr.get_segment_id(), last_modified, last_rewritten);
-
- gc_process.maybe_wake_on_space_used();
- assert(ret > 0);
- DEBUG("segment {} new len: {}~{}, live_bytes: {}",
- seg_addr.get_segment_id(),
- addr,
- len,
- space_tracker->get_usage(seg_addr.get_segment_id()));
-}
-
-void SegmentCleaner::mark_space_free(
- paddr_t addr,
- extent_len_t len)
-{
- LOG_PREFIX(SegmentCleaner::mark_space_free);
- if (!init_complete) {
- return;
- }
- if (addr.get_addr_type() != addr_types_t::SEGMENT) {
- return;
- }
-
- ceph_assert(stats.used_bytes >= len);
- stats.used_bytes -= len;
- auto& seg_addr = addr.as_seg_paddr();
-
- DEBUG("segment {} free len: {}~{}",
- seg_addr.get_segment_id(), addr, len);
- auto old_usage = calc_utilization(seg_addr.get_segment_id());
- [[maybe_unused]] auto ret = space_tracker->release(
- seg_addr.get_segment_id(),
- seg_addr.get_segment_off(),
- len);
- auto new_usage = calc_utilization(seg_addr.get_segment_id());
- adjust_segment_util(old_usage, new_usage);
- maybe_wake_gc_blocked_io();
- assert(ret >= 0);
- DEBUG("segment {} free len: {}~{}, live_bytes: {}",
- seg_addr.get_segment_id(),
- addr,
- len,
- space_tracker->get_usage(seg_addr.get_segment_id()));
-}
-
-segment_id_t SegmentCleaner::get_next_reclaim_segment() const
-{
- LOG_PREFIX(SegmentCleaner::get_next_reclaim_segment);
- segment_id_t id = NULL_SEG_ID;
- double max_benefit_cost = 0;
- for (auto& [_id, segment_info] : segments) {
- if (segment_info.is_closed() &&
- !segment_info.is_in_journal(journal_tail_committed)) {
- double benefit_cost = calc_gc_benefit_cost(_id);
- if (benefit_cost > max_benefit_cost) {
- id = _id;
- max_benefit_cost = benefit_cost;
- }
- }
- }
- if (id != NULL_SEG_ID) {
- DEBUG("segment {}, benefit_cost {}",
- id, max_benefit_cost);
- return id;
- } else {
- ceph_assert(get_segments_reclaimable() == 0);
- // see gc_should_reclaim_space()
- ceph_abort("impossible!");
- return NULL_SEG_ID;
- }
-}
-
-void SegmentCleaner::log_gc_state(const char *caller) const
-{
- LOG_PREFIX(SegmentCleaner::log_gc_state);
- if (LOCAL_LOGGER.is_enabled(seastar::log_level::debug) &&
- !disable_trim) {
- DEBUG(
- "caller {}, "
- "empty {}, "
- "open {}, "
- "closed {}, "
- "in_journal {}, "
- "total {}B, "
- "available {}B, "
- "unavailable {}B, "
- "unavailable_used {}B, "
- "unavailable_unused {}B; "
- "reclaim_ratio {}, "
- "available_ratio {}, "
- "should_block_on_gc {}, "
- "gc_should_reclaim_space {}, "
- "journal_head {}, "
- "journal_tail_target {}, "
- "journal_tail_commit {}, "
- "dirty_tail {}, "
- "dirty_tail_limit {}, "
- "gc_should_trim_journal {}, ",
- caller,
- segments.get_num_empty(),
- segments.get_num_open(),
- segments.get_num_closed(),
- get_segments_in_journal(),
- segments.get_total_bytes(),
- segments.get_available_bytes(),
- segments.get_unavailable_bytes(),
- stats.used_bytes,
- get_unavailable_unused_bytes(),
- get_reclaim_ratio(),
- segments.get_available_ratio(),
- should_block_on_gc(),
- gc_should_reclaim_space(),
- segments.get_journal_head(),
- journal_tail_target,
- journal_tail_committed,
- get_dirty_tail(),
- get_dirty_tail_limit(),
- gc_should_trim_journal()
- );
- }
-}
-
-seastar::future<>
-SegmentCleaner::reserve_projected_usage(std::size_t projected_usage)
-{
- if (disable_trim) {
- return seastar::now();
- }
- ceph_assert(init_complete);
- // The pipeline configuration prevents another IO from entering
- // prepare until the prior one exits and clears this.
- ceph_assert(!blocked_io_wake);
- ++stats.io_count;
- bool is_blocked = false;
- if (should_block_on_trim()) {
- is_blocked = true;
- ++stats.io_blocked_count_trim;
- }
- if (should_block_on_reclaim()) {
- is_blocked = true;
- ++stats.io_blocked_count_reclaim;
- }
- if (is_blocked) {
- ++stats.io_blocking_num;
- ++stats.io_blocked_count;
- stats.io_blocked_sum += stats.io_blocking_num;
- }
- return seastar::do_until(
- [this] {
- log_gc_state("await_hard_limits");
- return !should_block_on_gc();
- },
- [this] {
- blocked_io_wake = seastar::promise<>();
- return blocked_io_wake->get_future();
- }
- ).then([this, projected_usage, is_blocked] {
- ceph_assert(!blocked_io_wake);
- stats.projected_used_bytes += projected_usage;
- ++stats.projected_count;
- stats.projected_used_bytes_sum += stats.projected_used_bytes;
- if (is_blocked) {
- assert(stats.io_blocking_num > 0);
- --stats.io_blocking_num;
- }
- });
-}
-
-void SegmentCleaner::release_projected_usage(std::size_t projected_usage)
-{
- if (disable_trim) return;
- ceph_assert(init_complete);
- ceph_assert(stats.projected_used_bytes >= projected_usage);
- stats.projected_used_bytes -= projected_usage;
- return maybe_wake_gc_blocked_io();
-}
-
-}
+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-#pragma once
-
-#include <boost/intrusive/set.hpp>
-#include <seastar/core/metrics_types.hh>
-
-#include "common/ceph_time.h"
-
-#include "osd/osd_types.h"
-
-#include "crimson/os/seastore/backref_manager.h"
-#include "crimson/os/seastore/cached_extent.h"
-#include "crimson/os/seastore/seastore_types.h"
-#include "crimson/os/seastore/segment_manager.h"
-#include "crimson/os/seastore/segment_manager_group.h"
-#include "crimson/os/seastore/transaction.h"
-#include "crimson/os/seastore/segment_seq_allocator.h"
-
-namespace crimson::os::seastore {
-
-/*
- * segment_info_t
- *
- * Maintains the tracked information for a segment.
- * It is read-only outside segments_info_t.
- */
-struct segment_info_t {
- using time_point = seastar::lowres_system_clock::time_point;
-
- // segment_info_t is initiated as set_empty()
- Segment::segment_state_t state = Segment::segment_state_t::EMPTY;
-
- // Will be non-null for any segments in the current journal
- segment_seq_t seq = NULL_SEG_SEQ;
-
- segment_type_t type = segment_type_t::NULL_SEG;
-
- time_point last_modified;
- time_point last_rewritten;
-
- std::size_t written_to = 0;
-
- bool is_in_journal(journal_seq_t tail_committed) const {
- return type == segment_type_t::JOURNAL &&
- tail_committed.segment_seq <= seq;
- }
-
- bool is_empty() const {
- return state == Segment::segment_state_t::EMPTY;
- }
-
- bool is_closed() const {
- return state == Segment::segment_state_t::CLOSED;
- }
-
- bool is_open() const {
- return state == Segment::segment_state_t::OPEN;
- }
-
- void init_closed(segment_seq_t, segment_type_t, std::size_t);
-
- void set_open(segment_seq_t, segment_type_t);
-
- void set_empty();
-
- void set_closed();
-
- void update_last_modified_rewritten(
- time_point _last_modified, time_point _last_rewritten) {
- if (_last_modified != time_point() && last_modified < _last_modified) {
- last_modified = _last_modified;
- }
- if (_last_rewritten != time_point() && last_rewritten < _last_rewritten) {
- last_rewritten = _last_rewritten;
- }
- }
-};
-
-std::ostream& operator<<(std::ostream&, const segment_info_t&);
-
-/*
- * segments_info_t
- *
- * Keep track of all segments and related information.
- */
-class segments_info_t {
-public:
- using time_point = seastar::lowres_system_clock::time_point;
-
- segments_info_t() {
- reset();
- }
-
- const segment_info_t& operator[](segment_id_t id) const {
- return segments[id];
- }
-
- auto begin() const {
- return segments.begin();
- }
-
- auto end() const {
- return segments.end();
- }
-
- std::size_t get_num_segments() const {
- assert(segments.size() > 0);
- return segments.size();
- }
- std::size_t get_segment_size() const {
- assert(segment_size > 0);
- return segment_size;
- }
- std::size_t get_num_in_journal_open() const {
- return num_in_journal_open;
- }
- std::size_t get_num_type_journal() const {
- return num_type_journal;
- }
- std::size_t get_num_type_ool() const {
- return num_type_ool;
- }
- std::size_t get_num_open() const {
- return num_open;
- }
- std::size_t get_num_empty() const {
- return num_empty;
- }
- std::size_t get_num_closed() const {
- return num_closed;
- }
- std::size_t get_count_open_journal() const {
- return count_open_journal;
- }
- std::size_t get_count_open_ool() const {
- return count_open_ool;
- }
- std::size_t get_count_release_journal() const {
- return count_release_journal;
- }
- std::size_t get_count_release_ool() const {
- return count_release_ool;
- }
- std::size_t get_count_close_journal() const {
- return count_close_journal;
- }
- std::size_t get_count_close_ool() const {
- return count_close_ool;
- }
-
- std::size_t get_total_bytes() const {
- return total_bytes;
- }
- /// the available space that is writable, including in open segments
- std::size_t get_available_bytes() const {
- return num_empty * get_segment_size() + avail_bytes_in_open;
- }
- /// the unavailable space that is not writable
- std::size_t get_unavailable_bytes() const {
- assert(total_bytes >= get_available_bytes());
- return total_bytes - get_available_bytes();
- }
- std::size_t get_available_bytes_in_open() const {
- return avail_bytes_in_open;
- }
- double get_available_ratio() const {
- return (double)get_available_bytes() / (double)total_bytes;
- }
-
- journal_seq_t get_journal_head() const {
- if (unlikely(journal_segment_id == NULL_SEG_ID)) {
- return JOURNAL_SEQ_NULL;
- }
- auto &segment_info = segments[journal_segment_id];
- assert(!segment_info.is_empty());
- assert(segment_info.type == segment_type_t::JOURNAL);
- assert(segment_info.seq != NULL_SEG_SEQ);
- return journal_seq_t{
- segment_info.seq,
- paddr_t::make_seg_paddr(
- journal_segment_id,
- segment_info.written_to)
- };
- }
-
- void reset();
-
- void add_segment_manager(SegmentManager &segment_manager);
-
- // initiate non-empty segments, the others are by default empty
- void init_closed(segment_id_t, segment_seq_t, segment_type_t);
-
- void mark_open(segment_id_t, segment_seq_t, segment_type_t);
-
- void mark_empty(segment_id_t);
-
- void mark_closed(segment_id_t);
-
- void update_written_to(segment_type_t, paddr_t);
-
- void update_last_modified_rewritten(
- segment_id_t id, time_point last_modified, time_point last_rewritten) {
- segments[id].update_last_modified_rewritten(last_modified, last_rewritten);
- }
-
-private:
- // See reset() for member initialization
- segment_map_t<segment_info_t> segments;
-
- std::size_t segment_size;
-
- segment_id_t journal_segment_id;
- std::size_t num_in_journal_open;
- std::size_t num_type_journal;
- std::size_t num_type_ool;
-
- std::size_t num_open;
- std::size_t num_empty;
- std::size_t num_closed;
-
- std::size_t count_open_journal;
- std::size_t count_open_ool;
- std::size_t count_release_journal;
- std::size_t count_release_ool;
- std::size_t count_close_journal;
- std::size_t count_close_ool;
-
- std::size_t total_bytes;
- std::size_t avail_bytes_in_open;
-};
-
-/**
- * Callback interface for managing available segments
- */
-class SegmentProvider {
-public:
- virtual journal_seq_t get_journal_tail_target() const = 0;
-
- virtual const segment_info_t& get_seg_info(segment_id_t id) const = 0;
-
- virtual segment_id_t allocate_segment(
- segment_seq_t seq, segment_type_t type) = 0;
-
- virtual journal_seq_t get_dirty_extents_replay_from() const = 0;
-
- virtual journal_seq_t get_alloc_info_replay_from() const = 0;
-
- virtual void close_segment(segment_id_t) = 0;
-
- virtual void update_journal_tail_committed(journal_seq_t tail_committed) = 0;
-
- virtual void update_segment_avail_bytes(segment_type_t, paddr_t) = 0;
-
- virtual SegmentManagerGroup* get_segment_manager_group() = 0;
-
- virtual ~SegmentProvider() {}
-};
-
-class SpaceTrackerI {
-public:
- virtual int64_t allocate(
- segment_id_t segment,
- seastore_off_t offset,
- extent_len_t len) = 0;
-
- virtual int64_t release(
- segment_id_t segment,
- seastore_off_t offset,
- extent_len_t len) = 0;
-
- virtual int64_t get_usage(
- segment_id_t segment) const = 0;
-
- virtual bool equals(const SpaceTrackerI &other) const = 0;
-
- virtual std::unique_ptr<SpaceTrackerI> make_empty() const = 0;
-
- virtual void dump_usage(segment_id_t) const = 0;
-
- virtual double calc_utilization(segment_id_t segment) const = 0;
-
- virtual void reset() = 0;
-
- virtual ~SpaceTrackerI() = default;
-};
-using SpaceTrackerIRef = std::unique_ptr<SpaceTrackerI>;
-
-class SpaceTrackerSimple : public SpaceTrackerI {
- struct segment_bytes_t {
- int64_t live_bytes = 0;
- seastore_off_t total_bytes = 0;
- };
- // Tracks live space for each segment
- segment_map_t<segment_bytes_t> live_bytes_by_segment;
-
- int64_t update_usage(segment_id_t segment, int64_t delta) {
- live_bytes_by_segment[segment].live_bytes += delta;
- assert(live_bytes_by_segment[segment].live_bytes >= 0);
- return live_bytes_by_segment[segment].live_bytes;
- }
-public:
- SpaceTrackerSimple(const SpaceTrackerSimple &) = default;
- SpaceTrackerSimple(const std::vector<SegmentManager*> &sms) {
- for (auto sm : sms) {
- live_bytes_by_segment.add_device(
- sm->get_device_id(),
- sm->get_num_segments(),
- {0, sm->get_segment_size()});
- }
- }
-
- int64_t allocate(
- segment_id_t segment,
- seastore_off_t offset,
- extent_len_t len) final {
- return update_usage(segment, len);
- }
-
- int64_t release(
- segment_id_t segment,
- seastore_off_t offset,
- extent_len_t len) final {
- return update_usage(segment, -(int64_t)len);
- }
-
- int64_t get_usage(segment_id_t segment) const final {
- return live_bytes_by_segment[segment].live_bytes;
- }
-
- double calc_utilization(segment_id_t segment) const final {
- auto& seg_bytes = live_bytes_by_segment[segment];
- return (double)seg_bytes.live_bytes / (double)seg_bytes.total_bytes;
- }
-
- void dump_usage(segment_id_t) const final;
-
- void reset() final {
- for (auto &i : live_bytes_by_segment) {
- i.second = {0, 0};
- }
- }
-
- SpaceTrackerIRef make_empty() const final {
- auto ret = SpaceTrackerIRef(new SpaceTrackerSimple(*this));
- ret->reset();
- return ret;
- }
-
- bool equals(const SpaceTrackerI &other) const;
-};
-
-class SpaceTrackerDetailed : public SpaceTrackerI {
- class SegmentMap {
- int64_t used = 0;
- seastore_off_t total_bytes = 0;
- std::vector<bool> bitmap;
-
- public:
- SegmentMap(
- size_t blocks,
- seastore_off_t total_bytes)
- : total_bytes(total_bytes),
- bitmap(blocks, false) {}
-
- int64_t update_usage(int64_t delta) {
- used += delta;
- return used;
- }
-
- int64_t allocate(
- device_segment_id_t segment,
- seastore_off_t offset,
- extent_len_t len,
- const extent_len_t block_size);
-
- int64_t release(
- device_segment_id_t segment,
- seastore_off_t offset,
- extent_len_t len,
- const extent_len_t block_size);
-
- int64_t get_usage() const {
- return used;
- }
-
- void dump_usage(extent_len_t block_size) const;
-
- double calc_utilization() const {
- return (double)used / (double)total_bytes;
- }
-
- void reset() {
- used = 0;
- for (auto &&i: bitmap) {
- i = false;
- }
- }
- };
-
- // Tracks live space for each segment
- segment_map_t<SegmentMap> segment_usage;
- std::vector<size_t> block_size_by_segment_manager;
-
-public:
- SpaceTrackerDetailed(const SpaceTrackerDetailed &) = default;
- SpaceTrackerDetailed(const std::vector<SegmentManager*> &sms)
- {
- block_size_by_segment_manager.resize(DEVICE_ID_MAX, 0);
- for (auto sm : sms) {
- segment_usage.add_device(
- sm->get_device_id(),
- sm->get_num_segments(),
- SegmentMap(
- sm->get_segment_size() / sm->get_block_size(),
- sm->get_segment_size()));
- block_size_by_segment_manager[sm->get_device_id()] = sm->get_block_size();
- }
- }
-
- int64_t allocate(
- segment_id_t segment,
- seastore_off_t offset,
- extent_len_t len) final {
- return segment_usage[segment].allocate(
- segment.device_segment_id(),
- offset,
- len,
- block_size_by_segment_manager[segment.device_id()]);
- }
-
- int64_t release(
- segment_id_t segment,
- seastore_off_t offset,
- extent_len_t len) final {
- return segment_usage[segment].release(
- segment.device_segment_id(),
- offset,
- len,
- block_size_by_segment_manager[segment.device_id()]);
- }
-
- int64_t get_usage(segment_id_t segment) const final {
- return segment_usage[segment].get_usage();
- }
-
- double calc_utilization(segment_id_t segment) const final {
- return segment_usage[segment].calc_utilization();
- }
-
- void dump_usage(segment_id_t seg) const final;
-
- void reset() final {
- for (auto &i: segment_usage) {
- i.second.reset();
- }
- }
-
- SpaceTrackerIRef make_empty() const final {
- auto ret = SpaceTrackerIRef(new SpaceTrackerDetailed(*this));
- ret->reset();
- return ret;
- }
-
- bool equals(const SpaceTrackerI &other) const;
-};
-
-
-class SegmentCleaner : public SegmentProvider {
-public:
- using time_point = seastar::lowres_system_clock::time_point;
- using duration = seastar::lowres_system_clock::duration;
-
- /// Config
- struct config_t {
- /// Number of minimum journal segments to stop trimming.
- size_t target_journal_segments = 0;
- /// Number of maximum journal segments to block user transactions.
- size_t max_journal_segments = 0;
-
- /// Number of journal segments the transactions in which can
- /// have their corresponding backrefs unmerged
- size_t target_backref_inflight_segments = 0;
-
- /// Ratio of maximum available space to disable reclaiming.
- double available_ratio_gc_max = 0;
- /// Ratio of minimum available space to force reclaiming.
- double available_ratio_hard_limit = 0;
-
- /// Ratio of minimum reclaimable space to stop reclaiming.
- double reclaim_ratio_gc_threshold = 0;
-
- /// Number of bytes to reclaim per cycle
- size_t reclaim_bytes_per_cycle = 0;
-
- /// Number of bytes to rewrite dirty per cycle
- size_t rewrite_dirty_bytes_per_cycle = 0;
-
- /// Number of bytes to rewrite backref per cycle
- size_t rewrite_backref_bytes_per_cycle = 0;
-
- void validate() const {
- ceph_assert(max_journal_segments > target_journal_segments);
- ceph_assert(available_ratio_gc_max > available_ratio_hard_limit);
- ceph_assert(reclaim_bytes_per_cycle > 0);
- ceph_assert(rewrite_dirty_bytes_per_cycle > 0);
- ceph_assert(rewrite_backref_bytes_per_cycle > 0);
- }
-
- static config_t get_default() {
- return config_t{
- 12, // target_journal_segments
- 16, // max_journal_segments
- 2, // target_backref_inflight_segments
- .1, // available_ratio_gc_max
- .05, // available_ratio_hard_limit
- .1, // reclaim_ratio_gc_threshold
- 1<<20,// reclaim_bytes_per_cycle
- 1<<17,// rewrite_dirty_bytes_per_cycle
- 1<<24 // rewrite_backref_bytes_per_cycle
- };
- }
-
- static config_t get_test() {
- return config_t{
- 2, // target_journal_segments
- 4, // max_journal_segments
- 2, // target_backref_inflight_segments
- .99, // available_ratio_gc_max
- .2, // available_ratio_hard_limit
- .6, // reclaim_ratio_gc_threshold
- 1<<20,// reclaim_bytes_per_cycle
- 1<<17,// rewrite_dirty_bytes_per_cycle
- 1<<24 // rewrite_backref_bytes_per_cycle
- };
- }
- };
-
- /// Callback interface for querying and operating on segments
- class ExtentCallbackInterface {
- public:
- virtual ~ExtentCallbackInterface() = default;
-
- virtual TransactionRef create_transaction(
- Transaction::src_t, const char*) = 0;
-
- /// Creates empty transaction with interruptible context
- template <typename Func>
- auto with_transaction_intr(
- Transaction::src_t src,
- const char* name,
- Func &&f) {
- return seastar::do_with(
- create_transaction(src, name),
- [f=std::forward<Func>(f)](auto &ref_t) mutable {
- return with_trans_intr(
- *ref_t,
- [f=std::forward<Func>(f)](auto& t) mutable {
- return f(t);
- }
- );
- }
- );
- }
-
- /// See Cache::get_next_dirty_extents
- using get_next_dirty_extents_iertr = trans_iertr<
- crimson::errorator<
- crimson::ct_error::input_output_error>
- >;
- using get_next_dirty_extents_ret = get_next_dirty_extents_iertr::future<
- std::vector<CachedExtentRef>>;
- virtual get_next_dirty_extents_ret get_next_dirty_extents(
- Transaction &t, ///< [in] current transaction
- journal_seq_t bound,///< [in] return extents with dirty_from < bound
- size_t max_bytes ///< [in] return up to max_bytes of extents
- ) = 0;
-
- using extent_mapping_ertr = crimson::errorator<
- crimson::ct_error::input_output_error,
- crimson::ct_error::eagain>;
- using extent_mapping_iertr = trans_iertr<
- crimson::errorator<
- crimson::ct_error::input_output_error>
- >;
-
- /**
- * rewrite_extent
- *
- * Updates t with operations moving the passed extents to a new
- * segment. extent may be invalid, implementation must correctly
- * handle finding the current instance if it is still alive and
- * otherwise ignore it.
- */
- using rewrite_extent_iertr = extent_mapping_iertr;
- using rewrite_extent_ret = rewrite_extent_iertr::future<>;
- virtual rewrite_extent_ret rewrite_extent(
- Transaction &t,
- CachedExtentRef extent) = 0;
-
- /**
- * get_extent_if_live
- *
- * Returns extent at specified location if still referenced by
- * lba_manager and not removed by t.
- *
- * See TransactionManager::get_extent_if_live and
- * LBAManager::get_physical_extent_if_live.
- */
- using get_extent_if_live_iertr = extent_mapping_iertr;
- using get_extent_if_live_ret = get_extent_if_live_iertr::future<
- CachedExtentRef>;
- virtual get_extent_if_live_ret get_extent_if_live(
- Transaction &t,
- extent_types_t type,
- paddr_t addr,
- laddr_t laddr,
- seastore_off_t len) = 0;
-
- /**
- * submit_transaction_direct
- *
- * Submits transaction without any space throttling.
- */
- using submit_transaction_direct_iertr = trans_iertr<
- crimson::errorator<
- crimson::ct_error::input_output_error>
- >;
- using submit_transaction_direct_ret =
- submit_transaction_direct_iertr::future<>;
- virtual submit_transaction_direct_ret submit_transaction_direct(
- Transaction &t,
- std::optional<journal_seq_t> seq_to_trim = std::nullopt) = 0;
- };
-
-private:
- const bool detailed;
- const config_t config;
-
- SegmentManagerGroupRef sm_group;
- BackrefManager &backref_manager;
-
- SpaceTrackerIRef space_tracker;
- segments_info_t segments;
- bool init_complete = false;
-
- struct {
- /**
- * used_bytes
- *
- * Bytes occupied by live extents
- */
- uint64_t used_bytes = 0;
-
- /**
- * projected_used_bytes
- *
- * Sum of projected bytes used by each transaction between throttle
- * acquisition and commit completion. See reserve_projected_usage()
- */
- uint64_t projected_used_bytes = 0;
- uint64_t projected_count = 0;
- uint64_t projected_used_bytes_sum = 0;
-
- uint64_t closed_journal_used_bytes = 0;
- uint64_t closed_journal_total_bytes = 0;
- uint64_t closed_ool_used_bytes = 0;
- uint64_t closed_ool_total_bytes = 0;
-
- uint64_t io_blocking_num = 0;
- uint64_t io_count = 0;
- uint64_t io_blocked_count = 0;
- uint64_t io_blocked_count_trim = 0;
- uint64_t io_blocked_count_reclaim = 0;
- uint64_t io_blocked_sum = 0;
-
- uint64_t reclaiming_bytes = 0;
- uint64_t reclaimed_bytes = 0;
- uint64_t reclaimed_segment_bytes = 0;
-
- seastar::metrics::histogram segment_util;
- } stats;
- seastar::metrics::metric_group metrics;
- void register_metrics();
-
- /// target journal_tail for next fresh segment
- journal_seq_t journal_tail_target;
-
- /// target replay_from for dirty extents
- journal_seq_t dirty_extents_replay_from;
-
- /// target replay_from for alloc infos
- journal_seq_t alloc_info_replay_from;
-
- /// most recently committed journal_tail
- journal_seq_t journal_tail_committed;
-
- ExtentCallbackInterface *ecb = nullptr;
-
- /// populated if there is an IO blocked on hard limits
- std::optional<seastar::promise<>> blocked_io_wake;
-
- SegmentSeqAllocatorRef ool_segment_seq_allocator;
-
- /**
- * disable_trim
- *
- * added to enable unit testing of CircularBoundedJournal before
- * proper support is added to SegmentCleaner.
- * Should be removed once proper support is added. TODO
- */
- bool disable_trim = false;
-public:
- SegmentCleaner(
- config_t config,
- SegmentManagerGroupRef&& sm_group,
- BackrefManager &backref_manager,
- bool detailed = false);
-
- SegmentSeqAllocator& get_ool_segment_seq_allocator() {
- return *ool_segment_seq_allocator;
- }
-
- using mount_ertr = crimson::errorator<
- crimson::ct_error::input_output_error>;
- using mount_ret = mount_ertr::future<>;
- mount_ret mount();
-
- /*
- * SegmentProvider interfaces
- */
- journal_seq_t get_journal_tail_target() const final {
- return journal_tail_target;
- }
-
- const segment_info_t& get_seg_info(segment_id_t id) const final {
- return segments[id];
- }
-
- segment_id_t allocate_segment(
- segment_seq_t seq, segment_type_t type) final;
-
- void close_segment(segment_id_t segment) final;
-
- void update_journal_tail_committed(journal_seq_t committed) final;
-
- void update_segment_avail_bytes(segment_type_t type, paddr_t offset) final {
- segments.update_written_to(type, offset);
- gc_process.maybe_wake_on_space_used();
- }
-
- SegmentManagerGroup* get_segment_manager_group() final {
- return sm_group.get();
- }
-
- journal_seq_t get_dirty_extents_replay_from() const final {
- return dirty_extents_replay_from;
- }
-
- journal_seq_t get_alloc_info_replay_from() const final {
- return alloc_info_replay_from;
- }
-
- void update_journal_tail_target(
- journal_seq_t dirty_replay_from,
- journal_seq_t alloc_replay_from);
-
- void update_alloc_info_replay_from(
- journal_seq_t alloc_replay_from);
-
- void init_mkfs() {
- auto journal_head = segments.get_journal_head();
- ceph_assert(disable_trim || journal_head != JOURNAL_SEQ_NULL);
- journal_tail_target = journal_head;
- journal_tail_committed = journal_head;
- }
-
- using release_ertr = SegmentManagerGroup::release_ertr;
- release_ertr::future<> maybe_release_segment(Transaction &t);
-
- void adjust_segment_util(double old_usage, double new_usage) {
- auto old_index = get_bucket_index(old_usage);
- auto new_index = get_bucket_index(new_usage);
- assert(stats.segment_util.buckets[old_index].count > 0);
- stats.segment_util.buckets[old_index].count--;
- stats.segment_util.buckets[new_index].count++;
- }
-
- void mark_space_used(
- paddr_t addr,
- extent_len_t len,
- time_point last_modified = time_point(),
- time_point last_rewritten = time_point(),
- bool init_scan = false);
-
- void mark_space_free(
- paddr_t addr,
- extent_len_t len);
-
- SpaceTrackerIRef get_empty_space_tracker() const {
- return space_tracker->make_empty();
- }
-
- void complete_init();
-
- store_statfs_t stat() const {
- store_statfs_t st;
- st.total = segments.get_total_bytes();
- st.available = segments.get_total_bytes() - stats.used_bytes;
- st.allocated = stats.used_bytes;
- st.data_stored = stats.used_bytes;
-
- // TODO add per extent type counters for omap_allocated and
- // internal metadata
- return st;
- }
-
- seastar::future<> stop() {
- return gc_process.stop();
- }
-
- seastar::future<> run_until_halt() {
- return gc_process.run_until_halt();
- }
-
- void set_extent_callback(ExtentCallbackInterface *cb) {
- ecb = cb;
- }
-
- bool debug_check_space(const SpaceTrackerI &tracker) {
- return space_tracker->equals(tracker);
- }
-
- void set_disable_trim(bool val) {
- disable_trim = val;
- }
-
- using work_ertr = ExtentCallbackInterface::extent_mapping_ertr;
- using work_iertr = ExtentCallbackInterface::extent_mapping_iertr;
-
-private:
- /*
- * 10 buckets for the number of closed segments by usage
- * 2 extra buckets for the number of open and empty segments
- */
- static constexpr double UTIL_STATE_OPEN = 1.05;
- static constexpr double UTIL_STATE_EMPTY = 1.15;
- static constexpr std::size_t UTIL_BUCKETS = 12;
- static std::size_t get_bucket_index(double util) {
- auto index = std::floor(util * 10);
- assert(index < UTIL_BUCKETS);
- return index;
- }
- double calc_utilization(segment_id_t id) const {
- auto& info = segments[id];
- if (info.is_open()) {
- return UTIL_STATE_OPEN;
- } else if (info.is_empty()) {
- return UTIL_STATE_EMPTY;
- } else {
- auto ret = space_tracker->calc_utilization(id);
- assert(ret >= 0 && ret < 1);
- return ret;
- }
- }
-
- // journal status helpers
-
- double calc_gc_benefit_cost(segment_id_t id) const {
- double util = calc_utilization(id);
- ceph_assert(util >= 0 && util < 1);
- auto cur_time = seastar::lowres_system_clock::now();
- auto segment = segments[id];
- assert(cur_time >= segment.last_modified);
- auto segment_age =
- cur_time - std::max(segment.last_modified, segment.last_rewritten);
- uint64_t age = segment_age.count();
- return (1 - util) * age / (1 + util);
- }
-
- segment_id_t get_next_reclaim_segment() const;
-
- /**
- * rewrite_dirty
- *
- * Writes out dirty blocks dirtied earlier than limit.
- */
- using rewrite_dirty_iertr = work_iertr;
- using rewrite_dirty_ret = rewrite_dirty_iertr::future<>;
- rewrite_dirty_ret rewrite_dirty(
- Transaction &t,
- journal_seq_t limit);
-
- using trim_backrefs_iertr = work_iertr;
- using trim_backrefs_ret = trim_backrefs_iertr::future<journal_seq_t>;
- trim_backrefs_ret trim_backrefs(
- Transaction &t,
- journal_seq_t limit);
-
- journal_seq_t get_dirty_tail() const {
- auto ret = segments.get_journal_head();
- ceph_assert(ret != JOURNAL_SEQ_NULL);
- if (ret.segment_seq >= config.target_journal_segments) {
- ret.segment_seq -= config.target_journal_segments;
- } else {
- ret.segment_seq = 0;
- ret.offset = P_ADDR_MIN;
- }
- return ret;
- }
-
- journal_seq_t get_dirty_tail_limit() const {
- auto ret = segments.get_journal_head();
- ceph_assert(ret != JOURNAL_SEQ_NULL);
- if (ret.segment_seq >= config.max_journal_segments) {
- ret.segment_seq -= config.max_journal_segments;
- } else {
- ret.segment_seq = 0;
- ret.offset = P_ADDR_MIN;
- }
- return ret;
- }
-
- journal_seq_t get_backref_tail() const {
- auto ret = segments.get_journal_head();
- ceph_assert(ret != JOURNAL_SEQ_NULL);
- if (ret.segment_seq >= config.target_backref_inflight_segments) {
- ret.segment_seq -= config.target_backref_inflight_segments;
- } else {
- ret.segment_seq = 0;
- ret.offset = P_ADDR_MIN;
- }
- return ret;
- }
-
- struct reclaim_state_t {
- std::size_t segment_size;
- paddr_t start_pos;
- paddr_t end_pos;
-
- static reclaim_state_t create(
- segment_id_t segment_id,
- std::size_t segment_size) {
- return {segment_size,
- P_ADDR_NULL,
- paddr_t::make_seg_paddr(segment_id, 0)};
- }
-
- segment_id_t get_segment_id() const {
- return end_pos.as_seg_paddr().get_segment_id();
- }
-
- bool is_complete() const {
- return (std::size_t)end_pos.as_seg_paddr().get_segment_off() >= segment_size;
- }
-
- void advance(std::size_t bytes) {
- assert(!is_complete());
- start_pos = end_pos;
- auto &end_seg_paddr = end_pos.as_seg_paddr();
- auto next_off = end_seg_paddr.get_segment_off() + bytes;
- if (next_off > segment_size) {
- end_seg_paddr.set_segment_off(segment_size);
- } else {
- end_seg_paddr.set_segment_off(next_off);
- }
- }
- };
- std::optional<reclaim_state_t> reclaim_state;
-
- /**
- * GCProcess
- *
- * Background gc process.
- */
- using gc_cycle_ret = seastar::future<>;
- class GCProcess {
- std::optional<gc_cycle_ret> process_join;
-
- SegmentCleaner &cleaner;
-
- std::optional<seastar::promise<>> blocking;
-
- bool is_stopping() const {
- return !process_join;
- }
-
- gc_cycle_ret run();
-
- void wake() {
- if (blocking) {
- blocking->set_value();
- blocking = std::nullopt;
- }
- }
-
- seastar::future<> maybe_wait_should_run() {
- return seastar::do_until(
- [this] {
- cleaner.log_gc_state("GCProcess::maybe_wait_should_run");
- return is_stopping() || cleaner.gc_should_run();
- },
- [this] {
- ceph_assert(!blocking);
- blocking = seastar::promise<>();
- return blocking->get_future();
- });
- }
- public:
- GCProcess(SegmentCleaner &cleaner) : cleaner(cleaner) {}
-
- void start() {
- ceph_assert(is_stopping());
- process_join = seastar::now(); // allow run()
- process_join = run();
- assert(!is_stopping());
- }
-
- gc_cycle_ret stop() {
- if (is_stopping()) {
- return seastar::now();
- }
- auto ret = std::move(*process_join);
- process_join.reset();
- assert(is_stopping());
- wake();
- return ret;
- }
-
- gc_cycle_ret run_until_halt() {
- ceph_assert(is_stopping());
- return seastar::do_until(
- [this] {
- cleaner.log_gc_state("GCProcess::run_until_halt");
- return !cleaner.gc_should_run();
- },
- [this] {
- return cleaner.do_gc_cycle();
- });
- }
-
- void maybe_wake_on_space_used() {
- if (is_stopping()) {
- return;
- }
- if (cleaner.gc_should_run()) {
- wake();
- }
- }
- } gc_process;
-
- using gc_ertr = work_ertr::extend_ertr<
- SegmentManagerGroup::scan_extents_ertr
- >;
-
- gc_cycle_ret do_gc_cycle();
-
- using gc_trim_journal_ertr = gc_ertr;
- using gc_trim_journal_ret = gc_trim_journal_ertr::future<>;
- gc_trim_journal_ret gc_trim_journal();
-
- using gc_trim_backref_ertr = gc_ertr;
- using gc_trim_backref_ret = gc_trim_backref_ertr::future<journal_seq_t>;
- gc_trim_backref_ret gc_trim_backref(journal_seq_t limit);
-
- using gc_reclaim_space_ertr = gc_ertr;
- using gc_reclaim_space_ret = gc_reclaim_space_ertr::future<>;
- gc_reclaim_space_ret gc_reclaim_space();
-
-
- using retrieve_live_extents_iertr = work_iertr;
- using retrieve_live_extents_ret =
- retrieve_live_extents_iertr::future<journal_seq_t>;
- retrieve_live_extents_ret _retrieve_live_extents(
- Transaction &t,
- std::set<
- backref_buf_entry_t,
- backref_buf_entry_t::cmp_t> &&backrefs,
- std::vector<CachedExtentRef> &extents);
-
- using retrieve_backref_mappings_ertr = work_ertr;
- using retrieve_backref_mappings_ret =
- retrieve_backref_mappings_ertr::future<backref_pin_list_t>;
- retrieve_backref_mappings_ret retrieve_backref_mappings(
- paddr_t start_paddr,
- paddr_t end_paddr);
-
- /*
- * Segments calculations
- */
- std::size_t get_segments_in_journal() const {
- if (!init_complete) {
- return 0;
- }
- if (journal_tail_committed == JOURNAL_SEQ_NULL) {
- return segments.get_num_type_journal();
- }
- auto journal_head = segments.get_journal_head();
- assert(journal_head != JOURNAL_SEQ_NULL);
- assert(journal_head.segment_seq >= journal_tail_committed.segment_seq);
- return journal_head.segment_seq + 1 - journal_tail_committed.segment_seq;
- }
- std::size_t get_segments_in_journal_closed() const {
- auto in_journal = get_segments_in_journal();
- auto in_journal_open = segments.get_num_in_journal_open();
- if (in_journal >= in_journal_open) {
- return in_journal - in_journal_open;
- } else {
- return 0;
- }
- }
- std::size_t get_segments_reclaimable() const {
- assert(segments.get_num_closed() >= get_segments_in_journal_closed());
- return segments.get_num_closed() - get_segments_in_journal_closed();
- }
-
- /*
- * Space calculations
- */
- /// the unavailable space that is not reclaimable yet
- std::size_t get_unavailable_unreclaimable_bytes() const {
- auto ret = (segments.get_num_open() + get_segments_in_journal_closed()) *
- segments.get_segment_size();
- assert(ret >= segments.get_available_bytes_in_open());
- return ret - segments.get_available_bytes_in_open();
- }
- /// the unavailable space that can be reclaimed
- std::size_t get_unavailable_reclaimable_bytes() const {
- auto ret = get_segments_reclaimable() * segments.get_segment_size();
- ceph_assert(ret + get_unavailable_unreclaimable_bytes() == segments.get_unavailable_bytes());
- return ret;
- }
- /// the unavailable space that is not alive
- std::size_t get_unavailable_unused_bytes() const {
- assert(segments.get_unavailable_bytes() > stats.used_bytes);
- return segments.get_unavailable_bytes() - stats.used_bytes;
- }
- double get_reclaim_ratio() const {
- if (segments.get_unavailable_bytes() == 0) return 0;
- return (double)get_unavailable_unused_bytes() / (double)segments.get_unavailable_bytes();
- }
-
- /*
- * Space calculations (projected)
- */
- std::size_t get_projected_available_bytes() const {
- return (segments.get_available_bytes() > stats.projected_used_bytes) ?
- segments.get_available_bytes() - stats.projected_used_bytes:
- 0;
- }
- double get_projected_available_ratio() const {
- return (double)get_projected_available_bytes() /
- (double)segments.get_total_bytes();
- }
-
- /*
- * Journal sizes
- */
- std::size_t get_dirty_journal_size() const {
- auto journal_head = segments.get_journal_head();
- if (journal_head == JOURNAL_SEQ_NULL ||
- dirty_extents_replay_from == JOURNAL_SEQ_NULL) {
- return 0;
- }
- return (journal_head.segment_seq - dirty_extents_replay_from.segment_seq) *
- segments.get_segment_size() +
- journal_head.offset.as_seg_paddr().get_segment_off() -
- segments.get_segment_size() -
- dirty_extents_replay_from.offset.as_seg_paddr().get_segment_off();
- }
-
- std::size_t get_alloc_journal_size() const {
- auto journal_head = segments.get_journal_head();
- if (journal_head == JOURNAL_SEQ_NULL ||
- alloc_info_replay_from == JOURNAL_SEQ_NULL) {
- return 0;
- }
- return (journal_head.segment_seq - alloc_info_replay_from.segment_seq) *
- segments.get_segment_size() +
- journal_head.offset.as_seg_paddr().get_segment_off() -
- segments.get_segment_size() -
- alloc_info_replay_from.offset.as_seg_paddr().get_segment_off();
- }
-
- /**
- * should_block_on_gc
- *
- * Encapsulates whether block pending gc.
- */
- bool should_block_on_trim() const {
- if (disable_trim) return false;
- return get_dirty_tail_limit() > journal_tail_target;
- }
-
- bool should_block_on_reclaim() const {
- if (disable_trim) return false;
- if (get_segments_reclaimable() == 0) {
- return false;
- }
- auto aratio = get_projected_available_ratio();
- return aratio < config.available_ratio_hard_limit;
- }
-
- bool should_block_on_gc() const {
- return should_block_on_trim() || should_block_on_reclaim();
- }
-
- void log_gc_state(const char *caller) const;
-
-public:
- seastar::future<> reserve_projected_usage(std::size_t projected_usage);
-
- void release_projected_usage(size_t projected_usage);
-
-private:
- void maybe_wake_gc_blocked_io() {
- if (!init_complete) {
- return;
- }
- if (!should_block_on_gc() && blocked_io_wake) {
- blocked_io_wake->set_value();
- blocked_io_wake = std::nullopt;
- }
- }
-
- using scan_extents_ret_bare =
- std::vector<std::pair<segment_id_t, segment_header_t>>;
- using scan_extents_ertr = SegmentManagerGroup::scan_extents_ertr;
- using scan_extents_ret = scan_extents_ertr::future<>;
- scan_extents_ret scan_nonfull_segment(
- const segment_header_t& header,
- scan_extents_ret_bare& segment_set,
- segment_id_t segment_id);
-
- /**
- * gc_should_reclaim_space
- *
- * Encapsulates logic for whether gc should be reclaiming segment space.
- */
- bool gc_should_reclaim_space() const {
- if (disable_trim) return false;
- if (get_segments_reclaimable() == 0) {
- return false;
- }
- auto aratio = segments.get_available_ratio();
- auto rratio = get_reclaim_ratio();
- return (
- (aratio < config.available_ratio_hard_limit) ||
- ((aratio < config.available_ratio_gc_max) &&
- (rratio > config.reclaim_ratio_gc_threshold))
- );
- }
-
- /**
- * gc_should_trim_journal
- *
- * Encapsulates logic for whether gc should be reclaiming segment space.
- */
- bool gc_should_trim_journal() const {
- return get_dirty_tail() > journal_tail_target;
- }
-
- bool gc_should_trim_backref() const {
- return get_backref_tail() > alloc_info_replay_from;
- }
- /**
- * gc_should_run
- *
- * True if gc should be running.
- */
- bool gc_should_run() const {
- if (disable_trim) return false;
- ceph_assert(init_complete);
- return gc_should_reclaim_space()
- || gc_should_trim_journal()
- || gc_should_trim_backref();
- }
-
- void init_mark_segment_closed(
- segment_id_t segment,
- segment_seq_t seq,
- segment_type_t s_type) {
- ceph_assert(!init_complete);
- auto old_usage = calc_utilization(segment);
- segments.init_closed(segment, seq, s_type);
- auto new_usage = calc_utilization(segment);
- adjust_segment_util(old_usage, new_usage);
- if (s_type == segment_type_t::OOL) {
- ool_segment_seq_allocator->set_next_segment_seq(seq);
- }
- }
-};
-using SegmentCleanerRef = std::unique_ptr<SegmentCleaner>;
-
-}
#include "crimson/os/seastore/seastore_types.h"
namespace crimson::os::seastore {
-class SegmentCleaner;
+class AsyncCleaner;
}
namespace crimson::os::seastore::journal {
segment_seq_t next_segment_seq = 0;
segment_type_t type = segment_type_t::NULL_SEG;
friend class journal::SegmentedJournal;
- friend class SegmentCleaner;
+ friend class AsyncCleaner;
};
using SegmentSeqAllocatorRef =
namespace crimson::os::seastore {
TransactionManager::TransactionManager(
- SegmentCleanerRef _segment_cleaner,
+ AsyncCleanerRef _async_cleaner,
JournalRef _journal,
CacheRef _cache,
LBAManagerRef _lba_manager,
ExtentPlacementManagerRef &&epm,
BackrefManagerRef&& backref_manager,
tm_make_config_t config)
- : segment_cleaner(std::move(_segment_cleaner)),
+ : async_cleaner(std::move(_async_cleaner)),
cache(std::move(_cache)),
lba_manager(std::move(_lba_manager)),
journal(std::move(_journal)),
epm(std::move(epm)),
backref_manager(std::move(backref_manager)),
- sm_group(*segment_cleaner->get_segment_manager_group()),
+ sm_group(*async_cleaner->get_segment_manager_group()),
config(config)
{
- segment_cleaner->set_extent_callback(this);
+ async_cleaner->set_extent_callback(this);
journal->set_write_pipeline(&write_pipeline);
}
{
LOG_PREFIX(TransactionManager::mkfs);
INFO("enter");
- return segment_cleaner->mount(
+ return async_cleaner->mount(
).safe_then([this] {
return journal->open_for_write();
}).safe_then([this](auto) {
- segment_cleaner->init_mkfs();
+ async_cleaner->init_mkfs();
return epm->open();
}).safe_then([this, FNAME]() {
return with_transaction_intr(
LOG_PREFIX(TransactionManager::mount);
INFO("enter");
cache->init();
- return segment_cleaner->mount(
+ return async_cleaner->mount(
).safe_then([this] {
return journal->replay(
[this](
auto last_modified)
{
auto start_seq = offsets.write_result.start_seq;
- segment_cleaner->update_journal_tail_target(
+ async_cleaner->update_journal_tail_target(
cache->get_oldest_dirty_from().value_or(start_seq),
cache->get_oldest_backref_dirty_from().value_or(start_seq));
return cache->replay_delta(
else
return lba_manager->init_cached_extent(t, e);
}).si_then([this, FNAME, &t] {
- assert(segment_cleaner->debug_check_space(
- *segment_cleaner->get_empty_space_tracker()));
+ assert(async_cleaner->debug_check_space(
+ *async_cleaner->get_empty_space_tracker()));
return backref_manager->scan_mapped_space(
t,
[this, FNAME, &t](
len);
if (addr.is_real() &&
!backref_manager->backref_should_be_removed(addr)) {
- segment_cleaner->mark_space_used(
+ async_cleaner->mark_space_used(
addr,
len ,
seastar::lowres_system_clock::time_point(),
auto &backrefs = backref_manager->get_cached_backrefs();
DEBUG("marking {} backrefs used", backrefs.size());
for (auto &backref : backrefs) {
- segment_cleaner->mark_space_used(
+ async_cleaner->mark_space_used(
backref.paddr,
backref.len,
seastar::lowres_system_clock::time_point(),
}).safe_then([this] {
return epm->open();
}).safe_then([FNAME, this] {
- segment_cleaner->complete_init();
+ async_cleaner->complete_init();
INFO("completed");
}).handle_error(
mount_ertr::pass_further{},
TransactionManager::close_ertr::future<> TransactionManager::close() {
LOG_PREFIX(TransactionManager::close);
INFO("enter");
- return segment_cleaner->stop(
+ return async_cleaner->stop(
).then([this] {
return cache->close();
}).safe_then([this] {
size_t projected_usage = t.get_allocation_size();
SUBTRACET(seastore_t, "waiting for projected_usage: {}", t, projected_usage);
return trans_intr::make_interruptible(
- segment_cleaner->reserve_projected_usage(projected_usage)
+ async_cleaner->reserve_projected_usage(projected_usage)
).then_interruptible([this, &t] {
return submit_transaction_direct(t);
}).finally([this, FNAME, projected_usage, &t] {
SUBTRACET(seastore_t, "releasing projected_usage: {}", t, projected_usage);
- segment_cleaner->release_projected_usage(projected_usage);
+ async_cleaner->release_projected_usage(projected_usage);
});
});
}
if (seq_to_trim && *seq_to_trim != JOURNAL_SEQ_NULL) {
cache->trim_backref_bufs(*seq_to_trim);
}
- auto record = cache->prepare_record(tref, segment_cleaner.get());
+ auto record = cache->prepare_record(tref, async_cleaner.get());
tref.get_handle().maybe_release_collection_lock();
tref,
submit_result.record_block_base,
start_seq,
- segment_cleaner.get());
+ async_cleaner.get());
std::vector<CachedExtentRef> lba_to_clear;
std::vector<CachedExtentRef> backref_to_clear;
lba_manager->complete_transaction(tref, lba_to_clear, lba_to_link);
backref_manager->complete_transaction(tref, backref_to_clear, backref_to_link);
- segment_cleaner->update_journal_tail_target(
+ async_cleaner->update_journal_tail_target(
cache->get_oldest_dirty_from().value_or(start_seq),
cache->get_oldest_backref_dirty_from().value_or(start_seq));
- return segment_cleaner->maybe_release_segment(tref);
+ return async_cleaner->maybe_release_segment(tref);
}).safe_then([FNAME, &tref] {
SUBTRACET(seastore_t, "completed", tref);
return tref.get_handle().complete();
/* This update_mapping is, strictly speaking, unnecessary for delayed_alloc
* extents since we're going to do it again once we either do the ool write
- * or allocate a relative inline addr. TODO: refactor SegmentCleaner to
+ * or allocate a relative inline addr. TODO: refactor AsyncCleaner to
* avoid this complication. */
return lba_manager->update_mapping(
t,
auto backref_manager = create_backref_manager(*sms, *cache);
bool cleaner_is_detailed;
- SegmentCleaner::config_t cleaner_config;
+ AsyncCleaner::config_t cleaner_config;
if (config.is_test) {
cleaner_is_detailed = true;
- cleaner_config = SegmentCleaner::config_t::get_test();
+ cleaner_config = AsyncCleaner::config_t::get_test();
} else {
cleaner_is_detailed = false;
- cleaner_config = SegmentCleaner::config_t::get_default();
+ cleaner_config = AsyncCleaner::config_t::get_default();
}
- auto segment_cleaner = std::make_unique<SegmentCleaner>(
+ auto async_cleaner = std::make_unique<AsyncCleaner>(
cleaner_config,
std::move(sms),
*backref_manager,
JournalRef journal;
if (config.j_type == journal_type_t::SEGMENT_JOURNAL) {
- journal = journal::make_segmented(*segment_cleaner);
+ journal = journal::make_segmented(*async_cleaner);
} else {
journal = journal::make_circularbounded(
nullptr, "");
- segment_cleaner->set_disable_trim(true);
+ async_cleaner->set_disable_trim(true);
ERROR("disabling journal trimming since support for CircularBoundedJournal\
hasn't been added yet");
}
epm->init_ool_writers(
- *segment_cleaner,
- segment_cleaner->get_ool_segment_seq_allocator());
+ *async_cleaner,
+ async_cleaner->get_ool_segment_seq_allocator());
return std::make_unique<TransactionManager>(
- std::move(segment_cleaner),
+ std::move(async_cleaner),
std::move(journal),
std::move(cache),
std::move(lba_manager),
#include "crimson/osd/exceptions.h"
#include "crimson/os/seastore/logging.h"
-#include "crimson/os/seastore/segment_cleaner.h"
+#include "crimson/os/seastore/async_cleaner.h"
#include "crimson/os/seastore/seastore_types.h"
#include "crimson/os/seastore/cache.h"
#include "crimson/os/seastore/lba_manager.h"
* Abstraction hiding reading and writing to persistence.
* Exposes transaction based interface with read isolation.
*/
-class TransactionManager : public SegmentCleaner::ExtentCallbackInterface {
+class TransactionManager : public AsyncCleaner::ExtentCallbackInterface {
public:
using base_ertr = Cache::base_ertr;
using base_iertr = Cache::base_iertr;
TransactionManager(
- SegmentCleanerRef segment_cleaner,
+ AsyncCleanerRef async_cleaner,
JournalRef journal,
CacheRef cache,
LBAManagerRef lba_manager,
using submit_transaction_iertr = base_iertr;
submit_transaction_iertr::future<> submit_transaction(Transaction &);
- /// SegmentCleaner::ExtentCallbackInterface
- using SegmentCleaner::ExtentCallbackInterface::submit_transaction_direct_ret;
+ /// AsyncCleaner::ExtentCallbackInterface
+ using AsyncCleaner::ExtentCallbackInterface::submit_transaction_direct_ret;
submit_transaction_direct_ret submit_transaction_direct(
Transaction &t,
std::optional<journal_seq_t> seq_to_trim = std::nullopt) final;
*/
seastar::future<> flush(OrderingHandle &handle);
- using SegmentCleaner::ExtentCallbackInterface::get_next_dirty_extents_ret;
+ using AsyncCleaner::ExtentCallbackInterface::get_next_dirty_extents_ret;
get_next_dirty_extents_ret get_next_dirty_extents(
Transaction &t,
journal_seq_t seq,
size_t max_bytes) final;
- using SegmentCleaner::ExtentCallbackInterface::rewrite_extent_ret;
+ using AsyncCleaner::ExtentCallbackInterface::rewrite_extent_ret;
rewrite_extent_ret rewrite_extent(
Transaction &t,
CachedExtentRef extent) final;
- using SegmentCleaner::ExtentCallbackInterface::get_extent_if_live_ret;
+ using AsyncCleaner::ExtentCallbackInterface::get_extent_if_live_ret;
get_extent_if_live_ret get_extent_if_live(
Transaction &t,
extent_types_t type,
}
store_statfs_t store_stat() const {
- return segment_cleaner->stat();
+ return async_cleaner->stat();
}
void add_device(Device* dev, bool is_primary) {
private:
friend class Transaction;
- SegmentCleanerRef segment_cleaner;
+ AsyncCleanerRef async_cleaner;
CacheRef cache;
LBAManagerRef lba_manager;
JournalRef journal;
LogicalCachedExtentRef extent);
public:
// Testing interfaces
- auto get_segment_cleaner() {
- return segment_cleaner.get();
+ auto get_async_cleaner() {
+ return async_cleaner.get();
}
auto get_lba_manager() {
auto t = create_mutate_transaction();
std::invoke(f, *t);
submit_transaction(std::move(t));
- segment_cleaner->run_until_halt().get0();
+ async_cleaner->run_until_halt().get0();
}
template <typename F>
auto t = create_mutate_transaction();
INTR(tree->bootstrap, *t).unsafe_get();
submit_transaction(std::move(t));
- segment_cleaner->run_until_halt().get0();
+ async_cleaner->run_until_halt().get0();
}
// test insert
auto t = create_mutate_transaction();
INTR(tree->insert, *t).unsafe_get();
submit_transaction(std::move(t));
- segment_cleaner->run_until_halt().get0();
+ async_cleaner->run_until_halt().get0();
}
{
auto t = create_read_transaction();
auto size = kvs.size() / 4 * 3;
INTR_R(tree->erase, *t, size).unsafe_get();
submit_transaction(std::move(t));
- segment_cleaner->run_until_halt().get0();
+ async_cleaner->run_until_halt().get0();
}
{
auto t = create_read_transaction();
auto size = kvs.size();
INTR_R(tree->erase, *t, size).unsafe_get();
submit_transaction(std::move(t));
- segment_cleaner->run_until_halt().get0();
+ async_cleaner->run_until_halt().get0();
}
{
auto t = create_read_transaction();
});
});
}).unsafe_get0();
- segment_cleaner->run_until_halt().get0();
+ async_cleaner->run_until_halt().get0();
// insert
logger().warn("start inserting {} kvs ...", kvs.size());
});
});
}).unsafe_get0();
- segment_cleaner->run_until_halt().get0();
+ async_cleaner->run_until_halt().get0();
++iter;
}
}
});
});
}).unsafe_get0();
- segment_cleaner->run_until_halt().get0();
+ async_cleaner->run_until_halt().get0();
++iter;
}
kvs.erase_from_random(kvs.random_begin(), kvs.random_end());
#include <random>
#include "crimson/common/log.h"
-#include "crimson/os/seastore/segment_cleaner.h"
+#include "crimson/os/seastore/async_cleaner.h"
#include "crimson/os/seastore/journal.h"
#include "crimson/os/seastore/segment_manager/ephemeral.h"
#include "test/crimson/gtest_seastar.h"
#include "test/crimson/seastore/transaction_manager_test_state.h"
-#include "crimson/os/seastore/segment_cleaner.h"
+#include "crimson/os/seastore/async_cleaner.h"
#include "crimson/os/seastore/cache.h"
#include "crimson/os/seastore/transaction_manager.h"
#include "crimson/os/seastore/segment_manager/ephemeral.h"
bool check_usage() {
auto t = create_weak_test_transaction();
- SpaceTrackerIRef tracker(segment_cleaner->get_empty_space_tracker());
+ SpaceTrackerIRef tracker(async_cleaner->get_empty_space_tracker());
with_trans_intr(
*t.t,
[this, &tracker](auto &t) {
return seastar::now();
});
}).unsafe_get0();
- return segment_cleaner->debug_check_space(*tracker);
+ return async_cleaner->debug_check_space(*tracker);
}
void replay() {
"try_submit_transaction hit invalid error"
}
).then([this](auto ret) {
- return segment_cleaner->run_until_halt().then([ret] { return ret; });
+ return async_cleaner->run_until_halt().then([ret] { return ret; });
}).get0();
if (success) {
});
});
}).safe_then([this]() {
- return segment_cleaner->run_until_halt();
+ return async_cleaner->run_until_halt();
}).handle_error(
crimson::ct_error::assert_all{
"Invalid error in SeaStore::list_collections"
#include <random>
#include <boost/iterator/counting_iterator.hpp>
-#include "crimson/os/seastore/segment_cleaner.h"
+#include "crimson/os/seastore/async_cleaner.h"
#include "crimson/os/seastore/cache.h"
#include "crimson/os/seastore/transaction_manager.h"
#include "crimson/os/seastore/segment_manager/ephemeral.h"
LBAManager *lba_manager;
BackrefManager *backref_manager;
Cache* cache;
- SegmentCleaner *segment_cleaner;
+ AsyncCleaner *async_cleaner;
TMTestState() : EphemeralTestState(1) {}
tm->add_device(sec_sm.get(), false);
}
}
- segment_cleaner = tm->get_segment_cleaner();
+ async_cleaner = tm->get_async_cleaner();
lba_manager = tm->get_lba_manager();
backref_manager = tm->get_backref_manager();
cache = tm->get_cache();
}
virtual void _destroy() override {
- segment_cleaner = nullptr;
+ async_cleaner = nullptr;
lba_manager = nullptr;
tm.reset();
}
).handle_error(
crimson::ct_error::assert_all{"Error in mount"}
).then([this] {
- return segment_cleaner->stop();
+ return async_cleaner->stop();
}).then([this] {
- return segment_cleaner->run_until_halt();
+ return async_cleaner->run_until_halt();
});
}
void submit_transaction(TransactionRef t) {
submit_transaction_fut(*t).unsafe_get0();
- segment_cleaner->run_until_halt().get0();
+ async_cleaner->run_until_halt().get0();
}
};