#include "crimson/common/log.h"
+#include "crimson/os/seastore/transaction.h"
#include "crimson/os/seastore/segment_cleaner.h"
namespace {
__func__,
journal_tail_target,
next_target);
- if (journal_tail_target > next_target) {
- return do_immediate_work_ertr::now();
+
+ logger().debug(
+ "SegmentCleaner::do_immediate_work gc total {}, available {}, unavailable {}, used {} available_ratio {}, reclaim_ratio {}, bytes_to_gc_for_available {}, bytes_to_gc_for_reclaim {}",
+ get_total_bytes(),
+ get_available_bytes(),
+ get_unavailable_bytes(),
+ get_used_bytes(),
+ get_available_ratio(),
+ get_reclaim_ratio(),
+ get_immediate_bytes_to_gc_for_available(),
+ get_immediate_bytes_to_gc_for_reclaim());
+
+ auto dirty_fut = do_immediate_work_ertr::now();
+ if (journal_tail_target < next_target) {
+ dirty_fut = rewrite_dirty(t, next_target);
}
+ return dirty_fut.safe_then([=, &t] {
+ return do_gc(t, get_immediate_bytes_to_gc());
+ }).handle_error(
+ do_immediate_work_ertr::pass_further{},
+ crimson::ct_error::assert_all{}
+ );
+}
+
+SegmentCleaner::do_deferred_work_ret SegmentCleaner::do_deferred_work(
+ Transaction &t)
+{
+ return do_deferred_work_ret(
+ do_deferred_work_ertr::ready_future_marker{},
+ ceph::timespan());
+}
+SegmentCleaner::rewrite_dirty_ret SegmentCleaner::rewrite_dirty(
+ Transaction &t,
+ journal_seq_t limit)
+{
return ecb->get_next_dirty_extents(
- get_dirty_tail_limit()
+ limit
).then([=, &t](auto dirty_list) {
if (dirty_list.empty()) {
return do_immediate_work_ertr::now();
});
}
-SegmentCleaner::do_deferred_work_ret SegmentCleaner::do_deferred_work(
- Transaction &t)
+SegmentCleaner::do_gc_ret SegmentCleaner::do_gc(
+ Transaction &t,
+ size_t bytes)
{
- return do_deferred_work_ret(
- do_deferred_work_ertr::ready_future_marker{},
- ceph::timespan());
+ if (bytes == 0) {
+ return do_gc_ertr::now();
+ }
+
+ if (gc_current_pos == P_ADDR_NULL) {
+ gc_current_pos.segment = get_next_gc_target();
+ if (gc_current_pos == P_ADDR_NULL) {
+ // apparently there are no segments to gc
+ logger().debug(
+ "SegmentCleaner::do_gc: no segments to gc");
+ return do_gc_ertr::now();
+ }
+ logger().debug(
+ "SegmentCleaner::do_gc: starting gc on segment {}",
+ gc_current_pos.segment);
+ gc_current_pos.offset = 0;
+ }
+
+ return ecb->scan_extents(
+ gc_current_pos,
+ bytes
+ ).safe_then([=, &t](auto addrs) {
+ return seastar::do_with(
+ std::move(addrs),
+ [=, &t](auto &addrs) {
+ auto &[next, addr_list] = addrs;
+ return crimson::do_for_each(
+ addr_list,
+ [=, &t](auto &addr_pair) {
+ auto &[addr, info] = addr_pair;
+ logger().debug(
+ "SegmentCleaner::do_gc: checking addr {}",
+ addr);
+ return ecb->get_extent_if_live(
+ t,
+ info.type,
+ addr,
+ info.addr,
+ info.len
+ ).safe_then([=, &t](CachedExtentRef ext) {
+ if (!ext) {
+ logger().debug(
+ "SegmentCleaner::do_gc: addr {} dead, skipping",
+ addr);
+ return ExtentCallbackInterface::rewrite_extent_ertr::now();
+ } else {
+ logger().debug(
+ "SegmentCleaner::do_gc: addr {} alive, gc'ing {}",
+ addr,
+ *ext);
+ }
+ return ecb->rewrite_extent(
+ t,
+ ext);
+ });
+ }).safe_then([=, &t] {
+ auto old_pos = std::exchange(gc_current_pos, next);
+ if (gc_current_pos == P_ADDR_NULL) {
+ t.mark_segment_to_release(old_pos.segment);
+ }
+ return ExtentCallbackInterface::release_segment_ertr::now();
+ });
+ });
+ });
}
}
// Will be non-null for any segments in the current journal
segment_seq_t journal_segment_seq = NULL_SEG_SEQ;
+
+ bool is_in_journal(journal_seq_t tail_committed) const {
+ return journal_segment_seq != NULL_SEG_SEQ &&
+ tail_committed.segment_seq <= journal_segment_seq;
+ }
+
bool is_empty() const {
return state == Segment::segment_state_t::EMPTY;
}
size_t target_journal_segments = 0;
size_t max_journal_segments = 0;
+ double reclaim_ratio_hard_limit = 0;
+ // don't apply reclaim ratio with available space below this
+ double reclaim_ratio_usage_min = 0;
+
+ double available_ratio_hard_limit = 0;
+
static config_t default_from_segment_manager(
SegmentManager &manager) {
return config_t{
static_cast<size_t>(manager.get_segment_size()),
(size_t)manager.get_block_size(),
2,
- 4};
+ 4,
+ .5,
+ .95,
+ .2
+ };
}
};
paddr_t addr,
laddr_t laddr,
segment_off_t len) = 0;
+
+ /**
+ * scan_extents
+ *
+ * Interface shim for Journal::scan_extents
+ */
+ using scan_extents_ret = Journal::scan_extents_ret;
+ virtual scan_extents_ret scan_extents(
+ paddr_t addr,
+ extent_len_t bytes_to_read) = 0;
+
+ /**
+ * release_segment
+ *
+ * Release segment.
+ */
+ using release_segment_ertr = SegmentManager::release_ertr;
+ using release_segment_ret = release_segment_ertr::future<>;
+ virtual release_segment_ret release_segment(
+ segment_id_t id) = 0;
};
private:
segments[segment].journal_segment_seq = seq;
}
+ segment_seq_t get_seq(segment_id_t id) final {
+ return segments[id].journal_segment_seq;
+ }
+
+ void mark_segment_released(segment_id_t segment) {
+ return mark_empty(segment);
+ }
+
void mark_space_used(
paddr_t addr,
extent_len_t len,
assert(ret >= 0);
}
+ segment_id_t get_next_gc_target() const {
+ segment_id_t ret = NULL_SEG_ID;
+ int64_t least_live_bytes = std::numeric_limits<int64_t>::max();
+ for (segment_id_t i = 0; i < segments.size(); ++i) {
+ if (segments[i].is_closed() &&
+ !segments[i].is_in_journal(journal_tail_committed) &&
+ space_tracker->get_usage(i) < least_live_bytes) {
+ ret = i;
+ least_live_bytes = space_tracker->get_usage(i);
+ }
+ }
+ if (ret != NULL_SEG_ID) {
+ crimson::get_logger(ceph_subsys_filestore).debug(
+ "SegmentCleaner::get_next_gc_target: segment {} seq {}",
+ ret,
+ segments[ret].journal_segment_seq);
+ }
+ return ret;
+ }
+
SpaceTrackerIRef get_empty_space_tracker() const {
return space_tracker->make_empty();
}
// journal status helpers
+ /**
+ * rewrite_dirty
+ *
+ * Writes out dirty blocks dirtied earlier than limit.
+ */
+ using rewrite_dirty_ertr = crimson::errorator<
+ crimson::ct_error::input_output_error>;
+ using rewrite_dirty_ret = rewrite_dirty_ertr::future<>;
+ rewrite_dirty_ret rewrite_dirty(
+ Transaction &t,
+ journal_seq_t limit);
+
journal_seq_t get_dirty_tail() const {
auto ret = journal_head;
ret.segment_seq -= std::min(
config.max_journal_segments);
return ret;
}
+
+ // GC status helpers
+ paddr_t gc_current_pos = P_ADDR_NULL;
+
+ /**
+ * do_gc
+ *
+ * Performs bytes worth of gc work on t.
+ */
+ using do_gc_ertr = SegmentManager::read_ertr;
+ using do_gc_ret = do_gc_ertr::future<>;
+ do_gc_ret do_gc(
+ Transaction &t,
+ size_t bytes);
+
+ size_t get_bytes_used_current_segment() const {
+ assert(journal_head != journal_seq_t());
+ return journal_head.offset.offset;
+ }
+
+ size_t get_bytes_available_current_segment() const {
+ return config.segment_size - get_bytes_used_current_segment();
+ }
+
+ /**
+ * get_bytes_scanned_current_segment
+ *
+ * Returns the number of bytes from the current gc segment that
+ * have been scanned.
+ */
+ size_t get_bytes_scanned_current_segment() const {
+ if (gc_current_pos == P_ADDR_NULL)
+ return 0;
+
+ return gc_current_pos.offset;
+ }
+
+ size_t get_available_bytes() const {
+ return (empty_segments * config.segment_size) +
+ get_bytes_available_current_segment() +
+ get_bytes_scanned_current_segment();
+ }
+
+ size_t get_total_bytes() const {
+ return config.segment_size * config.num_segments;
+ }
+
+ size_t get_unavailable_bytes() const {
+ return get_total_bytes() - get_available_bytes();
+ }
+
+ /// Returns bytes currently occupied by live extents (not journal)
+ size_t get_used_bytes() const {
+ return used_bytes;
+ }
+
+ /// Returns the number of bytes in unavailable segments that are not live
+ size_t get_reclaimable_bytes() const {
+ return get_unavailable_bytes() - get_used_bytes();
+ }
+
+ /**
+ * get_reclaim_ratio
+ *
+ * Returns the ratio of unavailable space that is not currently used.
+ */
+ double get_reclaim_ratio() const {
+ if (get_unavailable_bytes() == 0) return 0;
+ return (double)get_reclaimable_bytes() / (double)get_unavailable_bytes();
+ }
+
+ /**
+ * get_available_ratio
+ *
+ * Returns ratio of available space to write to total space
+ */
+ double get_available_ratio() const {
+ return (double)get_available_bytes() / (double)get_total_bytes();
+ }
+
+ /**
+ * get_immediate_bytes_to_gc_for_reclaim
+ *
+ * Returns the number of bytes to gc in order to bring the
+ * reclaim ratio below reclaim_ratio_usage_min.
+ */
+ size_t get_immediate_bytes_to_gc_for_reclaim() const {
+ if (get_reclaim_ratio() < config.reclaim_ratio_hard_limit)
+ return 0;
+
+ const size_t unavailable_target = std::max(
+ get_used_bytes() / (1.0 - config.reclaim_ratio_hard_limit),
+ (1 - config.reclaim_ratio_usage_min) * get_total_bytes());
+
+ if (unavailable_target > get_unavailable_bytes())
+ return 0;
+
+ return (get_unavailable_bytes() - unavailable_target) / get_reclaim_ratio();
+ }
+
+ /**
+ * get_immediate_bytes_to_gc_for_available
+ *
+ * Returns the number of bytes to gc in order to bring the
+ * the ratio of available disk space to total disk space above
+ * available_ratio_hard_limit.
+ */
+ size_t get_immediate_bytes_to_gc_for_available() const {
+ if (get_available_ratio() > config.available_ratio_hard_limit) {
+ return 0;
+ }
+
+ const double ratio_to_make_available = config.available_ratio_hard_limit -
+ get_available_ratio();
+ return ratio_to_make_available * (double)get_total_bytes()
+ / get_reclaim_ratio();
+ }
+
+ /**
+ * get_immediate_bytes_to_gc
+ *
+ * Returns number of bytes to gc in order to restore any strict
+ * limits.
+ */
+ size_t get_immediate_bytes_to_gc() const {
+ // number of bytes to gc in order to correct reclaim ratio
+ size_t for_reclaim = get_immediate_bytes_to_gc_for_reclaim();
+
+ // number of bytes to gc in order to correct available_ratio
+ size_t for_available = get_immediate_bytes_to_gc_for_available();
+
+ return std::max(for_reclaim, for_available);
+ }
+
void mark_closed(segment_id_t segment) {
assert(segments.size() > segment);
if (init_complete) {
return crimson::ct_error::eagain::make();
}
- return journal.submit_record(std::move(*record)).safe_then(
- [this, t=std::move(t)](auto p) mutable {
- auto [addr, journal_seq] = p;
- segment_cleaner.set_journal_head(journal_seq);
- cache.complete_commit(*t, addr, journal_seq, &segment_cleaner);
- lba_manager.complete_transaction(*t);
- },
+ return journal.submit_record(std::move(*record)
+ ).safe_then([this, t=std::move(t)](auto p) mutable {
+ auto [addr, journal_seq] = p;
+ segment_cleaner.set_journal_head(journal_seq);
+ cache.complete_commit(*t, addr, journal_seq, &segment_cleaner);
+ lba_manager.complete_transaction(*t);
+ auto to_release = t->get_segment_to_release();
+ if (to_release != NULL_SEG_ID) {
+ segment_cleaner.mark_segment_released(to_release);
+ return segment_manager.release(to_release);
+ } else {
+ return SegmentManager::release_ertr::now();
+ }
+ }).handle_error(
submit_transaction_ertr::pass_further{},
crimson::ct_error::all_same_way([](auto e) {
ceph_assert(0 == "Hit error submitting to journal");