]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore: make segment cleaner do gc based on gc benefit cost ratio 43474/head
authorXuehan Xu <xxhdx1985126@gmail.com>
Tue, 28 Sep 2021 09:35:30 +0000 (17:35 +0800)
committerXuehan Xu <xxhdx1985126@gmail.com>
Mon, 7 Mar 2022 03:44:16 +0000 (11:44 +0800)
Segments' benefit cost ratio is calculated:
segment_last_mod_time = max{last modification time of all extents in the segment}
segment_last_rewrite_time = max{last rewrite time of all extents in the segment}
segment_utilization = segment_live_bytes / segment_size
segment_age = current_time - max{segment_last_mod_time, segment_last_rewrite_time}
benefit_cost_ratio = (1 - segment_utilization) * segment_age / (1 + segment_utilization)

Segment cleaner always reclaim the segment with the largest benefit cost ratio

Signed-off-by: Xuehan Xu <xxhdx1985126@gmail.com>
src/crimson/os/seastore/segment_cleaner.h

index 6b10c63d9ca7bc20ab5c69f6fe41185c35bcd213..aed0c4e8ff135d762d718de9d21a80051e68851e 100644 (file)
@@ -895,28 +895,41 @@ private:
 
   // journal status helpers
 
+  double calc_gc_benefit_cost(segment_id_t id) const {
+    double util = space_tracker->calc_utilization(id);
+    auto cur_time = seastar::lowres_system_clock::now();
+    auto segment = segments[id];
+    assert(cur_time >= segment.last_modified);
+    auto segment_age =
+      cur_time - std::max(segment.last_modified, segment.last_rewritten);
+    uint64_t age = segment_age.count();
+    return (1 - util) * age / (1 + util);
+  }
+
   journal_seq_t get_next_gc_target() const {
     segment_id_t id = NULL_SEG_ID;
     segment_seq_t seq = NULL_SEG_SEQ;
-    int64_t least_live_bytes = std::numeric_limits<int64_t>::max();
+    double max_benefit_cost = 0;
     for (auto it = segments.begin();
         it != segments.end();
         ++it) {
       auto _id = it->first;
       const auto& segment_info = it->second;
+      double benefit_cost = calc_gc_benefit_cost(_id);
       if (segment_info.is_closed() &&
          !segment_info.is_in_journal(journal_tail_committed) &&
-         space_tracker->get_usage(_id) < least_live_bytes) {
+         benefit_cost > max_benefit_cost) {
        id = _id;
        seq = segment_info.journal_segment_seq;
-       least_live_bytes = space_tracker->get_usage(id);
+       max_benefit_cost = benefit_cost;
       }
     }
     if (id != NULL_SEG_ID) {
       crimson::get_logger(ceph_subsys_seastore_cleaner).debug(
-       "SegmentCleaner::get_next_gc_target: segment {} seq {}",
+       "SegmentCleaner::get_next_gc_target: segment {} seq {}, benefit_cost {}",
        id,
-       seq);
+       seq,
+       max_benefit_cost);
       return journal_seq_t{seq, paddr_t::make_seg_paddr(id, 0)};
     } else {
       return JOURNAL_SEQ_NULL;