]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore/segment_cleaner: add online gc
authorSamuel Just <sjust@redhat.com>
Thu, 17 Sep 2020 23:40:50 +0000 (16:40 -0700)
committerSamuel Just <sjust@redhat.com>
Tue, 20 Oct 2020 19:27:08 +0000 (12:27 -0700)
Signed-off-by: Samuel Just <sjust@redhat.com>
src/crimson/os/seastore/segment_cleaner.cc
src/crimson/os/seastore/segment_cleaner.h
src/crimson/os/seastore/transaction.h
src/crimson/os/seastore/transaction_manager.cc
src/crimson/os/seastore/transaction_manager.h

index fd39c229138755f7c070adb501b00aa8fbe827cc..21e669f5614334483c9a2159e1eb0abffb62f676 100644 (file)
@@ -3,6 +3,7 @@
 
 #include "crimson/common/log.h"
 
+#include "crimson/os/seastore/transaction.h"
 #include "crimson/os/seastore/segment_cleaner.h"
 
 namespace {
@@ -205,12 +206,44 @@ SegmentCleaner::do_immediate_work_ret SegmentCleaner::do_immediate_work(
     __func__,
     journal_tail_target,
     next_target);
-  if (journal_tail_target > next_target) {
-    return do_immediate_work_ertr::now();
+
+  logger().debug(
+    "SegmentCleaner::do_immediate_work gc total {}, available {}, unavailable {}, used {}  available_ratio {}, reclaim_ratio {}, bytes_to_gc_for_available {}, bytes_to_gc_for_reclaim {}",
+    get_total_bytes(),
+    get_available_bytes(),
+    get_unavailable_bytes(),
+    get_used_bytes(),
+    get_available_ratio(),
+    get_reclaim_ratio(),
+    get_immediate_bytes_to_gc_for_available(),
+    get_immediate_bytes_to_gc_for_reclaim());
+
+  auto dirty_fut = do_immediate_work_ertr::now();
+  if (journal_tail_target < next_target) {
+    dirty_fut = rewrite_dirty(t, next_target);
   }
+  return dirty_fut.safe_then([=, &t] {
+    return do_gc(t, get_immediate_bytes_to_gc());
+  }).handle_error(
+    do_immediate_work_ertr::pass_further{},
+    crimson::ct_error::assert_all{}
+  );
+}
+
+SegmentCleaner::do_deferred_work_ret SegmentCleaner::do_deferred_work(
+  Transaction &t)
+{
+  return do_deferred_work_ret(
+    do_deferred_work_ertr::ready_future_marker{},
+    ceph::timespan());
+}
 
+SegmentCleaner::rewrite_dirty_ret SegmentCleaner::rewrite_dirty(
+  Transaction &t,
+  journal_seq_t limit)
+{
   return ecb->get_next_dirty_extents(
-    get_dirty_tail_limit()
+    limit
   ).then([=, &t](auto dirty_list) {
     if (dirty_list.empty()) {
       return do_immediate_work_ertr::now();
@@ -232,12 +265,74 @@ SegmentCleaner::do_immediate_work_ret SegmentCleaner::do_immediate_work(
   });
 }
 
-SegmentCleaner::do_deferred_work_ret SegmentCleaner::do_deferred_work(
-  Transaction &t)
+SegmentCleaner::do_gc_ret SegmentCleaner::do_gc(
+  Transaction &t,
+  size_t bytes)
 {
-  return do_deferred_work_ret(
-    do_deferred_work_ertr::ready_future_marker{},
-    ceph::timespan());
+  if (bytes == 0) {
+    return do_gc_ertr::now();
+  }
+
+  if (gc_current_pos == P_ADDR_NULL) {
+    gc_current_pos.segment = get_next_gc_target();
+    if (gc_current_pos == P_ADDR_NULL) {
+      // apparently there are no segments to gc
+      logger().debug(
+       "SegmentCleaner::do_gc: no segments to gc");
+      return do_gc_ertr::now();
+    }
+    logger().debug(
+      "SegmentCleaner::do_gc: starting gc on segment {}",
+      gc_current_pos.segment);
+    gc_current_pos.offset = 0;
+  }
+
+  return ecb->scan_extents(
+    gc_current_pos,
+    bytes
+  ).safe_then([=, &t](auto addrs) {
+    return seastar::do_with(
+      std::move(addrs),
+      [=, &t](auto &addrs) {
+       auto &[next, addr_list] = addrs;
+       return crimson::do_for_each(
+         addr_list,
+         [=, &t](auto &addr_pair) {
+           auto &[addr, info] = addr_pair;
+           logger().debug(
+             "SegmentCleaner::do_gc: checking addr {}",
+             addr);
+           return ecb->get_extent_if_live(
+             t,
+             info.type,
+             addr,
+             info.addr,
+             info.len
+           ).safe_then([=, &t](CachedExtentRef ext) {
+             if (!ext) {
+               logger().debug(
+                 "SegmentCleaner::do_gc: addr {} dead, skipping",
+                 addr);
+               return ExtentCallbackInterface::rewrite_extent_ertr::now();
+             } else {
+               logger().debug(
+                 "SegmentCleaner::do_gc: addr {} alive, gc'ing {}",
+                 addr,
+                 *ext);
+             }
+             return ecb->rewrite_extent(
+               t,
+               ext);
+           });
+         }).safe_then([=, &t] {
+           auto old_pos = std::exchange(gc_current_pos, next);
+           if (gc_current_pos == P_ADDR_NULL) {
+             t.mark_segment_to_release(old_pos.segment);
+           }
+           return ExtentCallbackInterface::release_segment_ertr::now();
+         });
+      });
+  });
 }
 
 }
index caa903e97529de921f0b32df0efaa8695af7968b..14e2a04f56272dd6d17314d0bba37127de9a8bf2 100644 (file)
@@ -21,6 +21,12 @@ struct segment_info_t {
   // Will be non-null for any segments in the current journal
   segment_seq_t journal_segment_seq = NULL_SEG_SEQ;
 
+
+  bool is_in_journal(journal_seq_t tail_committed) const {
+    return journal_segment_seq != NULL_SEG_SEQ &&
+      tail_committed.segment_seq <= journal_segment_seq;
+  }
+
   bool is_empty() const {
     return state == Segment::segment_state_t::EMPTY;
   }
@@ -209,6 +215,12 @@ public:
     size_t target_journal_segments = 0;
     size_t max_journal_segments = 0;
 
+    double reclaim_ratio_hard_limit = 0;
+    // don't apply reclaim ratio with available space below this
+    double reclaim_ratio_usage_min = 0;
+
+    double available_ratio_hard_limit = 0;
+
     static config_t default_from_segment_manager(
       SegmentManager &manager) {
       return config_t{
@@ -216,7 +228,11 @@ public:
        static_cast<size_t>(manager.get_segment_size()),
        (size_t)manager.get_block_size(),
        2,
-       4};
+       4,
+       .5,
+       .95,
+       .2
+       };
     }
   };
 
@@ -270,6 +286,26 @@ public:
       paddr_t addr,
       laddr_t laddr,
       segment_off_t len) = 0;
+
+    /**
+     * scan_extents
+     *
+     * Interface shim for Journal::scan_extents
+     */
+    using scan_extents_ret = Journal::scan_extents_ret;
+    virtual scan_extents_ret scan_extents(
+      paddr_t addr,
+      extent_len_t bytes_to_read) = 0;
+
+    /**
+     * release_segment
+     *
+     * Release segment.
+     */
+    using release_segment_ertr = SegmentManager::release_ertr;
+    using release_segment_ret = release_segment_ertr::future<>;
+    virtual release_segment_ret release_segment(
+      segment_id_t id) = 0;
   };
 
 private:
@@ -338,6 +374,14 @@ public:
     segments[segment].journal_segment_seq = seq;
   }
 
+  segment_seq_t get_seq(segment_id_t id) final {
+    return segments[id].journal_segment_seq;
+  }
+
+  void mark_segment_released(segment_id_t segment) {
+    return mark_empty(segment);
+  }
+
   void mark_space_used(
     paddr_t addr,
     extent_len_t len,
@@ -375,6 +419,26 @@ public:
     assert(ret >= 0);
   }
 
+  segment_id_t get_next_gc_target() const {
+    segment_id_t ret = NULL_SEG_ID;
+    int64_t least_live_bytes = std::numeric_limits<int64_t>::max();
+    for (segment_id_t i = 0; i < segments.size(); ++i) {
+      if (segments[i].is_closed() &&
+         !segments[i].is_in_journal(journal_tail_committed) &&
+         space_tracker->get_usage(i) < least_live_bytes) {
+       ret = i;
+       least_live_bytes = space_tracker->get_usage(i);
+      }
+    }
+    if (ret != NULL_SEG_ID) {
+      crimson::get_logger(ceph_subsys_filestore).debug(
+       "SegmentCleaner::get_next_gc_target: segment {} seq {}",
+       ret,
+       segments[ret].journal_segment_seq);
+    }
+    return ret;
+  }
+
   SpaceTrackerIRef get_empty_space_tracker() const {
     return space_tracker->make_empty();
   }
@@ -425,6 +489,18 @@ private:
 
   // journal status helpers
 
+  /**
+   * rewrite_dirty
+   *
+   * Writes out dirty blocks dirtied earlier than limit.
+   */
+  using rewrite_dirty_ertr = crimson::errorator<
+    crimson::ct_error::input_output_error>;
+  using rewrite_dirty_ret = rewrite_dirty_ertr::future<>;
+  rewrite_dirty_ret rewrite_dirty(
+    Transaction &t,
+    journal_seq_t limit);
+
   journal_seq_t get_dirty_tail() const {
     auto ret = journal_head;
     ret.segment_seq -= std::min(
@@ -440,6 +516,140 @@ private:
       config.max_journal_segments);
     return ret;
   }
+
+  // GC status helpers
+  paddr_t gc_current_pos = P_ADDR_NULL;
+
+  /**
+   * do_gc
+   *
+   * Performs bytes worth of gc work on t.
+   */
+  using do_gc_ertr = SegmentManager::read_ertr;
+  using do_gc_ret = do_gc_ertr::future<>;
+  do_gc_ret do_gc(
+    Transaction &t,
+    size_t bytes);
+
+  size_t get_bytes_used_current_segment() const {
+    assert(journal_head != journal_seq_t());
+    return journal_head.offset.offset;
+  }
+
+  size_t get_bytes_available_current_segment() const {
+    return config.segment_size - get_bytes_used_current_segment();
+  }
+
+  /**
+   * get_bytes_scanned_current_segment
+   *
+   * Returns the number of bytes from the current gc segment that
+   * have been scanned.
+   */
+  size_t get_bytes_scanned_current_segment() const {
+    if (gc_current_pos == P_ADDR_NULL)
+      return 0;
+
+    return gc_current_pos.offset;
+  }
+
+  size_t get_available_bytes() const {
+    return (empty_segments * config.segment_size) +
+      get_bytes_available_current_segment() +
+      get_bytes_scanned_current_segment();
+  }
+
+  size_t get_total_bytes() const {
+    return config.segment_size * config.num_segments;
+  }
+
+  size_t get_unavailable_bytes() const {
+    return get_total_bytes() - get_available_bytes();
+  }
+
+  /// Returns bytes currently occupied by live extents (not journal)
+  size_t get_used_bytes() const {
+    return used_bytes;
+  }
+
+  /// Returns the number of bytes in unavailable segments that are not live
+  size_t get_reclaimable_bytes() const {
+    return get_unavailable_bytes() - get_used_bytes();
+  }
+
+  /**
+   * get_reclaim_ratio
+   *
+   * Returns the ratio of unavailable space that is not currently used.
+   */
+  double get_reclaim_ratio() const {
+    if (get_unavailable_bytes() == 0) return 0;
+    return (double)get_reclaimable_bytes() / (double)get_unavailable_bytes();
+  }
+
+  /**
+   * get_available_ratio
+   *
+   * Returns ratio of available space to write to total space
+   */
+  double get_available_ratio() const {
+    return (double)get_available_bytes() / (double)get_total_bytes();
+  }
+
+  /**
+   * get_immediate_bytes_to_gc_for_reclaim
+   *
+   * Returns the number of bytes to gc in order to bring the
+   * reclaim ratio below reclaim_ratio_usage_min.
+   */
+  size_t get_immediate_bytes_to_gc_for_reclaim() const {
+    if (get_reclaim_ratio() < config.reclaim_ratio_hard_limit)
+      return 0;
+
+    const size_t unavailable_target = std::max(
+      get_used_bytes() / (1.0 - config.reclaim_ratio_hard_limit),
+      (1 - config.reclaim_ratio_usage_min) * get_total_bytes());
+
+    if (unavailable_target > get_unavailable_bytes())
+      return 0;
+
+    return (get_unavailable_bytes() - unavailable_target) / get_reclaim_ratio();
+  }
+
+  /**
+   * get_immediate_bytes_to_gc_for_available
+   *
+   * Returns the number of bytes to gc in order to bring the
+   * the ratio of available disk space to total disk space above
+   * available_ratio_hard_limit.
+   */
+  size_t get_immediate_bytes_to_gc_for_available() const {
+    if (get_available_ratio() > config.available_ratio_hard_limit) {
+      return 0;
+    }
+
+    const double ratio_to_make_available = config.available_ratio_hard_limit -
+      get_available_ratio();
+    return ratio_to_make_available * (double)get_total_bytes()
+      / get_reclaim_ratio();
+  }
+
+  /**
+   * get_immediate_bytes_to_gc
+   *
+   * Returns number of bytes to gc in order to restore any strict
+   * limits.
+   */
+  size_t get_immediate_bytes_to_gc() const {
+    // number of bytes to gc in order to correct reclaim ratio
+    size_t for_reclaim = get_immediate_bytes_to_gc_for_reclaim();
+
+    // number of bytes to gc in order to correct available_ratio
+    size_t for_available = get_immediate_bytes_to_gc_for_available();
+
+    return std::max(for_reclaim, for_available);
+  }
+
   void mark_closed(segment_id_t segment) {
     assert(segments.size() > segment);
     if (init_complete) {
index 3c6d77520f14d508f4d276f56cee017c31f3d02f..e189d1d32da03dfb550358d8d1b532ebf13835d2 100644 (file)
@@ -79,6 +79,15 @@ public:
     write_set.insert(*ref);
   }
 
+  void mark_segment_to_release(segment_id_t segment) {
+    assert(to_release == NULL_SEG_ID);
+    to_release = segment;
+  }
+
+  segment_id_t get_segment_to_release() const {
+    return to_release;
+  }
+
   const auto &get_fresh_block_list() {
     return fresh_block_list;
   }
@@ -118,6 +127,9 @@ private:
 
   pextent_set_t retired_set; ///< list of extents mutated by this transaction
 
+  ///< if != NULL_SEG_ID, release this segment after completion
+  segment_id_t to_release = NULL_SEG_ID;
+
   Transaction(bool weak) : weak(weak) {}
 };
 using TransactionRef = Transaction::Ref;
index 298cc00f54110d14612f67448a8642aa3dbe53a7..76517055fd3e14de8d10578e0d78c04eddacc8c3 100644 (file)
@@ -186,13 +186,20 @@ TransactionManager::submit_transaction(
       return crimson::ct_error::eagain::make();
     }
 
-    return journal.submit_record(std::move(*record)).safe_then(
-      [this, t=std::move(t)](auto p) mutable {
-       auto [addr, journal_seq] = p;
-       segment_cleaner.set_journal_head(journal_seq);
-       cache.complete_commit(*t, addr, journal_seq, &segment_cleaner);
-       lba_manager.complete_transaction(*t);
-      },
+    return journal.submit_record(std::move(*record)
+    ).safe_then([this, t=std::move(t)](auto p) mutable {
+      auto [addr, journal_seq] = p;
+      segment_cleaner.set_journal_head(journal_seq);
+      cache.complete_commit(*t, addr, journal_seq, &segment_cleaner);
+      lba_manager.complete_transaction(*t);
+      auto to_release = t->get_segment_to_release();
+      if (to_release != NULL_SEG_ID) {
+       segment_cleaner.mark_segment_released(to_release);
+       return segment_manager.release(to_release);
+      } else {
+       return SegmentManager::release_ertr::now();
+      }
+    }).handle_error(
       submit_transaction_ertr::pass_further{},
       crimson::ct_error::all_same_way([](auto e) {
        ceph_assert(0 == "Hit error submitting to journal");
index 4a93a6b156d2c3bdadc2feef45aa1cf5d397f343..4ddb191aae3ded57060719faa9d10e6ed93efd58 100644 (file)
@@ -235,6 +235,21 @@ public:
     laddr_t laddr,
     segment_off_t len) final;
 
+  using scan_extents_ret =
+    SegmentCleaner::ExtentCallbackInterface::scan_extents_ret;
+  scan_extents_ret scan_extents(
+    paddr_t addr,
+    extent_len_t bytes_to_read) final {
+    return journal.scan_extents(addr, bytes_to_read);
+  }
+
+  using release_segment_ret =
+    SegmentCleaner::ExtentCallbackInterface::release_segment_ret;
+  release_segment_ret release_segment(
+    segment_id_t id) {
+    return segment_manager.release(id);
+  }
+
   ~TransactionManager();
 
 private: