]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
crimson/os/seastore: generalize journal tail calculations
authorYingxin Cheng <yingxin.cheng@intel.com>
Fri, 19 Aug 2022 07:09:41 +0000 (15:09 +0800)
committerYingxin Cheng <yingxin.cheng@intel.com>
Wed, 24 Aug 2022 02:59:59 +0000 (10:59 +0800)
Signed-off-by: Yingxin Cheng <yingxin.cheng@intel.com>
Signed-off-by: Myoungwon Oh <myoungwon.oh@samsung.com>
src/crimson/os/seastore/async_cleaner.cc
src/crimson/os/seastore/async_cleaner.h
src/crimson/os/seastore/seastore_types.cc
src/crimson/os/seastore/seastore_types.h
src/crimson/os/seastore/transaction_manager.cc

index 3ead6ff012770cd8e39de2466214058cd752a78a..6a5dfe7f049204ec76d600ed06b98bf9f066573d 100644 (file)
@@ -467,16 +467,24 @@ AsyncCleaner::AsyncCleaner(
   config_t config,
   SegmentManagerGroupRef&& sm_group,
   BackrefManager &backref_manager,
-  bool detailed)
+  bool detailed,
+  journal_type_t type,
+  seastore_off_t roll_start,
+  seastore_off_t roll_size)
   : detailed(detailed),
     config(config),
     sm_group(std::move(sm_group)),
     backref_manager(backref_manager),
+    journal_type(type),
+    roll_start(roll_start),
+    roll_size(roll_size),
     ool_segment_seq_allocator(
       new SegmentSeqAllocator(segment_type_t::OOL)),
     gc_process(*this)
 {
   config.validate();
+  ceph_assert(roll_start >= 0);
+  ceph_assert(roll_size > 0);
 }
 
 void AsyncCleaner::register_metrics()
index 3d0cbf55ca9b261adb2a4d359afef04c5862c1ad..2ca0ba974921a0bf0c2246d0aab4da8c01ab7834 100644 (file)
@@ -539,14 +539,13 @@ class AsyncCleaner : public SegmentProvider, public JournalTrimmer {
 public:
   /// Config
   struct config_t {
-    /// Number of minimum journal segments to stop trimming dirty.
-    size_t target_journal_dirty_segments = 0;
-    /// Number of maximum journal segments to block user transactions.
-    size_t max_journal_segments = 0;
-
-    /// Number of minimum journal segments to stop trimming allocation
+    /// Number of minimum bytes to stop trimming dirty.
+    std::size_t target_journal_dirty_bytes = 0;
+    /// Number of minimum bytes to stop trimming allocation
     /// (having the corresponding backrefs unmerged)
-    size_t target_journal_alloc_segments = 0;
+    std::size_t target_journal_alloc_bytes = 0;
+    /// Number of maximum bytes to block user transactions.
+    std::size_t max_journal_bytes = 0;
 
     /// Ratio of maximum available space to disable reclaiming.
     double available_ratio_gc_max = 0;
@@ -557,28 +556,44 @@ public:
     double reclaim_ratio_gc_threshold = 0;
 
     /// Number of bytes to reclaim per cycle
-    size_t reclaim_bytes_per_cycle = 0;
+    std::size_t reclaim_bytes_per_cycle = 0;
 
     /// Number of bytes to rewrite dirty per cycle
-    size_t rewrite_dirty_bytes_per_cycle = 0;
+    std::size_t rewrite_dirty_bytes_per_cycle = 0;
 
     /// Number of bytes to rewrite backref per cycle
-    size_t rewrite_backref_bytes_per_cycle = 0;
+    std::size_t rewrite_backref_bytes_per_cycle = 0;
 
     void validate() const {
-      ceph_assert(max_journal_segments > target_journal_dirty_segments);
-      ceph_assert(max_journal_segments > target_journal_alloc_segments);
+      ceph_assert(max_journal_bytes <= MAX_SEG_OFF);
+      ceph_assert(max_journal_bytes > target_journal_dirty_bytes);
+      ceph_assert(max_journal_bytes > target_journal_alloc_bytes);
       ceph_assert(available_ratio_gc_max > available_ratio_hard_limit);
       ceph_assert(reclaim_bytes_per_cycle > 0);
       ceph_assert(rewrite_dirty_bytes_per_cycle > 0);
       ceph_assert(rewrite_backref_bytes_per_cycle > 0);
     }
 
-    static config_t get_default() {
+    static config_t get_default(
+        std::size_t roll_size, journal_type_t type) {
+      assert(roll_size);
+      std::size_t target_dirty_bytes = 0;
+      std::size_t target_alloc_bytes = 0;
+      std::size_t max_journal_bytes = 0;
+      if (type == journal_type_t::SEGMENTED) {
+        target_dirty_bytes = 12 * roll_size;
+        target_alloc_bytes = 2 * roll_size;
+        max_journal_bytes = 16 * roll_size;
+      } else {
+        assert(type == journal_type_t::CIRCULAR);
+        target_dirty_bytes = roll_size / 4;
+        target_alloc_bytes = roll_size / 4;
+        max_journal_bytes = roll_size / 2;
+      }
       return config_t{
-         12,   // target_journal_dirty_segments
-         16,   // max_journal_segments
-         2,    // target_journal_alloc_segments
+         target_dirty_bytes,
+         target_alloc_bytes,
+         max_journal_bytes,
          .15,  // available_ratio_gc_max
          .1,   // available_ratio_hard_limit
          .1,   // reclaim_ratio_gc_threshold
@@ -588,11 +603,26 @@ public:
        };
     }
 
-    static config_t get_test() {
+    static config_t get_test(
+        std::size_t roll_size, journal_type_t type) {
+      assert(roll_size);
+      std::size_t target_dirty_bytes = 0;
+      std::size_t target_alloc_bytes = 0;
+      std::size_t max_journal_bytes = 0;
+      if (type == journal_type_t::SEGMENTED) {
+        target_dirty_bytes = 2 * roll_size;
+        target_alloc_bytes = 2 * roll_size;
+        max_journal_bytes = 4 * roll_size;
+      } else {
+        assert(type == journal_type_t::CIRCULAR);
+        target_dirty_bytes = roll_size / 4;
+        target_alloc_bytes = roll_size / 4;
+        max_journal_bytes = roll_size / 2;
+      }
       return config_t{
-         2,    // target_journal_dirty_segments
-         4,    // max_journal_segments
-         2,    // target_journal_alloc_segments
+         target_dirty_bytes,
+         target_alloc_bytes,
+         max_journal_bytes,
          .99,  // available_ratio_gc_max
          .2,   // available_ratio_hard_limit
          .6,   // reclaim_ratio_gc_threshold
@@ -777,6 +807,11 @@ private:
   seastar::metrics::metric_group metrics;
   void register_metrics();
 
+  journal_type_t journal_type;
+
+  seastore_off_t roll_start;
+  seastore_off_t roll_size;
+
   journal_seq_t journal_alloc_tail;
 
   journal_seq_t journal_dirty_tail;
@@ -805,7 +840,10 @@ public:
     config_t config,
     SegmentManagerGroupRef&& sm_group,
     BackrefManager &backref_manager,
-    bool detailed = false);
+    bool detailed,
+    journal_type_t type,
+    seastore_off_t roll_start,
+    seastore_off_t roll_size);
 
   SegmentSeqAllocator& get_ool_segment_seq_allocator() {
     return *ool_segment_seq_allocator;
@@ -977,40 +1015,34 @@ private:
 
   journal_seq_t get_dirty_tail_target() const {
     assert(is_ready());
-    auto ret = journal_head;
-    ceph_assert(ret != JOURNAL_SEQ_NULL);
-    if (ret.segment_seq >= config.target_journal_dirty_segments) {
-      ret.segment_seq -= config.target_journal_dirty_segments;
-    } else {
-      ret.segment_seq = 0;
-      ret.offset = P_ADDR_MIN;
-    }
+    ceph_assert(journal_head != JOURNAL_SEQ_NULL);
+    auto ret = journal_head.add_offset(
+        journal_type,
+        -static_cast<seastore_off_t>(config.target_journal_dirty_bytes),
+        roll_start,
+        roll_size);
     return ret;
   }
 
-  journal_seq_t get_tail_limit() const {
+  journal_seq_t get_alloc_tail_target() const {
     assert(is_ready());
-    auto ret = journal_head;
-    ceph_assert(ret != JOURNAL_SEQ_NULL);
-    if (ret.segment_seq >= config.max_journal_segments) {
-      ret.segment_seq -= config.max_journal_segments;
-    } else {
-      ret.segment_seq = 0;
-      ret.offset = P_ADDR_MIN;
-    }
+    ceph_assert(journal_head != JOURNAL_SEQ_NULL);
+    auto ret = journal_head.add_offset(
+        journal_type,
+        -static_cast<seastore_off_t>(config.target_journal_alloc_bytes),
+        roll_start,
+        roll_size);
     return ret;
   }
 
-  journal_seq_t get_alloc_tail_target() const {
+  journal_seq_t get_tail_limit() const {
     assert(is_ready());
-    auto ret = journal_head;
-    ceph_assert(ret != JOURNAL_SEQ_NULL);
-    if (ret.segment_seq >= config.target_journal_alloc_segments) {
-      ret.segment_seq -= config.target_journal_alloc_segments;
-    } else {
-      ret.segment_seq = 0;
-      ret.offset = P_ADDR_MIN;
-    }
+    ceph_assert(journal_head != JOURNAL_SEQ_NULL);
+    auto ret = journal_head.add_offset(
+        journal_type,
+        -static_cast<seastore_off_t>(config.max_journal_bytes),
+        roll_start,
+        roll_size);
     return ret;
   }
 
@@ -1251,11 +1283,13 @@ private:
         journal_dirty_tail == JOURNAL_SEQ_NULL) {
       return 0;
     }
-    return (journal_head.segment_seq - journal_dirty_tail.segment_seq) *
-           segments.get_segment_size() +
-           journal_head.offset.as_seg_paddr().get_segment_off() -
-           segments.get_segment_size() -
-           journal_dirty_tail.offset.as_seg_paddr().get_segment_off();
+    auto ret = journal_head.relative_to(
+        journal_type,
+        journal_dirty_tail,
+        roll_start,
+        roll_size);
+    ceph_assert(ret >= 0);
+    return static_cast<std::size_t>(ret);
   }
 
   std::size_t get_alloc_journal_size() const {
@@ -1263,11 +1297,13 @@ private:
         journal_alloc_tail == JOURNAL_SEQ_NULL) {
       return 0;
     }
-    return (journal_head.segment_seq - journal_alloc_tail.segment_seq) *
-           segments.get_segment_size() +
-           journal_head.offset.as_seg_paddr().get_segment_off() -
-           segments.get_segment_size() -
-           journal_alloc_tail.offset.as_seg_paddr().get_segment_off();
+    auto ret = journal_head.relative_to(
+        journal_type,
+        journal_alloc_tail,
+        roll_start,
+        roll_size);
+    ceph_assert(ret >= 0);
+    return static_cast<std::size_t>(ret);
   }
 
   /**
index b97630cd9b96b06efebe3f5ea6b23855cc03792b..e14cab14555561a03dee91e5c72ec2036c04ece5 100644 (file)
@@ -129,6 +129,81 @@ std::ostream &operator<<(std::ostream &out, const paddr_t &rhs)
   return out << ">";
 }
 
+journal_seq_t journal_seq_t::add_offset(
+      journal_type_t type,
+      seastore_off_t off,
+      seastore_off_t roll_start,
+      seastore_off_t roll_size) const
+{
+  assert(offset.is_absolute());
+  assert(off != MIN_SEG_OFF);
+  assert(roll_start >= 0);
+  assert(roll_size > 0);
+
+  segment_seq_t jseq = segment_seq;
+  seastore_off_t joff;
+  if (type == journal_type_t::SEGMENTED) {
+    joff = offset.as_seg_paddr().get_segment_off();
+  } else {
+    assert(type == journal_type_t::CIRCULAR);
+    auto boff = offset.as_blk_paddr().get_block_off();
+    assert(boff <= MAX_SEG_OFF);
+    joff = boff;
+  }
+  auto roll_end = roll_start + roll_size;
+  assert(joff >= roll_start);
+  assert(joff <= roll_end);
+
+  if (off >= 0) {
+    jseq += (off / roll_size);
+    joff += (off % roll_size);
+    if (joff >= roll_end) {
+      ++jseq;
+      joff -= roll_size;
+    }
+  } else {
+    auto mod = static_cast<segment_seq_t>((-off) / roll_size);
+    joff -= ((-off) % roll_size);
+    if (joff < roll_start) {
+      ++mod;
+      joff += roll_size;
+    }
+    if (jseq >= mod) {
+      jseq -= mod;
+    } else {
+      return JOURNAL_SEQ_MIN;
+    }
+  }
+  assert(joff >= roll_start);
+  assert(joff < roll_end);
+  return journal_seq_t{jseq, make_block_relative_paddr(joff)};
+}
+
+seastore_off_t journal_seq_t::relative_to(
+      journal_type_t type,
+      const journal_seq_t& r,
+      seastore_off_t roll_start,
+      seastore_off_t roll_size) const
+{
+  assert(offset.is_absolute());
+  assert(r.offset.is_absolute());
+  assert(roll_start >= 0);
+  assert(roll_size > 0);
+
+  int64_t ret = static_cast<int64_t>(segment_seq) - r.segment_seq;
+  ret *= roll_size;
+  if (type == journal_type_t::SEGMENTED) {
+    ret += (static_cast<int64_t>(offset.as_seg_paddr().get_segment_off()) -
+            static_cast<int64_t>(r.offset.as_seg_paddr().get_segment_off()));
+  } else {
+    assert(type == journal_type_t::CIRCULAR);
+    ret += (static_cast<int64_t>(offset.as_blk_paddr().get_block_off()) -
+            static_cast<int64_t>(r.offset.as_blk_paddr().get_block_off()));
+  }
+  assert(ret <= MAX_SEG_OFF && ret > MIN_SEG_OFF);
+  return static_cast<seastore_off_t>(ret);
+}
+
 std::ostream &operator<<(std::ostream &out, const journal_seq_t &seq)
 {
   if (seq == JOURNAL_SEQ_NULL) {
index 3f8c0ce16a589f9e5c81cd30d2f1bd7b33304372..5962cdcfabf113c6c134848ab4fbd81417f8e570 100644 (file)
@@ -420,6 +420,8 @@ using seastore_off_t = int32_t;
 using u_seastore_off_t = uint32_t;
 constexpr seastore_off_t MAX_SEG_OFF =
   std::numeric_limits<seastore_off_t>::max();
+constexpr seastore_off_t MIN_SEG_OFF =
+  std::numeric_limits<seastore_off_t>::min();
 constexpr seastore_off_t NULL_SEG_OFF = MAX_SEG_OFF;
 constexpr auto SEGMENT_OFF_BITS = std::numeric_limits<u_seastore_off_t>::digits;
 
@@ -877,9 +879,18 @@ struct journal_seq_t {
   segment_seq_t segment_seq = NULL_SEG_SEQ;
   paddr_t offset = P_ADDR_NULL;
 
-  journal_seq_t add_offset(seastore_off_t o) const {
-    return {segment_seq, offset.add_offset(o)};
-  }
+  // produces a pseudo journal_seq_t relative to this by offset
+  journal_seq_t add_offset(
+      journal_type_t type,
+      seastore_off_t off,
+      seastore_off_t roll_start,
+      seastore_off_t roll_size) const;
+
+  seastore_off_t relative_to(
+      journal_type_t type,
+      const journal_seq_t& r,
+      seastore_off_t roll_start,
+      seastore_off_t roll_size) const;
 
   DENC(journal_seq_t, v, p) {
     DENC_START(1, 1, p);
@@ -1923,7 +1934,9 @@ struct write_result_t {
   seastore_off_t length;
 
   journal_seq_t get_end_seq() const {
-    return start_seq.add_offset(length);
+    return journal_seq_t{
+      start_seq.segment_seq,
+      start_seq.offset.add_offset(length)};
   }
 };
 std::ostream& operator<<(std::ostream&, const write_result_t&);
index da95f5351a417b2f4188dfb783f50cd3665b0ccb..ef644a0796668f3298ffd3e8e8dca2eb784f0c90 100644 (file)
@@ -7,6 +7,7 @@
 #include "crimson/os/seastore/logging.h"
 #include "crimson/os/seastore/transaction_manager.h"
 #include "crimson/os/seastore/journal.h"
+#include "crimson/os/seastore/journal/circular_bounded_journal.h"
 #include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
 #include "crimson/os/seastore/random_block_manager/rbm_device.h"
 
@@ -630,7 +631,11 @@ TransactionManagerRef make_transaction_manager(
   auto sms = std::make_unique<SegmentManagerGroup>();
   auto backref_manager = create_backref_manager(*cache);
 
-  if (primary_device->get_device_type() == device_type_t::SEGMENTED) {
+  auto p_device_type = primary_device->get_device_type();
+  ceph_assert(p_device_type == device_type_t::SEGMENTED ||
+              p_device_type == device_type_t::RANDOM_BLOCK);
+
+  if (p_device_type == device_type_t::SEGMENTED) {
     sms->add_segment_manager(static_cast<SegmentManager*>(primary_device));
   }
   for (auto &p_dev : secondary_devices) {
@@ -638,31 +643,52 @@ TransactionManagerRef make_transaction_manager(
     sms->add_segment_manager(static_cast<SegmentManager*>(p_dev));
   }
 
+  auto journal_type = (p_device_type == device_type_t::SEGMENTED ?
+                       journal_type_t::SEGMENTED : journal_type_t::CIRCULAR);
+  seastore_off_t roll_size;
+  seastore_off_t roll_start;
+  if (journal_type == journal_type_t::SEGMENTED) {
+    roll_size = static_cast<SegmentManager*>(primary_device)->get_segment_size();
+    roll_start = 0;
+  } else {
+    // FIXME: get from runtime configration instead of static defaults
+    roll_size = journal::CircularBoundedJournal::mkfs_config_t
+                       ::get_default().total_size;
+    // see CircularBoundedJournal::get_start_addr()
+    roll_start = journal::CBJOURNAL_START_ADDRESS +
+                 primary_device->get_block_size();
+  }
+  ceph_assert(roll_size % primary_device->get_block_size() == 0);
+  ceph_assert(roll_start % primary_device->get_block_size() == 0);
+
   bool cleaner_is_detailed;
   AsyncCleaner::config_t cleaner_config;
   if (is_test) {
     cleaner_is_detailed = true;
-    cleaner_config = AsyncCleaner::config_t::get_test();
+    cleaner_config = AsyncCleaner::config_t::get_test(
+        roll_size, journal_type);
   } else {
     cleaner_is_detailed = false;
-    cleaner_config = AsyncCleaner::config_t::get_default();
+    cleaner_config = AsyncCleaner::config_t::get_default(
+        roll_size, journal_type);
   }
   auto async_cleaner = std::make_unique<AsyncCleaner>(
     cleaner_config,
     std::move(sms),
     *backref_manager,
-    cleaner_is_detailed);
+    cleaner_is_detailed,
+    journal_type,
+    roll_start,
+    roll_size);
 
-  if (primary_device->get_device_type() == device_type_t::SEGMENTED) {
+  if (journal_type == journal_type_t::SEGMENTED) {
     cache->set_segment_provider(*async_cleaner);
   }
 
-  auto p_device_type = primary_device->get_device_type();
   JournalRef journal;
-  if (p_device_type == device_type_t::SEGMENTED) {
+  if (journal_type == journal_type_t::SEGMENTED) {
     journal = journal::make_segmented(*async_cleaner, *async_cleaner);
   } else {
-    ceph_assert(p_device_type == device_type_t::RANDOM_BLOCK);
     journal = journal::make_circularbounded(
       *async_cleaner,
       static_cast<random_block_device::RBMDevice*>(primary_device),