]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore/segment_cleaner: correct available space calculation 44141/head
authorXuehan Xu <xxhdx1985126@gmail.com>
Fri, 26 Nov 2021 06:39:06 +0000 (14:39 +0800)
committerXuehan Xu <xxhdx1985126@gmail.com>
Tue, 14 Dec 2021 02:30:59 +0000 (10:30 +0800)
Current available space calculation is wrong, it just counts the space occupied
by extents, deltas and other stuff are not taken into account.

Fixes: https://tracker.ceph.com/issues/53409
Signed-off-by: Xuehan Xu <xxhdx1985126@gmail.com>
src/crimson/os/seastore/extent_placement_manager.cc
src/crimson/os/seastore/seastore_types.h
src/crimson/os/seastore/segment_cleaner.cc
src/crimson/os/seastore/segment_cleaner.h
src/test/crimson/seastore/test_btree_lba_manager.cc
src/test/crimson/seastore/test_seastore_journal.cc

index fde7de1c5504176ea184cb80483259c56570dc86..c0e6d0a38c843b554f3fbd62b0050401bec7730e 100644 (file)
@@ -73,6 +73,10 @@ SegmentedAllocator::Writer::_write(
 {
   auto record_size = record.get_encoded_record_length();
   allocated_to += record_size.get_encoded_length();
+  segment_provider.update_segment_avail_bytes(
+    paddr_t::make_seg_paddr(
+      current_segment->segment->get_segment_id(),
+      allocated_to));
   bufferlist bl = record.encode(
       current_segment->segment->get_segment_id(),
       0);
index b00bea374701f50c16b7dc4fb4ea9391f4efbac9..c3c803e425e88ae7dfb8dd415f52527e104073ea 100644 (file)
@@ -231,6 +231,15 @@ public:
     return device_to_segments[id.device_id()][id.device_segment_id()];
   }
 
+  bool contains(segment_id_t id) {
+    bool b = id.device_id() < device_to_segments.size();
+    if (!b) {
+      return b;
+    }
+    b = id.device_segment_id() < device_to_segments[id.device_id()].size();
+    return b;
+  }
+
   auto begin() {
     return iterator<false>::lower_bound(*this, 0, 0);
   }
index 5bb24d6ec670c64f830d90c2a45e2404288de47b..10c15849918d61f5a6ed1c32b16096ca8ce6fdf8 100644 (file)
@@ -193,7 +193,12 @@ void SegmentCleaner::register_metrics()
                    [this] {
                      return segments.get_available_bytes();
                    },
-                   sm::description("the size of the space not occupied"))
+                   sm::description("the size of the space not occupied")),
+    sm::make_derive("opened_segments",
+                   [this] {
+                     return segments.get_opened_segments();
+                   },
+                   sm::description("the number of segments whose state is open"))
   });
 }
 
index 47eef1046dac5b04742f0b8c217720fb754f7bb2..57cc54859e340d7348e14782a56248d0802d224f 100644 (file)
@@ -48,6 +48,7 @@ class segment_info_set_t {
     size_t empty_segments = 0;
     size_t size = 0;
     size_t avail_bytes = 0;
+    std::map<segment_id_t, segment_off_t> open_segment_avails;
   };
 
   struct segment_info_t {
@@ -110,6 +111,7 @@ public:
     total_bytes = 0;
     journal_segments = 0;
     avail_bytes = 0;
+    opened_segments = 0;
   }
 
   void add_segment_manager(SegmentManager& segment_manager)
@@ -158,12 +160,15 @@ public:
 
   // the following methods are used for keeping track of
   // seastore disk space usage
-  void segment_used(segment_id_t segment, bool full_unavail = false) {
+  void segment_opened(segment_id_t segment) {
     auto& sm_info = sm_infos[segment.device_id()];
     sm_info->empty_segments--;
-    if (full_unavail)
-      sm_info->avail_bytes -= sm_info->segment_size;
-    avail_bytes -= sm_info->segment_size;
+    ceph_assert(segments[segment].is_empty());
+    // must be opening a new segment
+    auto [iter, inserted] = sm_info->open_segment_avails.emplace(
+      segment, sm_info->segment_size);
+    opened_segments++;
+    ceph_assert(inserted);
   }
   void segment_emptied(segment_id_t segment) {
     auto& sm_info = sm_infos[segment.device_id()];
@@ -171,14 +176,65 @@ public:
     sm_info->avail_bytes += sm_info->segment_size;
     avail_bytes += sm_info->segment_size;
   }
-  void space_used(paddr_t addr, extent_len_t len) {
-    auto& sm_info = sm_infos[addr.get_device_id()];
-    sm_info->avail_bytes -= len;
-    avail_bytes -= len;
+  void segment_closed(segment_id_t segment) {
+    assert(segments.contains(segment));
+    auto& segment_info = segments[segment];
+    auto& sm_info = sm_infos[segment.device_id()];
+    if (segment_info.is_open()) {
+      auto iter = sm_info->open_segment_avails.find(segment);
+      ceph_assert(iter != sm_info->open_segment_avails.end());
+      assert(sm_info->avail_bytes >= (size_t)iter->second);
+      assert(avail_bytes >= (size_t)iter->second);
+      sm_info->avail_bytes -= iter->second;
+      avail_bytes -= iter->second;
+      sm_info->open_segment_avails.erase(iter);
+      opened_segments--;
+    } else {
+      ceph_assert(segment_info.is_empty());
+      assert(sm_info->avail_bytes >= (size_t)sm_info->segment_size);
+      assert(avail_bytes >= (size_t)sm_info->segment_size);
+      assert(sm_info->empty_segments > 0);
+      sm_info->avail_bytes -= sm_info->segment_size;
+      avail_bytes -= sm_info->segment_size;
+      sm_info->empty_segments--;
+    }
+    segment_info.set_closed();
+  }
+  void update_segment_avail_bytes(paddr_t offset) {
+    auto segment_id = offset.as_seg_paddr().get_segment_id();
+    auto& sm_info = sm_infos[segment_id.device_id()];
+    auto iter = sm_info->open_segment_avails.find(segment_id);
+    if (iter == sm_info->open_segment_avails.end()) {
+      crimson::get_logger(ceph_subsys_seastore).error(
+       "SegmentCleaner::update_segment_avail_bytes:"
+       ":segment closed {}, not updating",
+       offset);
+      return;
+    }
+    auto new_avail_bytes = sm_info->segment_size - offset.as_seg_paddr().get_segment_off();
+    if (iter->second < new_avail_bytes) {
+      crimson::get_logger(ceph_subsys_seastore).error(
+       "SegmentCleaner::update_segment_avail_bytes:"
+       " avail_bytes increased? , {}, {}",
+       iter->second,
+       new_avail_bytes);
+      ceph_assert(iter->second >= new_avail_bytes);
+    }
+    assert(sm_info->avail_bytes >= (size_t)(iter->second - new_avail_bytes));
+    assert(avail_bytes >= (size_t)(iter->second - new_avail_bytes));
+    sm_info->avail_bytes -= iter->second - new_avail_bytes;
+    avail_bytes -= iter->second - new_avail_bytes;
+    iter->second = new_avail_bytes;
   }
   size_t get_empty_segments(device_id_t d_id) {
     return sm_infos[d_id]->empty_segments;
   }
+  size_t get_opened_segments(device_id_t d_id) {
+    return sm_infos[d_id]->open_segment_avails.size();
+  }
+  size_t get_opened_segments() {
+    return opened_segments;
+  }
   size_t get_total_bytes() const {
     return total_bytes;
   }
@@ -205,6 +261,7 @@ private:
   device_segment_id_t journal_segments = 0;
   size_t total_bytes = 0;
   size_t avail_bytes = 0;
+  size_t opened_segments = 0;
 
   friend class SegmentCleaner;
 };
@@ -235,6 +292,8 @@ public:
 
   virtual segment_seq_t get_seq(segment_id_t id) { return 0; }
 
+  virtual void update_segment_avail_bytes(paddr_t offset) = 0;
+
   virtual ~SegmentProvider() {}
 };
 
@@ -702,9 +761,14 @@ public:
   void set_journal_head(journal_seq_t head) {
     assert(journal_head == journal_seq_t() || head >= journal_head);
     journal_head = head;
+    segments.update_segment_avail_bytes(head.offset);
     gc_process.maybe_wake_on_space_used();
   }
 
+  void update_segment_avail_bytes(paddr_t offset) final {
+    segments.update_segment_avail_bytes(offset);
+  }
+
   journal_seq_t get_journal_head() const {
     return journal_head;
   }
@@ -754,7 +818,6 @@ public:
       seg_addr.get_segment_id(),
       seg_addr.get_segment_off(),
       len);
-    segments.space_used(addr, len);
     gc_process.maybe_wake_on_space_used();
     assert(ret > 0);
   }
@@ -1237,14 +1300,20 @@ private:
     } else {
       assert(segments[segment].is_empty());
       assert(segments.get_empty_segments(segment.device_id()) > 0);
-      segments.segment_used(segment, true);
+      assert(stats.empty_segments > 0);
       stats.empty_segments--;
     }
-    crimson::get_logger(ceph_subsys_seastore).debug(
-      "mark_closed: device: {} empty_segments: {}",
-      segment.device_id(),
-      segments.get_empty_segments(segment.device_id()));
-    segments[segment].set_closed();
+    segments.segment_closed(segment);
+    crimson::get_logger(ceph_subsys_seastore).info(
+      "mark closed: {} empty_segments: {}"
+      ", opened_segments {}, should_block_on_gc {}"
+      ", projected_avail_ratio {}, projected_reclaim_ratio {}",
+      segment,
+      segments.get_empty_segments(segment.device_id()),
+      segments.get_opened_segments(),
+      should_block_on_gc(),
+      get_projected_available_ratio(),
+      get_projected_reclaim_ratio());
   }
 
   void mark_empty(segment_id_t segment) {
@@ -1262,7 +1331,15 @@ private:
     segment_info.set_empty();
     stats.empty_segments++;
     crimson::get_logger(ceph_subsys_seastore
-      ).info("mark empty: {}, empty_segments {}", segment, stats.empty_segments);
+      ).info("mark empty: {}, empty_segments {}"
+       ", opened_segments {}, should_block_on_gc {}"
+       ", projected_avail_ratio {}, projected_reclaim_ratio {}",
+       segment,
+       stats.empty_segments,
+       segments.get_opened_segments(),
+       should_block_on_gc(),
+       get_projected_available_ratio(),
+       get_projected_reclaim_ratio());
     if (!segment_info.out_of_line) {
       segments.journal_segment_emptied();
     }
@@ -1276,11 +1353,20 @@ private:
       segments[segment.device_id()]->num_segments);
     assert(segments[segment].is_empty());
     assert(segments.get_empty_segments(segment.device_id()) > 0);
-    segments.segment_used(segment);
+    segments.segment_opened(segment);
     segments[segment].set_open();
+    assert(stats.empty_segments > 0);
     stats.empty_segments--;
     crimson::get_logger(ceph_subsys_seastore
-      ).info("mark open: {}, empty_segments {}", segment, stats.empty_segments);
+      ).info("mark open: {}, empty_segments {}"
+       ", opened_segments {}, should_block_on_gc {}"
+       ", projected_avail_ratio {}, projected_reclaim_ratio {}",
+       segment,
+       stats.empty_segments,
+       segments.get_opened_segments(),
+       should_block_on_gc(),
+       get_projected_available_ratio(),
+       get_projected_reclaim_ratio());
   }
 };
 using SegmentCleanerRef = std::unique_ptr<SegmentCleaner>;
index f54bac24506da9336b4f6df19c89502601b6a79d..19c36edb492c78539ac6120fdeff87c0ad5df23d 100644 (file)
@@ -40,6 +40,8 @@ struct btree_test_base :
 
   btree_test_base() = default;
 
+  void update_segment_avail_bytes(paddr_t offset) final {}
+
   get_segment_ret get_segment(device_id_t id) final {
     auto ret = next;
     next = segment_id_t{
index 0186a59c405c17882f94dd2f56ff9acd8b0edcd5..eaeb8cd2d0cf23f996f5b76656f9c78097ebc835 100644 (file)
@@ -82,6 +82,8 @@ struct journal_test_t : seastar_test_suite_t, SegmentProvider {
 
   journal_test_t() = default;
 
+  void update_segment_avail_bytes(paddr_t offset) final {}
+
   get_segment_ret get_segment(device_id_t id) final {
     auto ret = next;
     next = segment_id_t{