From 6d142533ae85598e23d02ad960e7e5f8d7bbe332 Mon Sep 17 00:00:00 2001 From: Xuehan Xu Date: Fri, 26 Nov 2021 14:39:06 +0800 Subject: [PATCH] crimson/os/seastore/segment_cleaner: correct available space calculation Current available space calculation is wrong, it just counts the space occupied by extents, deltas and other stuff are not taken into account. Fixes: https://tracker.ceph.com/issues/53409 Signed-off-by: Xuehan Xu --- .../os/seastore/extent_placement_manager.cc | 4 + src/crimson/os/seastore/seastore_types.h | 9 ++ src/crimson/os/seastore/segment_cleaner.cc | 7 +- src/crimson/os/seastore/segment_cleaner.h | 122 +++++++++++++++--- .../seastore/test_btree_lba_manager.cc | 2 + .../crimson/seastore/test_seastore_journal.cc | 2 + 6 files changed, 127 insertions(+), 19 deletions(-) diff --git a/src/crimson/os/seastore/extent_placement_manager.cc b/src/crimson/os/seastore/extent_placement_manager.cc index fde7de1c55041..c0e6d0a38c843 100644 --- a/src/crimson/os/seastore/extent_placement_manager.cc +++ b/src/crimson/os/seastore/extent_placement_manager.cc @@ -73,6 +73,10 @@ SegmentedAllocator::Writer::_write( { auto record_size = record.get_encoded_record_length(); allocated_to += record_size.get_encoded_length(); + segment_provider.update_segment_avail_bytes( + paddr_t::make_seg_paddr( + current_segment->segment->get_segment_id(), + allocated_to)); bufferlist bl = record.encode( current_segment->segment->get_segment_id(), 0); diff --git a/src/crimson/os/seastore/seastore_types.h b/src/crimson/os/seastore/seastore_types.h index b00bea374701f..c3c803e425e88 100644 --- a/src/crimson/os/seastore/seastore_types.h +++ b/src/crimson/os/seastore/seastore_types.h @@ -231,6 +231,15 @@ public: return device_to_segments[id.device_id()][id.device_segment_id()]; } + bool contains(segment_id_t id) { + bool b = id.device_id() < device_to_segments.size(); + if (!b) { + return b; + } + b = id.device_segment_id() < device_to_segments[id.device_id()].size(); + return b; + } + auto begin() { return iterator::lower_bound(*this, 0, 0); } diff --git a/src/crimson/os/seastore/segment_cleaner.cc b/src/crimson/os/seastore/segment_cleaner.cc index 5bb24d6ec670c..10c15849918d6 100644 --- a/src/crimson/os/seastore/segment_cleaner.cc +++ b/src/crimson/os/seastore/segment_cleaner.cc @@ -193,7 +193,12 @@ void SegmentCleaner::register_metrics() [this] { return segments.get_available_bytes(); }, - sm::description("the size of the space not occupied")) + sm::description("the size of the space not occupied")), + sm::make_derive("opened_segments", + [this] { + return segments.get_opened_segments(); + }, + sm::description("the number of segments whose state is open")) }); } diff --git a/src/crimson/os/seastore/segment_cleaner.h b/src/crimson/os/seastore/segment_cleaner.h index 47eef1046dac5..57cc54859e340 100644 --- a/src/crimson/os/seastore/segment_cleaner.h +++ b/src/crimson/os/seastore/segment_cleaner.h @@ -48,6 +48,7 @@ class segment_info_set_t { size_t empty_segments = 0; size_t size = 0; size_t avail_bytes = 0; + std::map open_segment_avails; }; struct segment_info_t { @@ -110,6 +111,7 @@ public: total_bytes = 0; journal_segments = 0; avail_bytes = 0; + opened_segments = 0; } void add_segment_manager(SegmentManager& segment_manager) @@ -158,12 +160,15 @@ public: // the following methods are used for keeping track of // seastore disk space usage - void segment_used(segment_id_t segment, bool full_unavail = false) { + void segment_opened(segment_id_t segment) { auto& sm_info = sm_infos[segment.device_id()]; sm_info->empty_segments--; - if (full_unavail) - sm_info->avail_bytes -= sm_info->segment_size; - avail_bytes -= sm_info->segment_size; + ceph_assert(segments[segment].is_empty()); + // must be opening a new segment + auto [iter, inserted] = sm_info->open_segment_avails.emplace( + segment, sm_info->segment_size); + opened_segments++; + ceph_assert(inserted); } void segment_emptied(segment_id_t segment) { auto& sm_info = sm_infos[segment.device_id()]; @@ -171,14 +176,65 @@ public: sm_info->avail_bytes += sm_info->segment_size; avail_bytes += sm_info->segment_size; } - void space_used(paddr_t addr, extent_len_t len) { - auto& sm_info = sm_infos[addr.get_device_id()]; - sm_info->avail_bytes -= len; - avail_bytes -= len; + void segment_closed(segment_id_t segment) { + assert(segments.contains(segment)); + auto& segment_info = segments[segment]; + auto& sm_info = sm_infos[segment.device_id()]; + if (segment_info.is_open()) { + auto iter = sm_info->open_segment_avails.find(segment); + ceph_assert(iter != sm_info->open_segment_avails.end()); + assert(sm_info->avail_bytes >= (size_t)iter->second); + assert(avail_bytes >= (size_t)iter->second); + sm_info->avail_bytes -= iter->second; + avail_bytes -= iter->second; + sm_info->open_segment_avails.erase(iter); + opened_segments--; + } else { + ceph_assert(segment_info.is_empty()); + assert(sm_info->avail_bytes >= (size_t)sm_info->segment_size); + assert(avail_bytes >= (size_t)sm_info->segment_size); + assert(sm_info->empty_segments > 0); + sm_info->avail_bytes -= sm_info->segment_size; + avail_bytes -= sm_info->segment_size; + sm_info->empty_segments--; + } + segment_info.set_closed(); + } + void update_segment_avail_bytes(paddr_t offset) { + auto segment_id = offset.as_seg_paddr().get_segment_id(); + auto& sm_info = sm_infos[segment_id.device_id()]; + auto iter = sm_info->open_segment_avails.find(segment_id); + if (iter == sm_info->open_segment_avails.end()) { + crimson::get_logger(ceph_subsys_seastore).error( + "SegmentCleaner::update_segment_avail_bytes:" + ":segment closed {}, not updating", + offset); + return; + } + auto new_avail_bytes = sm_info->segment_size - offset.as_seg_paddr().get_segment_off(); + if (iter->second < new_avail_bytes) { + crimson::get_logger(ceph_subsys_seastore).error( + "SegmentCleaner::update_segment_avail_bytes:" + " avail_bytes increased? , {}, {}", + iter->second, + new_avail_bytes); + ceph_assert(iter->second >= new_avail_bytes); + } + assert(sm_info->avail_bytes >= (size_t)(iter->second - new_avail_bytes)); + assert(avail_bytes >= (size_t)(iter->second - new_avail_bytes)); + sm_info->avail_bytes -= iter->second - new_avail_bytes; + avail_bytes -= iter->second - new_avail_bytes; + iter->second = new_avail_bytes; } size_t get_empty_segments(device_id_t d_id) { return sm_infos[d_id]->empty_segments; } + size_t get_opened_segments(device_id_t d_id) { + return sm_infos[d_id]->open_segment_avails.size(); + } + size_t get_opened_segments() { + return opened_segments; + } size_t get_total_bytes() const { return total_bytes; } @@ -205,6 +261,7 @@ private: device_segment_id_t journal_segments = 0; size_t total_bytes = 0; size_t avail_bytes = 0; + size_t opened_segments = 0; friend class SegmentCleaner; }; @@ -235,6 +292,8 @@ public: virtual segment_seq_t get_seq(segment_id_t id) { return 0; } + virtual void update_segment_avail_bytes(paddr_t offset) = 0; + virtual ~SegmentProvider() {} }; @@ -702,9 +761,14 @@ public: void set_journal_head(journal_seq_t head) { assert(journal_head == journal_seq_t() || head >= journal_head); journal_head = head; + segments.update_segment_avail_bytes(head.offset); gc_process.maybe_wake_on_space_used(); } + void update_segment_avail_bytes(paddr_t offset) final { + segments.update_segment_avail_bytes(offset); + } + journal_seq_t get_journal_head() const { return journal_head; } @@ -754,7 +818,6 @@ public: seg_addr.get_segment_id(), seg_addr.get_segment_off(), len); - segments.space_used(addr, len); gc_process.maybe_wake_on_space_used(); assert(ret > 0); } @@ -1237,14 +1300,20 @@ private: } else { assert(segments[segment].is_empty()); assert(segments.get_empty_segments(segment.device_id()) > 0); - segments.segment_used(segment, true); + assert(stats.empty_segments > 0); stats.empty_segments--; } - crimson::get_logger(ceph_subsys_seastore).debug( - "mark_closed: device: {} empty_segments: {}", - segment.device_id(), - segments.get_empty_segments(segment.device_id())); - segments[segment].set_closed(); + segments.segment_closed(segment); + crimson::get_logger(ceph_subsys_seastore).info( + "mark closed: {} empty_segments: {}" + ", opened_segments {}, should_block_on_gc {}" + ", projected_avail_ratio {}, projected_reclaim_ratio {}", + segment, + segments.get_empty_segments(segment.device_id()), + segments.get_opened_segments(), + should_block_on_gc(), + get_projected_available_ratio(), + get_projected_reclaim_ratio()); } void mark_empty(segment_id_t segment) { @@ -1262,7 +1331,15 @@ private: segment_info.set_empty(); stats.empty_segments++; crimson::get_logger(ceph_subsys_seastore - ).info("mark empty: {}, empty_segments {}", segment, stats.empty_segments); + ).info("mark empty: {}, empty_segments {}" + ", opened_segments {}, should_block_on_gc {}" + ", projected_avail_ratio {}, projected_reclaim_ratio {}", + segment, + stats.empty_segments, + segments.get_opened_segments(), + should_block_on_gc(), + get_projected_available_ratio(), + get_projected_reclaim_ratio()); if (!segment_info.out_of_line) { segments.journal_segment_emptied(); } @@ -1276,11 +1353,20 @@ private: segments[segment.device_id()]->num_segments); assert(segments[segment].is_empty()); assert(segments.get_empty_segments(segment.device_id()) > 0); - segments.segment_used(segment); + segments.segment_opened(segment); segments[segment].set_open(); + assert(stats.empty_segments > 0); stats.empty_segments--; crimson::get_logger(ceph_subsys_seastore - ).info("mark open: {}, empty_segments {}", segment, stats.empty_segments); + ).info("mark open: {}, empty_segments {}" + ", opened_segments {}, should_block_on_gc {}" + ", projected_avail_ratio {}, projected_reclaim_ratio {}", + segment, + stats.empty_segments, + segments.get_opened_segments(), + should_block_on_gc(), + get_projected_available_ratio(), + get_projected_reclaim_ratio()); } }; using SegmentCleanerRef = std::unique_ptr; diff --git a/src/test/crimson/seastore/test_btree_lba_manager.cc b/src/test/crimson/seastore/test_btree_lba_manager.cc index f54bac24506da..19c36edb492c7 100644 --- a/src/test/crimson/seastore/test_btree_lba_manager.cc +++ b/src/test/crimson/seastore/test_btree_lba_manager.cc @@ -40,6 +40,8 @@ struct btree_test_base : btree_test_base() = default; + void update_segment_avail_bytes(paddr_t offset) final {} + get_segment_ret get_segment(device_id_t id) final { auto ret = next; next = segment_id_t{ diff --git a/src/test/crimson/seastore/test_seastore_journal.cc b/src/test/crimson/seastore/test_seastore_journal.cc index 0186a59c405c1..eaeb8cd2d0cf2 100644 --- a/src/test/crimson/seastore/test_seastore_journal.cc +++ b/src/test/crimson/seastore/test_seastore_journal.cc @@ -82,6 +82,8 @@ struct journal_test_t : seastar_test_suite_t, SegmentProvider { journal_test_t() = default; + void update_segment_avail_bytes(paddr_t offset) final {} + get_segment_ret get_segment(device_id_t id) final { auto ret = next; next = segment_id_t{ -- 2.39.5