From: Yingxin Cheng Date: Tue, 19 Apr 2022 07:38:41 +0000 (+0800) Subject: crimson/os/seastore/segment_cleaner: cleanup segment_info_set_t X-Git-Tag: v18.0.0~1018^2~1 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=6383581e184131fa1aab8b526e001bf5a4b8d164;p=ceph-ci.git crimson/os/seastore/segment_cleaner: cleanup segment_info_set_t * better encapsulated interfaces to SegmentCleaner; * drop unused device related implementations; * improve related metrics and logs; Signed-off-by: Yingxin Cheng --- diff --git a/src/crimson/os/seastore/seastore_types.h b/src/crimson/os/seastore/seastore_types.h index ab3a7f1a12e..2a70f1fd34f 100644 --- a/src/crimson/os/seastore/seastore_types.h +++ b/src/crimson/os/seastore/seastore_types.h @@ -244,9 +244,10 @@ public: // are not yet present device_to_segments.resize(DEVICE_ID_MAX_VALID); } - void add_device(device_id_t device, size_t segments, const T& init) { - assert(device <= DEVICE_ID_MAX_VALID); - assert(device_to_segments[device].size() == 0); + void add_device(device_id_t device, std::size_t segments, const T& init) { + ceph_assert(device <= DEVICE_ID_MAX_VALID); + ceph_assert(device_to_segments[device].size() == 0); + ceph_assert(segments > 0); device_to_segments[device].resize(segments, init); total_segments += segments; } diff --git a/src/crimson/os/seastore/segment_cleaner.cc b/src/crimson/os/seastore/segment_cleaner.cc index 3eceee2bffc..4bac8394c9c 100644 --- a/src/crimson/os/seastore/segment_cleaner.cc +++ b/src/crimson/os/seastore/segment_cleaner.cc @@ -19,21 +19,218 @@ SET_SUBSYS(seastore_cleaner); namespace crimson::os::seastore { -void segment_info_set_t::segment_info_t::set_open(segment_seq_t seq) { - assert(state == Segment::segment_state_t::EMPTY); - assert(seq != NULL_SEG_SEQ); +void segment_info_t::set_open( + segment_seq_t _seq, segment_type_t _type, std::size_t seg_size) +{ + ceph_assert(_seq != NULL_SEG_SEQ); + ceph_assert(_type != segment_type_t::NULL_SEG); + ceph_assert(seg_size > 0); state = Segment::segment_state_t::OPEN; - journal_segment_seq = seq; + seq = _seq; + type = _type; + open_avail_bytes = seg_size; } -void segment_info_set_t::segment_info_t::set_empty() { - assert(state == Segment::segment_state_t::CLOSED); +void segment_info_t::set_empty() +{ state = Segment::segment_state_t::EMPTY; - journal_segment_seq = NULL_SEG_SEQ; + seq = NULL_SEG_SEQ; + type = segment_type_t::NULL_SEG; + last_modified = {}; + last_rewritten = {}; + open_avail_bytes = 0; +} + +void segment_info_t::set_closed() +{ + state = Segment::segment_state_t::CLOSED; + // the rest of information is unchanged } -void segment_info_set_t::segment_info_t::set_closed() { +void segment_info_t::init_closed( + segment_seq_t _seq, segment_type_t _type) +{ + ceph_assert(_seq != NULL_SEG_SEQ); + ceph_assert(_type != segment_type_t::NULL_SEG); state = Segment::segment_state_t::CLOSED; + seq = _seq; + type = _type; + open_avail_bytes = 0; +} + +std::ostream& operator<<(std::ostream &out, const segment_info_t &info) +{ + out << "seg_info_t(" + << "state=" << info.state; + if (info.is_empty()) { + // pass + } else { // open or closed + out << ", seq=" << segment_seq_printer_t{info.seq} + << ", type=" << info.type + << ", last_modified=" << info.last_modified.time_since_epoch() + << ", last_rewritten=" << info.last_rewritten.time_since_epoch() + << ", open_avail_bytes=" << info.open_avail_bytes; + } + return out << ")"; +} + +void segments_info_t::reset() +{ + segments.clear(); + + segment_size = 0; + + num_in_journal = 0; + num_open = 0; + num_empty = 0; + num_closed = 0; + + count_open = 0; + count_release = 0; + count_close = 0; + + total_bytes = 0; + avail_bytes = 0; +} + +void segments_info_t::add_segment_manager( + SegmentManager &segment_manager) +{ + LOG_PREFIX(segments_info_t::add_segment_manager); + device_id_t d_id = segment_manager.get_device_id(); + auto ssize = segment_manager.get_segment_size(); + auto nsegments = segment_manager.get_num_segments(); + auto sm_size = segment_manager.get_size(); + INFO("adding segment manager {}, size={}, ssize={}, segments={}", + device_id_printer_t{d_id}, sm_size, ssize, nsegments); + ceph_assert(ssize > 0); + ceph_assert(nsegments > 0); + ceph_assert(sm_size > 0); + + // also validate if the device is duplicated + segments.add_device(d_id, nsegments, segment_info_t{}); + + // assume all the segment managers share the same settings as follows. + if (segment_size == 0) { + ceph_assert(ssize > 0); + segment_size = ssize; + } else { + ceph_assert(segment_size == (std::size_t)ssize); + } + + // NOTE: by default the segments are empty + num_empty += nsegments; + + total_bytes += sm_size; + avail_bytes += sm_size; +} + +void segments_info_t::init_closed( + segment_id_t segment, segment_seq_t seq, segment_type_t type) +{ + LOG_PREFIX(segments_info_t::init_closed); + auto& segment_info = segments[segment]; + INFO("initiating {} {} {}, {}, num_segments(empty={}, opened={}, closed={})", + segment, segment_seq_printer_t{seq}, type, + segment_info, num_empty, num_open, num_closed); + ceph_assert(segment_info.is_empty()); + segment_info.init_closed(seq, type); + ceph_assert(num_empty > 0); + --num_empty; + ++num_closed; + if (type == segment_type_t::JOURNAL) { + ++num_in_journal; + } + ceph_assert(avail_bytes >= get_segment_size()); + avail_bytes -= get_segment_size(); + // do not increment count_close; +} + +void segments_info_t::mark_open( + segment_id_t segment, segment_seq_t seq, segment_type_t type) +{ + LOG_PREFIX(segments_info_t::mark_open); + auto& segment_info = segments[segment]; + INFO("opening {} {} {}, {}, num_segments(empty={}, opened={}, closed={})", + segment, segment_seq_printer_t{seq}, type, + segment_info, num_empty, num_open, num_closed); + ceph_assert(segment_info.is_empty()); + segment_info.set_open(seq, type, get_segment_size()); + ceph_assert(num_empty > 0); + --num_empty; + ++num_open; + if (type == segment_type_t::JOURNAL) { + ++num_in_journal; + } + ++count_open; +} + +void segments_info_t::mark_empty( + segment_id_t segment) +{ + LOG_PREFIX(segments_info_t::mark_empty); + auto& segment_info = segments[segment]; + INFO("releasing {}, {}, num_segments(empty={}, opened={}, closed={})", + segment, segment_info, + num_empty, num_open, num_closed); + ceph_assert(segment_info.is_closed()); + auto type = segment_info.type; + assert(type != segment_type_t::NULL_SEG); + segment_info.set_empty(); + ceph_assert(num_closed > 0); + --num_closed; + ++num_empty; + if (type == segment_type_t::JOURNAL) { + ceph_assert(num_in_journal > 0); + --num_in_journal; + } + avail_bytes += get_segment_size(); + ++count_release; +} + +void segments_info_t::mark_closed( + segment_id_t segment) +{ + LOG_PREFIX(segments_info_t::mark_closed); + auto& segment_info = segments[segment]; + INFO("closing {}, {}, num_segments(empty={}, opened={}, closed={})", + segment, segment_info, + num_empty, num_open, num_closed); + ceph_assert(segment_info.is_open()); + segment_info.set_closed(); + ceph_assert(num_open > 0); + --num_open; + ++num_closed; + ceph_assert(avail_bytes >= segment_info.open_avail_bytes); + avail_bytes -= segment_info.open_avail_bytes; + ++count_close; +} + +void segments_info_t::update_written_to( + paddr_t offset) +{ + LOG_PREFIX(segments_info_t::update_written_to); + auto& saddr = offset.as_seg_paddr(); + auto& segment_info = segments[saddr.get_segment_id()]; + if (!segment_info.is_open()) { + ERROR("segment is not open, not updating, offset={}, {}", + offset, segment_info); + // FIXME + return; + } + + auto new_avail = get_segment_size() - saddr.get_segment_off(); + if (segment_info.open_avail_bytes < new_avail) { + ERROR("open_avail_bytes should not increase! offset={}, {}", + offset, segment_info); + ceph_abort(); + } + + DEBUG("offset={}, {}", offset, segment_info); + auto avail_deduction = segment_info.open_avail_bytes - new_avail; + ceph_assert(avail_bytes >= avail_deduction); + avail_bytes -= avail_deduction; + segment_info.open_avail_bytes = new_avail; } bool SpaceTrackerSimple::equals(const SpaceTrackerI &_other) const @@ -195,40 +392,58 @@ void SegmentCleaner::register_metrics() for (int i = 0; i < 11; i++) { stats.segment_util.buckets[i].upper_bound = ((double)(i + 1)) / 10; if (!i) { - stats.segment_util.buckets[i].count = segments.num_segments(); + stats.segment_util.buckets[i].count = segments.get_num_segments(); } else { stats.segment_util.buckets[i].count = 0; } } metrics.add_group("segment_cleaner", { - sm::make_counter("segments_released", stats.segments_released, - sm::description("total number of extents released by SegmentCleaner")), + sm::make_derive("segments_number", + [this] { return segments.get_num_segments(); }, + sm::description("the number of segments")), + sm::make_derive("segment_size", + [this] { return segments.get_segment_size(); }, + sm::description("the bytes of a segment")), + sm::make_derive("segments_in_journal", + [this] { return segments.get_num_in_journal(); }, + sm::description("the number of segments in journal")), + sm::make_derive("segments_open", + [this] { return segments.get_num_open(); }, + sm::description("the number of open segments")), + sm::make_derive("segments_empty", + [this] { return segments.get_num_empty(); }, + sm::description("the number of empty segments")), + sm::make_derive("segments_closed", + [this] { return segments.get_num_closed(); }, + sm::description("the number of closed segments")), + sm::make_derive("segments_count_open", + [this] { return segments.get_count_open(); }, + sm::description("the count of open segment operations")), + sm::make_derive("segments_count_release", + [this] { return segments.get_count_release(); }, + sm::description("the count of release segment operations")), + sm::make_derive("segments_count_close", + [this] { return segments.get_count_close(); }, + sm::description("the count of close segment operations")), + sm::make_derive("total_bytes", + [this] { return segments.get_total_bytes(); }, + sm::description("the size of the space")), + sm::make_derive("available_bytes", + [this] { return segments.get_available_bytes(); }, + sm::description("the size of the space is available")), + sm::make_counter("accumulated_blocked_ios", stats.accumulated_blocked_ios, sm::description("accumulated total number of ios that were blocked by gc")), - sm::make_counter("reclaimed_segments", stats.reclaimed_segments, - sm::description("reclaimed segments")), sm::make_counter("reclaim_rewrite_bytes", stats.reclaim_rewrite_bytes, sm::description("rewritten bytes due to reclaim")), sm::make_counter("reclaiming_bytes", stats.reclaiming_bytes, sm::description("bytes being reclaimed")), - sm::make_derive("empty_segments", stats.empty_segments, - sm::description("current empty segments")), sm::make_derive("ios_blocking", stats.ios_blocking, sm::description("IOs that are blocking on space usage")), sm::make_derive("used_bytes", stats.used_bytes, sm::description("the size of the space occupied by live extents")), sm::make_derive("projected_used_bytes", stats.projected_used_bytes, sm::description("the size of the space going to be occupied by new extents")), - sm::make_derive("avail_bytes", - [this] { - return segments.get_available_bytes(); - }, - sm::description("the size of the space not occupied")), - sm::make_derive("opened_segments", - [this] { - return segments.get_opened_segments(); - }, - sm::description("the number of segments whose state is open")), sm::make_histogram("segment_utilization_distribution", [this]() -> seastar::metrics::histogram& { return stats.segment_util; @@ -249,8 +464,12 @@ segment_id_t SegmentCleaner::get_segment( auto seg_id = it->first; auto& segment_info = it->second; if (segment_info.is_empty()) { - DEBUG("returning segment {} {}", seg_id, segment_seq_printer_t{seq}); - mark_open(seg_id, seq, type); + segments.mark_open(seg_id, seq, type); + INFO("opened, should_block_on_gc {}, projected_avail_ratio {}, " + "projected_reclaim_ratio {}", + should_block_on_gc(), + get_projected_available_ratio(), + get_projected_reclaim_ratio()); return seg_id; } } @@ -296,8 +515,14 @@ void SegmentCleaner::update_journal_tail_committed(journal_seq_t committed) void SegmentCleaner::close_segment(segment_id_t segment) { - ceph_assert(segments[segment].journal_segment_seq != NULL_SEG_SEQ); - mark_closed(segment); + LOG_PREFIX(SegmentCleaner::close_segment); + ceph_assert(init_complete); + segments.mark_closed(segment); + INFO("closed, should_block_on_gc {}, projected_avail_ratio {}, " + "projected_reclaim_ratio {}", + should_block_on_gc(), + get_projected_available_ratio(), + get_projected_reclaim_ratio()); } SegmentCleaner::rewrite_dirty_ret SegmentCleaner::rewrite_dirty( @@ -448,20 +673,13 @@ SegmentCleaner::gc_reclaim_space_ret SegmentCleaner::gc_reclaim_space() assert(info.last_modified); assert(commit_type == record_commit_type_t::MODIFY || commit_type == record_commit_type_t::REWRITE); - if (ext->get_last_modified() == - seastar::lowres_system_clock::time_point()) { - assert(ext->get_last_rewritten() == - seastar::lowres_system_clock::time_point()); - ext->set_last_modified( - seastar::lowres_system_clock::duration( - info.last_modified)); + if (ext->get_last_modified() == time_point()) { + assert(ext->get_last_rewritten() == time_point()); + ext->set_last_modified(duration(info.last_modified)); } if (commit_type == record_commit_type_t::REWRITE - && ext->get_last_rewritten() == - seastar::lowres_system_clock::time_point()) { - ext->set_last_rewritten( - seastar::lowres_system_clock::duration( - commit_time)); + && ext->get_last_rewritten() == time_point()) { + ext->set_last_rewritten(duration(commit_time)); } assert( @@ -494,7 +712,6 @@ SegmentCleaner::gc_reclaim_space_ret SegmentCleaner::gc_reclaim_space() if (scan_cursor->is_complete()) { stats.reclaim_rewrite_bytes += stats.reclaiming_bytes; stats.reclaiming_bytes = 0; - stats.reclaimed_segments++; scan_cursor.reset(); } }); @@ -518,15 +735,14 @@ SegmentCleaner::mount_ret SegmentCleaner::mount() (SpaceTrackerI*)new SpaceTrackerSimple( sms)); - segments.clear(); + segments.reset(); for (auto sm : sms) { segments.add_segment_manager(*sm); - stats.empty_segments += sm->get_num_segments(); } metrics.clear(); register_metrics(); - logger().info("SegmentCleaner::mount: {} segments", segments.size()); + logger().info("SegmentCleaner::mount: {} segments", segments.get_num_segments()); return seastar::do_with( std::vector>(), [this](auto& segment_set) { @@ -555,16 +771,10 @@ SegmentCleaner::mount_ret SegmentCleaner::mount() if (tail.segment_nonce != header.segment_nonce) { return scan_nonfull_segment(header, segment_set, segment_id); } - seastar::lowres_system_clock::time_point last_modified( - seastar::lowres_system_clock::duration(tail.last_modified)); - seastar::lowres_system_clock::time_point last_rewritten( - seastar::lowres_system_clock::duration(tail.last_rewritten)); - if (segments[segment_id].last_modified < last_modified) { - segments[segment_id].last_modified = last_modified; - } - if (segments[segment_id].last_rewritten < last_rewritten) { - segments[segment_id].last_rewritten = last_rewritten; - } + time_point last_modified(duration(tail.last_modified)); + time_point last_rewritten(duration(tail.last_rewritten)); + segments.update_last_modified_rewritten( + segment_id, last_modified, last_rewritten); init_mark_segment_closed( segment_id, header.segment_seq, @@ -602,7 +812,7 @@ SegmentCleaner::scan_extents_ret SegmentCleaner::scan_nonfull_segment( segment_id); return seastar::do_with( scan_valid_records_cursor({ - segments[segment_id].journal_segment_seq, + segments[segment_id].seq, paddr_t::make_seg_paddr(segment_id, 0)}), [this, segment_id, header](auto& cursor) { return seastar::do_with( @@ -628,17 +838,14 @@ SegmentCleaner::scan_extents_ret SegmentCleaner::scan_nonfull_segment( ctime); ceph_abort("0 commit_time"); } - seastar::lowres_system_clock::time_point commit_time{ - seastar::lowres_system_clock::duration(ctime)}; + time_point commit_time{duration(ctime)}; assert(commit_type == record_commit_type_t::MODIFY || commit_type == record_commit_type_t::REWRITE); - if (commit_type == record_commit_type_t::MODIFY - && this->segments[segment_id].last_modified < commit_time) { - this->segments[segment_id].last_modified = commit_time; + if (commit_type == record_commit_type_t::MODIFY) { + segments.update_last_modified_rewritten(segment_id, commit_time, {}); } - if (commit_type == record_commit_type_t::REWRITE - && this->segments[segment_id].last_rewritten < commit_time) { - this->segments[segment_id].last_rewritten = commit_time; + if (commit_type == record_commit_type_t::REWRITE) { + segments.update_last_modified_rewritten(segment_id, {}, commit_time); } } return seastar::now(); @@ -660,7 +867,7 @@ SegmentCleaner::scan_extents_ret SegmentCleaner::scan_nonfull_segment( }); } else if (header.get_type() == segment_type_t::JOURNAL) { logger().info( - "SEgmentCleaner::scan_nonfull_segment: journal segment {}", + "SegmentCleaner::scan_nonfull_segment: journal segment {}", segment_id); segment_set.emplace_back(std::make_pair(segment_id, std::move(header))); } else { @@ -681,9 +888,19 @@ SegmentCleaner::maybe_release_segment(Transaction &t) LOG_PREFIX(SegmentCleaner::maybe_release_segment); INFOT("releasing segment {}", t, to_release); return sm_group->release_segment(to_release - ).safe_then([this, to_release] { - stats.segments_released++; - mark_empty(to_release); + ).safe_then([this, FNAME, &t, to_release] { + segments.mark_empty(to_release); + INFOT("released, should_block_on_gc {}, projected_avail_ratio {}, " + "projected_reclaim_ratio {}", + t, + should_block_on_gc(), + get_projected_available_ratio(), + get_projected_reclaim_ratio()); + if (space_tracker->get_usage(to_release) != 0) { + space_tracker->dump_usage(to_release); + ceph_abort(); + } + maybe_wake_gc_blocked_io(); }); } else { return SegmentManager::release_ertr::now(); diff --git a/src/crimson/os/seastore/segment_cleaner.h b/src/crimson/os/seastore/segment_cleaner.h index fa7578cde0c..840a8e19dbd 100644 --- a/src/crimson/os/seastore/segment_cleaner.h +++ b/src/crimson/os/seastore/segment_cleaner.h @@ -20,274 +20,164 @@ namespace crimson::os::seastore { -class SegmentCleaner; - -// for keeping track of segment managers' various information, -// like empty segments, opened segments and so on. -class segment_info_set_t { - struct segment_manager_info_t { - segment_manager_info_t() = default; - segment_manager_info_t( - device_id_t device_id, - device_segment_id_t num_segments, - seastore_off_t block_size, - size_t empty_segments, - size_t size) - : device_id(device_id), - num_segments(num_segments), - block_size(block_size), - empty_segments(empty_segments), - size(size), - avail_bytes(size) - {} - - device_id_t device_id = 0; - device_segment_id_t num_segments = 0; - seastore_off_t block_size = 0; - size_t empty_segments = 0; - size_t size = 0; - size_t avail_bytes = 0; - std::map open_segment_avails; - }; - - struct segment_info_t { - Segment::segment_state_t state = Segment::segment_state_t::EMPTY; - - // Will be non-null for any segments in the current journal - segment_seq_t journal_segment_seq = NULL_SEG_SEQ; +/* + * segment_info_t + * + * Maintains the tracked information for a segment. + * It is read-only outside segments_info_t. + */ +struct segment_info_t { + using time_point = seastar::lowres_system_clock::time_point; - segment_type_t type = segment_type_t::NULL_SEG; + // segment_info_t is initiated as set_empty() + Segment::segment_state_t state = Segment::segment_state_t::EMPTY; - seastar::lowres_system_clock::time_point last_modified; - seastar::lowres_system_clock::time_point last_rewritten; + // Will be non-null for any segments in the current journal + segment_seq_t seq = NULL_SEG_SEQ; - segment_type_t get_type() const { - return type; - } + segment_type_t type = segment_type_t::NULL_SEG; - void set_open(segment_seq_t); - void set_empty(); - void set_closed(); + time_point last_modified; + time_point last_rewritten; - bool is_in_journal(journal_seq_t tail_committed) const { - return get_type() == segment_type_t::JOURNAL && - tail_committed.segment_seq <= journal_segment_seq; - } + std::size_t open_avail_bytes = 0; - bool is_empty() const { - return state == Segment::segment_state_t::EMPTY; - } - - bool is_closed() const { - return state == Segment::segment_state_t::CLOSED; - } - - bool is_open() const { - return state == Segment::segment_state_t::OPEN; - } - }; -public: - segment_info_set_t() { - sm_infos.resize(DEVICE_ID_MAX); + bool is_in_journal(journal_seq_t tail_committed) const { + return type == segment_type_t::JOURNAL && + tail_committed.segment_seq <= seq; } - segment_info_t& operator[](segment_id_t id) { - return segments[id]; - } - const segment_info_t& operator[](segment_id_t id) const { - return segments[id]; + bool is_empty() const { + return state == Segment::segment_state_t::EMPTY; } - std::optional & - operator[](device_id_t id) { - auto& sm_info = sm_infos[id]; - assert(sm_info && sm_info->device_id == id); - return sm_info; - } - const std::optional & - operator[](device_id_t id) const { - auto& sm_info = sm_infos[id]; - assert(sm_info && sm_info->device_id == id); - return sm_info; + bool is_closed() const { + return state == Segment::segment_state_t::CLOSED; } - void clear() { - segments.clear(); - total_bytes = 0; - journal_segments = 0; - avail_bytes = 0; - opened_segments = 0; - segment_size = 0; + bool is_open() const { + return state == Segment::segment_state_t::OPEN; } - void add_segment_manager(SegmentManager& segment_manager) - { - device_id_t d_id = segment_manager.get_device_id(); - auto ssize = segment_manager.get_segment_size(); - ceph_assert(ssize != 0); - segments.add_device( - d_id, - segment_manager.get_num_segments(), - segment_info_t{}); - sm_infos[segment_manager.get_device_id()].emplace( - d_id, - segment_manager.get_num_segments(), - segment_manager.get_block_size(), - segment_manager.get_num_segments(), - segment_manager.get_size()); - - total_bytes += segment_manager.get_size(); - avail_bytes += segment_manager.get_size(); - - // assume all the segment managers share the same settings as follows. - if (segment_size == 0) { - segment_size = ssize; - } else { - ceph_assert(segment_size == ssize); + void init_closed(segment_seq_t, segment_type_t); + + void set_open(segment_seq_t, segment_type_t, std::size_t segment_size); + + void set_empty(); + + void set_closed(); + + void update_last_modified_rewritten( + time_point _last_modified, time_point _last_rewritten) { + if (_last_modified != time_point() && last_modified < _last_modified) { + last_modified = _last_modified; + } + if (_last_rewritten != time_point() && last_rewritten < _last_rewritten) { + last_rewritten = _last_rewritten; } } +}; - device_segment_id_t size() const { - return segments.size(); +std::ostream& operator<<(std::ostream&, const segment_info_t&); + +/* + * segments_info_t + * + * Keep track of all segments and related information. + */ +class segments_info_t { +public: + using time_point = seastar::lowres_system_clock::time_point; + + segments_info_t() { + reset(); } - auto begin() { - return segments.begin(); + const segment_info_t& operator[](segment_id_t id) const { + return segments[id]; } + auto begin() const { return segments.begin(); } - auto end() { - return segments.end(); - } auto end() const { return segments.end(); } - // the following methods are used for keeping track of - // seastore disk space usage - void segment_opened(segment_id_t segment) { - auto& sm_info = sm_infos[segment.device_id()]; - sm_info->empty_segments--; - ceph_assert(segments[segment].is_empty()); - // must be opening a new segment - auto [iter, inserted] = sm_info->open_segment_avails.emplace( - segment, get_segment_size()); - opened_segments++; - ceph_assert(inserted); - } - void segment_emptied(segment_id_t segment) { - auto& sm_info = sm_infos[segment.device_id()]; - sm_info->empty_segments++; - sm_info->avail_bytes += get_segment_size(); - avail_bytes += get_segment_size(); - } - void segment_closed(segment_id_t segment) { - assert(segments.contains(segment)); - auto& segment_info = segments[segment]; - auto& sm_info = sm_infos[segment.device_id()]; - if (segment_info.is_open()) { - auto iter = sm_info->open_segment_avails.find(segment); - ceph_assert(iter != sm_info->open_segment_avails.end()); - assert(sm_info->avail_bytes >= (size_t)iter->second); - assert(avail_bytes >= (size_t)iter->second); - sm_info->avail_bytes -= iter->second; - avail_bytes -= iter->second; - sm_info->open_segment_avails.erase(iter); - opened_segments--; - } else { - ceph_assert(segment_info.is_empty()); - assert(sm_info->avail_bytes >= (std::size_t)get_segment_size()); - assert(avail_bytes >= (std::size_t)get_segment_size()); - assert(sm_info->empty_segments > 0); - sm_info->avail_bytes -= get_segment_size(); - avail_bytes -= get_segment_size(); - sm_info->empty_segments--; - } - segment_info.set_closed(); - } - void update_segment_avail_bytes(paddr_t offset) { - auto segment_id = offset.as_seg_paddr().get_segment_id(); - auto& sm_info = sm_infos[segment_id.device_id()]; - auto iter = sm_info->open_segment_avails.find(segment_id); - if (iter == sm_info->open_segment_avails.end()) { - crimson::get_logger(ceph_subsys_seastore_cleaner).error( - "SegmentCleaner::update_segment_avail_bytes:" - ":segment closed {}, not updating", - offset); - return; - } - auto new_avail_bytes = get_segment_size() - - offset.as_seg_paddr().get_segment_off(); - if (iter->second < new_avail_bytes) { - crimson::get_logger(ceph_subsys_seastore_cleaner).error( - "SegmentCleaner::update_segment_avail_bytes:" - " avail_bytes increased? , {}, {}", - iter->second, - new_avail_bytes); - ceph_assert(iter->second >= new_avail_bytes); - } - assert(sm_info->avail_bytes >= (size_t)(iter->second - new_avail_bytes)); - assert(avail_bytes >= (size_t)(iter->second - new_avail_bytes)); - sm_info->avail_bytes -= iter->second - new_avail_bytes; - avail_bytes -= iter->second - new_avail_bytes; - iter->second = new_avail_bytes; + std::size_t get_num_segments() const { + assert(segments.size() > 0); + return segments.size(); } - size_t get_empty_segments(device_id_t d_id) { - return sm_infos[d_id]->empty_segments; + std::size_t get_segment_size() const { + assert(segment_size > 0); + return segment_size; } - size_t get_opened_segments(device_id_t d_id) { - return sm_infos[d_id]->open_segment_avails.size(); + std::size_t get_num_in_journal() const { + return num_in_journal; } - size_t get_opened_segments() { - return opened_segments; + std::size_t get_num_open() const { + return num_open; } - size_t get_total_bytes() const { - return total_bytes; + std::size_t get_num_empty() const { + return num_empty; } - size_t get_available_bytes(device_id_t d_id) const { - auto& sm_info = sm_infos[d_id]; - return sm_info->avail_bytes; + std::size_t get_num_closed() const { + return num_closed; } - size_t get_available_bytes() const { - return avail_bytes; + std::size_t get_count_open() const { + return count_open; } - void new_journal_segment() { - ++journal_segments; + std::size_t get_count_release() const { + return count_release; } - void journal_segment_emptied() { - --journal_segments; + std::size_t get_count_close() const { + return count_close; } - device_segment_id_t get_journal_segments() const { - return journal_segments; + size_t get_total_bytes() const { + return total_bytes; } - device_segment_id_t num_segments() const { - device_segment_id_t num = 0; - for (auto& sm_info : sm_infos) { - if (!sm_info) { - continue; - } - num += sm_info->num_segments; - } - return num; + size_t get_available_bytes() const { + return avail_bytes; } - seastore_off_t get_segment_size() const { - assert(segment_size != 0); - return segment_size; + + void reset(); + + void add_segment_manager(SegmentManager &segment_manager); + + // initiate non-empty segments, the others are by default empty + void init_closed(segment_id_t, segment_seq_t, segment_type_t); + + void mark_open(segment_id_t, segment_seq_t, segment_type_t); + + void mark_empty(segment_id_t); + + void mark_closed(segment_id_t); + + void update_written_to(paddr_t offset); + + void update_last_modified_rewritten( + segment_id_t id, time_point last_modified, time_point last_rewritten) { + segments[id].update_last_modified_rewritten(last_modified, last_rewritten); } private: - std::vector> sm_infos; + // See reset() for member initialization segment_map_t segments; - device_segment_id_t journal_segments = 0; - size_t total_bytes = 0; - size_t avail_bytes = 0; - size_t opened_segments = 0; - seastore_off_t segment_size = 0; + std::size_t segment_size; + + std::size_t num_in_journal; + std::size_t num_open; + std::size_t num_empty; + std::size_t num_closed; - friend class SegmentCleaner; + std::size_t count_open; + std::size_t count_release; + std::size_t count_close; + + std::size_t total_bytes; + std::size_t avail_bytes; }; /** @@ -532,6 +422,9 @@ public: class SegmentCleaner : public SegmentProvider { public: + using time_point = seastar::lowres_system_clock::time_point; + using duration = seastar::lowres_system_clock::duration; + /// Config struct config_t { size_t target_journal_segments = 0; @@ -666,7 +559,7 @@ private: SegmentManagerGroupRef sm_group; SpaceTrackerIRef space_tracker; - segment_info_set_t segments; + segments_info_t segments; bool init_complete = false; struct { @@ -679,11 +572,8 @@ private: */ uint64_t projected_used_bytes = 0; - uint64_t segments_released = 0; uint64_t accumulated_blocked_ios = 0; - uint64_t empty_segments = 0; int64_t ios_blocking = 0; - uint64_t reclaimed_segments = 0; uint64_t reclaim_rewrite_bytes = 0; uint64_t reclaiming_bytes = 0; seastar::metrics::histogram segment_util; @@ -744,20 +634,20 @@ public: void set_journal_head(journal_seq_t head) { assert(journal_head == JOURNAL_SEQ_NULL || head >= journal_head); journal_head = head; - segments.update_segment_avail_bytes(head.offset); + segments.update_written_to(head.offset); gc_process.maybe_wake_on_space_used(); } void update_segment_avail_bytes(paddr_t offset) final { - segments.update_segment_avail_bytes(offset); + segments.update_written_to(offset); } segment_seq_t get_seq(segment_id_t id) final { - return segments[id].journal_segment_seq; + return segments[id].seq; } segment_type_t get_type(segment_id_t id) final { - return segments[id].get_type(); + return segments[id].type; } SegmentManagerGroup* get_segment_manager_group() final { @@ -776,16 +666,10 @@ public: void mark_space_used( paddr_t addr, extent_len_t len, - seastar::lowres_system_clock::time_point last_modified - = seastar::lowres_system_clock::time_point(), - seastar::lowres_system_clock::time_point last_rewritten - = seastar::lowres_system_clock::time_point(), + time_point last_modified = time_point(), + time_point last_rewritten = time_point(), bool init_scan = false) { auto& seg_addr = addr.as_seg_paddr(); - assert(seg_addr.get_segment_id().device_id() == - segments[seg_addr.get_segment_id().device_id()]->device_id); - assert(seg_addr.get_segment_id().device_segment_id() < - segments[seg_addr.get_segment_id().device_id()]->num_segments); if (!init_scan && !init_complete) return; @@ -803,11 +687,8 @@ public: // time the segments' live extents are to stay unmodified; this is an approximation // of the sprite lfs' segment "age". - if (last_modified > segments[seg_addr.get_segment_id()].last_modified) - segments[seg_addr.get_segment_id()].last_modified = last_modified; - - if (last_rewritten > segments[seg_addr.get_segment_id()].last_rewritten) - segments[seg_addr.get_segment_id()].last_rewritten = last_rewritten; + segments.update_last_modified_rewritten( + seg_addr.get_segment_id(), last_modified, last_rewritten); gc_process.maybe_wake_on_space_used(); assert(ret > 0); @@ -832,10 +713,6 @@ public: ceph_assert(stats.used_bytes >= len); stats.used_bytes -= len; auto& seg_addr = addr.as_seg_paddr(); - assert(seg_addr.get_segment_id().device_id() == - segments[seg_addr.get_segment_id().device_id()]->device_id); - assert(seg_addr.get_segment_id().device_segment_id() < - segments[seg_addr.get_segment_id().device_id()]->num_segments); auto old_usage = space_tracker->calc_utilization(seg_addr.get_segment_id()); [[maybe_unused]] auto ret = space_tracker->release( @@ -921,7 +798,7 @@ private: !segment_info.is_in_journal(journal_tail_committed) && benefit_cost > max_benefit_cost) { id = _id; - seq = segment_info.journal_segment_seq; + seq = segment_info.seq; max_benefit_cost = benefit_cost; } } @@ -1117,7 +994,7 @@ private: if (journal_head == JOURNAL_SEQ_NULL) { // this for calculating journal bytes in the journal // replay phase in which journal_head is not set - return segments.get_journal_segments() * segments.get_segment_size(); + return segments.get_num_in_journal() * segments.get_segment_size(); } else { assert(journal_head >= journal_tail_committed); auto segment_size = segments.get_segment_size(); @@ -1312,115 +1189,15 @@ private: } void init_mark_segment_closed( - segment_id_t segment, - segment_seq_t seq, - segment_type_t s_type) { - crimson::get_logger(ceph_subsys_seastore_cleaner).debug( - "SegmentCleaner::init_mark_segment_closed: segment {}, seq {}, s_type {}", - segment, - segment_seq_printer_t{seq}, - s_type); - mark_closed(segment); - segments[segment].journal_segment_seq = seq; - assert(s_type != segment_type_t::NULL_SEG); - segments[segment].type = s_type; - if (s_type == segment_type_t::JOURNAL) { - segments.new_journal_segment(); - } else { - assert(s_type == segment_type_t::OOL); + segment_id_t segment, + segment_seq_t seq, + segment_type_t s_type) { + ceph_assert(!init_complete); + segments.init_closed(segment, seq, s_type); + if (s_type == segment_type_t::OOL) { ool_segment_seq_allocator->set_next_segment_seq(seq); } } - - void mark_closed(segment_id_t segment) { - assert(segment.device_id() == - segments[segment.device_id()]->device_id); - assert(segment.device_segment_id() < - segments[segment.device_id()]->num_segments); - if (init_complete) { - assert(segments[segment].is_open()); - } else { - assert(segments[segment].is_empty()); - assert(segments.get_empty_segments(segment.device_id()) > 0); - assert(stats.empty_segments > 0); - stats.empty_segments--; - } - segments.segment_closed(segment); - crimson::get_logger(ceph_subsys_seastore_cleaner).info( - "mark closed: {} empty_segments: {}" - ", opened_segments {}, should_block_on_gc {}" - ", projected_avail_ratio {}, projected_reclaim_ratio {}", - segment, - segments.get_empty_segments(segment.device_id()), - segments.get_opened_segments(), - should_block_on_gc(), - get_projected_available_ratio(), - get_projected_reclaim_ratio()); - } - - void mark_empty(segment_id_t segment) { - auto& segment_info = segments[segment]; - assert(segment.device_id() == - segments[segment.device_id()]->device_id); - assert(segment.device_segment_id() < - segments[segment.device_id()]->num_segments); - assert(segment_info.is_closed()); - segments.segment_emptied(segment); - if (space_tracker->get_usage(segment) != 0) { - space_tracker->dump_usage(segment); - assert(space_tracker->get_usage(segment) == 0); - } - auto s_type = segment_info.get_type(); - segment_info.set_empty(); - stats.empty_segments++; - crimson::get_logger(ceph_subsys_seastore_cleaner - ).info("mark empty: {}, empty_segments {}" - ", opened_segments {}, should_block_on_gc {}" - ", projected_avail_ratio {}, projected_reclaim_ratio {}", - segment, - stats.empty_segments, - segments.get_opened_segments(), - should_block_on_gc(), - get_projected_available_ratio(), - get_projected_reclaim_ratio()); - ceph_assert(s_type != segment_type_t::NULL_SEG); - if (s_type == segment_type_t::JOURNAL) { - segments.journal_segment_emptied(); - } - maybe_wake_gc_blocked_io(); - } - - void mark_open(segment_id_t segment, segment_seq_t seq, segment_type_t s_type) { - assert(segment.device_id() == - segments[segment.device_id()]->device_id); - assert(segment.device_segment_id() < - segments[segment.device_id()]->num_segments); - assert(segments[segment].is_empty()); - assert(segments.get_empty_segments(segment.device_id()) > 0); - segments.segment_opened(segment); - auto& segment_info = segments[segment]; - segment_info.set_open(seq); - segment_info.type = s_type; - - ceph_assert(s_type != segment_type_t::NULL_SEG); - if (s_type == segment_type_t::JOURNAL) { - segments.new_journal_segment(); - } - assert(stats.empty_segments > 0); - stats.empty_segments--; - crimson::get_logger(ceph_subsys_seastore_cleaner - ).info("mark open: {} {} {}, empty_segments {}" - ", opened_segments {}, should_block_on_gc {}" - ", projected_avail_ratio {}, projected_reclaim_ratio {}", - segment, - segment_seq_printer_t{seq}, - segment_info.type, - stats.empty_segments, - segments.get_opened_segments(), - should_block_on_gc(), - get_projected_available_ratio(), - get_projected_reclaim_ratio()); - } }; using SegmentCleanerRef = std::unique_ptr;