From: Yingxin Cheng Date: Fri, 19 Aug 2022 07:09:41 +0000 (+0800) Subject: crimson/os/seastore: generalize journal tail calculations X-Git-Tag: v18.0.0~165^2~2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=c2b9e770f7ee055eba154296137c10d9f22079b6;p=ceph-ci.git crimson/os/seastore: generalize journal tail calculations Signed-off-by: Yingxin Cheng Signed-off-by: Myoungwon Oh --- diff --git a/src/crimson/os/seastore/async_cleaner.cc b/src/crimson/os/seastore/async_cleaner.cc index 3ead6ff0127..6a5dfe7f049 100644 --- a/src/crimson/os/seastore/async_cleaner.cc +++ b/src/crimson/os/seastore/async_cleaner.cc @@ -467,16 +467,24 @@ AsyncCleaner::AsyncCleaner( config_t config, SegmentManagerGroupRef&& sm_group, BackrefManager &backref_manager, - bool detailed) + bool detailed, + journal_type_t type, + seastore_off_t roll_start, + seastore_off_t roll_size) : detailed(detailed), config(config), sm_group(std::move(sm_group)), backref_manager(backref_manager), + journal_type(type), + roll_start(roll_start), + roll_size(roll_size), ool_segment_seq_allocator( new SegmentSeqAllocator(segment_type_t::OOL)), gc_process(*this) { config.validate(); + ceph_assert(roll_start >= 0); + ceph_assert(roll_size > 0); } void AsyncCleaner::register_metrics() diff --git a/src/crimson/os/seastore/async_cleaner.h b/src/crimson/os/seastore/async_cleaner.h index 3d0cbf55ca9..2ca0ba97492 100644 --- a/src/crimson/os/seastore/async_cleaner.h +++ b/src/crimson/os/seastore/async_cleaner.h @@ -539,14 +539,13 @@ class AsyncCleaner : public SegmentProvider, public JournalTrimmer { public: /// Config struct config_t { - /// Number of minimum journal segments to stop trimming dirty. - size_t target_journal_dirty_segments = 0; - /// Number of maximum journal segments to block user transactions. - size_t max_journal_segments = 0; - - /// Number of minimum journal segments to stop trimming allocation + /// Number of minimum bytes to stop trimming dirty. + std::size_t target_journal_dirty_bytes = 0; + /// Number of minimum bytes to stop trimming allocation /// (having the corresponding backrefs unmerged) - size_t target_journal_alloc_segments = 0; + std::size_t target_journal_alloc_bytes = 0; + /// Number of maximum bytes to block user transactions. + std::size_t max_journal_bytes = 0; /// Ratio of maximum available space to disable reclaiming. double available_ratio_gc_max = 0; @@ -557,28 +556,44 @@ public: double reclaim_ratio_gc_threshold = 0; /// Number of bytes to reclaim per cycle - size_t reclaim_bytes_per_cycle = 0; + std::size_t reclaim_bytes_per_cycle = 0; /// Number of bytes to rewrite dirty per cycle - size_t rewrite_dirty_bytes_per_cycle = 0; + std::size_t rewrite_dirty_bytes_per_cycle = 0; /// Number of bytes to rewrite backref per cycle - size_t rewrite_backref_bytes_per_cycle = 0; + std::size_t rewrite_backref_bytes_per_cycle = 0; void validate() const { - ceph_assert(max_journal_segments > target_journal_dirty_segments); - ceph_assert(max_journal_segments > target_journal_alloc_segments); + ceph_assert(max_journal_bytes <= MAX_SEG_OFF); + ceph_assert(max_journal_bytes > target_journal_dirty_bytes); + ceph_assert(max_journal_bytes > target_journal_alloc_bytes); ceph_assert(available_ratio_gc_max > available_ratio_hard_limit); ceph_assert(reclaim_bytes_per_cycle > 0); ceph_assert(rewrite_dirty_bytes_per_cycle > 0); ceph_assert(rewrite_backref_bytes_per_cycle > 0); } - static config_t get_default() { + static config_t get_default( + std::size_t roll_size, journal_type_t type) { + assert(roll_size); + std::size_t target_dirty_bytes = 0; + std::size_t target_alloc_bytes = 0; + std::size_t max_journal_bytes = 0; + if (type == journal_type_t::SEGMENTED) { + target_dirty_bytes = 12 * roll_size; + target_alloc_bytes = 2 * roll_size; + max_journal_bytes = 16 * roll_size; + } else { + assert(type == journal_type_t::CIRCULAR); + target_dirty_bytes = roll_size / 4; + target_alloc_bytes = roll_size / 4; + max_journal_bytes = roll_size / 2; + } return config_t{ - 12, // target_journal_dirty_segments - 16, // max_journal_segments - 2, // target_journal_alloc_segments + target_dirty_bytes, + target_alloc_bytes, + max_journal_bytes, .15, // available_ratio_gc_max .1, // available_ratio_hard_limit .1, // reclaim_ratio_gc_threshold @@ -588,11 +603,26 @@ public: }; } - static config_t get_test() { + static config_t get_test( + std::size_t roll_size, journal_type_t type) { + assert(roll_size); + std::size_t target_dirty_bytes = 0; + std::size_t target_alloc_bytes = 0; + std::size_t max_journal_bytes = 0; + if (type == journal_type_t::SEGMENTED) { + target_dirty_bytes = 2 * roll_size; + target_alloc_bytes = 2 * roll_size; + max_journal_bytes = 4 * roll_size; + } else { + assert(type == journal_type_t::CIRCULAR); + target_dirty_bytes = roll_size / 4; + target_alloc_bytes = roll_size / 4; + max_journal_bytes = roll_size / 2; + } return config_t{ - 2, // target_journal_dirty_segments - 4, // max_journal_segments - 2, // target_journal_alloc_segments + target_dirty_bytes, + target_alloc_bytes, + max_journal_bytes, .99, // available_ratio_gc_max .2, // available_ratio_hard_limit .6, // reclaim_ratio_gc_threshold @@ -777,6 +807,11 @@ private: seastar::metrics::metric_group metrics; void register_metrics(); + journal_type_t journal_type; + + seastore_off_t roll_start; + seastore_off_t roll_size; + journal_seq_t journal_alloc_tail; journal_seq_t journal_dirty_tail; @@ -805,7 +840,10 @@ public: config_t config, SegmentManagerGroupRef&& sm_group, BackrefManager &backref_manager, - bool detailed = false); + bool detailed, + journal_type_t type, + seastore_off_t roll_start, + seastore_off_t roll_size); SegmentSeqAllocator& get_ool_segment_seq_allocator() { return *ool_segment_seq_allocator; @@ -977,40 +1015,34 @@ private: journal_seq_t get_dirty_tail_target() const { assert(is_ready()); - auto ret = journal_head; - ceph_assert(ret != JOURNAL_SEQ_NULL); - if (ret.segment_seq >= config.target_journal_dirty_segments) { - ret.segment_seq -= config.target_journal_dirty_segments; - } else { - ret.segment_seq = 0; - ret.offset = P_ADDR_MIN; - } + ceph_assert(journal_head != JOURNAL_SEQ_NULL); + auto ret = journal_head.add_offset( + journal_type, + -static_cast(config.target_journal_dirty_bytes), + roll_start, + roll_size); return ret; } - journal_seq_t get_tail_limit() const { + journal_seq_t get_alloc_tail_target() const { assert(is_ready()); - auto ret = journal_head; - ceph_assert(ret != JOURNAL_SEQ_NULL); - if (ret.segment_seq >= config.max_journal_segments) { - ret.segment_seq -= config.max_journal_segments; - } else { - ret.segment_seq = 0; - ret.offset = P_ADDR_MIN; - } + ceph_assert(journal_head != JOURNAL_SEQ_NULL); + auto ret = journal_head.add_offset( + journal_type, + -static_cast(config.target_journal_alloc_bytes), + roll_start, + roll_size); return ret; } - journal_seq_t get_alloc_tail_target() const { + journal_seq_t get_tail_limit() const { assert(is_ready()); - auto ret = journal_head; - ceph_assert(ret != JOURNAL_SEQ_NULL); - if (ret.segment_seq >= config.target_journal_alloc_segments) { - ret.segment_seq -= config.target_journal_alloc_segments; - } else { - ret.segment_seq = 0; - ret.offset = P_ADDR_MIN; - } + ceph_assert(journal_head != JOURNAL_SEQ_NULL); + auto ret = journal_head.add_offset( + journal_type, + -static_cast(config.max_journal_bytes), + roll_start, + roll_size); return ret; } @@ -1251,11 +1283,13 @@ private: journal_dirty_tail == JOURNAL_SEQ_NULL) { return 0; } - return (journal_head.segment_seq - journal_dirty_tail.segment_seq) * - segments.get_segment_size() + - journal_head.offset.as_seg_paddr().get_segment_off() - - segments.get_segment_size() - - journal_dirty_tail.offset.as_seg_paddr().get_segment_off(); + auto ret = journal_head.relative_to( + journal_type, + journal_dirty_tail, + roll_start, + roll_size); + ceph_assert(ret >= 0); + return static_cast(ret); } std::size_t get_alloc_journal_size() const { @@ -1263,11 +1297,13 @@ private: journal_alloc_tail == JOURNAL_SEQ_NULL) { return 0; } - return (journal_head.segment_seq - journal_alloc_tail.segment_seq) * - segments.get_segment_size() + - journal_head.offset.as_seg_paddr().get_segment_off() - - segments.get_segment_size() - - journal_alloc_tail.offset.as_seg_paddr().get_segment_off(); + auto ret = journal_head.relative_to( + journal_type, + journal_alloc_tail, + roll_start, + roll_size); + ceph_assert(ret >= 0); + return static_cast(ret); } /** diff --git a/src/crimson/os/seastore/seastore_types.cc b/src/crimson/os/seastore/seastore_types.cc index b97630cd9b9..e14cab14555 100644 --- a/src/crimson/os/seastore/seastore_types.cc +++ b/src/crimson/os/seastore/seastore_types.cc @@ -129,6 +129,81 @@ std::ostream &operator<<(std::ostream &out, const paddr_t &rhs) return out << ">"; } +journal_seq_t journal_seq_t::add_offset( + journal_type_t type, + seastore_off_t off, + seastore_off_t roll_start, + seastore_off_t roll_size) const +{ + assert(offset.is_absolute()); + assert(off != MIN_SEG_OFF); + assert(roll_start >= 0); + assert(roll_size > 0); + + segment_seq_t jseq = segment_seq; + seastore_off_t joff; + if (type == journal_type_t::SEGMENTED) { + joff = offset.as_seg_paddr().get_segment_off(); + } else { + assert(type == journal_type_t::CIRCULAR); + auto boff = offset.as_blk_paddr().get_block_off(); + assert(boff <= MAX_SEG_OFF); + joff = boff; + } + auto roll_end = roll_start + roll_size; + assert(joff >= roll_start); + assert(joff <= roll_end); + + if (off >= 0) { + jseq += (off / roll_size); + joff += (off % roll_size); + if (joff >= roll_end) { + ++jseq; + joff -= roll_size; + } + } else { + auto mod = static_cast((-off) / roll_size); + joff -= ((-off) % roll_size); + if (joff < roll_start) { + ++mod; + joff += roll_size; + } + if (jseq >= mod) { + jseq -= mod; + } else { + return JOURNAL_SEQ_MIN; + } + } + assert(joff >= roll_start); + assert(joff < roll_end); + return journal_seq_t{jseq, make_block_relative_paddr(joff)}; +} + +seastore_off_t journal_seq_t::relative_to( + journal_type_t type, + const journal_seq_t& r, + seastore_off_t roll_start, + seastore_off_t roll_size) const +{ + assert(offset.is_absolute()); + assert(r.offset.is_absolute()); + assert(roll_start >= 0); + assert(roll_size > 0); + + int64_t ret = static_cast(segment_seq) - r.segment_seq; + ret *= roll_size; + if (type == journal_type_t::SEGMENTED) { + ret += (static_cast(offset.as_seg_paddr().get_segment_off()) - + static_cast(r.offset.as_seg_paddr().get_segment_off())); + } else { + assert(type == journal_type_t::CIRCULAR); + ret += (static_cast(offset.as_blk_paddr().get_block_off()) - + static_cast(r.offset.as_blk_paddr().get_block_off())); + } + assert(ret <= MAX_SEG_OFF && ret > MIN_SEG_OFF); + return static_cast(ret); +} + std::ostream &operator<<(std::ostream &out, const journal_seq_t &seq) { if (seq == JOURNAL_SEQ_NULL) { diff --git a/src/crimson/os/seastore/seastore_types.h b/src/crimson/os/seastore/seastore_types.h index 3f8c0ce16a5..5962cdcfabf 100644 --- a/src/crimson/os/seastore/seastore_types.h +++ b/src/crimson/os/seastore/seastore_types.h @@ -420,6 +420,8 @@ using seastore_off_t = int32_t; using u_seastore_off_t = uint32_t; constexpr seastore_off_t MAX_SEG_OFF = std::numeric_limits::max(); +constexpr seastore_off_t MIN_SEG_OFF = + std::numeric_limits::min(); constexpr seastore_off_t NULL_SEG_OFF = MAX_SEG_OFF; constexpr auto SEGMENT_OFF_BITS = std::numeric_limits::digits; @@ -877,9 +879,18 @@ struct journal_seq_t { segment_seq_t segment_seq = NULL_SEG_SEQ; paddr_t offset = P_ADDR_NULL; - journal_seq_t add_offset(seastore_off_t o) const { - return {segment_seq, offset.add_offset(o)}; - } + // produces a pseudo journal_seq_t relative to this by offset + journal_seq_t add_offset( + journal_type_t type, + seastore_off_t off, + seastore_off_t roll_start, + seastore_off_t roll_size) const; + + seastore_off_t relative_to( + journal_type_t type, + const journal_seq_t& r, + seastore_off_t roll_start, + seastore_off_t roll_size) const; DENC(journal_seq_t, v, p) { DENC_START(1, 1, p); @@ -1923,7 +1934,9 @@ struct write_result_t { seastore_off_t length; journal_seq_t get_end_seq() const { - return start_seq.add_offset(length); + return journal_seq_t{ + start_seq.segment_seq, + start_seq.offset.add_offset(length)}; } }; std::ostream& operator<<(std::ostream&, const write_result_t&); diff --git a/src/crimson/os/seastore/transaction_manager.cc b/src/crimson/os/seastore/transaction_manager.cc index da95f5351a4..ef644a07966 100644 --- a/src/crimson/os/seastore/transaction_manager.cc +++ b/src/crimson/os/seastore/transaction_manager.cc @@ -7,6 +7,7 @@ #include "crimson/os/seastore/logging.h" #include "crimson/os/seastore/transaction_manager.h" #include "crimson/os/seastore/journal.h" +#include "crimson/os/seastore/journal/circular_bounded_journal.h" #include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h" #include "crimson/os/seastore/random_block_manager/rbm_device.h" @@ -630,7 +631,11 @@ TransactionManagerRef make_transaction_manager( auto sms = std::make_unique(); auto backref_manager = create_backref_manager(*cache); - if (primary_device->get_device_type() == device_type_t::SEGMENTED) { + auto p_device_type = primary_device->get_device_type(); + ceph_assert(p_device_type == device_type_t::SEGMENTED || + p_device_type == device_type_t::RANDOM_BLOCK); + + if (p_device_type == device_type_t::SEGMENTED) { sms->add_segment_manager(static_cast(primary_device)); } for (auto &p_dev : secondary_devices) { @@ -638,31 +643,52 @@ TransactionManagerRef make_transaction_manager( sms->add_segment_manager(static_cast(p_dev)); } + auto journal_type = (p_device_type == device_type_t::SEGMENTED ? + journal_type_t::SEGMENTED : journal_type_t::CIRCULAR); + seastore_off_t roll_size; + seastore_off_t roll_start; + if (journal_type == journal_type_t::SEGMENTED) { + roll_size = static_cast(primary_device)->get_segment_size(); + roll_start = 0; + } else { + // FIXME: get from runtime configration instead of static defaults + roll_size = journal::CircularBoundedJournal::mkfs_config_t + ::get_default().total_size; + // see CircularBoundedJournal::get_start_addr() + roll_start = journal::CBJOURNAL_START_ADDRESS + + primary_device->get_block_size(); + } + ceph_assert(roll_size % primary_device->get_block_size() == 0); + ceph_assert(roll_start % primary_device->get_block_size() == 0); + bool cleaner_is_detailed; AsyncCleaner::config_t cleaner_config; if (is_test) { cleaner_is_detailed = true; - cleaner_config = AsyncCleaner::config_t::get_test(); + cleaner_config = AsyncCleaner::config_t::get_test( + roll_size, journal_type); } else { cleaner_is_detailed = false; - cleaner_config = AsyncCleaner::config_t::get_default(); + cleaner_config = AsyncCleaner::config_t::get_default( + roll_size, journal_type); } auto async_cleaner = std::make_unique( cleaner_config, std::move(sms), *backref_manager, - cleaner_is_detailed); + cleaner_is_detailed, + journal_type, + roll_start, + roll_size); - if (primary_device->get_device_type() == device_type_t::SEGMENTED) { + if (journal_type == journal_type_t::SEGMENTED) { cache->set_segment_provider(*async_cleaner); } - auto p_device_type = primary_device->get_device_type(); JournalRef journal; - if (p_device_type == device_type_t::SEGMENTED) { + if (journal_type == journal_type_t::SEGMENTED) { journal = journal::make_segmented(*async_cleaner, *async_cleaner); } else { - ceph_assert(p_device_type == device_type_t::RANDOM_BLOCK); journal = journal::make_circularbounded( *async_cleaner, static_cast(primary_device),