From c1fdb0ab481fd12294902f281b1b0922cc2ec041 Mon Sep 17 00:00:00 2001 From: Yingxin Cheng Date: Tue, 28 Sep 2021 15:38:52 +0800 Subject: [PATCH] crimson/os/seastore: introduce ool related metrics with misc improvements * The number of ool records written; * Write overhead from journal/ool records; * Wasted writes from invalided ool records; * Wasted writes from erased inline extents; * Distinguish ool and inline extents from metrics; Signed-off-by: Yingxin Cheng --- src/crimson/os/seastore/cache.cc | 100 +++++++++++++++--- src/crimson/os/seastore/cache.h | 12 ++- .../os/seastore/extent_placement_manager.cc | 9 ++ .../os/seastore/extent_placement_manager.h | 6 +- src/crimson/os/seastore/seastore_types.h | 17 +++ src/crimson/os/seastore/transaction.h | 11 ++ 6 files changed, 138 insertions(+), 17 deletions(-) diff --git a/src/crimson/os/seastore/cache.cc b/src/crimson/os/seastore/cache.cc index 1c6783fd41b..4f810003b90 100644 --- a/src/crimson/os/seastore/cache.cc +++ b/src/crimson/os/seastore/cache.cc @@ -169,15 +169,16 @@ void Cache::register_metrics() * efforts discarded/committed */ auto effort_label = sm::label("effort"); + + // invalidated efforts using namespace std::literals::string_view_literals; - const string_view effort_names[] = { + const string_view invalidated_effort_names[] = { "READ"sv, "MUTATE"sv, "RETIRE"sv, "FRESH"sv, + "FRESH_OOL_WRITTEN"sv, }; - - // invalidated efforts for (auto& [src, src_label] : labels_by_src) { auto& efforts = get_by_src(stats.invalidated_efforts_by_src, src); for (auto& [ext, ext_label] : labels_by_ext) { @@ -219,7 +220,7 @@ void Cache::register_metrics() } // non READ invalidated efforts - for (auto& effort_name : effort_names) { + for (auto& effort_name : invalidated_effort_names) { auto& effort = [&effort_name, &efforts]() -> effort_t& { if (effort_name == "READ") { return efforts.read; @@ -227,9 +228,11 @@ void Cache::register_metrics() return efforts.mutate; } else if (effort_name == "RETIRE") { return efforts.retire; - } else { - assert(effort_name == "FRESH"); + } else if (effort_name == "FRESH") { return efforts.fresh; + } else { + assert(effort_name == "FRESH_OOL_WRITTEN"); + return efforts.fresh_ool_written; } }(); metrics.add_group( @@ -260,11 +263,31 @@ void Cache::register_metrics() sm::description("delta bytes of invalidated transactions"), {src_label} ), + sm::make_counter( + "invalidated_ool_records", + efforts.num_ool_records, + sm::description("number of ool-records from invalidated transactions"), + {src_label} + ), + sm::make_counter( + "invalidated_ool_record_overhead_bytes", + efforts.ool_record_overhead_bytes, + sm::description("bytes of ool-record overhead from invalidated transactions"), + {src_label} + ), } ); } // src // committed efforts + const string_view committed_effort_names[] = { + "READ"sv, + "MUTATE"sv, + "RETIRE"sv, + "FRESH_INVALID"sv, + "FRESH_INLINE"sv, + "FRESH_OOL"sv, + }; for (auto& [src, src_label] : labels_by_src) { if (src == src_t::READ) { // READ transaction won't commit @@ -280,9 +303,27 @@ void Cache::register_metrics() sm::description("total number of transaction committed"), {src_label} ), + sm::make_counter( + "committed_ool_records", + efforts.num_ool_records, + sm::description("number of ool-records from committed transactions"), + {src_label} + ), + sm::make_counter( + "committed_ool_record_overhead_bytes", + efforts.ool_record_overhead_bytes, + sm::description("bytes of ool-record overhead from committed transactions"), + {src_label} + ), + sm::make_counter( + "committed_inline_record_overhead_bytes", + efforts.inline_record_overhead_bytes, + sm::description("bytes of inline-record overhead from committed transactions"), + {src_label} + ), } ); - for (auto& effort_name : effort_names) { + for (auto& effort_name : committed_effort_names) { auto& effort_by_ext = [&efforts, &effort_name]() -> counter_by_extent_t& { if (effort_name == "READ") { @@ -291,9 +332,13 @@ void Cache::register_metrics() return efforts.mutate_by_ext; } else if (effort_name == "RETIRE") { return efforts.retire_by_ext; + } else if (effort_name == "FRESH_INVALID") { + return efforts.fresh_invalid_by_ext; + } else if (effort_name == "FRESH_INLINE") { + return efforts.fresh_inline_by_ext; } else { - assert(effort_name == "FRESH"); - return efforts.fresh_by_ext; + assert(effort_name == "FRESH_OOL"); + return efforts.fresh_ool_by_ext; } }(); for (auto& [ext, ext_label] : labels_by_ext) { @@ -627,6 +672,12 @@ void Cache::mark_transaction_conflicted( efforts.mutate_delta_bytes += i->get_delta().length(); } + auto& ool_stats = t.get_ool_write_stats(); + efforts.fresh_ool_written.extents += ool_stats.extents.num; + efforts.fresh_ool_written.bytes += ool_stats.extents.bytes; + efforts.num_ool_records += ool_stats.num_records; + efforts.ool_record_overhead_bytes += ool_stats.overhead_bytes; + if (t.get_src() == Transaction::src_t::CLEANER) { // CLEANER transaction won't contain any onode tree operations assert(t.onode_tree_stats.is_clear()); @@ -642,6 +693,7 @@ void Cache::mark_transaction_conflicted( assert(t.retired_set.empty()); assert(t.get_fresh_block_stats().num == 0); assert(t.mutated_block_list.empty()); + assert(t.get_ool_write_stats().num_records == 0); assert(t.onode_tree_stats.is_clear()); assert(t.lba_tree_stats.is_clear()); } @@ -845,8 +897,14 @@ record_t Cache::prepare_record(Transaction &t) record.extents.reserve(t.inline_block_list.size()); for (auto &i: t.inline_block_list) { - DEBUGT("fresh block {}", t, *i); - get_by_ext(efforts.fresh_by_ext, + if (!i->is_valid()) { + DEBUGT("fresh inline block (invalid) {}", t, *i); + get_by_ext(efforts.fresh_invalid_by_ext, + i->get_type()).increment(i->get_length()); + } else { + DEBUGT("fresh inline block {}", t, *i); + } + get_by_ext(efforts.fresh_inline_by_ext, i->get_type()).increment(i->get_length()); assert(i->is_inline()); @@ -867,11 +925,27 @@ record_t Cache::prepare_record(Transaction &t) }); } + for (auto &i: t.ool_block_list) { + ceph_assert(i->is_valid()); + DEBUGT("fresh ool block {}", t, *i); + get_by_ext(efforts.fresh_ool_by_ext, + i->get_type()).increment(i->get_length()); + } + ceph_assert(t.get_fresh_block_stats().num == t.inline_block_list.size() + t.ool_block_list.size() + t.num_delayed_invalid_extents); + auto& ool_stats = t.get_ool_write_stats(); + ceph_assert(ool_stats.extents.num == t.ool_block_list.size()); + efforts.num_ool_records += ool_stats.num_records; + efforts.ool_record_overhead_bytes += ool_stats.overhead_bytes; + auto record_size = get_encoded_record_length( + record, segment_manager.get_block_size()); + auto inline_overhead = + record_size.mdlength + record_size.dlength - record.get_raw_data_size(); + efforts.inline_record_overhead_bytes += inline_overhead; return record; } @@ -891,9 +965,7 @@ void Cache::complete_commit( i->last_committed_crc = i->get_crc32c(); i->on_initial_write(); - if (!i->is_valid()) { - DEBUGT("invalid {}", t, *i); - } else { + if (i->is_valid()) { i->state = CachedExtent::extent_state_t::CLEAN; DEBUGT("fresh {}", t, *i); add_extent(i); diff --git a/src/crimson/os/seastore/cache.h b/src/crimson/os/seastore/cache.h index c8049ca29ca..f1449adb221 100644 --- a/src/crimson/os/seastore/cache.h +++ b/src/crimson/os/seastore/cache.h @@ -659,7 +659,10 @@ private: uint64_t mutate_delta_bytes = 0; effort_t retire; effort_t fresh; + effort_t fresh_ool_written; counter_by_extent_t num_trans_invalidated; + uint64_t num_ool_records = 0; + uint64_t ool_record_overhead_bytes = 0; }; struct commit_trans_efforts_t { @@ -667,8 +670,13 @@ private: counter_by_extent_t mutate_by_ext; counter_by_extent_t delta_bytes_by_ext; counter_by_extent_t retire_by_ext; - counter_by_extent_t fresh_by_ext; - uint64_t num_trans = 0; + counter_by_extent_t fresh_invalid_by_ext; + counter_by_extent_t fresh_inline_by_ext; + counter_by_extent_t fresh_ool_by_ext; + uint64_t num_trans = 0; // the number of inline records + uint64_t num_ool_records = 0; + uint64_t ool_record_overhead_bytes = 0; + uint64_t inline_record_overhead_bytes = 0; }; struct success_read_trans_efforts_t { diff --git a/src/crimson/os/seastore/extent_placement_manager.cc b/src/crimson/os/seastore/extent_placement_manager.cc index 9b069138d73..43e6235ddab 100644 --- a/src/crimson/os/seastore/extent_placement_manager.cc +++ b/src/crimson/os/seastore/extent_placement_manager.cc @@ -84,6 +84,15 @@ SegmentedAllocator::Writer::_write( current_segment->segment->get_segment_id(), record.get_base()); + // account transactional ool writes before write() + auto& stats = t.get_ool_write_stats(); + stats.extents.num += record.get_num_extents(); + auto extent_bytes = record.get_raw_data_size(); + stats.extents.bytes += extent_bytes; + assert(bl.length() > extent_bytes); + stats.overhead_bytes += (bl.length() - extent_bytes); + stats.num_records += 1; + return trans_intr::make_interruptible( current_segment->segment->write(record.get_base(), bl).safe_then( [this, pr=std::move(pr), &t, diff --git a/src/crimson/os/seastore/extent_placement_manager.h b/src/crimson/os/seastore/extent_placement_manager.h index 8a1fa5b2309..5c55b9e1f72 100644 --- a/src/crimson/os/seastore/extent_placement_manager.h +++ b/src/crimson/os/seastore/extent_placement_manager.h @@ -95,9 +95,13 @@ public: extent_buf_len = 0; base = MAX_SEG_OFF; } - uint64_t get_num_extents() { + uint64_t get_num_extents() const { return extents.size(); } + uint64_t get_raw_data_size() const { + assert(extents.size() == record.extents.size()); + return record.get_raw_data_size(); + } private: std::vector extents; record_t record; diff --git a/src/crimson/os/seastore/seastore_types.h b/src/crimson/os/seastore/seastore_types.h index f5e05297591..b4184e01c69 100644 --- a/src/crimson/os/seastore/seastore_types.h +++ b/src/crimson/os/seastore/seastore_types.h @@ -4,6 +4,7 @@ #pragma once #include +#include #include #include "include/byteorder.h" @@ -434,6 +435,22 @@ std::ostream &operator<<(std::ostream &lhs, const delta_info_t &rhs); struct record_t { std::vector extents; std::vector deltas; + + std::size_t get_raw_data_size() const { + auto extent_size = std::accumulate( + extents.begin(), extents.end(), 0, + [](uint64_t sum, auto& extent) { + return sum + extent.bl.length(); + } + ); + auto delta_size = std::accumulate( + deltas.begin(), deltas.end(), 0, + [](uint64_t sum, auto& delta) { + return sum + delta.bl.length(); + } + ); + return extent_size + delta_size; + } }; class object_data_t { diff --git a/src/crimson/os/seastore/transaction.h b/src/crimson/os/seastore/transaction.h index 91ac1cd2869..af26ae63c58 100644 --- a/src/crimson/os/seastore/transaction.h +++ b/src/crimson/os/seastore/transaction.h @@ -264,6 +264,7 @@ public: retired_set.clear(); onode_tree_stats = {}; lba_tree_stats = {}; + ool_write_stats = {}; to_release = NULL_SEG_ID; conflicted = false; if (!has_reset) { @@ -293,6 +294,15 @@ public: return lba_tree_stats; } + struct ool_write_stats_t { + io_stat_t extents; + uint64_t overhead_bytes = 0; + uint64_t num_records = 0; + }; + ool_write_stats_t& get_ool_write_stats() { + return ool_write_stats; + } + void increment_delayed_invalid_extents() { ++num_delayed_invalid_extents; } @@ -356,6 +366,7 @@ private: /// stats to collect when commit or invalidate tree_stats_t onode_tree_stats; tree_stats_t lba_tree_stats; + ool_write_stats_t ool_write_stats; ///< if != NULL_SEG_ID, release this segment after completion segment_id_t to_release = NULL_SEG_ID; -- 2.39.5