From 78bf4744f6c205fb6990b82d14c2cff19775fe84 Mon Sep 17 00:00:00 2001 From: Yingxin Cheng Date: Tue, 27 Jul 2021 15:39:05 +0800 Subject: [PATCH] crimson/os/seastore: measure committed efforts by extent In order to cross-check the writes at segment manager level, and evaluate the write amplification from each sub-component. Signed-off-by: Yingxin Cheng --- src/crimson/os/seastore/cache.cc | 237 +++++++++++++++-------- src/crimson/os/seastore/cache.h | 26 ++- src/crimson/os/seastore/seastore_types.h | 19 ++ 3 files changed, 195 insertions(+), 87 deletions(-) diff --git a/src/crimson/os/seastore/cache.cc b/src/crimson/os/seastore/cache.cc index 2f3021e701e..8966dc5a9e3 100644 --- a/src/crimson/os/seastore/cache.cc +++ b/src/crimson/os/seastore/cache.cc @@ -345,9 +345,6 @@ void Cache::register_metrics() { /* * efforts discarded/committed - * - * XXX: include ext_label if want to measure efforts at the granularity of - * sub-components. */ auto effort_label = sm::label("effort"); std::map labels_by_effort { @@ -357,65 +354,133 @@ void Cache::register_metrics() {"FRESH", effort_label("FRESH")}, }; - auto register_effort = - [this, &labels_by_src, &labels_by_effort, &labels_by_counter] - (const char* category, - src_t src, - const std::string& effort_name, - const char* counter_name, - uint64_t& value) { - std::ostringstream oss_desc; - oss_desc << "total number of " << category - << " transactional efforts labeled by source, effort and counter"; - std::ostringstream oss_metric; - oss_metric << category << "_efforts"; + // invalidated efforts (non READ) + for (auto& [src, src_label] : labels_by_src) { + if (src == src_t::READ) { + // register src_t::READ later + continue; + } + auto& efforts = get_counter(stats.invalidated_efforts_by_src, src); + efforts = {}; + for (auto& [effort_name, effort_label] : labels_by_effort) { + auto& effort = efforts.get_by_name(effort_name); metrics.add_group( "cache", { sm::make_counter( - oss_metric.str(), - value, - sm::description(oss_desc.str()), - {labels_by_src.find(src)->second, - labels_by_effort.find(effort_name)->second, - labels_by_counter.find(counter_name)->second} + "invalidated_extents", + effort.extents, + sm::description("extents of invalidated transactions"), + {src_label, effort_label} + ), + sm::make_counter( + "invalidated_extent_bytes", + effort.bytes, + sm::description("extent bytes of invalidated transactions"), + {src_label, effort_label} ), } ); - }; - - auto get_efforts_by_category = - [this](const char* category) -> auto& { - if (strcmp(category, "committed") == 0) { - return stats.committed_efforts_by_src; - } else { - assert(strcmp(category, "invalidated") == 0); - return stats.invalidated_efforts_by_src; - } - }; + } // effort_name - for (auto& category : {"committed", "invalidated"}) { - auto& efforts_by_src = get_efforts_by_category(category); - for (auto& [src, label] : labels_by_src) { - if (std::strcmp(category, "committed") == 0 && src == src_t::READ) { - // READ transaction won't commit - continue; + metrics.add_group( + "cache", + { + sm::make_counter( + "invalidated_delta_bytes", + efforts.mutate_delta_bytes, + sm::description("delta bytes of invalidated transactions"), + {src_label} + ), } + ); + } // src + + // invalidated efforts (READ) + // read transaction won't have non-read efforts + auto read_src_label = labels_by_src.find(src_t::READ)->second; + auto read_effort_label = labels_by_effort.find("READ")->second; + auto& read_efforts = get_counter(stats.invalidated_efforts_by_src, src_t::READ); + read_efforts = {}; + metrics.add_group( + "cache", + { + sm::make_counter( + "invalidated_extents", + read_efforts.read.extents, + sm::description("extents of invalidated transactions"), + {read_src_label, read_effort_label} + ), + sm::make_counter( + "invalidated_extent_bytes", + read_efforts.read.bytes, + sm::description("extent bytes of invalidated transactions"), + {read_src_label, read_effort_label} + ), + } + ); - auto& efforts = get_counter(efforts_by_src, src); - for (auto& [effort_name, _label] : labels_by_effort) { - auto& effort = efforts.get_by_name(effort_name); - for (auto& counter_name : {"EXTENTS", "BYTES"}) { - auto& value = effort.get_by_name(counter_name); - register_effort(category, src, effort_name, counter_name, value); + // by-extent committed efforts + for (auto& [src, src_label] : labels_by_src) { + if (src == src_t::READ) { + // READ transaction won't commit + continue; + } + auto& efforts = get_counter(stats.committed_efforts_by_src, src); + for (auto& [effort_name, effort_label] : labels_by_effort) { + auto& effort_by_ext = [&efforts, &effort_name]() + -> std::array& { + if (effort_name == "READ") { + return efforts.read_by_ext; + } else if (effort_name == "MUTATE") { + return efforts.mutate_by_ext; + } else if (effort_name == "RETIRE") { + return efforts.retire_by_ext; + } else { + assert(effort_name == "FRESH"); + return efforts.fresh_by_ext; } - if (effort_name == "MUTATE") { - register_effort(category, src, effort_name, "DELTA_BYTES", - efforts.mutate_delta_bytes); + }(); + effort_by_ext.fill({}); + for (auto& [ext, ext_label] : labels_by_ext) { + auto& effort = effort_by_ext[extent_type_to_index(ext)]; + metrics.add_group( + "cache", + { + sm::make_counter( + "committed_extents", + effort.extents, + sm::description("extents of committed transactions"), + {src_label, effort_label, ext_label} + ), + sm::make_counter( + "committed_extent_bytes", + effort.bytes, + sm::description("extent bytes of committed transactions"), + {src_label, effort_label, ext_label} + ), + } + ); + } // ext + } // effort_name + + auto& delta_by_ext = efforts.delta_bytes_by_ext; + delta_by_ext.fill(0); + for (auto& [ext, ext_label] : labels_by_ext) { + auto& value = delta_by_ext[extent_type_to_index(ext)]; + metrics.add_group( + "cache", + { + sm::make_counter( + "committed_delta_bytes", + value, + sm::description("delta bytes of committed transactions"), + {src_label, ext_label} + ), } - } // effort_name - } // src - } // category + ); + } // ext + } // src /** * read_effort_successful @@ -609,44 +674,42 @@ void Cache::invalidate(Transaction& t, CachedExtent& conflicting_extent) assert(!t.conflicted); DEBUGT("set conflict", t); t.conflicted = true; + auto m_key = std::make_pair( t.get_src(), conflicting_extent.get_type()); assert(stats.trans_invalidated.count(m_key)); ++(stats.trans_invalidated[m_key]); + auto& efforts = get_counter(stats.invalidated_efforts_by_src, t.get_src()); - measure_efforts(t, efforts); - for (auto &i: t.mutated_block_list) { - if (!i->is_valid()) { - continue; - } - ++efforts.mutate.extents; - efforts.mutate.bytes += i->get_length(); - efforts.mutate_delta_bytes += i->get_delta().length(); - } -} - -void Cache::measure_efforts(Transaction& t, trans_efforts_t& efforts) -{ efforts.read.extents += t.read_set.size(); for (auto &i: t.read_set) { efforts.read.bytes += i.ref->get_length(); } + if (t.get_src() != Transaction::src_t::READ) { + efforts.retire.extents += t.retired_set.size(); + for (auto &i: t.retired_set) { + efforts.retire.bytes += i->get_length(); + } - efforts.retire.extents += t.retired_set.size(); - for (auto &i: t.retired_set) { - efforts.retire.bytes += i->get_length(); - } + efforts.fresh.extents += t.fresh_block_list.size(); + for (auto &i: t.fresh_block_list) { + efforts.fresh.bytes += i->get_length(); + } - efforts.fresh.extents += t.fresh_block_list.size(); - for (auto &i: t.fresh_block_list) { - efforts.fresh.bytes += i->get_length(); + for (auto &i: t.mutated_block_list) { + if (!i->is_valid()) { + continue; + } + efforts.mutate.increment(i->get_length()); + efforts.mutate_delta_bytes += i->get_delta().length(); + } + } else { + // read transaction won't have non-read efforts + assert(t.retired_set.empty()); + assert(t.fresh_block_list.empty()); + assert(t.mutated_block_list.empty()); } - - /** - * Mutated blocks are special because CachedExtent::get_delta() is not - * idempotent, so they need to be dealt later. - */ } void Cache::on_transaction_destruct(Transaction& t) @@ -658,13 +721,15 @@ void Cache::on_transaction_destruct(Transaction& t) DEBUGT("read is successful", t); ++stats.read_transactions_successful; + auto& effort = stats.read_effort_successful; + effort.extents += t.read_set.size(); + for (auto &i: t.read_set) { + effort.bytes += i.ref->get_length(); + } + // read transaction won't have non-read efforts assert(t.retired_set.empty()); assert(t.fresh_block_list.empty()); assert(t.mutated_block_list.empty()); - stats.read_effort_successful.extents += t.read_set.size(); - for (auto &i: t.read_set) { - stats.read_effort_successful.bytes += i.ref->get_length(); - } } } @@ -741,11 +806,12 @@ record_t Cache::prepare_record(Transaction &t) ++(get_counter(stats.trans_committed_by_src, t.get_src())); auto& efforts = get_counter(stats.committed_efforts_by_src, t.get_src()); - measure_efforts(t, efforts); // Should be valid due to interruptible future for (auto &i: t.read_set) { assert(i.ref->is_valid()); + get_by_ext(efforts.read_by_ext, + i.ref->get_type()).increment(i.ref->get_length()); } DEBUGT("read_set validated", t); t.read_set.clear(); @@ -762,8 +828,8 @@ record_t Cache::prepare_record(Transaction &t) continue; } DEBUGT("mutating {}", t, *i); - ++efforts.mutate.extents; - efforts.mutate.bytes += i->get_length(); + get_by_ext(efforts.mutate_by_ext, + i->get_type()).increment(i->get_length()); assert(i->prior_instance); replace_extent(i, i->prior_instance); @@ -805,19 +871,24 @@ record_t Cache::prepare_record(Transaction &t) } auto delta_length = record.deltas.back().bl.length(); assert(delta_length); - efforts.mutate_delta_bytes += delta_length; + get_by_ext(efforts.delta_bytes_by_ext, + i->get_type()) += delta_length; } // Transaction is now a go, set up in-memory cache state // invalidate now invalid blocks for (auto &i: t.retired_set) { DEBUGT("retiring {}", t, *i); + get_by_ext(efforts.retire_by_ext, + i->get_type()).increment(i->get_length()); retire_extent(i); } record.extents.reserve(t.fresh_block_list.size()); for (auto &i: t.fresh_block_list) { DEBUGT("fresh block {}", t, *i); + get_by_ext(efforts.fresh_by_ext, + i->get_type()).increment(i->get_length()); bufferlist bl; i->prepare_write(); bl.append(i->get_bptr()); diff --git a/src/crimson/os/seastore/cache.h b/src/crimson/os/seastore/cache.h index 7d556a3d55f..35d175360b2 100644 --- a/src/crimson/os/seastore/cache.h +++ b/src/crimson/os/seastore/cache.h @@ -618,6 +618,11 @@ private: return bytes; } } + + void increment(uint64_t extent_len) { + ++extents; + bytes += extent_len; + } }; struct trans_efforts_t { @@ -641,10 +646,19 @@ private: } }; + struct trans_byextent_efforts_t { + std::array read_by_ext; + std::array mutate_by_ext; + std::array delta_bytes_by_ext; + std::array retire_by_ext; + std::array fresh_by_ext; + }; + struct { std::array trans_created_by_src; std::array trans_committed_by_src; - std::array committed_efforts_by_src; + std::array committed_efforts_by_src; std::unordered_map> trans_invalidated; std::array invalidated_efforts_by_src; @@ -663,6 +677,13 @@ private: return counters_by_src[static_cast(src)]; } + template + CounterT& get_by_ext( + std::array& counters_by_ext, + extent_types_t ext) { + return counters_by_ext[extent_type_to_index(ext)]; + } + seastar::metrics::metric_group metrics; void register_metrics(); @@ -702,9 +723,6 @@ private: /// Mark a valid transaction as conflicted void invalidate(Transaction& t, CachedExtent& conflicting_extent); - /// Measure efforts of a submitting/invalidating transaction - void measure_efforts(Transaction& t, trans_efforts_t& efforts); - /// Introspect transaction when it is being destructed void on_transaction_destruct(Transaction& t); diff --git a/src/crimson/os/seastore/seastore_types.h b/src/crimson/os/seastore/seastore_types.h index 53316be75c8..372461a70e4 100644 --- a/src/crimson/os/seastore/seastore_types.h +++ b/src/crimson/os/seastore/seastore_types.h @@ -347,6 +347,25 @@ enum class extent_types_t : uint8_t { NONE = 0xFF }; +// FIXME: reassign extent_types_t values instead +inline uint8_t extent_type_to_index(extent_types_t type) { + auto value = static_cast(type); + if (value <= 9) { + return value; + } + switch (type) { + case extent_types_t::RBM_ALLOC_INFO: + return 10; + case extent_types_t::TEST_BLOCK: + return 11; + case extent_types_t::TEST_BLOCK_PHYSICAL: + return 12; + default: + ceph_abort("impossible path"); + }; +} +constexpr uint8_t EXTENT_TYPES_MAX = 13; + inline bool is_logical_type(extent_types_t type) { switch (type) { case extent_types_t::ROOT: -- 2.39.5