From c7cd7f92e9f29fdab81d4021ba42930efcc12e4b Mon Sep 17 00:00:00 2001 From: Milind Changire Date: Wed, 2 Nov 2022 15:12:17 +0530 Subject: [PATCH] mds: collect and propagate individual scrub stats Signed-off-by: Milind Changire --- src/mds/MDCache.cc | 8 +++++++- src/mds/ScrubHeader.h | 24 ++++++++++++++++++++++ src/mds/ScrubStack.cc | 35 ++++++++++++++++++++++++++++++++ src/mds/ScrubStack.h | 11 ++++++++++ src/messages/MMDSScrubStats.h | 38 +++++++++++++++++++++++++++++++++-- 5 files changed, 113 insertions(+), 3 deletions(-) diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 72233b886b131..3a53bd2e1f515 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -13197,6 +13197,8 @@ class C_MDC_DataUninlinedSubmitted : public MDCacheLogContext { ceph_assert(r == 0); in->mdcache->logger->inc(l_mdc_uninline_succeeded); + auto h = in->get_scrub_header(); + h->record_uninline_passed(); in->uninline_finished(); mdr->apply(); mds->server->respond_to_request(mdr, r); @@ -13223,9 +13225,11 @@ struct C_IO_DataUninlined : public MDSIOContext { << " (" << cpp_strerror(r) << ") for " << *in << dendl; in->mdcache->logger->inc(l_mdc_uninline_write_failed); ceph_assert(in->get_scrub_header()); + auto h = in->get_scrub_header(); + h->record_uninline_failed(); std::string path; in->make_path_string(path); - in->get_scrub_header()->record_uninline_status(in->ino(), r, path); + h->record_uninline_status(in->ino(), r, path); in->uninline_finished(); mds->server->respond_to_request(mdr, r); return; @@ -13292,6 +13296,8 @@ void MDCache::uninline_data_work(MDRequestRef mdr) } logger->inc(l_mdc_uninline_started); + auto h = in->get_scrub_header(); + h->record_uninline_started(); in->uninline_initialize(); auto ino = [&]() { return in->ino(); }; diff --git a/src/mds/ScrubHeader.h b/src/mds/ScrubHeader.h index 6d255ca99e03d..34105b34503cb 100644 --- a/src/mds/ScrubHeader.h +++ b/src/mds/ScrubHeader.h @@ -82,6 +82,25 @@ public: return paths; } + void record_uninline_started() { + uninline_started++; + } + void record_uninline_passed() { + uninline_passed++; + } + void record_uninline_failed() { + uninline_failed++; + } + uint64_t get_uninline_started() const { + return uninline_started; + } + uint64_t get_uninline_passed() const { + return uninline_passed; + } + uint64_t get_uninline_failed() const { + return uninline_failed; + } + protected: const std::string tag; bool is_tag_internal; @@ -97,6 +116,11 @@ protected: // errno -> [ino1, ino2, ino3, ...] std::unordered_map> uninline_failed_info; std::unordered_map<_inodeno_t, std::string> paths; + + // scrub counters + uint64_t uninline_started = 0; + uint64_t uninline_passed = 0; + uint64_t uninline_failed = 0; }; typedef std::shared_ptr ScrubHeaderRef; diff --git a/src/mds/ScrubStack.cc b/src/mds/ScrubStack.cc index bb586173d9f5b..63bf584b2bcf9 100644 --- a/src/mds/ScrubStack.cc +++ b/src/mds/ScrubStack.cc @@ -108,6 +108,14 @@ int ScrubStack::_enqueue(MDSCacheObject *obj, ScrubHeaderRef& header, bool top) return 0; } +void ScrubStack::init_scrub_counters(std::string_view path, std::string_view tag) +{ + scrub_counters_t sc{real_clock::now(), std::string(path), 0, 0, 0}; + for (auto& stat : mds_scrub_stats) { + stat.counters[std::string(tag)] = sc; + } +} + int ScrubStack::enqueue(CInode *in, ScrubHeaderRef& header, bool top) { // abort in progress @@ -135,6 +143,10 @@ int ScrubStack::enqueue(CInode *in, ScrubHeaderRef& header, bool top) //to make sure mdsdir is always on the top top = false; } + + std::string path; + in->make_path_string(path); + init_scrub_counters(path, header->get_tag()); int r = _enqueue(in, header, top); if (r < 0) return r; @@ -940,6 +952,7 @@ void ScrubStack::handle_scrub(const cref_t &m) header->set_origin(m->get_origin()); scrubbing_map.emplace(header->get_tag(), header); } + for (auto dir : dfs) { queued.insert_raw(dir->get_frag()); _enqueue(dir, header, true); @@ -1059,6 +1072,7 @@ void ScrubStack::handle_scrub_stats(const cref_t &m) std::set scrubbing_tags; std::unordered_map>> uninline_failed_meta_info; std::unordered_map<_inodeno_t, std::string> paths; + std::unordered_map> counters; for (auto it = scrubbing_map.begin(); it != scrubbing_map.end(); ) { auto& header = it->second; @@ -1075,6 +1089,11 @@ void ScrubStack::handle_scrub_stats(const cref_t &m) ufi.clear(); paths.merge(header->get_paths()); ceph_assert(header->get_paths().size() == 0); + std::vector c{header->get_uninline_started(), + header->get_uninline_passed(), + header->get_uninline_failed() + }; + counters[header->get_tag()] = c; scrubbing_map.erase(it++); } else { ++it; @@ -1087,6 +1106,7 @@ void ScrubStack::handle_scrub_stats(const cref_t &m) std::move(scrubbing_tags), std::move(uninline_failed_meta_info), std::move(paths), + std::move(counters), clear_stack); mdcache->mds->send_message_mds(ack, 0); @@ -1105,6 +1125,11 @@ void ScrubStack::handle_scrub_stats(const cref_t &m) stat.uninline_failed_meta_info[scrub_tag] = errno_map; } stat.paths.insert(m->get_paths().begin(), m->get_paths().end());; + for (auto& [tag, v] : m->get_counters()) { + stat.counters[tag].uninline_started = v[0]; + stat.counters[tag].uninline_passed = v[1]; + stat.counters[tag].uninline_failed = v[2]; + } } } } @@ -1126,6 +1151,9 @@ void ScrubStack::move_uninline_failures_to_damage_table() } ufmi.clear(); paths.clear(); + // do not clear the counters map; we'll clear them later: + // - on user request or + // - after a grace period } } @@ -1196,6 +1224,13 @@ void ScrubStack::advance_scrub_status() ufmi[it->first] = header->get_uninline_failed_info(); mds_scrub_stats[0].paths.merge(header->get_paths()); move_uninline_failures_to_damage_table(); + + auto& c = mds_scrub_stats[0].counters; + auto& sc = c[header->get_tag()]; + sc.uninline_started = header->get_uninline_started(); + sc.uninline_passed = header->get_uninline_passed(); + sc.uninline_failed = header->get_uninline_failed(); + scrubbing_map.erase(it++); } else { ++it; diff --git a/src/mds/ScrubStack.h b/src/mds/ScrubStack.h index 97ec39d152df6..5030ae813491c 100644 --- a/src/mds/ScrubStack.h +++ b/src/mds/ScrubStack.h @@ -23,6 +23,7 @@ #include "common/LogClient.h" #include "common/Cond.h" +#include "common/ceph_time.h" #include "include/elist.h" #include "messages/MMDSScrub.h" #include "messages/MMDSScrubStats.h" @@ -106,6 +107,8 @@ public: void move_uninline_failures_to_damage_table(); + void init_scrub_counters(std::string_view path, std::string_view tag); + MDCache *mdcache; protected: @@ -135,12 +138,20 @@ protected: // check if any mds is aborting scrub after mds.0 starts bool scrub_any_peer_aborting = true; + struct scrub_counters_t { + ceph::real_clock::time_point start_time; + std::string origin_path; + uint64_t uninline_started; + uint64_t uninline_passed; + uint64_t uninline_failed; + }; struct scrub_stat_t { unsigned epoch_acked = 0; std::set scrubbing_tags; bool aborting = false; std::unordered_map>> uninline_failed_meta_info; std::unordered_map<_inodeno_t, std::string> paths; + std::unordered_map counters; // map(scrub_tag -> counters) }; std::vector mds_scrub_stats; diff --git a/src/messages/MMDSScrubStats.h b/src/messages/MMDSScrubStats.h index d25ea104d518f..1f3e70d384303 100644 --- a/src/messages/MMDSScrubStats.h +++ b/src/messages/MMDSScrubStats.h @@ -44,6 +44,9 @@ public: const std::unordered_map<_inodeno_t, std::string>& get_paths() const { return paths; } + const std::unordered_map>& get_counters() const { + return counters; + } void encode_payload(uint64_t features) override { using ceph::encode; @@ -84,6 +87,18 @@ public: encode(ino, payload); encode(path, payload); } + count = (int)counters.size(); + encode(count, payload); + for (auto& [tag, v] : counters) { + encode(tag, payload); + uint64_t started = v[0]; + uint64_t passed = v[1]; + uint64_t failed = v[2]; + + encode(started, payload); + encode(passed, payload); + encode(failed, payload); + } } void decode_uninline_failed_info(ceph::bufferlist::const_iterator& p) { using ceph::decode; @@ -113,6 +128,21 @@ public: decode(path, p); paths[ino] = path; } + count = 0; + decode(count, p); + while (count--) { + std::string tag; + decode(tag, p); + uint64_t started = 0; + uint64_t passed = 0; + uint64_t failed = 0; + + decode(started, p); + decode(passed, p); + decode(failed, p); + std::vector c{started, passed, failed}; + counters[tag] = c; + } } protected: @@ -127,10 +157,13 @@ protected: epoch(e), scrubbing_tags(tags), update_scrubbing(true), aborting(abrt) {} MMDSScrubStats(unsigned e, const std::set& tags, std::unordered_map>>&& ufmi, - std::unordered_map<_inodeno_t, std::string>&& paths_, bool abrt = false) : + std::unordered_map<_inodeno_t, std::string>&& paths_, + std::unordered_map>&& counters_, + bool abrt = false) : MMDSOp(MSG_MDS_SCRUB_STATS, HEAD_VERSION, COMPAT_VERSION), epoch(e), scrubbing_tags(tags), update_scrubbing(true), aborting(abrt), - uninline_failed_meta_info(std::move(ufmi)), paths(std::move(paths_)) {} + uninline_failed_meta_info(std::move(ufmi)), paths(std::move(paths_)), + counters(std::move(counters_)) {} ~MMDSScrubStats() override {} private: @@ -141,6 +174,7 @@ private: // > std::unordered_map>> uninline_failed_meta_info; std::unordered_map<_inodeno_t, std::string> paths; + std::unordered_map> counters; template friend boost::intrusive_ptr ceph::make_message(Args&&... args); -- 2.39.5