From 56caef2ceb5164e94a5afa8d0d2227eb56598c5a Mon Sep 17 00:00:00 2001 From: Milind Changire Date: Fri, 14 Oct 2022 13:48:30 +0530 Subject: [PATCH] mds: collect and propagate scrub status to rank 0 Signed-off-by: Milind Changire --- src/mds/CInode.cc | 3 ++ src/mds/CInode.h | 3 +- src/mds/MDCache.cc | 6 ++++ src/mds/ScrubHeader.h | 15 ++++++++++ src/mds/ScrubStack.cc | 7 +++-- src/mds/ScrubStack.h | 1 + src/messages/MMDSScrubStats.h | 52 ++++++++++++++++++++++++++++++++++- 7 files changed, 83 insertions(+), 4 deletions(-) diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 2bb493b79fa..1207302d6f6 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -5284,6 +5284,7 @@ void CInode::scrub_maybe_delete_info() { if (scrub_infop && !scrub_infop->scrub_in_progress && + !scrub_infop->uninline_in_progress && !scrub_infop->last_scrub_dirty) { scrub_infop.reset(); } @@ -5295,6 +5296,7 @@ void CInode::scrub_initialize(ScrubHeaderRef& header) scrub_info(); scrub_infop->scrub_in_progress = true; + scrub_infop->uninline_in_progress = true; scrub_infop->queued_frags.clear(); scrub_infop->header = header; header->inc_num_pending(); @@ -5306,6 +5308,7 @@ void CInode::scrub_aborted() { ceph_assert(scrub_is_in_progress()); scrub_infop->scrub_in_progress = false; + scrub_infop->uninline_in_progress = false; scrub_infop->header->dec_num_pending(); scrub_maybe_delete_info(); } diff --git a/src/mds/CInode.h b/src/mds/CInode.h index 92486d0f343..f5123ded6f2 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -305,6 +305,7 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counterscrub_in_progress); + return (scrub_infop && (scrub_infop->scrub_in_progress || scrub_infop->uninline_in_progress)); } /** * Start scrubbing on this inode. That could be very short if it's diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index ec4acae3303..3bd632abb44 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -13191,10 +13191,13 @@ class C_MDC_DataUninlinedSubmitted : public MDCacheLogContext { if (r) { dout(20) << "(uninline_data) log submission failed; r=" << r << " (" << cpp_strerror(r) << ") for " << *in << dendl; + ceph_assert(in->get_scrub_header()); + in->get_scrub_header()->record_uninline_status(in->ino(), r); } else { dout(20) << "(uninline_data) log submission succeeded for " << *in << dendl; in->mdcache->logger->inc(l_mdc_uninline_succeeded); } + const_cast(in->scrub_info())->uninline_in_progress = false; mdr->apply(); mds->server->respond_to_request(mdr, r); @@ -13220,6 +13223,8 @@ struct C_IO_DataUninlined : public MDSIOContext { derr << "(uninline_data) mutation failed: r=" << r << " (" << cpp_strerror(r) << ") for " << *in << dendl; in->mdcache->logger->inc(l_mdc_uninline_write_failed); + ceph_assert(in->get_scrub_header()); + in->get_scrub_header()->record_uninline_status(in->ino(), r); mds->server->respond_to_request(mdr, r); return; } @@ -13279,6 +13284,7 @@ void MDCache::uninline_data_work(MDRequestRef mdr) if (!in->has_inline_data()) { dout(20) << "(uninline_data) inode doesn't have inline data anymore " << *in << dendl; + const_cast(in->scrub_info())->uninline_in_progress = false; mds->server->respond_to_request(mdr, 0); return; } diff --git a/src/mds/ScrubHeader.h b/src/mds/ScrubHeader.h index a5d35f61ce4..d8b90f53f10 100644 --- a/src/mds/ScrubHeader.h +++ b/src/mds/ScrubHeader.h @@ -27,6 +27,7 @@ class Formatter; }; class CInode; +class MDCache; /** * Externally input parameters for a scrub, associated with the root @@ -64,6 +65,18 @@ public: } unsigned get_num_pending() const { return num_pending; } + void record_uninline_status(_inodeno_t ino, int e) { + if (uninline_failed_info.find(e) == uninline_failed_info.end()) { + uninline_failed_info[e] = std::vector<_inodeno_t>(); + } + auto& v = uninline_failed_info.at(e); + v.push_back(ino); + } + + std::unordered_map>& get_uninline_failed_info() { + return uninline_failed_info; + } + protected: const std::string tag; bool is_tag_internal; @@ -76,6 +89,8 @@ protected: bool repaired = false; // May be set during scrub if repairs happened unsigned epoch_last_forwarded = 0; unsigned num_pending = 0; + // errno -> [ino1, ino2, ino3, ...] + std::unordered_map> uninline_failed_info; }; typedef std::shared_ptr ScrubHeaderRef; diff --git a/src/mds/ScrubStack.cc b/src/mds/ScrubStack.cc index 5a3994dc688..8a4d724ed0a 100644 --- a/src/mds/ScrubStack.cc +++ b/src/mds/ScrubStack.cc @@ -1054,6 +1054,8 @@ void ScrubStack::handle_scrub_stats(const cref_t &m) bool any_finished = false; bool any_repaired = false; std::set scrubbing_tags; + std::unordered_map>> uninline_failed_meta_info; + for (auto it = scrubbing_map.begin(); it != scrubbing_map.end(); ) { auto& header = it->second; if (header->get_num_pending() || @@ -1076,7 +1078,9 @@ void ScrubStack::handle_scrub_stats(const cref_t &m) scrub_epoch = m->get_epoch(); auto ack = make_message(scrub_epoch, - std::move(scrubbing_tags), clear_stack); + std::move(scrubbing_tags), + std::move(uninline_failed_meta_info), + clear_stack); mdcache->mds->send_message_mds(ack, 0); if (any_finished) @@ -1192,7 +1196,6 @@ void ScrubStack::advance_scrub_status() } ++scrub_epoch; - for (auto& r : up_mds) { if (r == 0) continue; diff --git a/src/mds/ScrubStack.h b/src/mds/ScrubStack.h index 13df9d4a331..5f486a170f1 100644 --- a/src/mds/ScrubStack.h +++ b/src/mds/ScrubStack.h @@ -139,6 +139,7 @@ protected: unsigned epoch_acked = 0; std::set scrubbing_tags; bool aborting = false; + std::unordered_map>> uninline_failed_meta_info; }; std::vector mds_scrub_stats; diff --git a/src/messages/MMDSScrubStats.h b/src/messages/MMDSScrubStats.h index 2cbb7f2f211..200c5cc10c8 100644 --- a/src/messages/MMDSScrubStats.h +++ b/src/messages/MMDSScrubStats.h @@ -18,7 +18,7 @@ #include "messages/MMDSOp.h" class MMDSScrubStats : public MMDSOp { - static constexpr int HEAD_VERSION = 1; + static constexpr int HEAD_VERSION = 2; static constexpr int COMPAT_VERSION = 1; public: @@ -38,6 +38,9 @@ public: bool is_finished(const std::string& tag) const { return update_scrubbing && !scrubbing_tags.count(tag); } + const std::unordered_map>>& get_uninline_failed_meta_info() const { + return uninline_failed_meta_info; + } void encode_payload(uint64_t features) override { using ceph::encode; @@ -45,6 +48,7 @@ public: encode(scrubbing_tags, payload); encode(update_scrubbing, payload); encode(aborting, payload); + encode_uninline_failed_info(); } void decode_payload() override { using ceph::decode; @@ -53,6 +57,44 @@ public: decode(scrubbing_tags, p); decode(update_scrubbing, p); decode(aborting, p); + if (header.version >= 2) { + decode_uninline_failed_info(p); + } + } + + void encode_uninline_failed_info() { + using ceph::encode; + int count = (int)uninline_failed_meta_info.size(); + encode(count, payload); + for (const auto& [tag, meta_info_map] : uninline_failed_meta_info) { + encode(tag, payload); + count = (int)meta_info_map.size(); + encode(count, payload); + for (const auto& [error_code, ino_vec] : meta_info_map) { + encode(error_code, payload); + encode(ino_vec, payload); + } + } + } + void decode_uninline_failed_info(ceph::bufferlist::const_iterator& p) { + using ceph::decode; + int tag_count = 0; + decode(tag_count, p); + while (tag_count--) { + std::string tag; + decode(tag, p); + int count = 0; + decode(count, p); + std::unordered_map> uninline_failed_info; + while (count--) { + int error_code; + std::vector<_inodeno_t> ino_vec; + decode(error_code, p); + decode(ino_vec, p); + uninline_failed_info[error_code] = std::move(ino_vec); + } + uninline_failed_meta_info[tag] = std::move(uninline_failed_info); + } } protected: @@ -65,6 +107,12 @@ protected: MMDSScrubStats(unsigned e, const std::set& tags, bool abrt=false) : MMDSOp(MSG_MDS_SCRUB_STATS, HEAD_VERSION, COMPAT_VERSION), epoch(e), scrubbing_tags(tags), update_scrubbing(true), aborting(abrt) {} + MMDSScrubStats(unsigned e, const std::set& tags, + std::unordered_map>>&& ufmi, + bool abrt = false) : + MMDSOp(MSG_MDS_SCRUB_STATS, HEAD_VERSION, COMPAT_VERSION), + epoch(e), scrubbing_tags(tags), update_scrubbing(true), aborting(abrt), + uninline_failed_meta_info(std::move(ufmi)) {} ~MMDSScrubStats() override {} private: @@ -72,6 +120,8 @@ private: std::set scrubbing_tags; bool update_scrubbing = false; bool aborting = false; + // > + std::unordered_map>> uninline_failed_meta_info; template friend boost::intrusive_ptr ceph::make_message(Args&&... args); -- 2.39.5