From 73c4541da3bd78548b242478c08502f2a67faf5c Mon Sep 17 00:00:00 2001 From: Milind Changire Date: Wed, 2 Nov 2022 17:09:20 +0530 Subject: [PATCH] mds: add manual and auto purging scrub stats feature Signed-off-by: Milind Changire --- src/common/options/mds.yaml.in | 11 +++++++ src/mds/MDCache.cc | 5 +++ src/mds/MDSDaemon.cc | 5 +++ src/mds/MDSRank.cc | 18 ++++++++++- src/mds/MDSRank.h | 1 + src/mds/ScrubHeader.h | 7 ++++ src/mds/ScrubStack.cc | 59 ++++++++++++++++++++++++++++++++-- src/mds/ScrubStack.h | 12 ++++--- src/messages/MMDSScrubStats.h | 6 +++- 9 files changed, 115 insertions(+), 9 deletions(-) diff --git a/src/common/options/mds.yaml.in b/src/common/options/mds.yaml.in index dcf3eaac0d6..18efba561ed 100644 --- a/src/common/options/mds.yaml.in +++ b/src/common/options/mds.yaml.in @@ -1666,6 +1666,17 @@ options: - mds flags: - runtime +- name: mds_scrub_stats_review_period + type: uint + level: advanced + desc: Period for which scrub stats will be available for review. + long_desc: Number of days for which scrub stats will be available for review since + start of scrub operation. After this period, the stats will be auto purged. + These stats will not be saved to the disk. So any restart or failover of mds + will cause stats to be lost forever. + default: 1 + min: 1 + max: 60 - name: mds_session_metadata_threshold type: size level: advanced diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 3a53bd2e1f5..d1102c4138e 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -13294,6 +13294,11 @@ void MDCache::uninline_data_work(MDRequestRef mdr) mds->server->respond_to_request(mdr, 0); return; } + if (MDS_INO_IS_MDSDIR(in->get_scrub_header()->get_origin())) { + in->get_scrub_header()->record_uninline_skipped(); + mds->server->respond_to_request(mdr, 0); + return; + } logger->inc(l_mdc_uninline_started); auto h = in->get_scrub_header(); diff --git a/src/mds/MDSDaemon.cc b/src/mds/MDSDaemon.cc index b31d9c95220..75b608ace77 100644 --- a/src/mds/MDSDaemon.cc +++ b/src/mds/MDSDaemon.cc @@ -334,6 +334,11 @@ void MDSDaemon::set_up_admin_socket() asok_hook, "Status of scrub operations(s)"); ceph_assert(r == 0); + r = admin_socket->register_command("scrub purge_status " + "name=tag,type=CephString,req=true", + asok_hook, + "Purge status of scrub tag|all"); + ceph_assert(r == 0); r = admin_socket->register_command("tag path name=path,type=CephString" " name=tag,type=CephString", asok_hook, diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 1cd742423e6..bb059ca5a7d 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -782,8 +782,10 @@ void MDSRankDispatcher::tick() } } - if (whoami == 0) + if (whoami == 0) { scrubstack->advance_scrub_status(); + scrubstack->purge_old_scrub_counters(); + } } if (is_active() || is_stopping()) { @@ -2965,6 +2967,15 @@ void MDSRankDispatcher::handle_asok_command( command_scrub_resume(f); } else if (command == "scrub status") { command_scrub_status(f); + } else if (command == "scrub purge_status") { + if (whoami != 0) { + *css << "Not rank 0"; + r = -CEPHFS_EXDEV; + goto out; + } + string tag; + cmd_getval(cmdmap, "tag", tag); + command_scrub_purge_status(tag); } else if (command == "tag path") { if (whoami != 0) { *css << "Not rank 0"; @@ -3225,6 +3236,11 @@ void MDSRank::command_scrub_status(Formatter *f) { scrubstack->scrub_status(f); } +void MDSRank::command_scrub_purge_status(std::string_view tag) { + std::lock_guard l(mds_lock); + scrubstack->purge_scrub_counters(tag); +} + void MDSRank::command_get_subtrees(Formatter *f) { ceph_assert(f != NULL); diff --git a/src/mds/MDSRank.h b/src/mds/MDSRank.h index c4a8809b6e1..9135db40c09 100644 --- a/src/mds/MDSRank.h +++ b/src/mds/MDSRank.h @@ -509,6 +509,7 @@ class MDSRank { // scrub control commands void command_scrub_resume(Formatter *f); void command_scrub_status(Formatter *f); + void command_scrub_purge_status(std::string_view tag); void command_get_subtrees(Formatter *f); void command_export_dir(Formatter *f, diff --git a/src/mds/ScrubHeader.h b/src/mds/ScrubHeader.h index 34105b34503..5f112387fec 100644 --- a/src/mds/ScrubHeader.h +++ b/src/mds/ScrubHeader.h @@ -91,6 +91,9 @@ public: void record_uninline_failed() { uninline_failed++; } + void record_uninline_skipped() { + uninline_skipped++; + } uint64_t get_uninline_started() const { return uninline_started; } @@ -100,6 +103,9 @@ public: uint64_t get_uninline_failed() const { return uninline_failed; } + uint64_t get_uninline_skipped() const { + return uninline_skipped; + } protected: const std::string tag; @@ -121,6 +127,7 @@ protected: uint64_t uninline_started = 0; uint64_t uninline_passed = 0; uint64_t uninline_failed = 0; + uint64_t uninline_skipped = 0; }; typedef std::shared_ptr ScrubHeaderRef; diff --git a/src/mds/ScrubStack.cc b/src/mds/ScrubStack.cc index a4e7c4ee375..acca1d75561 100644 --- a/src/mds/ScrubStack.cc +++ b/src/mds/ScrubStack.cc @@ -108,9 +108,51 @@ int ScrubStack::_enqueue(MDSCacheObject *obj, ScrubHeaderRef& header, bool top) return 0; } +void ScrubStack::purge_scrub_counters(std::string_view tag) +{ + for (auto& stat : mds_scrub_stats) { + if (tag == "all") { + stat.counters.clear(); + } else { + auto it = stat.counters.find(std::string(tag)); + if (it != stat.counters.end()) { + stat.counters.erase(it); + } + } + } +} + +// called from tick +void ScrubStack::purge_old_scrub_counters() +{ + // "mds_scrub_stats_review_period" must be in number of days + uint64_t mds_scrub_stats_review_period = g_conf().get_val("mds_scrub_stats_review_period"); + auto review_period = ceph::make_timespan(mds_scrub_stats_review_period * 24 * 60 * 60); + auto now = coarse_real_clock::now(); + + dout(20) << __func__ << " review_period:" << review_period << dendl; + + for (mds_rank_t rank = 0; rank < (mds_rank_t)mds_scrub_stats.size(); rank++) { + auto& counters = mds_scrub_stats[rank].counters; + for (auto it = counters.begin(); it != counters.end(); ) { + auto curr = it; + auto c = (*it).second; + auto elapsed = now - c.start_time; + dout(20) << __func__ + << " rank(" << rank << ") :" + << " elapsed:" << elapsed + << dendl; + ++it; + if (elapsed >= review_period) { + counters.erase(curr); + } + } + } +} + void ScrubStack::init_scrub_counters(std::string_view path, std::string_view tag) { - scrub_counters_t sc{real_clock::now(), std::string(path), 0, 0, 0}; + scrub_counters_t sc{coarse_real_clock::now(), std::string(path), 0, 0, 0}; for (auto& stat : mds_scrub_stats) { stat.counters[std::string(tag)] = sc; } @@ -736,15 +778,23 @@ void ScrubStack::scrub_status(Formatter *f) { started += c.uninline_started; passed += c.uninline_passed; failed += c.uninline_failed; + skipped += c.uninline_skipped; } } f->open_object_section(tag); { f->dump_stream("start_time") << ctrs.start_time; - f->dump_string("path", (ctrs.origin_path == "" ? "/"s : ctrs.origin_path)); + std::string path = ctrs.origin_path; + if (path == "") { + path = "/"; + } else if (path.starts_with("~mds")) { + path = "~mdsdir"; + } + f->dump_string("path", path); f->dump_int("uninline_started", started); f->dump_int("uninline_passed", passed); f->dump_int("uninline_failed", failed); + f->dump_int("uninline_skipped", skipped); } f->close_section(); // tag } @@ -1119,7 +1169,8 @@ void ScrubStack::handle_scrub_stats(const cref_t &m) ceph_assert(header->get_paths().size() == 0); std::vector c{header->get_uninline_started(), header->get_uninline_passed(), - header->get_uninline_failed() + header->get_uninline_failed(), + header->get_uninline_skipped() }; counters[header->get_tag()] = c; scrubbing_map.erase(it++); @@ -1157,6 +1208,7 @@ void ScrubStack::handle_scrub_stats(const cref_t &m) stat.counters[tag].uninline_started = v[0]; stat.counters[tag].uninline_passed = v[1]; stat.counters[tag].uninline_failed = v[2]; + stat.counters[tag].uninline_skipped = v[3]; } } } @@ -1258,6 +1310,7 @@ void ScrubStack::advance_scrub_status() sc.uninline_started = header->get_uninline_started(); sc.uninline_passed = header->get_uninline_passed(); sc.uninline_failed = header->get_uninline_failed(); + sc.uninline_skipped = header->get_uninline_skipped(); scrubbing_map.erase(it++); } else { diff --git a/src/mds/ScrubStack.h b/src/mds/ScrubStack.h index 5030ae81349..c921804ba25 100644 --- a/src/mds/ScrubStack.h +++ b/src/mds/ScrubStack.h @@ -108,6 +108,9 @@ public: void move_uninline_failures_to_damage_table(); void init_scrub_counters(std::string_view path, std::string_view tag); + void purge_scrub_counters(std::string_view tag); + void purge_old_scrub_counters(); // on tick + MDCache *mdcache; @@ -139,11 +142,12 @@ protected: bool scrub_any_peer_aborting = true; struct scrub_counters_t { - ceph::real_clock::time_point start_time; + ceph::coarse_real_clock::time_point start_time = coarse_real_clock::now(); std::string origin_path; - uint64_t uninline_started; - uint64_t uninline_passed; - uint64_t uninline_failed; + uint64_t uninline_started = 0; + uint64_t uninline_passed = 0; + uint64_t uninline_failed = 0; + uint64_t uninline_skipped = 0; }; struct scrub_stat_t { unsigned epoch_acked = 0; diff --git a/src/messages/MMDSScrubStats.h b/src/messages/MMDSScrubStats.h index 1f3e70d3843..9252f99ac8a 100644 --- a/src/messages/MMDSScrubStats.h +++ b/src/messages/MMDSScrubStats.h @@ -94,10 +94,12 @@ public: uint64_t started = v[0]; uint64_t passed = v[1]; uint64_t failed = v[2]; + uint64_t skipped = v[3]; encode(started, payload); encode(passed, payload); encode(failed, payload); + encode(skipped, payload); } } void decode_uninline_failed_info(ceph::bufferlist::const_iterator& p) { @@ -136,11 +138,13 @@ public: uint64_t started = 0; uint64_t passed = 0; uint64_t failed = 0; + uint64_t skipped = 0; decode(started, p); decode(passed, p); decode(failed, p); - std::vector c{started, passed, failed}; + decode(skipped, p); + std::vector c{started, passed, failed, skipped}; counters[tag] = c; } } -- 2.39.5