]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: collect and propagate individual scrub stats
authorMilind Changire <mchangir@redhat.com>
Wed, 2 Nov 2022 09:42:17 +0000 (15:12 +0530)
committerMilind Changire <mchangir@redhat.com>
Wed, 28 Aug 2024 07:49:39 +0000 (13:19 +0530)
Signed-off-by: Milind Changire <mchangir@redhat.com>
src/mds/MDCache.cc
src/mds/ScrubHeader.h
src/mds/ScrubStack.cc
src/mds/ScrubStack.h
src/messages/MMDSScrubStats.h

index 72233b886b1314a36c819060f8f7d617fe92b86c..3a53bd2e1f5156ecdf50c0f5c7870e34168a4975 100644 (file)
@@ -13197,6 +13197,8 @@ class C_MDC_DataUninlinedSubmitted : public MDCacheLogContext {
     ceph_assert(r == 0);
 
     in->mdcache->logger->inc(l_mdc_uninline_succeeded);
+    auto h = in->get_scrub_header();
+    h->record_uninline_passed();
     in->uninline_finished();
     mdr->apply();
     mds->server->respond_to_request(mdr, r);
@@ -13223,9 +13225,11 @@ struct C_IO_DataUninlined : public MDSIOContext {
           << " (" << cpp_strerror(r) << ") for " << *in << dendl;
       in->mdcache->logger->inc(l_mdc_uninline_write_failed);
       ceph_assert(in->get_scrub_header());
+      auto h = in->get_scrub_header();
+      h->record_uninline_failed();
       std::string path;
       in->make_path_string(path);
-      in->get_scrub_header()->record_uninline_status(in->ino(), r, path);
+      h->record_uninline_status(in->ino(), r, path);
       in->uninline_finished();
       mds->server->respond_to_request(mdr, r);
       return;
@@ -13292,6 +13296,8 @@ void MDCache::uninline_data_work(MDRequestRef mdr)
   }
 
   logger->inc(l_mdc_uninline_started);
+  auto h = in->get_scrub_header();
+  h->record_uninline_started();
   in->uninline_initialize();
 
   auto ino = [&]() { return in->ino(); };
index 6d255ca99e03d25230f87c7c37a8a76f6f02e6c2..34105b34503cb6708ac2560ad33730c2e0008065 100644 (file)
@@ -82,6 +82,25 @@ public:
     return paths;
   }
 
+  void record_uninline_started() {
+    uninline_started++;
+  }
+  void record_uninline_passed() {
+    uninline_passed++;
+  }
+  void record_uninline_failed() {
+    uninline_failed++;
+  }
+  uint64_t get_uninline_started() const {
+    return uninline_started;
+  }
+  uint64_t get_uninline_passed() const {
+    return uninline_passed;
+  }
+  uint64_t get_uninline_failed() const {
+    return uninline_failed;
+  }
+
 protected:
   const std::string tag;
   bool is_tag_internal;
@@ -97,6 +116,11 @@ protected:
   // errno -> [ino1, ino2, ino3, ...]
   std::unordered_map<int, std::vector<_inodeno_t>> uninline_failed_info;
   std::unordered_map<_inodeno_t, std::string> paths;
+
+  // scrub counters
+  uint64_t uninline_started = 0;
+  uint64_t uninline_passed = 0;
+  uint64_t uninline_failed = 0;
 };
 
 typedef std::shared_ptr<ScrubHeader> ScrubHeaderRef;
index bb586173d9f5b7aca239a9d4e410340f0ff30693..63bf584b2bcf9af6d0702d7cb30777a3277e634a 100644 (file)
@@ -108,6 +108,14 @@ int ScrubStack::_enqueue(MDSCacheObject *obj, ScrubHeaderRef& header, bool top)
   return 0;
 }
 
+void ScrubStack::init_scrub_counters(std::string_view path, std::string_view tag)
+{
+  scrub_counters_t sc{real_clock::now(), std::string(path), 0, 0, 0};
+  for (auto& stat : mds_scrub_stats) {
+    stat.counters[std::string(tag)] = sc;
+  }
+}
+
 int ScrubStack::enqueue(CInode *in, ScrubHeaderRef& header, bool top)
 {
   // abort in progress
@@ -135,6 +143,10 @@ int ScrubStack::enqueue(CInode *in, ScrubHeaderRef& header, bool top)
     //to make sure mdsdir is always on the top
     top = false;
   }
+
+  std::string path;
+  in->make_path_string(path);
+  init_scrub_counters(path, header->get_tag());
   int r = _enqueue(in, header, top);
   if (r < 0)
     return r;
@@ -940,6 +952,7 @@ void ScrubStack::handle_scrub(const cref_t<MMDSScrub> &m)
          header->set_origin(m->get_origin());
          scrubbing_map.emplace(header->get_tag(), header);
        }
+
        for (auto dir : dfs) {
          queued.insert_raw(dir->get_frag());
          _enqueue(dir, header, true);
@@ -1059,6 +1072,7 @@ void ScrubStack::handle_scrub_stats(const cref_t<MMDSScrubStats> &m)
     std::set<std::string> scrubbing_tags;
     std::unordered_map<std::string, unordered_map<int, std::vector<_inodeno_t>>> uninline_failed_meta_info;
     std::unordered_map<_inodeno_t, std::string> paths;
+    std::unordered_map<std::string, std::vector<uint64_t>> counters;
 
     for (auto it = scrubbing_map.begin(); it != scrubbing_map.end(); ) {
       auto& header = it->second;
@@ -1075,6 +1089,11 @@ void ScrubStack::handle_scrub_stats(const cref_t<MMDSScrubStats> &m)
        ufi.clear();
        paths.merge(header->get_paths());
        ceph_assert(header->get_paths().size() == 0);
+       std::vector<uint64_t> c{header->get_uninline_started(),
+                               header->get_uninline_passed(),
+                               header->get_uninline_failed()
+       };
+       counters[header->get_tag()] = c;
        scrubbing_map.erase(it++);
       } else {
        ++it;
@@ -1087,6 +1106,7 @@ void ScrubStack::handle_scrub_stats(const cref_t<MMDSScrubStats> &m)
                                            std::move(scrubbing_tags),
                                            std::move(uninline_failed_meta_info),
                                            std::move(paths),
+                                           std::move(counters),
                                            clear_stack);
     mdcache->mds->send_message_mds(ack, 0);
 
@@ -1105,6 +1125,11 @@ void ScrubStack::handle_scrub_stats(const cref_t<MMDSScrubStats> &m)
        stat.uninline_failed_meta_info[scrub_tag] = errno_map;
       }
       stat.paths.insert(m->get_paths().begin(), m->get_paths().end());;
+      for (auto& [tag, v] : m->get_counters()) {
+       stat.counters[tag].uninline_started = v[0];
+       stat.counters[tag].uninline_passed = v[1];
+       stat.counters[tag].uninline_failed = v[2];
+      }
     }
   }
 }
@@ -1126,6 +1151,9 @@ void ScrubStack::move_uninline_failures_to_damage_table()
     }
     ufmi.clear();
     paths.clear();
+    // do not clear the counters map; we'll clear them later:
+    // - on user request or
+    // - after a grace period
   }
 }
 
@@ -1196,6 +1224,13 @@ void ScrubStack::advance_scrub_status()
       ufmi[it->first] = header->get_uninline_failed_info();
       mds_scrub_stats[0].paths.merge(header->get_paths());
       move_uninline_failures_to_damage_table();
+
+      auto& c = mds_scrub_stats[0].counters;
+      auto& sc = c[header->get_tag()];
+      sc.uninline_started = header->get_uninline_started();
+      sc.uninline_passed = header->get_uninline_passed();
+      sc.uninline_failed = header->get_uninline_failed();
+
       scrubbing_map.erase(it++);
     } else {
       ++it;
index 97ec39d152df659120e5c38c85bbd3c3caea4cc7..5030ae813491c2d963c92ed340394113f186cf93 100644 (file)
@@ -23,6 +23,7 @@
 
 #include "common/LogClient.h"
 #include "common/Cond.h"
+#include "common/ceph_time.h"
 #include "include/elist.h"
 #include "messages/MMDSScrub.h"
 #include "messages/MMDSScrubStats.h"
@@ -106,6 +107,8 @@ public:
 
   void move_uninline_failures_to_damage_table();
 
+  void init_scrub_counters(std::string_view path, std::string_view tag);
+
   MDCache *mdcache;
 
 protected:
@@ -135,12 +138,20 @@ protected:
   // check if any mds is aborting scrub after mds.0 starts
   bool scrub_any_peer_aborting = true;
 
+  struct scrub_counters_t {
+    ceph::real_clock::time_point start_time;
+    std::string origin_path;
+    uint64_t uninline_started;
+    uint64_t uninline_passed;
+    uint64_t uninline_failed;
+  };
   struct scrub_stat_t {
     unsigned epoch_acked = 0;
     std::set<std::string> scrubbing_tags;
     bool aborting = false;
     std::unordered_map<std::string, std::unordered_map<int, std::vector<_inodeno_t>>> uninline_failed_meta_info;
     std::unordered_map<_inodeno_t, std::string> paths;
+    std::unordered_map<std::string, scrub_counters_t> counters; // map(scrub_tag -> counters)
   };
   std::vector<scrub_stat_t> mds_scrub_stats;
 
index d25ea104d518f53220fa915ac941e6151643d83c..1f3e70d38430343dd8d1ad4f90bdf6bc2cea3bea 100644 (file)
@@ -44,6 +44,9 @@ public:
   const std::unordered_map<_inodeno_t, std::string>& get_paths() const {
     return paths;
   }
+  const std::unordered_map<std::string, std::vector<uint64_t>>& get_counters() const {
+    return counters;
+  }
 
   void encode_payload(uint64_t features) override {
     using ceph::encode;
@@ -84,6 +87,18 @@ public:
       encode(ino, payload);
       encode(path, payload);
     }
+    count = (int)counters.size();
+    encode(count, payload);
+    for (auto& [tag, v] : counters) {
+      encode(tag, payload);
+      uint64_t started = v[0];
+      uint64_t passed = v[1];
+      uint64_t failed = v[2];
+
+      encode(started, payload);
+      encode(passed, payload);
+      encode(failed, payload);
+    }
   }
   void decode_uninline_failed_info(ceph::bufferlist::const_iterator& p) {
     using ceph::decode;
@@ -113,6 +128,21 @@ public:
       decode(path, p);
       paths[ino] = path;
     }
+    count = 0;
+    decode(count, p);
+    while (count--) {
+      std::string tag;
+      decode(tag, p);
+      uint64_t started = 0;
+      uint64_t passed = 0;
+      uint64_t failed = 0;
+
+      decode(started, p);
+      decode(passed, p);
+      decode(failed, p);
+      std::vector<uint64_t> c{started, passed, failed};
+      counters[tag] = c;
+    }
   }
 
 protected:
@@ -127,10 +157,13 @@ protected:
     epoch(e), scrubbing_tags(tags), update_scrubbing(true), aborting(abrt) {}
   MMDSScrubStats(unsigned e, const std::set<std::string>& tags,
     std::unordered_map<std::string, std::unordered_map<int, std::vector<_inodeno_t>>>&& ufmi,
-    std::unordered_map<_inodeno_t, std::string>&& paths_, bool abrt = false) :
+    std::unordered_map<_inodeno_t, std::string>&& paths_,
+    std::unordered_map<std::string, std::vector<uint64_t>>&& counters_,
+    bool abrt = false) :
     MMDSOp(MSG_MDS_SCRUB_STATS, HEAD_VERSION, COMPAT_VERSION),
     epoch(e), scrubbing_tags(tags), update_scrubbing(true), aborting(abrt),
-    uninline_failed_meta_info(std::move(ufmi)), paths(std::move(paths_)) {}
+    uninline_failed_meta_info(std::move(ufmi)), paths(std::move(paths_)),
+    counters(std::move(counters_)) {}
   ~MMDSScrubStats() override {}
 
 private:
@@ -141,6 +174,7 @@ private:
   // <tag, <error_code, [ino1, ino2, ...]>>
   std::unordered_map<std::string, std::unordered_map<int, std::vector<_inodeno_t>>> uninline_failed_meta_info;
   std::unordered_map<_inodeno_t, std::string> paths;
+  std::unordered_map<std::string, std::vector<uint64_t>> counters;
 
   template<class T, typename... Args>
   friend boost::intrusive_ptr<T> ceph::make_message(Args&&... args);