]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mds: add manual and auto purging scrub stats feature
authorMilind Changire <mchangir@redhat.com>
Wed, 2 Nov 2022 11:39:20 +0000 (17:09 +0530)
committerMilind Changire <mchangir@redhat.com>
Wed, 28 Aug 2024 07:58:19 +0000 (13:28 +0530)
Signed-off-by: Milind Changire <mchangir@redhat.com>
src/common/options/mds.yaml.in
src/mds/MDCache.cc
src/mds/MDSDaemon.cc
src/mds/MDSRank.cc
src/mds/MDSRank.h
src/mds/ScrubHeader.h
src/mds/ScrubStack.cc
src/mds/ScrubStack.h
src/messages/MMDSScrubStats.h

index dcf3eaac0d68372645712c923b67d06210f9b360..18efba561ed55e5b433ae671b1e8a287848c03d4 100644 (file)
@@ -1666,6 +1666,17 @@ options:
   - mds
   flags:
   - runtime
+- name: mds_scrub_stats_review_period
+  type: uint
+  level: advanced
+  desc: Period for which scrub stats will be available for review.
+  long_desc: Number of days for which scrub stats will be available for review since
+    start of scrub operation. After this period, the stats will be auto purged.
+    These stats will not be saved to the disk. So any restart or failover of mds
+    will cause stats to be lost forever.
+  default: 1
+  min: 1
+  max: 60
 - name: mds_session_metadata_threshold
   type: size
   level: advanced
index 3a53bd2e1f5156ecdf50c0f5c7870e34168a4975..d1102c4138e28cdaf79e8672e5c8d574e3a467a5 100644 (file)
@@ -13294,6 +13294,11 @@ void MDCache::uninline_data_work(MDRequestRef mdr)
     mds->server->respond_to_request(mdr, 0);
     return;
   }
+  if (MDS_INO_IS_MDSDIR(in->get_scrub_header()->get_origin())) {
+    in->get_scrub_header()->record_uninline_skipped();
+    mds->server->respond_to_request(mdr, 0);
+    return;
+  }
 
   logger->inc(l_mdc_uninline_started);
   auto h = in->get_scrub_header();
index b31d9c95220cc2fbcd7ea55c3a8d36eeef2ac6be..75b608ace777130d5996cc4a60ef457665d45fdc 100644 (file)
@@ -334,6 +334,11 @@ void MDSDaemon::set_up_admin_socket()
                                      asok_hook,
                                      "Status of scrub operations(s)");
   ceph_assert(r == 0);
+  r = admin_socket->register_command("scrub purge_status "
+                                    "name=tag,type=CephString,req=true",
+                                     asok_hook,
+                                     "Purge status of scrub tag|all");
+  ceph_assert(r == 0);
   r = admin_socket->register_command("tag path name=path,type=CephString"
                                      " name=tag,type=CephString",
                                      asok_hook,
index 1cd742423e6411541b0195b869f7a6e705f142ba..bb059ca5a7d80306d60c0d57263bce6d534317ad 100644 (file)
@@ -782,8 +782,10 @@ void MDSRankDispatcher::tick()
       }
     }
 
-    if (whoami == 0)
+    if (whoami == 0) {
       scrubstack->advance_scrub_status();
+      scrubstack->purge_old_scrub_counters();
+    }
   }
 
   if (is_active() || is_stopping()) {
@@ -2965,6 +2967,15 @@ void MDSRankDispatcher::handle_asok_command(
     command_scrub_resume(f);
   } else if (command == "scrub status") {
     command_scrub_status(f);
+  } else if (command == "scrub purge_status") {
+    if (whoami != 0) {
+      *css << "Not rank 0";
+      r = -CEPHFS_EXDEV;
+      goto out;
+    }
+    string tag;
+    cmd_getval(cmdmap, "tag", tag);
+    command_scrub_purge_status(tag);
   } else if (command == "tag path") {
     if (whoami != 0) {
       *css << "Not rank 0";
@@ -3225,6 +3236,11 @@ void MDSRank::command_scrub_status(Formatter *f) {
   scrubstack->scrub_status(f);
 }
 
+void MDSRank::command_scrub_purge_status(std::string_view tag) {
+  std::lock_guard l(mds_lock);
+  scrubstack->purge_scrub_counters(tag);
+}
+
 void MDSRank::command_get_subtrees(Formatter *f)
 {
   ceph_assert(f != NULL);
index c4a8809b6e1a0e8e99697cf70399b5dc15c251a6..9135db40c0992fe0eb30c9c0792ce33374c93753 100644 (file)
@@ -509,6 +509,7 @@ class MDSRank {
     // scrub control commands
     void command_scrub_resume(Formatter *f);
     void command_scrub_status(Formatter *f);
+    void command_scrub_purge_status(std::string_view tag);
 
     void command_get_subtrees(Formatter *f);
     void command_export_dir(Formatter *f,
index 34105b34503cb6708ac2560ad33730c2e0008065..5f112387fec3a59fd1756e0005abdd830b43ed2f 100644 (file)
@@ -91,6 +91,9 @@ public:
   void record_uninline_failed() {
     uninline_failed++;
   }
+  void record_uninline_skipped() {
+    uninline_skipped++;
+  }
   uint64_t get_uninline_started() const {
     return uninline_started;
   }
@@ -100,6 +103,9 @@ public:
   uint64_t get_uninline_failed() const {
     return uninline_failed;
   }
+  uint64_t get_uninline_skipped() const {
+    return uninline_skipped;
+  }
 
 protected:
   const std::string tag;
@@ -121,6 +127,7 @@ protected:
   uint64_t uninline_started = 0;
   uint64_t uninline_passed = 0;
   uint64_t uninline_failed = 0;
+  uint64_t uninline_skipped = 0;
 };
 
 typedef std::shared_ptr<ScrubHeader> ScrubHeaderRef;
index a4e7c4ee3758ff04fe69160bb948f22825d5c380..acca1d75561c1c139aceb9ca417ce800f5095a16 100644 (file)
@@ -108,9 +108,51 @@ int ScrubStack::_enqueue(MDSCacheObject *obj, ScrubHeaderRef& header, bool top)
   return 0;
 }
 
+void ScrubStack::purge_scrub_counters(std::string_view tag)
+{
+  for (auto& stat : mds_scrub_stats) {
+    if (tag == "all") {
+      stat.counters.clear();
+    } else {
+      auto it = stat.counters.find(std::string(tag));
+      if (it != stat.counters.end()) {
+       stat.counters.erase(it);
+      }
+    }
+  }
+}
+
+// called from tick
+void ScrubStack::purge_old_scrub_counters()
+{
+  // "mds_scrub_stats_review_period" must be in number of days
+  uint64_t mds_scrub_stats_review_period = g_conf().get_val<uint64_t>("mds_scrub_stats_review_period");
+  auto review_period = ceph::make_timespan(mds_scrub_stats_review_period * 24 * 60 * 60);
+  auto now = coarse_real_clock::now();
+
+  dout(20) << __func__ << " review_period:" << review_period << dendl;
+
+  for (mds_rank_t rank = 0; rank < (mds_rank_t)mds_scrub_stats.size(); rank++) {
+    auto& counters = mds_scrub_stats[rank].counters;
+    for (auto it = counters.begin(); it != counters.end(); ) {
+      auto curr = it;
+      auto c = (*it).second;
+      auto elapsed = now - c.start_time;
+      dout(20) << __func__
+              << " rank(" << rank << ") :"
+               << " elapsed:" << elapsed
+              << dendl;
+      ++it;
+      if (elapsed >= review_period) {
+       counters.erase(curr);
+      }
+    }
+  }
+}
+
 void ScrubStack::init_scrub_counters(std::string_view path, std::string_view tag)
 {
-  scrub_counters_t sc{real_clock::now(), std::string(path), 0, 0, 0};
+  scrub_counters_t sc{coarse_real_clock::now(), std::string(path), 0, 0, 0};
   for (auto& stat : mds_scrub_stats) {
     stat.counters[std::string(tag)] = sc;
   }
@@ -736,15 +778,23 @@ void ScrubStack::scrub_status(Formatter *f) {
          started += c.uninline_started;
          passed += c.uninline_passed;
          failed += c.uninline_failed;
+         skipped += c.uninline_skipped;
        }
       }
       f->open_object_section(tag);
       {
        f->dump_stream("start_time") << ctrs.start_time;
-       f->dump_string("path", (ctrs.origin_path == "" ? "/"s : ctrs.origin_path));
+       std::string path = ctrs.origin_path;
+       if (path == "") {
+         path = "/";
+       } else if (path.starts_with("~mds")) {
+         path = "~mdsdir";
+       }
+       f->dump_string("path", path);
        f->dump_int("uninline_started", started);
        f->dump_int("uninline_passed", passed);
        f->dump_int("uninline_failed", failed);
+       f->dump_int("uninline_skipped", skipped);
       }
       f->close_section(); // tag
     }
@@ -1119,7 +1169,8 @@ void ScrubStack::handle_scrub_stats(const cref_t<MMDSScrubStats> &m)
        ceph_assert(header->get_paths().size() == 0);
        std::vector<uint64_t> c{header->get_uninline_started(),
                                header->get_uninline_passed(),
-                               header->get_uninline_failed()
+                               header->get_uninline_failed(),
+                               header->get_uninline_skipped()
        };
        counters[header->get_tag()] = c;
        scrubbing_map.erase(it++);
@@ -1157,6 +1208,7 @@ void ScrubStack::handle_scrub_stats(const cref_t<MMDSScrubStats> &m)
        stat.counters[tag].uninline_started = v[0];
        stat.counters[tag].uninline_passed = v[1];
        stat.counters[tag].uninline_failed = v[2];
+       stat.counters[tag].uninline_skipped = v[3];
       }
     }
   }
@@ -1258,6 +1310,7 @@ void ScrubStack::advance_scrub_status()
       sc.uninline_started = header->get_uninline_started();
       sc.uninline_passed = header->get_uninline_passed();
       sc.uninline_failed = header->get_uninline_failed();
+      sc.uninline_skipped = header->get_uninline_skipped();
 
       scrubbing_map.erase(it++);
     } else {
index 5030ae813491c2d963c92ed340394113f186cf93..c921804ba254411056db4d25d4b275bcffa2af0f 100644 (file)
@@ -108,6 +108,9 @@ public:
   void move_uninline_failures_to_damage_table();
 
   void init_scrub_counters(std::string_view path, std::string_view tag);
+  void purge_scrub_counters(std::string_view tag);
+  void purge_old_scrub_counters(); // on tick
+
 
   MDCache *mdcache;
 
@@ -139,11 +142,12 @@ protected:
   bool scrub_any_peer_aborting = true;
 
   struct scrub_counters_t {
-    ceph::real_clock::time_point start_time;
+    ceph::coarse_real_clock::time_point start_time = coarse_real_clock::now();
     std::string origin_path;
-    uint64_t uninline_started;
-    uint64_t uninline_passed;
-    uint64_t uninline_failed;
+    uint64_t uninline_started = 0;
+    uint64_t uninline_passed = 0;
+    uint64_t uninline_failed = 0;
+    uint64_t uninline_skipped = 0;
   };
   struct scrub_stat_t {
     unsigned epoch_acked = 0;
index 1f3e70d38430343dd8d1ad4f90bdf6bc2cea3bea..9252f99ac8ace22494818fba1bc09da9b60cf080 100644 (file)
@@ -94,10 +94,12 @@ public:
       uint64_t started = v[0];
       uint64_t passed = v[1];
       uint64_t failed = v[2];
+      uint64_t skipped = v[3];
 
       encode(started, payload);
       encode(passed, payload);
       encode(failed, payload);
+      encode(skipped, payload);
     }
   }
   void decode_uninline_failed_info(ceph::bufferlist::const_iterator& p) {
@@ -136,11 +138,13 @@ public:
       uint64_t started = 0;
       uint64_t passed = 0;
       uint64_t failed = 0;
+      uint64_t skipped = 0;
 
       decode(started, p);
       decode(passed, p);
       decode(failed, p);
-      std::vector<uint64_t> c{started, passed, failed};
+      decode(skipped, p);
+      std::vector<uint64_t> c{started, passed, failed, skipped};
       counters[tag] = c;
     }
   }